import pandas
as pd
import numpy
as np
from matplotlib
import pyplot
as plt
df
= pd
.read_csv
("./IMDB-Movie-Data.csv")
genre
= df
["Genre"]
temp_list
= df
["Genre"].str.split
(",").tolist
()
genre_list
= list(set([i
for j
in temp_list
for i
in j
]))
zero_np
= pd
.DataFrame
(np
.zeros
((genre
.shape
[0], len(genre_list
))), columns
=genre_list
)
for i
in range(genre
.shape
[0]):
zero_np
.loc
[i
, temp_list
[i
]] = 1
genre_count
= zero_np
.sum(axis
=0)
genre_count
= genre_count
.sort_values
()
_x
= genre_count
.index
_y
= genre_count
.values
print(type(genre_count
))
plt
.figure
(figsize
=(20, 8), dpi
=80)
plt
.bar
(range(len(_x
)), _y
)
plt
.xticks
(range(len(_x
)), _x
)
plt
.show
()
转载请注明原文地址: https://mac.8miu.com/read-490156.html