python 电影种类之简单数据分析

mac2024-04-21  29

import pandas as pd import numpy as np from matplotlib import pyplot as plt df = pd.read_csv("./IMDB-Movie-Data.csv") # 获取电影分类数据 genre = df["Genre"] # 获取不重复的种类列表 temp_list = df["Genre"].str.split(",").tolist() genre_list = list(set([i for j in temp_list for i in j])) # 构造全为0的dataframe zero_np = pd.DataFrame(np.zeros((genre.shape[0], len(genre_list))), columns=genre_list) # 给为0的dataframe赋值 for i in range(genre.shape[0]): zero_np.loc[i, temp_list[i]] = 1 # 分类之后求和 genre_count = zero_np.sum(axis=0) genre_count = genre_count.sort_values() _x = genre_count.index _y = genre_count.values print(type(genre_count)) plt.figure(figsize=(20, 8), dpi=80) plt.bar(range(len(_x)), _y) plt.xticks(range(len(_x)), _x) plt.show()
最新回复(0)