1 # 数据分组转换 .transform 2 import numpy as np 3 import pandas as pd 4 df = pd.DataFrame({'data1':np.random.rand(5), 5 'data2':np.random.rand(5), 6 'key1':list('aabba'), 7 'key2':['one','two','one','two','one']}) 8 print(df) 9 k_mean = df.groupby('key1').mean() 10 print(k_mean) 11 print(pd.merge(df,k_mean,left_on = 'key1',right_index = True).add_prefix('mean_'))#.add_profix('mean_'):增添前缀 12 print('------') 13 #通过分组,合并,得到一个包含均值的Dataframe 以key1位分组的列均值 14 15 print(df.groupby('key2').mean())#按照key2分组求均值 16 print(df.groupby('key2').transform(np.mean)) 17 #data1,data2每个位置的元素取对应分组列的均值 18 #字符串不能进行计算
结果: data1 data2 key1 key20 0.845365 0.411704 a one1 0.300226 0.411719 a two2 0.476632 0.628493 b one3 0.985675 0.304024 b two4 0.418804 0.229940 a one data1 data2key1 a 0.521465 0.351121b 0.731153 0.466258 mean_data1_x mean_data2_x mean_key1 mean_key2 mean_data1_y mean_data2_y0 0.845365 0.411704 a one 0.521465 0.3511211 0.300226 0.411719 a two 0.521465 0.3511214 0.418804 0.229940 a one 0.521465 0.3511212 0.476632 0.628493 b one 0.731153 0.4662583 0.985675 0.304024 b two 0.731153 0.466258------ data1 data2key2 one 0.580267 0.423379two 0.642951 0.357872 data1 data20 0.580267 0.4233791 0.642951 0.3578722 0.580267 0.4233793 0.642951 0.3578724 0.580267 0.423379