转:Python金融大数据分析——第6章 金融时间序列 笔记
dataframe的相关操作 import pandas as pd import numpy as np df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'], index=['a', 'b', 'c', 'd']) df # numbers # a 10 # b 20 # c 30 # d 40 df.index # Index(['a', 'b', 'c', 'd'], dtype='object') df.columns # Index(['numbers'], dtype='object') df.loc['c'] # numbers 30 # Name: c, dtype: int64 df.loc[['a', 'b']] # numbers # a 10 # b 20 df.loc[df.index[1:3]] # numbers # b 20 # c 30 df.sum() # numbers 100 # dtype: int64 df.apply(lambda x: x ** 2) # numbers # a 100 # b 400 # c 900 # d 1600 df ** 2 # numbers # a 100 # b 400 # c 900 # d 1600 df['floats'] = (1.5, 2.5, 3.5, 4.5) df # numbers floats # a 10 1.5 # b 20 2.5 # c 30 3.5 # d 40 4.5 df['floats'] # a 1.5 # b 2.5 # c 3.5 # d 4.5 # Name: floats, dtype: float64 df.floats # a 1.5 # b 2.5 # c 3.5 # d 4.5 # Name: floats, dtype: float64 df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francesc'], index=['d', 'a', 'b', 'c']) df # numbers floats names # a 10 1.5 Guido # b 20 2.5 Felix # c 30 3.5 Francesc # d 40 4.5 Yves df.append({'numbers': 100, 'floats': 5.75, 'names': 'Henry'}, ignore_index=True) # numbers floats names # 0 10 1.50 Guido # 1 20 2.50 Felix # 2 30 3.50 Francesc # 3 40 4.50 Yves # 4 100 5.75 Henry df = df.append(pd.DataFrame({'numbers': 100, 'floats': 5.75, 'names': 'Henry'}, index=['z', ])) df # floats names numbers # a 1.50 Guido 10 # b 2.50 Felix 20 # c 3.50 Francesc 30 # d 4.50 Yves 40 # z 5.75 Henry 100 df.join(pd.DataFrame([1, 4, 9, 16, 25], index=['a', 'b', 'c', 'd', 'y'], columns=['squares', ])) # floats names numbers squares # a 1.50 Guido 10 1.0 # b 2.50 Felix 20 4.0 # c 3.50 Francesc 30 9.0 # d 4.50 Yves 40 16.0 # z 5.75 Henry 100 NaN df = df.join(pd.DataFrame([1, 4, 9, 16, 25], index=['a', 'b', 'c', 'd', 'y'], columns=['squares', ]), how='outer') df # floats names numbers squares # a 1.50 Guido 10.0 1.0 # b 2.50 Felix 20.0 4.0 # c 3.50 Francesc 30.0 9.0 # d 4.50 Yves 40.0 16.0 # y NaN NaN NaN 25.0 # z 5.75 Henry 100.0 NaN df[['numbers', 'squares']].mean() # numbers 40.0 # squares 11.0 # dtype: float64 df[['numbers', 'squares']].std() # numbers 35.355339 # squares 9.669540 # dtype: float64