首页 > 代码库 > pandas 常用函数

pandas 常用函数

import numpy as np
from pandas import DataFrame , Series
print ("Axis indexes with duplicate values")
obj=Series(range(5),index =[‘a‘,‘a‘,‘b‘,‘b‘,‘c‘])
print("obj is \n", obj)
print("obj.index.is_unique is ",obj.index.is_unique)
print("obj[‘a‘] is \n", obj[‘a‘])
print("obj[‘b‘] is \n",obj[‘b‘])

df=DataFrame(np.random.randn(4,3),index=[‘a‘,‘a‘,‘b‘,‘b‘])
print("df is \n",df)
print("df.ix[‘b‘] is \n ",df.ix[‘b‘])

df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],index=[‘a‘, ‘b‘, ‘c‘, ‘d‘],columns=[‘one‘,‘two‘])
print("df is \n",df)
print("Calling dafaframe‘s sum method returns a Series containing column sums")
print("df.sum() is \n",df.sum())
print("passing axis=1 sums over the rows instead")
print("df.sum(axis=1) \n", df.sum(axis=1))
print("NA values are excluded unless the entire slice is NA.this can be disabled using the skipna option")
print("df.mean(axis=1,skipna=False \n ",df.mean(axis=1,skipna=False))

print("df.idxmax() return indirect statistics like the index value where the maximum values are attained \n",df.idxmax())
print("df.cumsum() return cumulative sum of values \n",df.cumsum())
print("df.describe() return multiple summary statistics in one shot \n",df.describe())
obj=Series([‘a‘,‘a‘,‘b‘,‘c‘]*4)
print("obj is \n",obj)
print("obj.describe() return alternate summary statistics \n",obj.describe())

import pandas_datareader as web

all_data=http://www.mamicode.com/{}
for ticker in [‘AAPL‘,‘IBM‘, ‘MSFT‘, ‘GOOG‘]:
all_data[ticker] = web.DataReader(ticker,‘yahoo‘,‘1/1/2000‘,‘1/1/2010‘)

price = DataFrame({tic: data[‘Adj Close‘]
for tic, data in all_data.iteritems()})
volume = DataFrame({tic: data[‘Volume‘]
for tic, data in all_data.iteritems()})

returns = price.pct_change()
print("returns.tail()\n",returns.tail())

pandas 常用函数