首页 > 代码库 > 大数据统计笔记

大数据统计笔记

# -*- coding: utf-8 -*-import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport jsons = pd.Series([1,3,5,np.nan,6,8]);print(s);s = pd.Series([1,3],["a","b"]);print(s);#循环查询日期范围内数据dates = pd.date_range(20130101,periods=6);print(dates);df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list(ABCD))print("LIMIT:============");print(df.head(3));print(df.tail(3));print("反转XY显示 :============");print(df.T);#排序#df.sort_index(axis=1, ascending=False)#df.sort(columns=‘B‘) ########################选择器#############################print("指定列 :============");print(df[[A,B]]);print("df[行范围,列范围]:============");print(df[0:3]);print("指定索引主键 :============");print(df[20130103:20130104]);print("布尔过滤 :============");print(df[df.A>0.5]);#print(df[0:3,0:1]);########################距阵操作 #############################print("距阵操作 :============");print(df*2);print(np.exp(df));########################TABLE 元素##############################以二维数组显示#df.values#列描述#df.columns#索引#df.index #对象类型#df.dtypesprint("字典(JSON)转换距阵 :============");sdata = {Ohio: 35000, Texas: 71000, Oregon: 16000, Utah: 5000};df = pd.Series(sdata);print(df);sdata = [{Ohio: 35000, Texas: 71000, Oregon: 16000, Utah: 5000}];df = pd.DataFrame(sdata);df.columns = [Ohio, Texas, Oregon, Utah];print(df);df = df.fillna(0) #将缺失值都替换为0#入门#http://pandas.pydata.org/pandas-docs/stable/10min.html#http://pda.readthedocs.org/en/latest/chp5.html#blog#http://cloga.info/#wat_e_eb3d32d8-f59a-4a08-bf96-6f706d89c097_zss_#大数据#http://www.17bigdata.com/?cat=22#优化#http://1.aisensiy.sinaapp.com/2014/03/%E6%9C%80%E8%BF%91%E4%BD%BF%E7%94%A8-pandas-%E7%9A%84%E6%80%BB%E7%BB%93/df.to_csv(E:\\py\\foo.csv)######################JSON###########################print("josn====================");j = [{0:{"a":"a"},1:{"b":"b"}}];elevations = json.dumps(j);df = pd.read_json(elevations );print(df);