首页 > 代码库 > 利用生鲜数据画图

利用生鲜数据画图

library("ggplot2")library("RODBC") #加载RODBC包,读取数据库数据library("sqldf") #sql语句处理数据library("scales")library("reshape2")library(gcookbook) # For the data setlibrary("tcltk")#### 生成数据框 ####library(RODBC)#library(plyr)channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/生鲜/生鲜二级商家.xlsx")jbp01<-sqlFetch(channel1 ,"Sheet4")head(jbp01)#,as.is=F) odbcClose(channel1)f = file("C:/Users/Jennifer/Desktop/Book1.xlsx")readLines(f,10)#head(jbp01)close(myconnect) #关闭数据库连接####数据处理#####jbp01$month <- as.character(jbp01$month) #月份处理成文本names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型head(jbp01)####线图#####用sql语句处理jbp02 <- sqldf("select * from jbp01",row.names=T)p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=sum(mrchnt_num),colour=factor(prov_name)))p+  geom_line(size=0.8)+    xlab("日期")+  ylab("销售金额 单位:K")+  labs(title="生鲜商城数据")+#设置图的标题  scale_y_continuous()#coord_flip() #横纵坐标翻转#多个线图jbp02 <- sqldf("select prov_name,categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 group by prov_name,categ_lvl2_name",row.names=T)p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt_num),fill= categ_lvl2_name)p+  geom_bar( stat = "identity")+  facet_wrap(~prov_name,scale="free_y")+scale_x_continuous(breaks=1:11)+    xlab("二级类目")+  ylab("商家数")+  labs(title="不同省份二级类目商家数") #设置图的标题####饼图####jbp02 <- sqldf("select categ_lvl2_name,sum(mrchnt_num) mrchnt_num from jbp01 where prov_name =‘上海‘ group by categ_lvl2_name",row.names=T)jbp02 <- transform(jbp02, percent=mrchnt_num/sum(mrchnt_num))p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl2_name)) p+  geom_bar(width = 1, stat = "identity")+    coord_polar("y")####中国地图#####library(ggmap)#library(mapproj)#map <- get_map(location=‘china‘,zoom=4)#ggmap(map) #此路不通,GOOGLE地图链接不上library(maps)library(mapdata)map("china")library(maptools)setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要x=readShapePoly(bou2_4p.shp) #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shxplot(x)plot(x,col=gray(924:0/924)); #设置分割线getColor=function(mapdata,provname,provcol,othercol){  f=function(x,y) ifelse(x %in% y,which(y==x),0)  colIndex=sapply(mapdata@data$NAME,f,provname)  col=c(othercol,provcol)[colIndex+1]  return(col)}# provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C# provcol=c("red","green","yellow","purple")# plot(x,col=getColor(x,provname,provcol,"white"))jbp03 <- sqldf("select case when mrchnt_name=‘武汉‘ then ‘湖北省‘               when mrchnt_name=济南 then 山东省               when mrchnt_name=广州 then 广东省               when mrchnt_name=福建 then 福建省               when mrchnt_name=成都 then 四川省               when mrchnt_name=北京 then 北京市               when mrchnt_name=上海 then 上海市               end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like %宝洁%group by case when mrchnt_name=武汉 then 湖北省               when mrchnt_name=济南 then 山东省               when mrchnt_name=广州 then 广东省               when mrchnt_name=福建 then 福建省               when mrchnt_name=成都 then 四川省               when mrchnt_name=北京 then 北京市               when mrchnt_name=上海 then 上海市               end",row.names=T)prov_name <-c(jbp03$prov_name)sale_amt <- c(jbp03$sale_amt)prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0)plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")####条形图####jbp02 <- sqldf("select * from jbp01",row.names=T)p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num))p+  geom_bar(stat="identity",colour=red,fill=blue)#堆积图myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接jbp01 <- sqlQuery(myconnect,"select  DATEPART(month, date_id) month,t1.* from temp1 t1")#head(jbp01)close(myconnect) #关闭数据库连接names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型jbp02 <- sqldf("select categ_lvl2_name,prov_name,count(mrchnt_name) mrchnt_num from jbp01 group by prov_name ",row.names=T)p <- ggplot(jbp02,aes(x=factor(categ_lvl2_name),y=mrchnt_num))p+  geom_bar(stat="identity",colour=red,fill=blue)+  facet_wrap(~prov_name)#geom_bar(position=‘dodge‘,stat="identity",colour=‘red‘,fill=‘blue‘)#dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置####气泡图####jbp02 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name  ",row.names=T)p <- ggplot(jbp02,aes(x=categ_lvl2_name,y=mrchnt,colour=factor(prov_name)))p+  geom_point(aes(size = mrchnt))+  scale_size_continuous(range=c(3,10))+  scale_y_continuous(labels=comma)+xlab("一级类目")+  ylab("销售金额")+  labs(title="气泡图") #设置图的标题####广告PPT地图####library(maps)library(mapdata)library(maptools)myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs")close(myconnct)jbp01 <- sqldf("select prov_name,categ_lvl2_name,count(mrchnt_name) mrchnt from jbp01 group by categ_lvl2_name  ",row.names=T)names(jbp01) <- tolower(names(jbp01))jbp01$prov_name <-as.character(jbp01$prov_name)setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要x=readShapePoly(bou2_4p.shp) #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shxgetColor=function(mapdata,provname,provcol,othercol){  f=function(x,y) ifelse(x %in% y,which(y==x),0)  colIndex=sapply(mapdata@data$NAME,f,provname)  col=c(othercol,provcol)[colIndex+1]  return(col)}prov_name <-c(jbp01$prov_name)vistrs <- c(jbp01$vistrs)prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0)plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")####广告PPT图2####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_chart")close(myconnct)names(jbp01) <- tolower(names(jbp01))jbp01$date_id<-as.Date(jbp01$date_id)jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01  where channel <> ‘SEM‘ group by date_id,channel  ",row.names=T)p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel)))p +geom_line(size=0.75)+  facet_wrap(~channel)+  scale_x_date(labels = date_format("%m/%d"),               minor_breaks = date_breaks("1 week"))+  scale_y_continuous(labels=comma)+  xlab("日期")+  ylab("有效访客Vstrs")+  labs(title="渠道日有效访客数") #设置图的标题####广告PPT 玫瑰图####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_pie")close(myconnct)names(jbp01) <- tolower(names(jbp01))names(jbp01) <- c("channel","无效访客","有效访客")mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+  coord_polar()+  ##玫瑰图  xlab("渠道")+ ylab("访客数")+  theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+  scale_y_continuous(label=comma)+  guides(fill=guide_legend(title=NULL))+  labs(title="渠道访客差异")#ylim(0,500) #设立坐标轴的范围#guides(fill=F)#剔除标签####堆积图 玫瑰图####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_pie")close(myconnct)names(jbp01) <- tolower(names(jbp01))ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+  coord_polar()  ##玫瑰图 堆积图衍生# stat= bin 默认 identity 独立变量 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图  scale_y_continuous(labels=percent)#position: stack(数值) fill(百分比) identity() dodge(并排)  jitter(增加扰动)####融合####mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")names(mt)str(mt)sem_visigt, seo_vistdcast(mt, channel + dd ~ type, sum, mean)plyr splite-c-comdplyr#####ppt cluser####myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111")chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster")close(myconnct)names(chart01) <- tolower(names(chart01))names(chart01) <- c("page_categ_name","可乐活动访客","全站访客")mt <- melt(data=http://www.mamicode.com/chart01,id.vars="page_categ_name",value.name="percent",variable.name="type")mt <- sqldf("select page_categ_name,type,case when type=‘可乐活动访客‘ then percent*-1            else percent            end percent from mt")p = ggplot(mt)p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+  coord_flip()+  theme(axis.text.x=element_blank())#p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent))#p+ #  geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+#  coord_flip()+#  geom_text(aes(label=percent, vjust=-0.2))library("ggplot2")library("RODBC") #加载RODBC包,读取数据库数据library("sqldf") #sql语句处理数据library("scales")library("reshape2")library(gcookbook) # For the data setlibrary("tcltk")#### 生成数据框 #####myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接#jbp01 <- sqlQuery(myconnect,"select * from temp1")library(RODBC)#library(plyr)channel1 <- odbcConnectExcel2007("C:/Users/Jennifer/Desktop/Book1.xlsx")jbp01<-sqlFetch(channel1 ,"Sheet2")#,as.is=F) odbcClose(channel1)f = file("C:/Users/Jennifer/Desktop/Book1.xlsx")readLines(f,10)#head(jbp01)close(myconnect) #关闭数据库连接####数据处理#####jbp01$month <- as.character(jbp01$month) #月份处理成文本names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型head(jbp01)####线图#####用sql语句处理jbp02 <- sqldf("select date_id,categ_lvl1_name,sum(ordr_sale) sale_amt from jbp01  group by date_id,categ_lvl1_name",row.names=T)p <- ggplot(jbp02,aes(x=date_id,y=sale_amt,colour=prov_name))p+  geom_line(size=0.8)+    xlab("日期")+  ylab("销售金额 单位:K")+  labs(title="生鲜商城数据")+#设置图的标题  scale_y_continuous()#coord_flip() #横纵坐标翻转#多个线图jbp02 <- sqldf("select manufacture,date_id,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 group by manufacture,date_id,categ_lvl1_name",row.names=T)p <- ggplot(jbp02,aes(x=date_id,y=sale_amt/1000,colour=factor(categ_lvl1_name)))p+  geom_line(size=0.8)+  facet_wrap(~manufacture)+    xlab("日期")+  ylab("销售金额 单位:K")+  labs(title="JBP日销售") #设置图的标题####饼图####jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like ‘%宝洁%‘ group by categ_lvl1_name",row.names=T)jbp02 <- transform(jbp02, percent=sale_amt/sum(sale_amt))p <- ggplot(jbp02,aes(x="", y=percent, fill=categ_lvl1_name)) p+  geom_bar(width = 1, stat = "identity")+    coord_polar("y")####中国地图#####library(ggmap)#library(mapproj)#map <- get_map(location=‘china‘,zoom=4)#ggmap(map) #此路不通,GOOGLE地图链接不上library(maps)library(mapdata)map("china")library(maptools)setwd("C:/Users/Jennifer/Documents/R/win-library/3.0/china-province-border-data") #这一步很重要x=readShapePoly(bou2_4p.shp) #运行之前要设置工作空间 bou2_4p.dbf,bou2_4p.shp,bou2_4p.shxplot(x)plot(x,col=gray(924:0/924)); #设置分割线getColor=function(mapdata,provname,provcol,othercol){  f=function(x,y) ifelse(x %in% y,which(y==x),0)  colIndex=sapply(mapdata@data$NAME,f,provname)  col=c(othercol,provcol)[colIndex+1]  return(col)}# provname=c("北京市","天津市","上海市","重庆市") #ctr+shift+C# provcol=c("red","green","yellow","purple")# plot(x,col=getColor(x,provname,provcol,"white"))jbp03 <- sqldf("select case when mrchnt_name=‘武汉‘ then ‘湖北省‘               when mrchnt_name=济南 then 山东省               when mrchnt_name=广州 then 广东省               when mrchnt_name=福建 then 福建省               when mrchnt_name=成都 then 四川省               when mrchnt_name=北京 then 北京市               when mrchnt_name=上海 then 上海市               end prov_name,sum(sale_amt) sale_amt from jbp01 where manufacture like %宝洁%group by case when mrchnt_name=武汉 then 湖北省               when mrchnt_name=济南 then 山东省               when mrchnt_name=广州 then 广东省               when mrchnt_name=福建 then 福建省               when mrchnt_name=成都 then 四川省               when mrchnt_name=北京 then 北京市               when mrchnt_name=上海 then 上海市               end",row.names=T)prov_name <-c(jbp03$prov_name)sale_amt <- c(jbp03$sale_amt)prov_col=rgb(red=1-sale_amt/max(sale_amt)/2,green=1-sale_amt/max(sale_amt)/2,blue=0)plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")####条形图####jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like ‘%宝洁%‘ group by categ_lvl1_name",row.names=T)p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000))p+  geom_bar(stat="identity",colour=red,fill=blue)#堆积图myconnect <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111",case="tolower") #建立数据库连接jbp01 <- sqlQuery(myconnect,"select  DATEPART(month, date_id) month,t1.* from temp1 t1")#head(jbp01)close(myconnect) #关闭数据库连接names(jbp01) <- tolower(names(jbp01))   #字段名调整为小写,易于处理jbp01$date_id <- as.Date(jbp01$date_id) #日期字段 调整为日期型jbp02 <- sqldf("select month,categ_lvl1_name,sum(sale_amt) sale_amt from jbp01 where manufacture like ‘%宝洁%‘ group by month,categ_lvl1_name",row.names=T)p <- ggplot(jbp02,aes(x=factor(categ_lvl1_name),y=sale_amt/1000))p+  geom_bar(stat="identity",colour=red,fill=blue)+  facet_wrap(~month)#geom_bar(position=‘dodge‘,stat="identity",colour=‘red‘,fill=‘blue‘)#dodge方式是将不同年份的数据并列放置;stack方式是将不同年份数据堆叠放置####气泡图####jbp02 <- sqldf("select categ_lvl1_name,sum(sale_amt) sale_amt,sum(sale_amt)/sum(sale_num) asp from jbp01 where manufacture like ‘%宝洁%‘ group by categ_lvl1_name",row.names=T)p <- ggplot(jbp02,aes(x=categ_lvl1_name,y=sale_amt/1000,colour=factor(categ_lvl1_name)))p+  geom_point(aes(size = asp))+  scale_size_continuous(range=c(3,10))+  scale_y_continuous(labels=comma)xlab("一级类目")+  ylab("销售金额 单位:K")+  labs(title="气泡图") #设置图的标题####广告PPT地图####library(maps)library(mapdata)library(maptools)myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from prov_vstrs")close(myconnct)names(jbp01) <- tolower(names(jbp01))jbp01$prov_name <-as.character(jbp01$prov_name)x=readShapePoly(bou2_4p.shp) getColor=function(mapdata,provname,provcol,othercol){  f=function(x,y) ifelse(x %in% y,which(y==x),0)  colIndex=sapply(mapdata@data$NAME,f,provname)  col=c(othercol,provcol)[colIndex+1]  return(col)}prov_name <-c(jbp01$prov_name)vistrs <- c(jbp01$vistrs)prov_col=rgb(red=1-vistrs/max(vistrs)/2,green=1-vistrs/max(vistrs)/2,blue=0)plot(x,col=getColor(x,prov_name,prov_col,"white"),xlab="",ylab="")####广告PPT图2####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_chart")close(myconnct)names(jbp01) <- tolower(names(jbp01))jbp01$date_id<-as.Date(jbp01$date_id)jbp01 <- sqldf("select date_id,channel,sum(vistrs)-sum(invalid_vistrs) valid_vistrs from jbp01  where channel <> ‘SEM‘ group by date_id,channel  ",row.names=T)p <- ggplot(jbp01,aes(x=date_id,y=valid_vistrs,group=channel,colour=factor(channel)))p +geom_line(size=0.75)+  facet_wrap(~channel)+  scale_x_date(labels = date_format("%m/%d"),               minor_breaks = date_breaks("1 week"))+  scale_y_continuous(labels=comma)+  xlab("日期")+  ylab("有效访客Vstrs")+  labs(title="渠道日有效访客数") #设置图的标题####广告PPT 玫瑰图####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_pie")close(myconnct)names(jbp01) <- tolower(names(jbp01))names(jbp01) <- c("channel","无效访客","有效访客")mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")ggplot(mt)+geom_bar(aes(x=channel,y=visits,fill=type),stat="identity")+  coord_polar()+  ##玫瑰图  xlab("渠道")+ ylab("访客数")+  theme(text=element_text(size=12),axis.text.x=element_text(colour="black",size=12,face="bold"))+  scale_y_continuous(label=comma)+  guides(fill=guide_legend(title=NULL))+  labs(title="渠道访客差异")#ylim(0,500) #设立坐标轴的范围#guides(fill=F)#剔除标签####堆积图 玫瑰图####myconnct <- odbcConnect("jbp2014",uid="chengyingbao",pwd="Mis,111")jbp01 <- sqlQuery(myconnct,"select * from ad_pie")close(myconnct)names(jbp01) <- tolower(names(jbp01))ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity")+  coord_polar()  ##玫瑰图 堆积图衍生# stat= bin 默认 identity 独立变量 ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity") #堆积柱状图ggplot(jbp01)+geom_bar(aes(x=channel,y=vstrs,fill=id),stat="identity",position="fill") + #堆积柱状图  scale_y_continuous(labels=percent)#position: stack(数值) fill(百分比) identity() dodge(并排)  jitter(增加扰动)####融合####mt <- melt(jbp01, id.vars=c("channel"), value.name="visits", variable.name="type")names(mt)str(mt)sem_visigt, seo_vistdcast(mt, channel + dd ~ type, sum, mean)plyr splite-c-comdplyr#####ppt cluser####myconnct <- odbcConnect(dsn="jbp2014",uid="chengyingbao",pwd="Mis,111")chart01 <- sqlQuery(channel=myconnct,query="select page_categ_name,pro_vstrs_per,yhd_vstrs_per from ad_cluster")close(myconnct)names(chart01) <- tolower(names(chart01))names(chart01) <- c("page_categ_name","可乐活动访客","全站访客")mt <- melt(data=http://www.mamicode.com/chart01,id.vars="page_categ_name",value.name="percent",variable.name="type")mt <- sqldf("select page_categ_name,type,case when type=‘可乐活动访客‘ then percent*-1            else percent            end percent from mt")p = ggplot(mt)p+ geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+  coord_flip()+  theme(axis.text.x=element_blank())#p = ggplot(mt,aes(x=interaction(page_categ_name, type),y=percent))#p+ #  geom_bar(aes(x=page_categ_name,y=percent,fill=type),stat="identity",position="identity")+#  coord_flip()+#  geom_text(aes(label=percent, vjust=-0.2))

 

利用生鲜数据画图