一.绘制简单的折线图
> BOD
Time demand
1 1 8.3
2 2 10.3
3 3 19.0
4 4 16.0
5 5 15.6
6 7 19.8
> ggplot(BOD,aes(x=Time,y=demand))+geom_line()
> BOD1 <- BOD
> BOD1$Time <- factor(BOD1$Time) ;;转化为因子变量
> ggplot(BOD,aes(x=Time,y=demand,group=1))+geom_line() ;;通过group限定该因子为一组
> ggplot(BOD,aes(x=Time,y=demand))+geom_line()+ylim(0,max(BOD$demand))
> ggplot(BOD,aes(x=Time,y=demand))+geom_line()+expand_limits(y=0)
这两个命令是等价的,都是把y轴调整到最大值对齐demand的最大值
> ggplot(BOD,aes(x=Time,y=demand))+geom_line()+geom_point()
;;在图像上加数据点
> library(gcookbook)
> ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()
;;绘制人口与时间的折线图
>ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()+scale_y_log10()
;;把人口作10为底的对数转化
二.绘制多重折线图
> library(plyr)
> tg <-ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len)) ;;按照supp和dose进行分组求平均值
> ggplot(tg,aes(x=dose,y=length,color=supp))+geom_line() ;;将supp映射给颜色
> ggplot(tg,aes(x=dose,y=length,linetype=supp))+geom_line() ;;将supp映射给线类型
> ggplot(tg,aes(x=factor(dose),y=length,linetype=supp,group=supp))+geom_line()
;;将dose转换为因子变量的时候,一定要把supp映射给group,告诉系统有两类。只有x为连续变量的时候用不着
>ggplot(tg,aes(x=dose,y=length,linetype=supp,shape=supp))+geom_line()+geom_point(size=4)
;;把supp映射给shape
>ggplot(tg,aes(x=dose,y=length,linetype=supp,fill=supp))+geom_line()+geom_point(size=4,shape=21)
;;把supp映射给fill,使用有颜色标记的点
> ggplot(tg,aes(x=dose,y=length,shape=supp))
+geom_line(position = position_dodge(0.2)) ;;将连接线左右移动0.2
+geom_point(position = position_dodge(0.2),size=4) ;;将数据点左右移动0.2
三.修改线条的样式
> ggplot(BOD,aes(x=Time,y=demand))
+geom_line(linetype="dashed",size=1,color="blue")
;;通过linetype,size和color来控制
> library(plyr)
> tg <-ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len)
> ggplot(tg,aes(x=dose,y=length,color=supp))+geom_line()+scale_color_brewer(palette = "Set1")
如果两根线的属性(linetype和color)一样,需要另加group来指定分组变量
> ggplot(tg,aes(x=dose,y=length,group=supp))+geom_line(color="darkgreen",size=1.5)
>ggplot(tg,aes(x=dose,y=length,color=supp))+geom_line(linetype="dashed")+geom_point(shape=22,size=3,fill="white")
四.修改数据标记的样式
>ggplot(BOD,aes(x=Time,y=demand))+geom_line()+geom_point(size=4,shape=22,colour="darkred",fill="pink")
>ggplot(BOD,aes(x=Time,y=demand))
+geom_line()
+geom_point(size=4,shape=22,colour="darkred",fill="pink")
+geom_text(aes(y=demand,label=demand),vjust=-1) ;;添加标签
+ylim(0,max(BOD$demand)+2) ;;调整坐标
> pd <- position_dodge(0.2)
> ggplot(tg,aes(x=dose,y=length,fill=supp)) ;;把supp映射给fill
+geom_line(position = pd)
+geom_point(shape=21,size=3,position=pd)
+scale_fill_manual(values=c("black","white")) ;;修改映射的颜色
四 绘制面积图
> sunspotyear <-
data.frame(Year = as.numeric(time(sunspot.year)),Sunspots =as.numeric(sunspot.year))
;;把sunspotyear转换为我们需要的形式
> sunspotyear[1:5,]
Year Sunspots
1 1700 5
2 1701 11
3 1702 16
4 1703 23
5 1704 36
> ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area()
>ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(color="black",fill="blue",alpha=0.2)
;;将图像的透明度设置为80%
>ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(fill="blue",alpha=0.2)+geom_line()
;;先绘制堆积图,再加一个line的涂层,可以避免在起点和钟点处各有一条垂线(因为color是加的外框)
五 绘制堆积图
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))
;;把agegroup映射给fill
+geom_area(size=0.2,alpha=0.4) ;;让图像透明度为60%
+scale_fill_brewer(palette="Blues",breaks=rev(levels(uspopage$AgeGroup)))
;;通过breaks可以让图例和图像顺序反向
+geom_line(position="stack",size=0.2) ;;在外面加线,可以避免原图中出现两侧的竖线
> library(gcookbook)
> library(plyr)
> uspopage_prop <-ddply(uspopage,"Year",transform,Percent=Thousands/sum(Thousands)*100)
;;先把uspopage按照Year的不同拆分成多个数据框,执行transform()函数计算每个数据框对应的Percent
最后再合并成一个数据框
> ggplot(uspopage_prop,aes(x=Year,y=Percent,fill=AgeGroup))+geom_area(color="black",size=.2,alpha=0.4)+scale_fill_brewer(palette = "Blues")
六 添加置信域
> library(cookbook)
> climate[1:5,]
Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y
1 Berkeley 1800 NA NA -0.435 0.505
2 Berkeley 1801 NA NA -0.453 0.493
3 Berkeley 1802 NA NA -0.460 0.486
4 Berkeley 1803 NA NA -0.493 0.489
5 Berkeley 1804 NA NA -0.536 0.483
;;其中 Anomaly10y是各年温度相对于1950-1980年平均水平的变异,Unc10y是95%水平下的置信区间
> clim <- subset(climate,Source=="Berkeley",select=c("Year","Anomaly10y","Unc10y"))
;; 提取climate里满足Source等于Berkeley的子集,选择Year,Anomaly10y和Unc10y三列
> ggplot(clim,aes(x=Year,y=Anomaly10y))+geom_ribbon(aes(ymin=Anomaly10y-Unc10y,ymax=Anomaly10y+Unc10y),alpha=0.2)+geom_line()
;;通过geom_ribbon()。注意:geom_line()要放在geom_ribbon()的涂层之前
> ggplot(clim,aes(x=Year,y=Anomaly10y))+
+ geom_line(aes(y=Anomaly10y-Unc10y),color="grey50",linetype="dotted")+
+ geom_line(aes(y=Anomaly10y+Unc10y),color="grey50",linetype="dotted")+
+ geom_line()