####Bar Plot #There’s an important distinction you should be aware of when making bar graphs: #sometimes the bar heights represent counts of cases in the data set, #and sometimes they represent values in the data set. # x vector is factor, y vector is the height of the bar respectively > install.packages("gcookbook") > library(gcookbook) > ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity") # same to > qplot(group, weight, data=pg_mean, geom="bar", stat="identity") # add some fill color and outline > ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", fill="lightblue", colour="black") ##Grouped bar: one categorical variable as X, the other continuous variable as Y; ##If we want to split X with another categorical variable Z, then use FILL and position=dodge (if no dodge, it's an stacked bar) ##Then Bars with same X but different Z will be grouped together, as below example. > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(position="dodge", stat="identity") > ce <- cabbage_exp[1:5, ] # Copy the data without last row #the last bar will be missing > ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(position="dodge", colour="black", stat="identity") + scale_fill_brewer(palette="Pastel1") ##a Bar Graph of Counts > ggplot(diamonds, aes(x=cut)) + geom_bar() # Equivalent to using geom_bar(stat="bin") #if the x is categorical/discrete, then its a count bar #if the x is continuous, then its a histogram, same as geom_histogram() > ggplot(diamonds, aes(x=carat)) + geom_bar() ##colour > upc <- subset(uspopchange, rank(Change)>40) > ggplot(upc, aes(x=Abb, y=Change, fill=Region)) + geom_bar(position="dodge", stat="identity") > ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) + geom_bar(stat="identity", colour="black") + scale_fill_manual(values=c("#669933", "#FFCC66")) + xlab("State") #reorder x > ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) + geom_bar(stat="identity", colour="black") + scale_fill_manual(values=c("#669933", "#FFCC66")) + xlab("State") #color for negative / positive numbers > csub <- subset(climate, Source=="Berkeley" & Year >= 1900) > csub$pos <- csub$Anomaly10y >= 0 # create a categorical variable for fill > ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) + geom_bar(stat="identity", position="identity") #position=identity is to prevent a warning message about stacking not being well defined for negative numbers ##remove legend(guide=FALSE) and change color of FILL and outline, and size of outline > ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) + geom_bar(stat="identity", position="identity", colour="black", size=0.8) + scale_fill_manual(values=c("#CCEEFF", "#FFDDDD"), guide=FALSE) ####To make the bars narrower or wider, set width in geom_bar(). ####The default value is 0.9; larger values make the bars wider, and smaller values make the bars narrower > ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=0.5) ##For grouped bars, the default is to have no space between bars within each group. ##To add space between bars within a group, make width smaller and set the value for position_dodge to be larger than width > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity", width=0.5, position=position_dodge(0.7)) #position="dodge" is same to position=position_dodge(0.9) ####stacked bar > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity") #reorder the legend > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity") + guides(fill=guide_legend(reverse=TRUE)) #reorder stack > library(plyr) # Needed for desc() > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) + geom_bar(stat="identity") > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity", colour="black", size=0.8) + guides(fill=guide_legend(reverse=TRUE)) + scale_fill_brewer(palette="Pastel1") #Proportional Stacked Bar > library(plyr) # Do a group-wise transform(), splitting on "Date" > ce <- ddply(cabbage_exp, "Date", transform, percent_weight = Weight / sum(Weight) * 100) > ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) + geom_bar(stat="identity") > ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) + geom_bar(stat="identity", colour="black") + guides(fill=guide_legend(reverse=TRUE)) + scale_fill_brewer(palette="Pastel1") ####add labels # Below the top > ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + geom_bar(stat="identity") + geom_text(aes(label=Weight), vjust=1.5, colour="white") # Above the top > ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + geom_bar(stat="identity") + geom_text(aes(label=Weight), vjust=-0.2) # size is the font size, default is size = 5 > ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity", position="dodge") + geom_text(aes(label=Weight), vjust=1.5, colour="white", position=position_dodge(.9), size=3) # stacked bar with labels > library(plyr) # Sort by the day and sex columns > ce <- arrange(cabbage_exp, Date, Cultivar) # Get the cumulative sum for y_label offset > ce <- ddply(ce, "Date", transform, label_y=cumsum(Weight)) > ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity") + geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="white") # Calculate y position, placing it in the middle > ce <- arrange(cabbage_exp, Date, Cultivar) > ce <- ddply(ce, "Date", transform, label_y=cumsum(Weight)-0.5*Weight) > ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity") + geom_text(aes(y=label_y, label=Weight), colour="white") # make a nicer plot > ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity", colour="black") + geom_text(aes(y=label_y, label=paste(format(Weight, nsmall=2), "kg")), size=4) + guides(fill=guide_legend(reverse=TRUE)) + scale_fill_brewer(palette="Pastel1") ####Cleveland dot plot > library(gcookbook) # For the data set > tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set > ggplot(tophit, aes(x=avg, y=name)) + geom_point() > tophit[, c("name", "lg", "avg")] # sort by avg, dash line > ggplot(tophit, aes(x=avg, y=reorder(name, avg))) + geom_point(size=3) + # Use a larger dot theme_bw() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line(colour="grey60", linetype="dashed")) # rotate > ggplot(tophit, aes(x=reorder(name, avg), y=avg)) + geom_point(size=3) + # Use a larger dot theme_bw() + theme(axis.text.x = element_text(angle=60, hjust=1), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), panel.grid.major.x = element_line(colour="grey60", linetype="dashed")) # Group # Get the names, sorted first by lg, then by avg > nameorder <- tophit$name[order(tophit$lg, tophit$avg)] # Turn name into a factor, with levels in the order of nameorder > tophit$name <- factor(tophit$name, levels=nameorder) > ggplot(tophit, aes(x=avg, y=name)) + geom_segment(aes(yend=name), xend=0, colour="grey50") + geom_point(size=3, aes(colour=lg)) + scale_colour_brewer(palette="Set1", limits=c("NL","AL")) + theme_bw() + theme(panel.grid.major.y = element_blank(), # No horizontal grid lines legend.position=c(1, 0.55), # Put legend inside plot area legend.justification=c(1, 0.5))