I have produced a bar chart that shows cumulative totals over periods of months for various programs using the following data structure and code:
我制作了一个条形图,显示了使用以下数据结构和代码的各种程序的累计总数。
library(dplyr)
data_totals <- data_long %>%
group_by(Period, Program) %>%
arrange(Period, Program) %>%
ungroup() %>%
group_by(Program) %>%
mutate(Running_Total = cumsum(Value))
dput(data_totals)
structure(list(Period = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L), .Label = c("2018-04", "2018-05",
"2018-06", "2018-07", "2018-08", "2018-09", "2018-10", "2018-11",
"2018-12", "2019-01", "2019-02", "2019-03", "Apr-Mar 2019"), class = "factor"),
Program = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L), .Label = c("A",
"B", "C", "D",
"E"), class = "factor"), Value = c(5597,
0, 0, 0, 1544, 0, 0, 0, 0, 1544, 0, 0, 0, 0, 1544, 0, 0,
850, 0, 1544, 0, 0, 0, 0, 1544, 0, 0, 0, 0, 1544, 0, 0, 0,
0, 1544, 0, 0, 0, 0, 1544, 0, 0, 0, 0, 1544, 0, 0, 0, 0,
1544, 0, 0, 0, 0, 1544, 0, 0, 0, 0, 1544), Running_Total = c(5597,
0, 0, 0, 1544, 5597, 0, 0, 0, 3088, 5597, 0, 0, 0, 4632,
5597, 0, 850, 0, 6176, 5597, 0, 850, 0, 7720, 5597, 0, 850,
0, 9264, 5597, 0, 850, 0, 10808, 5597, 0, 850, 0, 12352,
5597, 0, 850, 0, 13896, 5597, 0, 850, 0, 15440, 5597, 0,
850, 0, 16984, 5597, 0, 850, 0, 18528)), .Names = c("Period",
"Program", "Value", "Running_Total"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), vars = "Program", labels = structure(list(
Program = structure(1:5, .Label = c("A",
"B", "C", "D",
"E"), class = "factor")), class = "data.frame", row.names = c(NA,
-5L), vars = "Program", drop = TRUE, .Names = "Program"), indices = list(
c(0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L
), c(1L, 6L, 11L, 16L, 21L, 26L, 31L, 36L, 41L, 46L, 51L,
56L), c(2L, 7L, 12L, 17L, 22L, 27L, 32L, 37L, 42L, 47L, 52L,
57L), c(3L, 8L, 13L, 18L, 23L, 28L, 33L, 38L, 43L, 48L, 53L,
58L), c(4L, 9L, 14L, 19L, 24L, 29L, 34L, 39L, 44L, 49L, 54L,
59L)), drop = TRUE, group_sizes = c(12L, 12L, 12L, 12L, 12L
), biggest_group_size = 12L)
# reorder the groups descending so that the lowest total will be on layers from front to back
reorder(data_totals$Program, -data_totals$Running_Total)
ggplot(data = data_totals, aes(x = Period, y = Running_Total)) +
geom_bar(aes(color = Program, group = Program, fill = Program),
stat = "identity", position = "identity", alpha = 1.0)
It works in that it creates the graph with all the proper data, but the smaller Running_Totals are obscured by the larger ones.
I get the following error message as well:
它的工作原理是它创建了包含所有适当数据的图形,但较小的Running_Totals被较大的数据模糊了。我也收到以下错误消息:
Warning message:
The plyr::rename operation has created duplicates for the following name(s): (`colour`)
Even though I do not have the plyr package loaded.
即使我没有加载plyr包。
I can see all the Running_Totals if I set the alpha to 0.5
如果我将alpha设置为0.5,我可以看到所有的Running_Totals
Running_Total for each Program by Period, alpha = 0.5:
按周期运行每个程序的Running_Total,alpha = 0.5:
How can I get the layers ordered so that the smallest values are on the front most layers working back toward the highest values?
如何对图层进行排序,以使最前面的最小值回到最高值?
1 个解决方案
#1
0
The way I was trying to represent the data in the original question was flawed.
我试图在原始问题中表示数据的方式存在缺陷。
There is no advantage to having the Program with the maximum value for each Period be the top of the bar.
将具有每个Period的最大值的程序作为条形的顶部是没有优势的。
A more illustrative solution is to have a stacked bar, with labels indicating the contribution of each Program to the overall value of each Period:
更具说明性的解决方案是使用堆叠条形图,其中标签指示每个程序对每个期间的总体价值的贡献:
ggplot(data = data_totals[which(data_totals$Running_Total > 0),], aes(x = Period, y = Running_Total, fill = Program)) +
geom_bar(aes(color = Program, group = Program, fill = Program), stat = "identity", position = "stack", alpha = 1.0) +
geom_text(aes(label = Running_Total), position = position_stack(vjust = 0.5))
I used [which(data_totals$Running_Total > 0),]
to eliminate any "0" bars and labels.
我使用[which(data_totals $ Running_Total> 0),]来消除任何“0”条和标签。
#1
0
The way I was trying to represent the data in the original question was flawed.
我试图在原始问题中表示数据的方式存在缺陷。
There is no advantage to having the Program with the maximum value for each Period be the top of the bar.
将具有每个Period的最大值的程序作为条形的顶部是没有优势的。
A more illustrative solution is to have a stacked bar, with labels indicating the contribution of each Program to the overall value of each Period:
更具说明性的解决方案是使用堆叠条形图,其中标签指示每个程序对每个期间的总体价值的贡献:
ggplot(data = data_totals[which(data_totals$Running_Total > 0),], aes(x = Period, y = Running_Total, fill = Program)) +
geom_bar(aes(color = Program, group = Program, fill = Program), stat = "identity", position = "stack", alpha = 1.0) +
geom_text(aes(label = Running_Total), position = position_stack(vjust = 0.5))
I used [which(data_totals$Running_Total > 0),]
to eliminate any "0" bars and labels.
我使用[which(data_totals $ Running_Total> 0),]来消除任何“0”条和标签。