I've been looking for a solution to plot survival curves using ggplot2. I've found some nice examples, but they do not follow the whole ggplot2 aesthetics (mainly regarding shaded confidence intervals and so on). So finally I've written my own function:
我一直在寻找用ggplot2绘制生存曲线的方法。我找到了一些很好的例子,但是它们并没有遵循整个ggplot2美学(主要是关于阴影的置信区间等等)。最后我写了自己的函数
ggsurvplot<-function(s, conf.int=T, events=T, shape="|", xlab="Time",
ylab="Survival probability", zeroy=F, col=T, linetype=F){
#s: a survfit object.
#conf.int: TRUE or FALSE to plot confidence intervals.
#events: TRUE or FALSE to draw points when censoring events occur
#shape: the shape of these points
#zeroy: Force the y axis to reach 0
#col: TRUE, FALSE or a vector with colours. Colour or B/W
#linetype: TRUE, FALSE or a vector with line types.
require(ggplot2)
require(survival)
if(class(s)!="survfit") stop("Survfit object required")
#Build a data frame with all the data
sdata<-data.frame(time=s$time, surv=s$surv, lower=s$lower, upper=s$upper)
sdata$strata<-rep(names(s$strata), s$strata)
#Create a blank canvas
kmplot<-ggplot(sdata, aes(x=time, y=surv))+
geom_blank()+
xlab(xlab)+
ylab(ylab)+
theme_bw()
#Set color palette
if(is.logical(col)) ifelse(col,
kmplot<-kmplot+scale_colour_brewer(type="qual", palette=6)+scale_fill_brewer(type="qual", palette=6),
kmplot<-kmplot+scale_colour_manual(values=rep("black",length(s$strata)))+scale_fill_manual(values=rep("black",length(s$strata)))
)
else kmplot<-kmplot+scale_fill_manual(values=col)+scale_colour_manual(values=col)
#Set line types
if(is.logical(linetype)) ifelse(linetype,
kmplot<-kmplot+scale_linetype_manual(values=1:length(s$strata)),
kmplot<-kmplot+scale_linetype_manual(values=rep(1, length(s$strata)))
)
else kmplot<-kmplot+scale_linetype_manual(values=linetype)
#Force y axis to zero
if(zeroy) {
kmplot<-kmplot+ylim(0,1)
}
#Confidence intervals
if(conf.int) {
#Create a data frame with stepped lines
n <- nrow(sdata)
ys <- rep(1:n, each = 2)[-2*n] #duplicate row numbers and remove the last one
xs <- c(1, rep(2:n, each=2)) #first row 1, and then duplicate row numbers
scurve.step<-data.frame(time=sdata$time[xs], lower=sdata$lower[ys], upper=sdata$upper[ys], surv=sdata$surv[ys], strata=sdata$strata[ys])
kmplot<-kmplot+
geom_ribbon(data=scurve.step, aes(x=time,ymin=lower, ymax=upper, fill=strata), alpha=0.2)
}
#Events
if(events) {
kmplot<-kmplot+
geom_point(aes(x=time, y=surv, col=strata), shape=shape)
}
#Survival stepped line
kmplot<-kmplot+geom_step(data=sdata, aes(x=time, y=surv, col=strata, linetype=strata))
#Return the ggplot2 object
kmplot
}
I wrote a previous version using for loops for each strata, but is was slower. As I'm not a programmer, I look for advice to improve the function. Maybe adding a data table with patients at risk, or a better integration in the ggplot2 framework.
我编写了一个以前的版本,用于每个层次的循环,但速度较慢。由于我不是一个程序员,我寻求建议来改进这个功能。可能在风险中添加一个数据表,或者在ggplot2框架中更好地集成。
Thanks
谢谢
2 个解决方案
#1
6
You could try the following for something with shaded areas between CIs:
您可以尝试以下的方法,在CIs:
(I'm using the development version here as there's a flaw with the parameter alpha
in the production version (doesn't shade upper rectangles correctly for non-default values). Otherwise the functions are identical).
(我在这里使用开发版本,因为在生产版本中,参数alpha存在缺陷(不为非默认值设置正确的上矩形)。否则函数是相同的。
library(devtools)
dev_mode(TRUE) # in case you don't want a permanent install
install_github("survMisc", "dardisco")
library("survMisc", lib.loc="C:/Users/c/R-dev") # or wherever you/devtools has put it
data(kidney, package="KMsurv")
p1 <- autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
type="fill", survSize=2, palette="Pastel1",
fillLineSize=0.1, alpha=0.4)$plot
p1 + theme_classic()
dev_mode(FALSE)
giving:
给:
And for a classic plot and table:
对于一个经典的情节和桌子:
autoplot(autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
type="CI"))
See ?survMisc::autoplot.survfit
and ?survMisc::autoplot.tableAndPlot
for more options.
看到了什么? survMisc::autoplot。survfit和? survMisc::autoplot。tableAndPlot更多选项。
#2
0
I wanted to do the same thing and also got the error from the cartesian error. In addition I wanted to have numbers of censored in my code and numbers of events. So I wrote this little snippet. Still a bit raw but maybe useful for some.
我想做同样的事情,也得到了笛卡尔误差的误差。此外,我还想在我的代码和事件数量上进行审查。我写了这个小片段。还有些生疏,但可能对一些人有用。
ggsurvplot<-function(
time,
event,
event.marker=1,
marker,
tabletitle="tabletitle",
xlab="Time(months)",
ylab="Disease Specific Survival",
ystratalabs=c("High", "Low"),
pv=TRUE,
legend=TRUE,
n.risk=TRUE,
n.event=TRUE,
n.cens=TRUE,
timeby=24,
xmax=120,
panel="A")
{
require(ggplot2)
require(survival)
require(gridExtra)
s.fit=survfit(Surv(time, event==event.marker)~marker)
s.diff=survdiff(Surv(time, event=event.marker)~marker)
#Build a data frame with all the data
sdata<-data.frame(time=s.fit$time,
surv=s.fit$surv,
lower=s.fit$lower,
upper=s.fit$upper,
n.censor=s.fit$n.censor,
n.event=s.fit$n.event,
n.risk=s.fit$n.risk)
sdata$strata<-rep(names(s.fit$strata), s.fit$strata)
m <- max(nchar(ystratalabs))
if(xmax<=max(sdata$time)){
xlims=c(0, round(xmax/timeby, digits=0)*timeby)
}else{
xlims=c(0, round((max(sdata$time))/timeby, digits=0)*timeby)
}
times <- seq(0, max(xlims), by = timeby)
subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs])
time = summary(s.fit, time = times, extend = TRUE)$time
#Buidling the plot basics
p<-ggplot(data = sdata, aes(colour = strata, group = strata, shape=strata)) +
theme_classic()+
geom_step(aes(x = time, y = surv), direction = "hv")+
scale_x_continuous(breaks=times)+
scale_y_continuous(breaks=seq(0,1,by=0.1)) +
geom_ribbon(aes(x = time, ymax = upper, ymin = lower, fill = strata), directions = "hv", linetype = 0,alpha = 0.10) +
geom_point(data = subset(sdata, n.censor == 1), aes(x = time, y = surv), shape = 3) +
labs(title=tabletitle)+
theme(
plot.margin=unit(c(1,0.5,(2.5+length(levels(factor(marker)))*2),2), "lines"),
legend.title=element_blank(),
legend.background=element_blank(),
legend.position=c(0.2,0.2))+
scale_colour_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
scale_shape_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
scale_fill_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
xlab(xlab)+
ylab(ylab)+
coord_cartesian(xlim = xlims, ylim=c(0,1))
#addping the p-value
if (pv==TRUE){
pval <- 1 - pchisq(s.diff$chisq, length(s.diff$n) - 1)
pvaltxt<-if(pval>=0.001){
paste0("P = ", round(pval, digits=3))
}else{
"P < 0.001"
}
p <- p + annotate("text", x = 0.85 * max(xlims), y = 0.1, label = pvaltxt)
}
#adding information for tables
times <- seq(0, max(xlims), by = timeby)
subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
risk.data<-data.frame(strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]),
time = summary(s.fit, time = times, extend = TRUE)$time[subs],
n.risk = summary(s.fit,times = times,extend = TRUE)$n.risk[subs],
n.cens = summary(s.fit, times=times, extend=TRUE)$n.cens[subs],
n.event=summary(s.fit, times=times, extend=TRUE)$n.event[subs])
#adding the risk table
if(n.risk==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=-0.15, label="Numbers at risk")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.15+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.15+(-0.05*q)), label=paste0(risk.data$n.risk[i]))
}
}
}
#adding the event table
if(n.event==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.20+(-0.05*length(levels(factor(marker))))), label="Number of events")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(risk.data$n.event[i]))
}
}
}
#adding the cens table
if(n.event==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.25+(-0.05*length(levels(factor(marker)))*2)), label="Number of censored")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(risk.data$n.cens[i]))
}
}
}
#adding panel marker
p <- p + annotate("text", cex=10, x= -0.2*max(xlims), y=1.1, label=panel)
#drawing the plot with the tables outside the margins
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name=="panel"] <- "off"
grid.draw(gt)
}
#1
6
You could try the following for something with shaded areas between CIs:
您可以尝试以下的方法,在CIs:
(I'm using the development version here as there's a flaw with the parameter alpha
in the production version (doesn't shade upper rectangles correctly for non-default values). Otherwise the functions are identical).
(我在这里使用开发版本,因为在生产版本中,参数alpha存在缺陷(不为非默认值设置正确的上矩形)。否则函数是相同的。
library(devtools)
dev_mode(TRUE) # in case you don't want a permanent install
install_github("survMisc", "dardisco")
library("survMisc", lib.loc="C:/Users/c/R-dev") # or wherever you/devtools has put it
data(kidney, package="KMsurv")
p1 <- autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
type="fill", survSize=2, palette="Pastel1",
fillLineSize=0.1, alpha=0.4)$plot
p1 + theme_classic()
dev_mode(FALSE)
giving:
给:
And for a classic plot and table:
对于一个经典的情节和桌子:
autoplot(autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
type="CI"))
See ?survMisc::autoplot.survfit
and ?survMisc::autoplot.tableAndPlot
for more options.
看到了什么? survMisc::autoplot。survfit和? survMisc::autoplot。tableAndPlot更多选项。
#2
0
I wanted to do the same thing and also got the error from the cartesian error. In addition I wanted to have numbers of censored in my code and numbers of events. So I wrote this little snippet. Still a bit raw but maybe useful for some.
我想做同样的事情,也得到了笛卡尔误差的误差。此外,我还想在我的代码和事件数量上进行审查。我写了这个小片段。还有些生疏,但可能对一些人有用。
ggsurvplot<-function(
time,
event,
event.marker=1,
marker,
tabletitle="tabletitle",
xlab="Time(months)",
ylab="Disease Specific Survival",
ystratalabs=c("High", "Low"),
pv=TRUE,
legend=TRUE,
n.risk=TRUE,
n.event=TRUE,
n.cens=TRUE,
timeby=24,
xmax=120,
panel="A")
{
require(ggplot2)
require(survival)
require(gridExtra)
s.fit=survfit(Surv(time, event==event.marker)~marker)
s.diff=survdiff(Surv(time, event=event.marker)~marker)
#Build a data frame with all the data
sdata<-data.frame(time=s.fit$time,
surv=s.fit$surv,
lower=s.fit$lower,
upper=s.fit$upper,
n.censor=s.fit$n.censor,
n.event=s.fit$n.event,
n.risk=s.fit$n.risk)
sdata$strata<-rep(names(s.fit$strata), s.fit$strata)
m <- max(nchar(ystratalabs))
if(xmax<=max(sdata$time)){
xlims=c(0, round(xmax/timeby, digits=0)*timeby)
}else{
xlims=c(0, round((max(sdata$time))/timeby, digits=0)*timeby)
}
times <- seq(0, max(xlims), by = timeby)
subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs])
time = summary(s.fit, time = times, extend = TRUE)$time
#Buidling the plot basics
p<-ggplot(data = sdata, aes(colour = strata, group = strata, shape=strata)) +
theme_classic()+
geom_step(aes(x = time, y = surv), direction = "hv")+
scale_x_continuous(breaks=times)+
scale_y_continuous(breaks=seq(0,1,by=0.1)) +
geom_ribbon(aes(x = time, ymax = upper, ymin = lower, fill = strata), directions = "hv", linetype = 0,alpha = 0.10) +
geom_point(data = subset(sdata, n.censor == 1), aes(x = time, y = surv), shape = 3) +
labs(title=tabletitle)+
theme(
plot.margin=unit(c(1,0.5,(2.5+length(levels(factor(marker)))*2),2), "lines"),
legend.title=element_blank(),
legend.background=element_blank(),
legend.position=c(0.2,0.2))+
scale_colour_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
scale_shape_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
scale_fill_discrete(
breaks=c(levels(factor(sdata$strata))),
labels=ystratalabs) +
xlab(xlab)+
ylab(ylab)+
coord_cartesian(xlim = xlims, ylim=c(0,1))
#addping the p-value
if (pv==TRUE){
pval <- 1 - pchisq(s.diff$chisq, length(s.diff$n) - 1)
pvaltxt<-if(pval>=0.001){
paste0("P = ", round(pval, digits=3))
}else{
"P < 0.001"
}
p <- p + annotate("text", x = 0.85 * max(xlims), y = 0.1, label = pvaltxt)
}
#adding information for tables
times <- seq(0, max(xlims), by = timeby)
subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
risk.data<-data.frame(strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]),
time = summary(s.fit, time = times, extend = TRUE)$time[subs],
n.risk = summary(s.fit,times = times,extend = TRUE)$n.risk[subs],
n.cens = summary(s.fit, times=times, extend=TRUE)$n.cens[subs],
n.event=summary(s.fit, times=times, extend=TRUE)$n.event[subs])
#adding the risk table
if(n.risk==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=-0.15, label="Numbers at risk")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.15+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.15+(-0.05*q)), label=paste0(risk.data$n.risk[i]))
}
}
}
#adding the event table
if(n.event==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.20+(-0.05*length(levels(factor(marker))))), label="Number of events")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(risk.data$n.event[i]))
}
}
}
#adding the cens table
if(n.event==TRUE){
p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.25+(-0.05*length(levels(factor(marker)))*2)), label="Number of censored")
for (q in 1:length(levels(factor(marker)))){
p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(ystratalabs[q]))
for(i in ((q-1)*length(times)+1):(q*length(times))){
p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(risk.data$n.cens[i]))
}
}
}
#adding panel marker
p <- p + annotate("text", cex=10, x= -0.2*max(xlims), y=1.1, label=panel)
#drawing the plot with the tables outside the margins
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name=="panel"] <- "off"
grid.draw(gt)
}