HLM。RCS。限制性立方条。R语言实现RCS生成限制性立方条,画图,源码

时间:2024-03-13 14:16:15

HLM。RCS。限制性立方条。R语言实现RCS生成限制性立方条,画图,源码


#HLM

library(readxl)
library(xlsx)
mydata <- as.data.frame(read_excel("mydata_2019_12.xlsx"))
View(mydata)

#create new dataframe
mydata_new_constructed <- data.frame()
k=1
for (i in 1:nrow(mydata)){
  for (j in 2:35) {
    if (!is.na(mydata[i,j])) {
      mydata_new_constructed[k,1]<-mydata[i,1]
      mydata_new_constructed[k,2]<-as.numeric(colnames(mydata[j]))
      mydata_new_constructed[k,3]<-mydata[i,j]
      mydata_new_constructed[k,4]<-mydata[i,36]
      k<-k+1
    }
  }
}

colnames(mydata_new_constructed)<-c('id','age_week','weight_gain','BMI_group')

###############################################################generate the RCS variables based on age_week
RCS=as.data.frame(rcspline.eval(mydata_new_constructed$age_week, nk=6))
mydata_new_RCS = cbind(mydata_new_constructed,RCS)
write.table(mydata_new_RCS,'mydata_new_RCS.csv')

#############################################################begin to deal with the model
library(readxl)
mydata <- as.data.frame(read_excel("mydata_new_RCS.xlsx"))
names(mydata)<-c('id','age_week','weight_gain','BMI_group','age1','age2','age3','age4')

#delete the outliers original num = 9432, 
mydata<-mydata[mydata$weight_gain<32,]#丢去后9405
mydata<-mydata[mydata$weight_gain>-30,]#丢去后9423

mydata_new_constructed<-mydata_new_constructed[mydata_new_constructed$weight_gain<32,]
mydata_new_constructed<-mydata_new_constructed[mydata_new_constructed$weight_gain>-30,]


#if need to create the dependant variable
for (i in 1:nrow(mydata)){
if (mydata[i,4]==1) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+4.5)
} else if (mydata[i,4]==2) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+5.9)
} else if (mydata[i,4]==3) {
mydata[i,'LOGWTGAINKG']<-log(mydata[i,3]+7)
}
}

#build up the model
library(lme4)
Model3.1 = lmer(weight_gain ~ age1+age2+age3+age4+ (1|id) +(1+age1|BMI_group)+(1+age2|BMI_group)+(1+age3|BMI_group)+(1+age4|BMI_group), data=mydata,REML=FALSE)


summary(Model3.1)
coef(Model3.1)
Model3.1
anova(Model3.1)
VarCorr(Model3.1)
diag(VarCorr(Model3.1)$index)
attr(VarCorr(Model3.1),"sc")

#var example: 0.207 + 0.0002(GA Spline term1^2)+ 2(GA Spline term1)(-0.005) + 0.013

RCS_data <- unique(mydata[c(2,5,6,7,8)])
for (i in 1:nrow(RCS_data)){
  RCS_data[i,'mean1'] <- 2.867073+ 3.597614*RCS_data[i,2] -12.54554*RCS_data[i,3] +19.79477*RCS_data[i,4] -22.73170*RCS_data[i,5]
  RCS_data[i,'mean2'] <- 2.867073+ 3.568180*RCS_data[i,2]-12.49818*RCS_data[i,3] +19.79483*RCS_data[i,4]-22.73137*RCS_data[i,5]
  RCS_data[i,'mean3'] <- 2.867073+ 3.357253*RCS_data[i,2]-12.09662*RCS_data[i,3] +19.79483*RCS_data[i,4]-22.73169*RCS_data[i,5]
  RCS_data[i,'sd1'] <- sqrt((14.71+1.578) + 0.04079*(RCS_data[i,2]^2)+4.255)
  RCS_data[i,'sd2'] <- sqrt((14.71+1.578+0.0001188) + (0.04079+0.0003057)*(RCS_data[i,2]^2)+4.255)
  RCS_data[i,'sd3'] <- sqrt((14.71+1.578+0.0001188+0.06352) + (0.04079+0.0003057+0.004636)*(RCS_data[i,2]^2)+4.255)
}

write.table(RCS_data,'mean_sd_2019_12.csv')

#BMI 1
plot(mydata$age_week[mydata$BMI_group==1], mydata$weight_gain[mydata$BMI_group==1], col = "red")
points(RCS_data$age_week, RCS_data$mean1, col = "blue", cex = 1.5)

#group1 
group1<-mydata[mydata$BMI_group==1,]

#quantiles
table1<-data.frame(matrix(NA,ncol = 5))
for (gtage in sort(unique(group1$age_week))){
  temp<-quantile(group1$weight_gain[group1$age_week==gtage],probs = c(3,10,50,90,97)/100)
  table1<-rbind(table1,temp)
}
#sd
result_SD<-matrix(ncol = 2)
result_sd <- c()
for (gtage in sort(unique(group1$age_week))){
    temp_sum<-0
    for (observation in group1$weight_gain[group1$age_week==gtage]){
    temp_sum<-temp_sum+(observation - RCS_data$mean1[RCS_data$age_week==gtage])^2
    }
    temp_sd<-sqrt(temp_sum/length(group1$weight_gain[group1$age_week==gtage]))
    result_sd<-c(result_sd,temp_sd)
}

#result1
result1<-as.data.frame(cbind(sort(unique(group1$age_week)),result_sd,NA))
i = 1
for (gtage in sort(unique(group1$age_week))){
  result1[i,3]<-RCS_data$mean1[RCS_data$age_week==gtage]
  i<-i+1
}
names(result1)<-c('age','sd','mean')

#plot1
library(ggplot2)
ggplot(data =RCS_data ,aes(x=age_week,y=mean1))+ geom_smooth(se=F,
                                                      method="loess")+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=mydata_new_constructed)+
  geom_smooth(mapping=aes(x=age_week, y=mean1+sd1), data=RCS_data,se=F,method="loess",linetype="longdash")+
  geom_smooth(mapping=aes(x=age_week, y=mean1-sd1), data=RCS_data,se=F,method="loess",linetype="longdash")+
  geom_smooth(mapping=aes(x=age_week, y=mean1+2*sd1), data=RCS_data,se=F,method="loess",linetype="dotted")+
  geom_smooth(mapping=aes(x=age_week, y=mean1-2*sd1), data=RCS_data,se=F,method="loess",linetype="dotted")

#group2
group2<-mydata[mydata$BMI_group==2,]

#n
as.data.frame(table(group2$age_week))

#mean
RCS_data[order(RCS_data$age_week),c(1,7)]

#sd
RCS_data[order(RCS_data$age_week),c(1,10)]

#quantiles
quantile2<-matrix(NA,nrow = 1,ncol = 7)
result2<-as.data.frame(RCS_data[order(RCS_data$age_week),c(1,7,10)])
for (i in 1:nrow(result2)){
  temp <- qnorm(c(0.03,0.05,0.1,0.5,0.9,0.95,0.97),mean = result2$mean2[i],sd = result2$sd2[i])
  quantile2<-rbind(quantile2,temp)
}

#result2
result2$posi_sd<-result2$mean2+result2$sd2
result2$nega_sd<-result2$mean2-result2$sd2
result2$posi2_sd<-result2$mean2+2*result2$sd2
result2$nega2_sd<-result2$mean2-2*result2$sd2
result2[4:7]

#plot2
library(ggplot2)
ggplot(data =result2 ,aes(x=age_week,y=mean2))+ geom_smooth(se=F,
                                                      method="loess",col='black')+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=group2)+
  geom_smooth(mapping=aes(x=age_week, y=mean2+sd2), data=result2,se=F,method="loess",linetype="longdash",col='black')+
  geom_smooth(mapping=aes(x=age_week, y=mean2-sd2), data=result2,se=F,method="loess",linetype="longdash",col='black')+
  geom_smooth(mapping=aes(x=age_week, y=mean2+2*sd2), data=result2,se=F,method="loess",linetype="dotted",col='black')+
  geom_smooth(mapping=aes(x=age_week, y=mean2-2*sd2), data=result2,se=F,method="loess",linetype="dotted",col='black')+ 
  xlab("Gestational age (wk)") + 
  ylab('Gestational weight gain (kg)') +theme_bw() 

#covered points
tot_point<-c()
ok_point<-c()
for (age in result2$age_week){
  tot_point_temp<-length(group2$weight_gain[group2$age_week==age])
  tot_point<-c(tot_point,tot_point_temp)
  
  ok_point_temp<-sum(group2$weight_gain[group2$age_week==age]>result2$nega_sd[result2$age_week==age] & group2$weight_gain[group2$age_week==age]<result2$posi_sd[result2$age_week==age])
  ok_point<-c(ok_point,ok_point_temp)
}

coverage<-sum(ok_point)/sum(tot_point)
coverage

#group3
group3<-mydata[mydata$BMI_group==3,]

#n
as.data.frame(table(group3$age_week))

#quantiles
table3<-data.frame(matrix(NA,ncol = 5))
for (gtage in sort(unique(group3$age_week))){
  temp<-quantile(group3$weight_gain[group3$age_week==gtage],probs = c(3,10,50,90,97)/100)
  table3<-rbind(table3,temp)
}

#mean
RCS_data[order(RCS_data$age_week),c(1,8)]

#sd
result_sd3 <- c()
for (gtage in sort(unique(group3$age_week))){
  temp_sum<-0
  for (observation in group3$weight_gain[group3$age_week==gtage]){
    temp_sum<-temp_sum+(observation - RCS_data$mean3[RCS_data$age_week==gtage])^2
  }
  temp_sd<-sqrt(temp_sum/length(group3$weight_gain[group3$age_week==gtage]))
  result_sd3<-c(result_sd3,temp_sd)
}
as.data.frame(result_sd3)

#result3
result3<-as.data.frame(cbind(sort(unique(group3$age_week)),result_sd3,NA))
i = 1
for (gtage in sort(unique(group3$age_week))){
  result3[i,3]<-RCS_data$mean3[RCS_data$age_week==gtage]
  i<-i+1
}
names(result3)<-c('age','sd','mean')

#plot3
library(ggplot2)
ggplot(data =result3 ,aes(x=age,y=mean))+ geom_smooth(se=F,
                                                      method="loess")+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=group3)+
  geom_smooth(mapping=aes(x=age, y=mean+sd), data=result3,se=F,method="loess",linetype="longdash")+
  geom_smooth(mapping=aes(x=age, y=mean-sd), data=result3,se=F,method="loess",linetype="longdash")+
  geom_smooth(mapping=aes(x=age, y=mean+2*sd), data=result3,se=F,method="loess",linetype="dotted")+
  geom_smooth(mapping=aes(x=age, y=mean-2*sd), data=result3,se=F,method="loess",linetype="dotted")


#get the quantiles for group1
a=qnorm(p = 0.03,mean = result1$mean,sd = result1$sd)
b=qnorm(p = 0.1,mean = result1$mean,sd = result1$sd)
c=qnorm(p = 0.5,mean = result1$mean,sd = result1$sd)
d=qnorm(p = 0.9,mean = result1$mean,sd = result1$sd)
e=qnorm(p = 0.97,mean = result1$mean,sd = result1$sd)
cbind(a,b,c,d,e)

a=qnorm(p = 0.03,mean = result2$mean,sd = result2$sd)
b=qnorm(p = 0.1,mean = result2$mean,sd = result2$sd)
c=qnorm(p = 0.5,mean = result2$mean,sd = result2$sd)
d=qnorm(p = 0.9,mean = result2$mean,sd = result2$sd)
e=qnorm(p = 0.97,mean = result2$mean,sd = result2$sd)
cbind(a,b,c,d,e)

a=qnorm(p = 0.03,mean = result3$mean,sd = result3$sd)
b=qnorm(p = 0.1,mean = result3$mean,sd = result3$sd)
c=qnorm(p = 0.5,mean = result3$mean,sd = result3$sd)
d=qnorm(p = 0.9,mean = result3$mean,sd = result3$sd)
e=qnorm(p = 0.97,mean = result3$mean,sd = result3$sd)
cbind(a,b,c,d,e)


#############################################################3
##############################################################
######################for BMI = 2#############################
Model3.2 = lmer(weight_gain ~ age1+age2+age3+age4+age1:id+(1+age1|id)+(1+age2|id)+(1+age3|id)+(1+age4|id), data=group2,REML=FALSE)
summary(Model3.2)
coef(Model3.2)

qqnorm(mydata$weight_gain)
qqline(mydata$weight_gain)

qqnorm(log(group2$weight_gain))
qqline(log(group2$weight_gain))

library(fBasics)
ksnormTest(mydata$weight_gain)

ks.test(mydata$weight_gain,'pnorm')


########################################################################################

#labels
test <-mydata_new_constructed[1:200,]
test$Centiles<-factor(test$BMI_group,levels = c(1,2,3),labels = c("Mean", "1SD", "2SD"))


 s

plot+theme(legend.title = element_text(color="black", size=16, face="bold"))+theme(legend.text = element_text(color=c('black'), size = 14)) + scale_fill_manual(name = 'Centiles')
  scale_fill_discrete(name="Centiles",
                    breaks=c(1, 2, 3),
                    labels=c("50th", "10th and 90th", "3rd and 97th"))


#plot1
library(ggplot2)
ggplot(data =RCS_data ,aes(x=age_week,y=mean1))+ geom_smooth(se=F,
                                                             method="loess",col = 'red')+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=mydata_new_constructed[mydata_new_constructed$BMI_group=='1',])+
  geom_smooth(mapping=aes(x=age_week, y=mean1+sd1), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean1-sd1), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean1+2*sd1), data=RCS_data,se=F,method="loess",linetype="dashed",col= 'blue')+
  geom_smooth(mapping=aes(x=age_week, y=mean1-2*sd1), data=RCS_data,se=F,method="loess",linetype="dashed",col='blue') + 
  xlab('Gestational age (weeks)') + ylab('Gestational weight gain (kg)') + theme(axis.title.x = element_text(size = 16)) + theme(axis.title.y = element_text(size = 16))


#plot2
library(ggplot2)
ggplot(data =RCS_data ,aes(x=age_week,y=mean2))+ geom_smooth(se=F,
                                                             method="loess",col = 'red')+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=mydata_new_constructed[mydata_new_constructed$BMI_group=='2',])+
  geom_smooth(mapping=aes(x=age_week, y=mean2+sd2), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean2-sd2), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean2+2*sd2), data=RCS_data,se=F,method="loess",linetype="dashed",col= 'blue')+
  geom_smooth(mapping=aes(x=age_week, y=mean2-2*sd2), data=RCS_data,se=F,method="loess",linetype="dashed",col='blue')+ 
  xlab('Gestational age (weeks)') + ylab('Gestational weight gain (kg)')+ theme(axis.title.x = element_text(size = 16)) + theme(axis.title.y = element_text(size = 16))


#plot3
library(ggplot2)
ggplot(data =RCS_data ,aes(x=age_week,y=mean3))+ geom_smooth(se=F,
                                                             method="loess",col = 'red')+
  geom_point(mapping=aes(x=age_week, y=weight_gain), data=mydata_new_constructed[mydata_new_constructed$BMI_group=='3',])+
  geom_smooth(mapping=aes(x=age_week, y=mean3+sd3), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean3-sd3), data=RCS_data,se=F,method="loess",linetype="dotted",col='green')+
  geom_smooth(mapping=aes(x=age_week, y=mean3+2*sd3), data=RCS_data,se=F,method="loess",linetype="dashed",col= 'blue')+
  geom_smooth(mapping=aes(x=age_week, y=mean3-2*sd3), data=RCS_data,se=F,method="loess",linetype="dashed",col='blue')+ 
  xlab('Gestational age (weeks)') + ylab('Gestational weight gain (kg)') + theme(axis.title.x = element_text(size = 16)) + theme(axis.title.y = element_text(size = 16))