#随机森林 分类因变量
install.packages("randomForest") #安装随机森林包
library(randomForest)
set.seed(2019) #设立随机种子,结果可重现
index=sample(2,nrow(iris),replace = TRUE,prob=c(0.8,0.2)) #随机抽样分成两部分,80%训练集,20%测试集
trainData=iris[index==1,] #训练集
testData=iris[index==2,] #测试集
iris_rf=randomForest(Species~.,data = trainData,ntree=100,proximity=TRUE) #随机森林分类结果
iris_rf #展示结果
plot(iris_rf) #画图,综合错误率随着森林规模的增加趋于稳定
importance(iris_rf) #特征的重要性
varImpPlot(iris_rf) #每个特征会使平均基尼系数下降多少
#使用训练好的随机森林来预测未知数据,并生成混淆矩阵来检验结果
irispred=predict(iris_rf,newdata = testData)
table(irispred,testData$Species)
#随机森林 数值型因变量 空气质量数据 臭氧浓度预测R自带数据
set.seed(2019)
ozone.rf=randomForest(Ozone~.,data=airquality,mtry=3,importance=TRUE,na.action = na.omit)
ozone.rf
importance(ozone.rf) #变量重要性
ozone.rf$predicted #预测结果
#软件结果展示
> #随机森林
> install.packages("randomForest")
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/randomForest_4.6-14.zip'
Content type 'application/zip' length 248681 bytes (242 KB)
downloaded 242 KB
package ‘randomForest’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\L3M309NJSJ\AppData\Local\Temp\RtmpmEgcnJ\downloaded_packages
> library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
Warning message:
程辑包‘randomForest’是用R版本3.5.3 来建造的
> set.seed(2019)
> index=sample(2,nrow(iris),replace = TRUE,prob=c(0.8,0.2))
> trainData=iris[index==1,]
> testData=iris[index==2,]
> iris_rf=randomForest(Species~.,data = trainData,ntree=100,proximity=TRUE)
> iris_rf
Call:
randomForest(formula = Species ~ ., data = trainData, ntree = 100, proximity = TRUE)
Type of random forest: classification
Number of trees: 100
No. of variables tried at each split: 2
OOB estimate of error rate: 6.61%
Confusion matrix:
setosa versicolor virginica class.error
setosa 42 0 0 0.00000000
versicolor 0 36 3 0.07692308
virginica 0 5 35 0.12500000
> plot(iris_rf)
> importance(iris_rf) #特征的重要性
MeanDecreaseGini
Sepal.Length 8.809279
Sepal.Width 1.699896
Petal.Length 34.953780
Petal.Width 34.500847
> varImpPlot(iris_rf)
> #使用训练好的随机森林来预测未知数据,并生成混淆矩阵来检验结果
> irispred=predict(iris_rf,newdata = testData)
> table(irispred,testData$Species)
irispred setosa versicolor virginica
setosa 8 0 0
versicolor 0 11 0
virginica 0 0 10
> #随机森林 数值型因变量 空气质量数据 臭氧浓度预测R自带数据
> set.seed(2019)
> ozone.rf=randomForest(Ozone~.,data=airquality,mtry=3,importance=TRUE,na.action = na.omit)
> ozone.rf
Call:
randomForest(formula = Ozone ~ ., data = airquality, mtry = 3, importance = TRUE, na.action = na.omit)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 3
Mean of squared residuals: 293.324
% Var explained: 73.27
> importance(ozone.rf)
%IncMSE IncNodePurity
Solar.R 10.384275 11121.816
Wind 23.962426 46057.764
Temp 44.084256 53066.900
Month 2.924016 1745.013
Day 2.439734 6406.094
> ozone.rf$predicted
1 2 3 4 7 8 9
31.92477 24.28743 25.18797 22.75789 25.69679 18.59441 14.60784
12 13 14 15 16 17 18
20.11589 22.39621 18.39547 10.90543 24.73056 17.53442 19.44983
19 20 21 22 23 24 28
20.00197 10.58886 13.64592 26.28683 12.03502 14.55901 12.66509
29 30 31 38 40 41 44
44.32243 60.99080 35.38688 29.01294 71.55579 57.73960 35.34667
47 48 49 50 51 62 63
20.55833 15.25729 14.05198 18.72587 20.86117 87.43433 75.16788
64 66 67 68 69 70 71
39.63745 92.73975 41.92709 94.30635 89.31866 92.41545 83.90387
73 74 76 77 78 79 80
18.73063 39.40132 28.14739 46.06712 37.67556 78.37855 80.52711
81 82 85 86 87 88 89
56.24972 25.53385 66.66287 66.90365 33.74187 51.17563 75.98084
90 91 92 93 94 95 99
79.41957 52.32790 45.94924 33.57491 19.20294 33.36013 107.01916
100 101 104 105 106 108 109
76.09340 82.86114 55.79450 35.16647 28.71812 15.41675 50.44410
110 111 112 113 114 116 117
26.60465 35.22445 28.56995 21.37998 12.06931 45.20655 81.50062
118 120 121 122 123 124 125
82.50852 76.75741 110.16262 90.04740 85.69622 78.84773 88.85975
126 127 128 129 130 131 132
117.27908 81.18402 66.31920 46.52276 37.09219 32.90388 19.90851
133 134 135 136 137 138 139
19.12444 27.75129 18.65773 64.06027 13.12723 18.25299 36.72594
140 141 142 143 144 145 146
19.73984 12.46647 19.97326 45.54831 21.74558 13.24708 36.32414
147 148 149 151 152 153
16.55316 13.37994 30.44026 21.99341 29.15090 26.37527
attr(,"na.action")
5 6 10 11 25 26 27 32 33 34 35 36 37 39 42 43 45
5 6 10 11 25 26 27 32 33 34 35 36 37 39 42 43 45
46 52 53 54 55 56 57 58 59 60 61 65 72 75 83 84 96
46 52 53 54 55 56 57 58 59 60 61 65 72 75 83 84 96
97 98 102 103 107 115 119 150
97 98 102 103 107 115 119 150
attr(,"class")
[1] "omit"