R语言 随机森林

时间:2024-04-13 19:16:03

#随机森林 分类因变量

install.packages("randomForest")   #安装随机森林包

library(randomForest)

set.seed(2019)  #设立随机种子,结果可重现

index=sample(2,nrow(iris),replace = TRUE,prob=c(0.8,0.2)) #随机抽样分成两部分,80%训练集,20%测试集

trainData=iris[index==1,]  #训练集

testData=iris[index==2,]   #测试集

iris_rf=randomForest(Species~.,data = trainData,ntree=100,proximity=TRUE)  #随机森林分类结果

iris_rf   #展示结果

plot(iris_rf)   #画图,综合错误率随着森林规模的增加趋于稳定

importance(iris_rf) #特征的重要性

varImpPlot(iris_rf)   #每个特征会使平均基尼系数下降多少

#使用训练好的随机森林来预测未知数据,并生成混淆矩阵来检验结果

irispred=predict(iris_rf,newdata = testData)

table(irispred,testData$Species)

 

#随机森林 数值型因变量 空气质量数据 臭氧浓度预测R自带数据

set.seed(2019)

ozone.rf=randomForest(Ozone~.,data=airquality,mtry=3,importance=TRUE,na.action = na.omit)

ozone.rf

importance(ozone.rf)    #变量重要性

ozone.rf$predicted    #预测结果

 

 

 

#软件结果展示

> #随机森林

> install.packages("randomForest")

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/randomForest_4.6-14.zip'

Content type 'application/zip' length 248681 bytes (242 KB)

downloaded 242 KB

 

package ‘randomForest’ successfully unpacked and MD5 sums checked

 

The downloaded binary packages are in

      C:\Users\L3M309NJSJ\AppData\Local\Temp\RtmpmEgcnJ\downloaded_packages

> library(randomForest)

randomForest 4.6-14

Type rfNews() to see new features/changes/bug fixes.

Warning message:

程辑包‘randomForest’是用R版本3.5.3 来建造的

> set.seed(2019)

> index=sample(2,nrow(iris),replace = TRUE,prob=c(0.8,0.2))

> trainData=iris[index==1,]

> testData=iris[index==2,]

> iris_rf=randomForest(Species~.,data = trainData,ntree=100,proximity=TRUE)

> iris_rf

 

Call:

 randomForest(formula = Species ~ ., data = trainData, ntree = 100,      proximity = TRUE)

               Type of random forest: classification

                     Number of trees: 100

No. of variables tried at each split: 2

 

        OOB estimate of  error rate: 6.61%

Confusion matrix:

           setosa versicolor virginica class.error

setosa         42          0         0  0.00000000

versicolor      0         36         3  0.07692308

virginica       0          5        35  0.12500000

> plot(iris_rf)

R语言 随机森林

> importance(iris_rf) #特征的重要性

             MeanDecreaseGini

Sepal.Length         8.809279

Sepal.Width          1.699896

Petal.Length        34.953780

Petal.Width         34.500847

> varImpPlot(iris_rf)

R语言 随机森林

> #使用训练好的随机森林来预测未知数据,并生成混淆矩阵来检验结果

> irispred=predict(iris_rf,newdata = testData)

> table(irispred,testData$Species)

           

irispred     setosa versicolor virginica

  setosa          8          0         0

  versicolor      0         11         0

  virginica       0          0        10

> #随机森林 数值型因变量 空气质量数据 臭氧浓度预测R自带数据

> set.seed(2019)

> ozone.rf=randomForest(Ozone~.,data=airquality,mtry=3,importance=TRUE,na.action = na.omit)

> ozone.rf

 

Call:

 randomForest(formula = Ozone ~ ., data = airquality, mtry = 3,      importance = TRUE, na.action = na.omit)

               Type of random forest: regression

                     Number of trees: 500

No. of variables tried at each split: 3

 

          Mean of squared residuals: 293.324

                    % Var explained: 73.27

> importance(ozone.rf)

          %IncMSE IncNodePurity

Solar.R 10.384275     11121.816

Wind    23.962426     46057.764

Temp    44.084256     53066.900

Month    2.924016      1745.013

Day      2.439734      6406.094

> ozone.rf$predicted

        1         2         3         4         7         8         9

 31.92477  24.28743  25.18797  22.75789  25.69679  18.59441  14.60784

       12        13        14        15        16        17        18

 20.11589  22.39621  18.39547  10.90543  24.73056  17.53442  19.44983

       19        20        21        22        23        24        28

 20.00197  10.58886  13.64592  26.28683  12.03502  14.55901  12.66509

       29        30        31        38        40        41        44

 44.32243  60.99080  35.38688  29.01294  71.55579  57.73960  35.34667

       47        48        49        50        51        62        63

 20.55833  15.25729  14.05198  18.72587  20.86117  87.43433  75.16788

       64        66        67        68        69        70        71

 39.63745  92.73975  41.92709  94.30635  89.31866  92.41545  83.90387

       73        74        76        77        78        79        80

 18.73063  39.40132  28.14739  46.06712  37.67556  78.37855  80.52711

       81        82        85        86        87        88        89

 56.24972  25.53385  66.66287  66.90365  33.74187  51.17563  75.98084

       90        91        92        93        94        95        99

 79.41957  52.32790  45.94924  33.57491  19.20294  33.36013 107.01916

      100       101       104       105       106       108       109

 76.09340  82.86114  55.79450  35.16647  28.71812  15.41675  50.44410

      110       111       112       113       114       116       117

 26.60465  35.22445  28.56995  21.37998  12.06931  45.20655  81.50062

      118       120       121       122       123       124       125

 82.50852  76.75741 110.16262  90.04740  85.69622  78.84773  88.85975

      126       127       128       129       130       131       132

117.27908  81.18402  66.31920  46.52276  37.09219  32.90388  19.90851

      133       134       135       136       137       138       139

 19.12444  27.75129  18.65773  64.06027  13.12723  18.25299  36.72594

      140       141       142       143       144       145       146

 19.73984  12.46647  19.97326  45.54831  21.74558  13.24708  36.32414

      147       148       149       151       152       153

 16.55316  13.37994  30.44026  21.99341  29.15090  26.37527

attr(,"na.action")

  5   6  10  11  25  26  27  32  33  34  35  36  37  39  42  43  45

  5   6  10  11  25  26  27  32  33  34  35  36  37  39  42  43  45

 46  52  53  54  55  56  57  58  59  60  61  65  72  75  83  84  96

 46  52  53  54  55  56  57  58  59  60  61  65  72  75  83  84  96

 97  98 102 103 107 115 119 150

 97  98 102 103 107 115 119 150

attr(,"class")

[1] "omit"