原理介绍

K折交叉验证：

KFold，GroupKFold，StratifiedKFold，

『Sklearn』数据划分方法

留一法：

LeaveOneGroupOut，LeavePGroupsOut，LeaveOneOut，LeavePOut，

『Sklearn』数据划分方法

随机划分法：

ShuffleSplit，GroupShuffleSplit，StratifiedShuffleSplit，

『Sklearn』数据划分方法

代码实现

流程：

实例化分类器 -> 迭代器迭代组[.split()]

KFold(n_splits=2)

#KFold

import numpy as np

from sklearn.model_selection import KFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

kf=KFold(n_splits=2)    # 定义分成几个组

# kf.get_n_splits(X)    # 查询分成几个组

print(kf)

for train_index,test_index in kf.split(X):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

GroupKFold(n_splits=2)

# GroupKFold，不是很懂这个划分方法

import numpy as np

from sklearn.model_selection import GroupKFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

groups=np.array([1,2,3,4,5,6])

group_kfold=GroupKFold(n_splits=2)

group_kfold.get_n_splits(X,y,groups)

print(group_kfold)

for train_index,test_index in group_kfold.split(X,y,groups):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#GroupKFold(n_splits=2)

#Train Index: [0 2 4] ,Test Index: [1 3 5]

#Train Index: [1 3 5] ,Test Index: [0 2 4]

StratifiedKFold(n_splits=3)

# stratifiedKFold：保证训练集中每一类的比例是相同的（尽量）

import numpy as np

from sklearn.model_selection import StratifiedKFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,1,1,2,2,2])

skf=StratifiedKFold(n_splits=3)

skf.get_n_splits(X,y)

print(skf)

for train_index,test_index in skf.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#StratifiedKFold(n_splits=3, random_state=None, shuffle=False)

#Train Index: [1 2 4 5] ,Test Index: [0 3]

#Train Index: [0 2 3 5] ,Test Index: [1 4]

LeaveOneOut()

# leaveOneOut：测试集就留下一个

import numpy as np

from sklearn.model_selection import LeaveOneOut

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

loo=LeaveOneOut()

loo.get_n_splits(X)

print(loo)

for train_index,test_index in loo.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#LeaveOneOut()

#Train Index: [1 2 3 4 5] ,Test Index: [0]

#Train Index: [0 2 3 4 5] ,Test Index: [1]

#Train Index: [0 1 3 4 5] ,Test Index: [2]

#Train Index: [0 1 2 4 5] ,Test Index: [3]

#Train Index: [0 1 2 3 5] ,Test Index: [4]

#Train Index: [0 1 2 3 4] ,Test Index: [5]

LeavePOut(p=3)

LeavePOut：测试集留下P个

import numpy as np

from sklearn.model_selection import LeavePOut

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

lpo=LeavePOut(p=3)

lpo.get_n_splits(X)

print(lpo)

for train_index,test_index in lpo.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#LeavePOut(p=3)

#Train Index: [3 4 5] ,Test Index: [0 1 2]

#Train Index: [2 4 5] ,Test Index: [0 1 3]

#Train Index: [2 3 5] ,Test Index: [0 1 4]

#Train Index: [2 3 4] ,Test Index: [0 1 5]

#Train Index: [1 4 5] ,Test Index: [0 2 3]

#Train Index: [1 3 5] ,Test Index: [0 2 4]

#Train Index: [1 3 4] ,Test Index: [0 2 5]

#Train Index: [1 2 5] ,Test Index: [0 3 4]

#Train Index: [1 2 4] ,Test Index: [0 3 5]

#Train Index: [1 2 3] ,Test Index: [0 4 5]

#Train Index: [0 4 5] ,Test Index: [1 2 3]

#Train Index: [0 3 5] ,Test Index: [1 2 4]

#Train Index: [0 3 4] ,Test Index: [1 2 5]

#Train Index: [0 2 5] ,Test Index: [1 3 4]

#Train Index: [0 2 4] ,Test Index: [1 3 5]

#Train Index: [0 2 3] ,Test Index: [1 4 5]

#Train Index: [0 1 5] ,Test Index: [2 3 4]

#Train Index: [0 1 4] ,Test Index: [2 3 5]

#Train Index: [0 1 3] ,Test Index: [2 4 5]

#Train Index: [0 1 2] ,Test Index: [3 4 5]

ShuffleSplit(n_splits=3,test_size=.25,random_state=0)

# ShuffleSplit 把数据集打乱顺序，然后划分测试集和训练集，训练集额和测试集的比例随机选定，

# 训练集和测试集的比例的和可以小于1

import numpy as np

from sklearn.model_selection import ShuffleSplit

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

rs=ShuffleSplit(n_splits=3,test_size=.25,random_state=0)

rs.get_n_splits(X)

print(rs)

for train_index,test_index in rs.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

print("==============================")

rs=ShuffleSplit(n_splits=3,train_size=.5,test_size=.25,random_state=0)

rs.get_n_splits(X)

print(rs)

for train_index,test_index in rs.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

#ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=None)

#Train Index: [1 3 0 4] ,Test Index: [5 2]

#Train Index: [4 0 2 5] ,Test Index: [1 3]

#Train Index: [1 2 4 0] ,Test Index: [3 5]

#==============================

#ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=0.5)

#Train Index: [1 3 0] ,Test Index: [5 2]

#Train Index: [4 0 2] ,Test Index: [1 3]

#Train Index: [1 2 4] ,Test Index: [3 5]

StratifiedShuffleSplit(n_splits=3,test_size=.5,random_state=0)

# StratifiedShuffleSplitShuffleSplit 把数据集打乱顺序，然后划分测试集和训练集，

# 训练集额和测试集的比例随机选定，训练集和测试集的比例的和可以小于1,但是还要保证训练集中各类所占的比例是一样的

import numpy as np

from sklearn.model_selection import StratifiedShuffleSplit

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,1,2,1,2])

sss=StratifiedShuffleSplit(n_splits=3,test_size=.5,random_state=0)

sss.get_n_splits(X,y)

print(sss)

for train_index,test_index in sss.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#StratifiedShuffleSplit(n_splits=3, random_state=0, test_size=0.5,train_size=None)

#Train Index: [5 4 1] ,Test Index: [3 2 0]

#Train Index: [5 2 3] ,Test Index: [0 4 1]

#Train Index: [5 0 4] ,Test Index: [3 1 2]

『Sklearn』数据划分方法的更多相关文章

『Sklearn』特征向量化处理
『Kaggle』分类任务_决策树&集成模型&DataFrame向量化操作 1 2 3 4 5 6 7 8 9 '''特征提取器''' from sklearn.feature_extr ...
『Sklearn』框架自带数据集接口
自带数据集类型如下: # 自带小型数据集# sklearn.datasets.load_<name># 在线下载数据集# sklearn.datasets.fetch_<name&g ...
JS 中通过对象关联实现『继承』
JS 中继承其实是种委托,而不是传统面向对象中的复制父类到子类,只是通过原型链将要做的事委托给父类. 下面介绍通过对象关联来实现『继承』的方法: Foo = { // 需要提供一个 init 方法来初 ...
『Python』&lowbar;&lowbar;getattr&lowbar;&lowbar;()特殊方法
self的认识 & __getattr__()特殊方法将字典调用方式改为通过属性查询的一个小class, class Dict(dict): def __init__(self, **kw) ...
『TensorFlow』模型保存和载入方法汇总
『TensorFlow』第七弹_保存&载入会话_霸王回马一.TensorFlow常规模型加载方法保存模型 tf.train.Saver()类,.save(sess, ckpt文件目录)方法 ...
『转载』hadoop2&period;x常用端口、定义方法及默认端口
『转载』hadoop2.x常用端口.定义方法及默认端口 1.问题导读 DataNode的http服务的端口.ipc服务的端口分别是哪个? NameNode的http服务的端口.ipc服务的端口分别是哪 ...
『计算机视觉』Mask-RCNN&lowbar;推断网络终篇：使用detect方法进行推断
一.detect和build 前面多节中我们花了大量笔墨介绍build方法的inference分支,这节我们看看它是如何被调用的. 在dimo.ipynb中,涉及model的操作我们简单进行一下汇总, ...
『TensorFlow』读书笔记&lowbar;降噪自编码器
『TensorFlow』降噪自编码器设计之前学习过的代码,又敲了一遍,新的收获也还是有的,因为这次注释写的比较详尽,所以再次记录一下,具体的相关知识查阅之前写的文章即可(见上面链接). # Aut ...
『AngularJS』&dollar;location 服务
项目中关于 $location的用法简介 $location服务解析在浏览器地址栏中的URL(基于window.location)并且让URL在你的应用中可用.改变在地址栏中的URL会作用到$loc ...

随机推荐

Linux 系统使用之 VMware Tools安装
Red Hat Enterprise Linux 4系统中安装VMware Tools 1. 必须以ROOT身份进入Linux 2. 进入linux系统,然后按下 CTRL+ALT组合键,进入主操作系 ...
多线程 or 多进程 (转强力推荐)
在Unix上编程采用多线程还是多进程的争执由来已久,这种争执最常见到在C/S通讯中服务端并发技术的选型上,比如WEB服务器技术中,Apache是采用多进程的(perfork模式,每客户连接对应一个进 ...
Python学习（17）异常处理
目录 Python 异常处理 Python 标准异常异常处理使用except而不带任何异常类型使用except而带多种异常类型 try-finally 语句异常参数异常的参数用户自定义参数 ...
对 Linux 新手非常有用的20个命令
你打算从Windows换到Linux上来,还是你刚好换到Linux上来?哎哟!!!我说什么呢,是什么原因你就出现我的世界里了.从我以往的经验来说,当我刚使用Linux,命令,终端啊什么的,吓了我一跳. ...
c&num; webbrowser 随机点击链接
HtmlElementCollection hec = webBrowser1.Document.All; ; i < hec.Count; i++) { if (hec[i].GetAttri ...
BZOJ 3545&colon; [ONTAK2010]Peaks( BST + 启发式合并 + 并查集 )
这道题很好想, 离线, 按询问的x排序从小到大, 然后用并查集维护连通性, 用平衡树维护连通块的山的权值, 合并就用启发式合并.时间复杂度的话, 排序是O(mlogm + qlogq), 启发式合并是 ...
vue分类筛选方法，filer
使用computed 方法来过滤筛选数据;也可以使用methods 方式来筛选过滤数据代码如下: <body> <div id="app"> <ul ...
如何在mysql客户端即mysql提示符下执行操作系统命令
环境描述: mysql版本:5.5.57-log 操作系统版本:Red Hat Enterprise Linux Server release 6.6 (Santiago) 需求描述: 在mysql的 ...
学习 Vim —— Vimtutor 总结笔记
Lesson 2 2.1-2.3 删除 [dw] 删除从光标开始处至下一词开始前的部分,光标停在下一词的词首. [de] 删除从光标开始处至词尾的部分. [d$] 删除从光标开始处至行末的部分. 2. ...
&commat;ResponseBody注解
作用 @ResponseBody注解表示该方法的返回结果直接写入HTTP response body中原理在使用此注解之后跳过视图处理器,将返回的对象通过适当的转换器转换为指定的格式之后,直接将数 ...