吴裕雄 python 机器学习-DMT(2)

时间:2022-11-01 07:48:28
import matplotlib.pyplot as plt

decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-") def getNumLeafs(myTree):
numLeafs = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
numLeafs += getNumLeafs(secondDict[key])
else: numLeafs +=1
return numLeafs def getTreeDepth(myTree):
maxDepth = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
thisDepth = 1 + getTreeDepth(secondDict[key])
else: thisDepth = 1
if thisDepth > maxDepth: maxDepth = thisDepth
return maxDepth def plotNode(nodeTxt, centerPt, parentPt, nodeType):
createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args ) def plotMidText(cntrPt, parentPt, txtString):
xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30) def plotTree(myTree, parentPt, nodeTxt):
numLeafs = getNumLeafs(myTree)
depth = getTreeDepth(myTree)
for i in myTree.keys():
firstStr = i
break
cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
plotMidText(cntrPt, parentPt, nodeTxt)
plotNode(firstStr, cntrPt, parentPt, decisionNode)
secondDict = myTree[firstStr]
plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
plotTree(secondDict[key],cntrPt,str(key))
else:
plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD def createPlot(inTree):
fig = plt.figure(1, facecolor='white')
fig.clf()
axprops = dict(xticks=[], yticks=[])
createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
#createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
plotTree.totalW = float(getNumLeafs(inTree))
plotTree.totalD = float(getTreeDepth(inTree))
plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
plotTree(inTree, (0.5,1.0), '')
plt.show() def retrieveTree(i):
listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
{'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
]
return listOfTrees[i] thisTree = retrieveTree(0)
createPlot(thisTree)
thisTree = retrieveTree(1)
createPlot(thisTree)

吴裕雄 python 机器学习-DMT(2)

import numpy as np
import operator as op
from math import log def calcShannonEnt(dataSet):
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if(currentLabel not in labelCounts.keys()):
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
rowNum = len(dataSet)
for key in labelCounts:
prob = float(labelCounts[key])/rowNum
shannonEnt -= prob * log(prob,2)
return shannonEnt def splitDataSet(dataSet, axis, value):
retDataSet = []
for featVec in dataSet:
if(featVec[axis] == value):
reducedFeatVec = featVec[:axis]
reducedFeatVec.extend(featVec[axis+1:])
retDataSet.append(reducedFeatVec)
return retDataSet def chooseBestFeatureToSplit(dataSet):
numFeatures = np.shape(dataSet)[1]-1
baseEntropy = calcShannonEnt(dataSet)
bestInfoGain = 0.0
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntropy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
prob = len(subDataSet)/float(len(dataSet))
newEntropy += prob * calcShannonEnt(subDataSet)
infoGain = baseEntropy - newEntropy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeature = i
return bestFeature def majorityCnt(classList):
classCount={}
for vote in classList:
if(vote not in classCount.keys()):
classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createTree(dataSet,labels):
classList = [example[-1] for example in dataSet]
if(classList.count(classList[0]) == len(classList)):
return classList[0]
if len(dataSet[0]) == 1:
return majorityCnt(classList)
bestFeat = chooseBestFeatureToSplit(dataSet)
bestFeatLabel = labels[bestFeat]
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
return myTree def classify(inputTree,featLabels,testVec):
for i in inputTree.keys():
firstStr = i
break
secondDict = inputTree[firstStr]
featIndex = featLabels.index(firstStr)
key = testVec[featIndex]
valueOfFeat = secondDict[key]
if isinstance(valueOfFeat, dict):
classLabel = classify(valueOfFeat, featLabels, testVec)
else:
classLabel = valueOfFeat
return classLabel data = open("D:\\LearningResource\\machinelearninginaction\\Ch03\\lenses.txt")
dataSet = [inst.strip().split("\t") for inst in data.readlines()]
print(dataSet)
print(np.shape(dataSet))
labels = ["age","prescript","astigmatic","tearRate"]
tree = createTree(dataSet,labels)
print(tree) import matplotlib.pyplot as plt decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-") def getNumLeafs(myTree):
numLeafs = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
numLeafs += getNumLeafs(secondDict[key])
else: numLeafs +=1
return numLeafs def getTreeDepth(myTree):
maxDepth = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
thisDepth = 1 + getTreeDepth(secondDict[key])
else: thisDepth = 1
if thisDepth > maxDepth: maxDepth = thisDepth
return maxDepth def plotNode(nodeTxt, centerPt, parentPt, nodeType):
createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args ) def plotMidText(cntrPt, parentPt, txtString):
xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30) def plotTree(myTree, parentPt, nodeTxt):
numLeafs = getNumLeafs(myTree)
depth = getTreeDepth(myTree)
for i in myTree.keys():
firstStr = i
break
cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
plotMidText(cntrPt, parentPt, nodeTxt)
plotNode(firstStr, cntrPt, parentPt, decisionNode)
secondDict = myTree[firstStr]
plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
plotTree(secondDict[key],cntrPt,str(key))
else:
plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD def createPlot(inTree):
fig = plt.figure(1, facecolor='white')
fig.clf()
axprops = dict(xticks=[], yticks=[])
createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
#createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
plotTree.totalW = float(getNumLeafs(inTree))
plotTree.totalD = float(getTreeDepth(inTree))
plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
plotTree(inTree, (0.5,1.0), '')
plt.show() createPlot(tree)

吴裕雄 python 机器学习-DMT(2)

吴裕雄 python 机器学习-DMT(2)的更多相关文章

  1. 吴裕雄 python 机器学习-DMT(1)

    import numpy as np import operator as op from math import log def createDataSet(): dataSet = [[1, 1, ...

  2. 吴裕雄 python 机器学习——分类决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  3. 吴裕雄 python 机器学习——回归决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  4. 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  5. 吴裕雄 python 机器学习——逻辑回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  6. 吴裕雄 python 机器学习——ElasticNet回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  7. 吴裕雄 python 机器学习——Lasso回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  8. 吴裕雄 python 机器学习——岭回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  9. 吴裕雄 python 机器学习——线性回归模型

    import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...

随机推荐

  1. Windows API 函数列表 附帮助手册

    所有Windows API函数列表,为了方便查询,也为了大家查找,所以整理一下贡献出来了. 帮助手册:700多个Windows API的函数手册 免费下载 API之网络函数 API之消息函数 API之 ...

  2. Native wifi API使用

    写于博客园,自己迁过来: 一.WlanOpenHandle打开一个客户端句柄 DWORD WINAPI WlanOpenHandle( __in DWORD dwClientVersion, __re ...

  3. 控件 UI&colon; 字体的自动继承的特性&comma; Style&comma; ControlTemplate

    字体的自动继承的特性 Style 样式 ControlTemplate 控件模板 示例1.演示字体的自动继承的特性Controls/UI/FontInherit.xaml <Page x:Cla ...

  4. findByExample&lpar;Object exampleEntity&rpar;方法得到的List判断是否为空,不可用&lpar;lis &excl;&equals; null&rpar;

    用findByExample(Object exampleEntity)方法可以应用在用户登录上面,获得有登陆名和密码的user对象进行查询. 返回两者都符合的对象列表,为空则登陆失败. 错误的方法: ...

  5. UVa 10747 - Maximum Subsequence

    题目大意:给出n个数,从中选取k个,使得乘积最大,并且尽量使和最大 分析:首先按照数的绝对值大小排序.然后就要分三大类情况讨论: (1)前k个中选到0:如果选到0的话,乘积一定是0,所以尽量选大的数, ...

  6. grunt自动化工具

    Grunt和 Grunt 插件是通过 npm 安装并管理的,npm是 Node.js 的包管理器. 安装 CLI 首先,需要先将Grunt命令行(CLI)安装到全局环境中. npm install - ...

  7. 升级NppAstyle中的AstyleLib为最高版本

    注:本文撰写时,NppAstyle的最高版本为0.10.02.14(更新于2013-04-08),Astyle的最高版本为2.05.1(更新于2014-12-11). Astyle是一个很好的代码格式 ...

  8. Linux篇---ftp服务器的搭建

    一.前述 企业中linux搭建ftp服务器还是很实用的,所以本文针对centoos7和centoos6搭建服务器教程做个总结. 二.具体 1.显示如下图则表示已安装 vsftp软件.如果未显示则需要安 ...

  9. Python&lowbar;装饰器复习&lowbar;30

    复习: # 装饰器的进阶 # functools.wraps # 带参数的装饰器 # 多个装饰器装饰同一个函数# 周末的作业 # 文件操作 # 字符串处理 # 输入输出 # 流程控制 # 装饰器# 开 ...

  10. es分词器

    1.默认的分词器 standard standard tokenizer:以单词边界进行切分standard token filter:什么都不做lowercase token filter:将所有字 ...