classList = [example[-1] for example in dataSet] 将dataSet中的数据先按行依次放入example中,然后取得example中的example[i]元素,放入列表featList中 >>> dataSet [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] >>> i = 0 >>> featList = ...
baseEntropy=calShannoEnt(dataSet) # 计算整个数据集的初始熵,这是决策前数据的不确定性度量 bestInfoGain=0.0 # 初始化最大信息增益为0 bestFeature=-1 # 初始化最佳特征的索引为-1,表示还未找到 for i in range(numFeatures): #对数据集中的每个特征进行循环 featList=[example[i] for example in dataSet...
bestInfoGain=0;bestFeature=-1# 遍历每个特征,计算信息增益foriinrange(numFeatures):# 取出对应特征值,即一列数据 featList=[example[i]forexampleindataSet]uniqueVals=np.unique(featList)newEntropy=0forvalueinuniqueVals:subDataSet=splitData(dataSet,i,value)prob=len(subDataSet)/float(dataSet)newEntropy+...
bestFeatLabel=labels[bestFeat]#初始化树,用于存储树的结构,是很多字典的嵌套结构myTree ={bestFeatLabel: {}}#已经用过的特征删去del(labels[bestFeatLabel])#取出最优特征这一列的值featVals = [example[bestFeat]forexampleindataSet]#特征的取值个数uniqueVals =np.unique(featVals)#开始递归分裂forvaluein...
for featVec in dataSet: if featVec[axis] == value: # 舍弃掉这一维度上对应的值,剩余部分作为新的数据集 reducedFeatVec = featVec[:axis] reducedFeatVec.extend(featVec[axis+1:]) retDataSet.append(reducedFeatVec) return retDataSet 1. 2.
featList=[example[i]forexampleindataSet] uniqueVals=set(featList) newEntropy=0.0 forvalueinuniqueVals: # 计算每种划分方式的信息熵,并对所有唯一特征值得到的熵求和 subDataSet=splitDataSet(dataSet,i,value) prob=len(subDataSet)/float(len(dataSet)) ...
numEntries = len(dataSet) labelCounts = {} for featVec in dataSet: #the the number of unique elements and their occurance currentLabel = featVec[-1] if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0 labelCounts[currentLabel] += 1 ...
40 numFeatures = len(dataSet[0])-1 41 baseEntropy = calcShannonEnt(dataSet) # 原始的熵 42 bestInfoGain = 0 43 bestFeature = -1 44 for i in range(numFeatures): 45 featList = [example[i] for example in dataSet] 46 uniqueVals = set(featList) ...
defchoose_best_feature_to_split(dataset): numFeatures =len(dataset[0]) -1# 数据集最后一列作为标签baseEntropy = compute_shannon_etropy(dataset) bestInfoGain =0.0bestFeature = -1foriinrange(numFeatures):# 遍历所有的特征featList = [example[i]forexampleindataset]# create a list of all the ...
EXECUTEsp_execute_external_script @language= N'Python', @script = N' import pandas as pd a = 1 b = 2 c = a/b s = pandas.Series(c, index =["simple math example 1", "simple math example 2"]) print(s) df = pd.DataFrame(s, index=["simple math example 1"]) Ou...