# 导入相关库 import pandas as pd import numpy as np from pandas import Series,DataFrame import re # 导入泰坦尼的数据集 data_train = pd.read_csv("./data/titanic/Train.csv") # 提取其中几列 data = data_train.loc[:,['PassengerId','Name']] # 提取称谓 data['Title'] = data['Name']...
iv.set_title('特征变量与IV值分布图',fontsize=(15)) iv.set_xlabel('特征变量',fontsize=(15)) iv.set_ylabel('IV',fontsize=(15)) df_new=pd.DataFrame() #新建df_new存放woe转换后的数据 def replace_data(cut,cut_woe): a=[] for i in cut.unique(): a.append(i) a.sort() for m...
DataFrame([x[1] for x in ivdict],index=[x[0] for x in ivdict],columns=['IV']) ax = iv_vs.plot(kind='barh', figsize=(12,12), title='Feature IV', fontsize=10, width=0.8, color='#00688B') ax.set_ylabel('Features') ax.set_xlabel('IV of Features') return ivdict, woe...
df[pandas.isnull(df.title)] 4.字符匹配:str.contains(patten,na = False) df[df.title.contains('台电',na = False)] 5.逻辑运算:与或非&,|,not 4.随机抽样 随机抽样函数:numpy.random.randint(start,end,num) 5.记录合并:concat([dataFrame1,dataFrame2,...]) -->之前的爬取nba球员信息就可以这...
# 导入相关库 import pandas as pd import numpy as np from pandas import Series,DataFrame import re # 导入泰坦尼的数据集 data_train = pd.read_csv("./data/titanic/Train.csv") # 提取其中几列 data = data_train.loc[:,['PassengerId','Name']] # 提取称谓 data['Title'] = data['Name']...