import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import Imputer from sklearn.model_selection import train_test_split
#导入数据 data = pd.read_csv('D:\statistical\data.csv', encoding='gbk')
cat_vars = [] def fea_categorical_check(df): print('描述变量有:\n') for col in df.columns: if df[col].dtype == 'object': print(col) cat_vars.append(col) return cat_vars
#测试集30%,训练集70%,随机种子设置为2018 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(data,y_data,train_size=0.7,random_state=2018