导入包
import numpy as np
import pandas as pd
导入数据
data = pd.read_csv("./datasets/Social_Network_Ads.csv")
data.head()
|
User ID
|
Gender
|
Age
|
EstimatedSalary
|
Purchased
|
0
|
15624510
|
Male
|
19
|
19000
|
0
|
1
|
15810944
|
Male
|
35
|
20000
|
0
|
2
|
15668575
|
Female
|
26
|
43000
|
0
|
3
|
15603246
|
Female
|
27
|
57000
|
0
|
4
|
15804002
|
Male
|
19
|
76000
|
0
|
X = data.iloc[:,[2,3]].values
Y = data.iloc[:,4].values
交叉验证
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,train_size=1/4,random_state=0)
标准化
from sklearn.preprocessing import StandardScaler
standardscaler = StandardScaler()
X_train = standardscaler.fit_transform(X_train)
X_test = standardscaler.transform(X_test)
训练模型
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,Y_train)
LogisticRegression()
模型得分
model.score(X_test,Y_test)
0.7933333333333333