复杂的线性回归
目录
import pandas as pd
import numpy as np
读入数据
= pd.read_csv("./datasets/50_Startups.csv")
data data.head()
R&D Spend | Administration | Marketing Spend | State | Profit | |
---|---|---|---|---|---|
0 | 165349.20 | 136897.80 | 471784.10 | New York | 192261.83 |
1 | 162597.70 | 151377.59 | 443898.53 | California | 191792.06 |
2 | 153441.51 | 101145.55 | 407934.54 | Florida | 191050.39 |
3 | 144372.41 | 118671.85 | 383199.62 | New York | 182901.99 |
4 | 142107.34 | 91391.77 | 366168.42 | Florida | 166187.94 |
分开xy
= data.iloc[:,:-1].values
X = data.iloc[:,-1].values Y
编码
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
= LabelEncoder()
labelEncoder 3] = labelEncoder.fit_transform(X[:,3])
X[:,= OneHotEncoder()
onehotencoder = onehotencoder.fit_transform(X).toarray() X
= X[:,1:] X
from sklearn.model_selection import train_test_split
= train_test_split(X, Y, test_size = 0.2, random_state = 0) X_train, X_test, Y_train, Y_test
from sklearn.linear_model import LinearRegression
= LinearRegression()
regressor regressor.fit(X_train,Y_train)
LinearRegression()