导入包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
导入数据
data = pd.read_csv("./datasets/studentscores.csv")
data.head()
|
|
Hours
|
Scores
|
|
0
|
2.5
|
21
|
|
1
|
5.1
|
47
|
|
2
|
3.2
|
27
|
|
3
|
8.5
|
75
|
|
4
|
3.5
|
30
|
数据处理
X = data.iloc[:,:1].values
Y = data.iloc[:,1].values
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=1/4,random_state=0)
训练模型
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor = regressor.fit(X_train,Y_train)
预测
Y_pred = regressor.predict(X_test)
画图
plt.scatter(X_train,Y_train,color='red')
plt.plot(X_train,regressor.predict(X_train),color='blue')
plt.scatter(X_test , Y_test, color = 'red')
plt.plot(X_test , regressor.predict(X_test), color ='blue')