20201120组会
对六盘山2008年6月到2015年9月的数据做了个简单的线性模型。
学习一下TensorFlow
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
data = pd.read_csv('liupanshan.csv')
data.head()
Unnamed: 0 | 站名 | 年 | 月 | 日 | 气压 | 温度 | 露点温度 | 相对湿度 | 水汽压 | 主导风向 | 主导风速 | 日降雨(固原) | 日降雨(六盘山) | 日降雨(隆德) | 日降雨(泾源) | 时间 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 六盘山 | 2008 | 6 | 5 | 720.375000 | 10.445833 | 6.145833 | 77.208333 | 9.529167 | 1 | 11.312500 | 2.0 | 3.3 | 2.8 | 5.5 | 2008-06-05 00:00:00 |
1 | 1 | 六盘山 | 2008 | 6 | 6 | 723.237500 | 5.695833 | 4.816667 | 94.500000 | 8.666667 | 2 | 7.055556 | 9.0 | 8.2 | 6.9 | 11.1 | 2008-06-06 00:00:00 |
2 | 2 | 六盘山 | 2008 | 6 | 9 | 722.845833 | 10.012500 | 5.604167 | 74.791667 | 9.129167 | 6 | 6.622222 | 0.0 | 3.2 | 0.0 | 1.4 | 2008-06-09 00:00:00 |
3 | 3 | 六盘山 | 2008 | 6 | 10 | 722.700000 | 10.316667 | 6.866667 | 79.541667 | 10.000000 | 4 | 4.310000 | 0.0 | 0.0 | 0.6 | 0.2 | 2008-06-10 00:00:00 |
4 | 4 | 六盘山 | 2008 | 6 | 11 | 721.908333 | 11.066667 | 5.529167 | 69.291667 | 9.162500 | 10 | 5.840000 | 0.0 | 2.4 | 17.3 | 0.6 | 2008-06-11 00:00:00 |
data.shape
(555, 17)
X=data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"气压", "温度", "露点温度", "相对湿度", "主导风向", "主导风速"]]
X.head()
日降雨(固原) | 日降雨(隆德) | 日降雨(泾源) | 气压 | 温度 | 露点温度 | 相对湿度 | 主导风向 | 主导风速 | |
|---|---|---|---|---|---|---|---|---|---|
0 | 2.0 | 2.8 | 5.5 | 720.375000 | 10.445833 | 6.145833 | 77.208333 | 1 | 11.312500 |
1 | 9.0 | 6.9 | 11.1 | 723.237500 | 5.695833 | 4.816667 | 94.500000 | 2 | 7.055556 |
2 | 0.0 | 0.0 | 1.4 | 722.845833 | 10.012500 | 5.604167 | 74.791667 | 6 | 6.622222 |
3 | 0.0 | 0.6 | 0.2 | 722.700000 | 10.316667 | 6.866667 | 79.541667 | 4 | 4.310000 |
4 | 0.0 | 17.3 | 0.6 | 721.908333 | 11.066667 | 5.529167 | 69.291667 | 10 | 5.840000 |
y=data[['日降雨(六盘山)']]
y.head()
日降雨(六盘山) | |
|---|---|
0 | 3.3 |
1 | 8.2 |
2 | 3.2 |
3 | 0.0 |
4 | 2.4 |
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(416, 4)
(416, 1)
(139, 4)
(139, 1)
75%划分
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
LinearRegression()
print(linreg.intercept_)
print(linreg.coef_)
[171.50925976]
[[ 0.17079399 0.51107104 0.34437496 -0.23550774 -0.14889083 -0.0422263
0.00797214 0.00079478 0.05113225]]
#模型拟合测试集
y_pred = linreg.predict(X_test)
from sklearn import metrics
print(linreg.score(X_test,y_test))
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
0.8386289278230632
MSE: 15.349302346222053
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()], [y_pred.min(), y_pred.max()], 'k--', lw=4)
ax.set_xlabel('Real')
ax.set_ylabel('Predicted')
plt.title('Linear model effect')
plt.show()
X=data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"气压", "温度", "露点温度", "相对湿度", "主导风速"]]
y=data[['日降雨(六盘山)']]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
#模型拟合测试集
y_pred = linreg.predict(X_test)
from sklearn import metrics
print(linreg.score(X_test,y_test))
# 用scikit-learn计算MSE
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
0.8386361395415285
MSE: 15.348616381596146
X=data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"气压", "温度", "露点温度", "相对湿度","主导风向"]]
y=data[['日降雨(六盘山)']]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
#模型拟合测试集
y_pred = linreg.predict(X_test)
from sklearn import metrics
print(linreg.score(X_test,y_test))
# 用scikit-learn计算MSE
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
0.8390160396893542
MSE: 15.31248101884688
X=data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"气压", "温度", "露点温度", "主导风向", "主导风速"]]
y=data[['日降雨(六盘山)']]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
#模型拟合测试集
y_pred = linreg.predict(X_test)
from sklearn import metrics
print(linreg.score(X_test,y_test))
# 用scikit-learn计算MSE
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
0.8386820882289012
MSE: 15.344245831872566
X=data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"相对湿度"]]
y=data[['日降雨(六盘山)']]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
#模型拟合测试集
y_pred = linreg.predict(X_test)
from sklearn import metrics
print(linreg.score(X_test,y_test))
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
0.8473383761137151
MSE: 14.520876574002957
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()], [y_pred.min(), y_pred.max()], 'k--', lw=4)
ax.set_xlabel('Real')
ax.set_ylabel('Predicted')
plt.title('Linear model effect')
plt.show()
X = data[['日降雨(固原)', '日降雨(隆德)', '日降雨(泾源)',"相对湿度"]]
y = data[['日降雨(六盘山)']]
from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(linreg, X, y, cv=10)
print(linreg.score(X_test,y_test))
# 用scikit-learn计算MSE
print ("MSE:",metrics.mean_squared_error(y, predicted))
0.8473383761137151
MSE: 16.0007942817435
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Real')
ax.set_ylabel('Predicted')
plt.title('Linear model effect')
plt.show()
没做残差分析
主要要做神神经网络的
本文章使用limfx的vscode插件快速发布