用Python徒手写线性回归!
选自towardsdatascience
作者:Kumud Lakara
机器之心编译
编辑:小舟、泽南
先放下 Scikit-learn,我们来看一看真正的技术。
def load_data(filename): df = pd.read_csv(filename, sep=",", index_col=False) df.columns = ["housesize", "rooms", "price"] data = np.array(df, dtype=float) plot_data(data[:,:2], data[:, -1]) normalize(data) return data[:,:2], data[:, -1]Z = (x — μ) / σ
μ : mean
σ : standard deviation
def normalize(data): for i in range(0,data.shape[1]-1): data[:,i] = ((data[:,i] - np.mean(data[:,i]))/np.std(data[:, i]))def plot_data(x, y): plt.xlabel('house size') plt.ylabel('price') plt.plot(x[:,], y, 'bo') plt.show()def h(x,theta): return np.matmul(x, theta)def cost_function(x, y, theta): return ((h(x, theta)-y).T@(h(x, theta)-y))/(2*y.shape[])def gradient_descent(x, y, theta, learning_rate=0.1, num_epochs=10): m = x.shape[] J_all = []
for _ in range(num_epochs): h_x = h(x, theta) cost_ = (1/m)*(x.T@(h_x - y)) theta = theta - (learning_rate)*cost_ J_all.append(cost_function(x, y, theta))
return theta, J_allx,y = load_data("house_price_data.txt")y = np.reshape(y, (46,1))x = np.hstack((np.ones((x.shape[],1)), x))theta = np.zeros((x.shape[1], 1))learning_rate = 0.1num_epochs = 50theta, J_all = gradient_descent(x, y, theta, learning_rate, num_epochs)J = cost_function(x, y, theta)print("Cost:", J)print("Parameters:", theta)
#for testing and plotting cost n_epochs = []jplot = []count = 0for i in J_all: jplot.append(i[0][0]) n_epochs.append(count) count += 1jplot = np.array(jplot)n_epochs = np.array(n_epochs)plot_cost(jplot, n_epochs)
test(theta, [1600, 2])def plot_cost(J_all, num_epochs): plt.xlabel('Epochs') plt.ylabel('Cost') plt.plot(num_epochs, J_all, 'm', linewidth = "5") plt.show()def test(theta, x): x[] = (x[] - mu[])/std[] x[1] = (x[1] - mu[1])/std[1]
y = theta[] + theta[1]*x[] + theta[2]*x[1] print("Price of house:", y)import numpy as npimport matplotlib.pyplot as pltimport pandas as pd
#variables to store mean and standard deviation for each featuremu = []std = []
def load_data(filename): df = pd.read_csv(filename, sep=",", index_col=False) df.columns = ["housesize", "rooms", "price"] data = np.array(df, dtype=float) plot_data(data[:,:2], data[:, -1]) normalize(data) return data[:,:2], data[:, -1]
def plot_data(x, y): plt.xlabel('house size') plt.ylabel('price') plt.plot(x[:,], y, 'bo') plt.show()
def normalize(data): for i in range(0,data.shape[1]-1): data[:,i] = ((data[:,i] - np.mean(data[:,i]))/np.std(data[:, i])) mu.append(np.mean(data[:,i])) std.append(np.std(data[:, i]))
def h(x,theta): return np.matmul(x, theta)
def cost_function(x, y, theta): return ((h(x, theta)-y).T@(h(x, theta)-y))/(2*y.shape[])
def gradient_descent(x, y, theta, learning_rate=0.1, num_epochs=10): m = x.shape[] J_all = []
for _ in range(num_epochs): h_x = h(x, theta) cost_ = (1/m)*(x.T@(h_x - y)) theta = theta - (learning_rate)*cost_ J_all.append(cost_function(x, y, theta))
return theta, J_all
def plot_cost(J_all, num_epochs): plt.xlabel('Epochs') plt.ylabel('Cost') plt.plot(num_epochs, J_all, 'm', linewidth = "5") plt.show()
def test(theta, x): x[] = (x[] - mu[])/std[] x[1] = (x[1] - mu[1])/std[1]
y = theta[] + theta[1]*x[] + theta[2]*x[1] print("Price of house:", y)
x,y = load_data("house_price_data.txt")y = np.reshape(y, (46,1))x = np.hstack((np.ones((x.shape[],1)), x))theta = np.zeros((x.shape[1], 1))learning_rate = 0.1num_epochs = 50theta, J_all = gradient_descent(x, y, theta, learning_rate, num_epochs)J = cost_function(x, y, theta)print("Cost:", J)print("Parameters:", theta)
#for testing and plotting cost n_epochs = []jplot = []count = 0for i in J_all: jplot.append(i[0][0]) n_epochs.append(count) count += 1jplot = np.array(jplot)n_epochs = np.array(n_epochs)plot_cost(jplot, n_epochs)
test(theta, [1600, 3])相关文章