Linear Regression

作者: 就是果味熊 | 来源:发表于2020-02-18 21:05 被阅读0次

tensorflow 已经完成高级别的模型封装种类
R-建模及预测
Linear Regression（线性回归）
Regression Tree (VS Linear Regre
tensorflow学习笔记（3）|线性回归
Logistic Regression
M.L.-Classification and Represen
BA: Predictive Data
Linear Regression线性回归
Review 6

二元线性回归

import numpy as np
#import pandas as pd


def cost_function(k1,k0,x,y):
    x = np.array(x)
    y = np.array(y)
    cost = 1/(2 * len(x)) * np.sum((k1 * x + k0 - y)**2)
    return cost

def gradient(k1,k0,x,y):
    x = np.array(x)
    y = np.array(y)
    grad_k1 = np.sum((k1 * x + k0 - y) * x) / len(x)
    grad_k0 = np.sum(k1 * x + k0 - y) / len(x)
    return grad_k0, grad_k1

def linear_regression_binary(x,y,epochs,lr,k0,k1):
    '''
    epochs 迭代次数
    lr 学习率
    k0 k1 拟合参数
    '''
    best_k0 = None
    best_k1 = None
    best_cost = np.inf
    for epoch in range(epochs):
        cost = cost_function(k1,k0,x,y)
        print("epoch: %d, k0: %d, k1: %d, cost: %d" % (epoch,k0,k1,cost))
        if cost <= best_cost:
            best_cost = cost
            best_k0 = k0
            best_k1 = k1
        grad_k0,grad_k1 = gradient(k1,k0,x,y)
        k0 = k0 - lr * grad_k0
        k1 = k1 - lr * grad_k1
    return best_k0, best_k1, best_cost

def gen_sample_data():
    w = random.randint(0, 10) + random.random()     # for noise random.random[0, 1)
    b = random.randint(0, 5) + random.random()
    num_samples = 100
    x_list = []
    y_list = []
    for i in range(num_samples):
        x = random.randint(0, 100) * random.random()
        y = w * x + b + random.random() * random.randint(-1, 1)
        x_list.append(x)
        y_list.append(y)
    return x_list, y_list, w, b

#%%
if __name__ == '__main__':
    
    x_list, y_list, k1, k0 = gen_sample_data()
    print(x_list)
    print(y_list)
    
    #%%
    k0,k1,cost = linear_regression_binary(x_list,y_list,30,0.0002,k1,k0)

多元线性回归

其中用到

x_T = np.transpose(x) # 转置
x0 =np.ones((x.shape[0])) 
x = np.column_stack((x,x0)) # 在矩阵x后面加新列x0
x = np.dot(x,y) # 若都为矩阵 则为矩阵乘法，若都为一维向量，则求点乘（内积）

import numpy as np
#import pandas as pd

'''
这里的向量k最后一个元素是k0，即截距，这是因为在给定的x的最后一位
都加了元素1，以便在计算时，符合所有k，包括k0。
'''
def cost_function(k,x,y): 
    x0 = np.ones((x.shape[0]))
    x = np.column_stack((x,x0))
    x_T = np.transpose(x)
    y = np.array(y)
    k = np.array(k)
    cost = 1/(2 * len(x)) * np.sum((np.dot(k,x_T) - y)**2)
    return cost


def gradient(k,x,y):
    x0 = np.ones((x.shape[0]))
    x = np.column_stack((x,x0))
    y = np.array(y)
    k = np.array(k)
    x_T = np.transpose(x)
    grad_k = np.dot((np.dot(k,x_T) - y), x) / len(x)
    return grad_k

def linear_regression_Non_binary(x,y,epochs,lr,k):
    '''
    epochs 迭代次数
    lr 学习率
    k 参数向量
    '''
    best_k = None
    best_cost = np.inf
    k = np.array(k)
    for epoch in range(epochs):
        cost = cost_function(k,x,y)
        print("epoch: %d, k: %d, cost: %d" % (epoch,k,cost))
        if cost <= best_cost:
            best_cost = cost
            best_k = k
        grad_k = gradient(k,x,y)
        k = k - lr * grad_k

    return best_k, best_cost