tf.GrandientTape 训练简单的线性梯度下降模型
添加引用
首先需要添加TensorFlow引用和Eager Execution,如下:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
构造模型和样本数据
构造一个样本数=1000,权重=3,偏差=2的模型,并且加入噪点,如下:
NUM_EXAMPLES = 1000 # 样本数1000
training_inputs = tf.random_normal([NUM_EXAMPLES]) # 输出正态分布的随机值
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise # 构造标签样本(权重=3,偏差=2,并且加入噪点)
定义函数
定义预测函数、损失函数和梯度下降函数,如下:
# 预测函数
def prediction(input, weight, bias):
return input * weight + bias
# 使用“均方差”的损失函数
def loss(weights, biases):
error = prediction(training_inputs, weights, biases) - training_outputs
return tf.reduce_mean(tf.square(error))
# 执行梯度下降,返回下降之后的权重和偏差
def grad(weights, biases):
with tf.GradientTape() as tape:
loss_value = loss(weights,biases)
return tape.gradient(loss_value,[weights,biases])
训练模型
初始化
初始化训练次数、学习率、权重和偏差,如下:
train_steps = 200
learning_rate = 0.01
# 权重和偏差设置任意值
W = tfe.Variable(5.)
B = tfe.Variable(10.)
训练
for i in range(train_steps):
dW, dB = grad(W, B)
W.assign_sub(dW * learning_rate)
B.assign_sub(dB * learning_rate)
print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))
得到训练后的权重和方差,即我们的模型。
print("Final loss: {:.3f}".format(loss(W, B)))
print("W = {}, B = {}".format(W.numpy(), B.numpy()))
image
权重=3.018173... 偏差=2.17251... 与我们设定的3和2基本相符。
完整实例代码如下:
方式一:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
NUM_EXAMPLES = 1000 # 样本数1000
training_inputs = tf.random_normal([NUM_EXAMPLES]) # 输出正态分布的随机值
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise # 构造标签样本(权重=3,偏差=2,并且加入噪点)
# 预测函数
def prediction(input, weight, bias):
return input * weight + bias
# 使用“均方差”的损失函数
def loss(weights, biases):
error = prediction(training_inputs, weights, biases) - training_outputs
return tf.reduce_mean(tf.square(error))
# 执行梯度下降,返回下降之后的权重和偏差
def grad(weights, biases):
with tf.GradientTape() as tape:
loss_value = loss(weights,biases)
return tape.gradient(loss_value,[weights,biases])
train_steps = 200
learning_rate = 0.01
# 权重和偏差设置任意值
W = tfe.Variable(5.)
B = tfe.Variable(10.)
print("Initial loss: {:.3f}".format(loss(W, B)))
for i in range(train_steps):
dW, dB = grad(W, B)
W.assign_sub(dW * learning_rate)
B.assign_sub(dB * learning_rate)
print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B)))
print("Final loss: {:.3f}".format(loss(W, B)))
print("W = {}, B = {}".format(W.numpy(), B.numpy()))
方式二(通过将 tfe.Variable 与 tf.GradientTape 结合使用可以更好地封装模型参数):
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
class Model(tf.keras.Model):
'''自定义模型'''
def __init__(self):
super(Model, self).__init__()
self.W = tfe.Variable(5., name = 'weight')
self.B = tfe.Variable(10., name = 'bias')
def call(self, inputs):
return inputs * self.W + self.B
# 构造样本数据
NUM_EXAMPLES = 2000
training_inputs = tf.random_normal([NUM_EXAMPLES]) # 随机正态分布的值
nosie = tf.random_normal([NUM_EXAMPLES]) # 噪点
training_outputs = training_inputs * 3 + 2 + nosie
# 定义损失和梯度函数
def loss(model, inputs, targets):
error = model(inputs) - targets
return tf.reduce_mean(tf.square(error))
def grad(model, inputs, targets):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets)
return tape.gradient(loss_value, [model.W, model.B])
# 训练
# 1. 实例化模型
# 2. 模型损失函数的导数
# 3. 更新变量的策略
model = Model()
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01) # 优化器
print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs).numpy()))
# 训练循环
for i in range(300):
grads = grad(model, training_inputs, training_outputs)
optimizer.apply_gradients(zip(grads, [model.W, model.B]),global_step=tf.train.get_or_create_global_step())
if i % 20 == 0:
print('Loss at step {:03d}: {:.3f}'.format(i,loss(model, training_inputs,training_outputs).numpy()))
print('Final loss: {:.3f}'.format(loss(model, training_inputs, training_outputs).numpy()))
print('W = {}, B = {}'.format(model.W.numpy(), model.B.numpy()))










网友评论