Optimizer Sample
Table of Contents
1. Optimizer Sample
1.1. 低阶 API: gradient tap
1.1.1. 计算梯度
import tensorflow as tf import numpy as np x = tf.Variable(1., name="x") with tf.GradientTape() as tape: y = tf.pow(x, 2) + 1 print(tape.gradient(y, x))
tf.Tensor(2.0, shape=(), dtype=float32)
1.1.2. 手动的梯度下降:
x = tf.Variable(10., name="x") lr = 0.01 for _ in range(1000): with tf.GradientTape() as tape: y = tf.pow(x, 2) + 1 dx = tape.gradient(y, x) x.assign(x - dx * lr) tf.print(y, x)
1 1.68296665e-08
1.2. 中阶 API: optimizer
1.2.1. apply_gradients
x = tf.Variable(10., name="x") optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) for _ in range(1000): with tf.GradientTape() as tape: y = tf.pow(x, 2) + 1 dx = tape.gradient(y, x) optimizer.apply_gradients([(dx, x)]) tf.print(y, x)
1 1.68296665e-08
1.2.2. minimize
x = tf.Variable(1., name="x") optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) for _ in range(1000): optimizer.minimize(lambda:tf.pow(x, 2) + 1, [x]) tf.print(y, x)
1 1.6829661e-09
1.3. 高阶 API: keras
拟合 \(f(x)=x^2\)
1.3.1. Sequential API
import tensorflow as tf import numpy as np from tensorflow.keras import layers,losses,metrics,optimizers, models n = 100 X = tf.random.uniform([n, 1], minval = 1, maxval = 10.) Y = tf.pow(X,2) tf.keras.backend.clear_session() model = models.Sequential(); model.add(layers.Dense(50, input_shape=(1,),activation="relu")) model.add(layers.Dense(1)) model.compile(optimizer="adam",loss="mse") history = model.fit(X,Y,batch_size = 20,epochs = 5000, verbose = 0) print(history.history["loss"][-1]) print(model.predict([2.,10.,20]))
0.002070189220830798 [[ 4.0138993] [ 99.874664 ] [292.50418 ]]
1.3.2. Functional API
import tensorflow as tf import numpy as np from tensorflow import keras from tensorflow.keras import layers n = 100 X = tf.random.uniform([n, 1], minval = 1, maxval = 10.) Y = tf.pow(X,2) tf.keras.backend.clear_session() inputs = keras.Input(shape = (1, )) dense = layers.Dense(50, activation = "relu")(inputs) outputs = layers.Dense(1)(dense) model = keras.Model(inputs, outputs) model.compile(optimizer="adam",loss="mse") history = model.fit(X,Y,batch_size = 20,epochs = 5000, verbose = 0) print(history.history["loss"][-1]) print(model.predict([2.,10.,20]))
0.0012873734813183546 [[ 4.0098534] [ 99.86765 ] [293.89005 ]]