Optimizer Sample

1. Optimizer Sample

1. Optimizer Sample

1.1. 低阶 API: gradient tap

1.1.1. 计算梯度

import tensorflow as tf
import numpy as np

x = tf.Variable(1., name="x")
with tf.GradientTape() as tape:
    y = tf.pow(x, 2) + 1
print(tape.gradient(y, x))

tf.Tensor(2.0, shape=(), dtype=float32)

1.1.2. 手动的梯度下降:

x = tf.Variable(10., name="x")
lr = 0.01
for _ in range(1000):
    with tf.GradientTape() as tape:
        y = tf.pow(x, 2) + 1
    dx = tape.gradient(y, x)
    x.assign(x - dx * lr)
tf.print(y, x)

1 1.68296665e-08

1.2. 中阶 API: optimizer

1.2.1. apply_gradients

x = tf.Variable(10., name="x")
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    with tf.GradientTape() as tape:
        y = tf.pow(x, 2) + 1
    dx = tape.gradient(y, x)
    optimizer.apply_gradients([(dx, x)])
tf.print(y, x)

1 1.68296665e-08

1.2.2. minimize

x = tf.Variable(1., name="x")
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    optimizer.minimize(lambda:tf.pow(x, 2) + 1, [x])
tf.print(y, x)

1 1.6829661e-09

1.3. 高阶 API: keras

拟合 \(f(x)=x^2\)

1.3.1. Sequential API

import tensorflow as tf
import numpy as np
from tensorflow.keras import layers,losses,metrics,optimizers, models

n = 100

X = tf.random.uniform([n, 1], minval = 1, maxval = 10.)
Y = tf.pow(X,2)

tf.keras.backend.clear_session()

model = models.Sequential();
model.add(layers.Dense(50, input_shape=(1,),activation="relu"))
model.add(layers.Dense(1))

model.compile(optimizer="adam",loss="mse")
history = model.fit(X,Y,batch_size = 20,epochs = 5000, verbose = 0)
print(history.history["loss"][-1])
print(model.predict([2.,10.,20]))

0.002070189220830798 [[ 4.0138993] [ 99.874664 ] [292.50418 ]]

1.3.2. Functional API

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

n = 100

X = tf.random.uniform([n, 1], minval = 1, maxval = 10.)
Y = tf.pow(X,2)

tf.keras.backend.clear_session()

inputs = keras.Input(shape = (1, ))
dense = layers.Dense(50, activation = "relu")(inputs)
outputs = layers.Dense(1)(dense)

model = keras.Model(inputs, outputs)
model.compile(optimizer="adam",loss="mse")
history = model.fit(X,Y,batch_size = 20,epochs = 5000, verbose = 0)
print(history.history["loss"][-1])
print(model.predict([2.,10.,20]))

0.0012873734813183546 [[ 4.0098534] [ 99.86765 ] [293.89005 ]]