Universal Approximation
Table of Contents
1. Universal Approximation
Universal Approximation (通用逼近理论) 是指:如果一个前馈神经网络具有线性输出层和至少一层隐藏层,只要给予网络足够数量的神经元,便可以实现以足够高精度来逼近任意一个在 Rn 的紧子集 (Compact subset) 上的连续函数。
1.1. Sigmoid 做为激活函数
http://neuralnetworksanddeeplearning.com/chap4.html
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn")
epsilon = 0.001
def sigmoid(x):
return 1. / (1. + np.exp(-x))
def bump_sigmoid(h, a, b):
x = np.linspace(0, 5, 100)
left = sigmoid(1 / epsilon * x - 1 / epsilon * a)
right = sigmoid(1 / epsilon * x - 1 / epsilon * b)
out = (left - right) * h
plt.plot(x, out)
bump_sigmoid(-10, 1, 2)
plt.show()
1.2. ReLU 做为激活函数
https://www.quora.com/Is-a-single-layered-ReLu-network-still-a-universal-approximator
def relu(x):
return np.maximum(0, x)
def bump_relu(h, a, b):
x = np.linspace(0, 5, 100)
plt.plot(
x, h / epsilon * (relu(x - a) - relu(x - a - epsilon) -
(relu(x - b) - relu(x - b - epsilon))))
bump_relu(10, 2, 4)
plt.show()
1.3. 线性函数无法做为激活函数
线性函数做为激活函数时, 最终输出必然还是 x,b 的线性组合
1.4. 函数能做为激活函数的条件
1.5. ANN 并不是万能
通用逼近理论的前提是逼近`连续函数`, 所以有些问题无法用 ANN 解决, 例如 \(f(x)=x\pmod{K}\)
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
N_CLASSES = 2
model = torch.nn.Sequential(
torch.nn.Linear(1, 10), torch.nn.ReLU(), torch.nn.Linear(10, N_CLASSES))
class OddsAndEvenDataset(Dataset):
def __init__(self, low, high, size):
X = np.random.randint(low, high, size)
Y = X % N_CLASSES
self.X = torch.from_numpy(X).float().view(-1, 1)
self.Y = torch.from_numpy(Y).long().view(-1)
def __getitem__(self, index):
return self.X[index], self.Y[index]
def __len__(self):
return len(self.X)
training_set = OddsAndEvenDataset(0, 1000, 500)
training_loader = DataLoader(training_set, batch_size=100)
test_set = OddsAndEvenDataset(500, 2000, 500)
test_loader = DataLoader(test_set, batch_size=500)
# criterion = torch.nn.BCEWithLogitsLoss()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
def train():
model.train()
for i in range(1000):
for x, y in training_loader:
loss = criterion(model(x), y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# if i % 20 == 0:
print("loss:",loss.item())
def test():
model.eval()
for x, y in training_loader:
y_hat = model(x)
# y_hat = F.sigmoid(y_hat)
# y_hat = y_hat > 0.5
y_hat = torch.argmax(y_hat, dim=1)
accu = torch.sum(y_hat.byte() == y.byte()).item() / 100
print("train:", accu)
break
for x, y in test_loader:
y_hat = model(x)
# y_hat = F.sigmoid(y_hat)
# y_hat = y_hat > 0.5
y_hat = torch.argmax(y_hat, dim=1)
accu = torch.sum(y_hat.byte() == y.byte()).item() / 500
print("test:", accu)
train()
test()
loss: 0.6905694007873535 train: 0.47 test: 0.536
