{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3.11 模型选择、欠拟合和过拟合\n",
"## 3.11.4 多项式函数拟合实验"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4.1\n"
]
}
],
"source": [
"%matplotlib inline\n",
"import torch\n",
"import numpy as np\n",
"import sys\n",
"sys.path.append(\"..\") \n",
"import d2lzh_pytorch as d2l\n",
"\n",
"print(torch.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.11.4.1 生成数据集"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5\n",
"features = torch.randn((n_train + n_test, 1))\n",
"poly_features = torch.cat((features, torch.pow(features, 2), torch.pow(features, 3)), 1) \n",
"labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1]\n",
" + true_w[2] * poly_features[:, 2] + true_b)\n",
"labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[0.2398],\n",
" [1.6313]]), tensor([[0.2398, 0.0575, 0.0138],\n",
" [1.6313, 2.6610, 4.3408]]), tensor([ 5.1772, 22.2243]))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"features[:2], poly_features[:2], labels[:2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.11.4.2 定义、训练和测试模型"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None, y2_vals=None,\n",
" legend=None, figsize=(3.5, 2.5)):\n",
" d2l.set_figsize(figsize)\n",
" d2l.plt.xlabel(x_label)\n",
" d2l.plt.ylabel(y_label)\n",
" d2l.plt.semilogy(x_vals, y_vals)\n",
" if x2_vals and y2_vals:\n",
" d2l.plt.semilogy(x2_vals, y2_vals, linestyle=':')\n",
" d2l.plt.legend(legend)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"num_epochs, loss = 100, torch.nn.MSELoss()\n",
"\n",
"def fit_and_plot(train_features, test_features, train_labels, test_labels):\n",
" net = torch.nn.Linear(train_features.shape[-1], 1)\n",
" # 通过Linear文档可知,pytorch已经将参数初始化了,所以我们这里就不手动初始化了\n",
" \n",
" batch_size = min(10, train_labels.shape[0]) \n",
" dataset = torch.utils.data.TensorDataset(train_features, train_labels)\n",
" train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True)\n",
" \n",
" optimizer = torch.optim.SGD(net.parameters(), lr=0.01)\n",
" train_ls, test_ls = [], []\n",
" for _ in range(num_epochs):\n",
" for X, y in train_iter:\n",
" l = loss(net(X), y.view(-1, 1))\n",
" optimizer.zero_grad()\n",
" l.backward()\n",
" optimizer.step()\n",
" train_labels = train_labels.view(-1, 1)\n",
" test_labels = test_labels.view(-1, 1)\n",
" train_ls.append(loss(net(train_features), train_labels).item())\n",
" test_ls.append(loss(net(test_features), test_labels).item())\n",
" print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1])\n",
" semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',\n",
" range(1, num_epochs + 1), test_ls, ['train', 'test'])\n",
" print('weight:', net.weight.data,\n",
" '\\nbias:', net.bias.data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.11.4.3 三阶多项式函数拟合(正常)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"final epoch: train loss 0.00010175639908993617 test loss 9.790256444830447e-05\n",
"weight: tensor([[ 1.1982, -3.3992, 5.6002]]) \n",
"bias: tensor([5.0014])\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fit_and_plot(poly_features[:n_train, :], poly_features[n_train:, :], labels[:n_train], labels[n_train:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.11.4.4 线性函数拟合(欠拟合)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"final epoch: train loss 249.35157775878906 test loss 168.37705993652344\n",
"weight: tensor([[19.4123]]) \n",
"bias: tensor([0.5805])\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fit_and_plot(features[:n_train, :], features[n_train:, :], labels[:n_train], labels[n_train:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.11.4.5 训练样本不足(过拟合)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"final epoch: train loss 1.198514699935913 test loss 166.037109375\n",
"weight: tensor([[1.4741, 2.1198, 2.5674]]) \n",
"bias: tensor([3.1207])\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fit_and_plot(poly_features[0:2, :], poly_features[n_train:, :], labels[0:2], labels[n_train:])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}