{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 3.3 线性回归的简洁实现" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.4.1\n" ] } ], "source": [ "import torch\n", "from torch import nn\n", "import numpy as np\n", "torch.manual_seed(1)\n", "\n", "print(torch.__version__)\n", "torch.set_default_tensor_type('torch.FloatTensor')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.1 生成数据集" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "num_inputs = 2\n", "num_examples = 1000\n", "true_w = [2, -3.4]\n", "true_b = 4.2\n", "features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)\n", "labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b\n", "labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.2 读取数据" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import torch.utils.data as Data\n", "\n", "batch_size = 10\n", "\n", "# 将训练数据的特征和标签组合\n", "dataset = Data.TensorDataset(features, labels)\n", "\n", "# 把 dataset 放入 DataLoader\n", "data_iter = Data.DataLoader(\n", " dataset=dataset, # torch TensorDataset format\n", " batch_size=batch_size, # mini batch size\n", " shuffle=True, # 要不要打乱数据 (打乱比较好)\n", " num_workers=2, # 多线程来读数据\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[-0.0163, -1.0072],\n", " [-0.3554, -0.1807],\n", " [-1.2406, -2.3683],\n", " [ 1.3847, 1.9209],\n", " [-0.7570, -0.3135],\n", " [ 0.3181, -0.8122],\n", " [-0.3864, 0.0382],\n", " [ 1.0939, -0.1225],\n", " [ 0.7272, 0.4801],\n", " [ 0.6706, -0.7972]]) \n", " tensor([7.6005, 4.1017, 9.7864, 0.4568, 3.7355, 7.5675, 3.2881, 6.7967, 4.0404,\n", " 8.2513])\n" ] } ], "source": [ "for X, y in data_iter:\n", " print(X, '\\n', y)\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.3 定义模型" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LinearNet(\n", " (linear): Linear(in_features=2, out_features=1, bias=True)\n", ")\n" ] } ], "source": [ "class LinearNet(nn.Module):\n", " def __init__(self, n_feature):\n", " super(LinearNet, self).__init__()\n", " self.linear = nn.Linear(n_feature, 1)\n", "\n", " def forward(self, x):\n", " y = self.linear(x)\n", " return y\n", " \n", "net = LinearNet(num_inputs)\n", "print(net) # 使用print可以打印出网络的结构" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sequential(\n", " (linear): Linear(in_features=2, out_features=1, bias=True)\n", ")\n", "Linear(in_features=2, out_features=1, bias=True)\n" ] } ], "source": [ "# 写法一\n", "net = nn.Sequential(\n", " nn.Linear(num_inputs, 1)\n", " # 此处还可以传入其他层\n", " )\n", "\n", "# 写法二\n", "net = nn.Sequential()\n", "net.add_module('linear', nn.Linear(num_inputs, 1))\n", "# net.add_module ......\n", "\n", "# 写法三\n", "from collections import OrderedDict\n", "net = nn.Sequential(OrderedDict([\n", " ('linear', nn.Linear(num_inputs, 1))\n", " # ......\n", " ]))\n", "\n", "print(net)\n", "print(net[0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parameter containing:\n", "tensor([[0.5347, 0.7057]], requires_grad=True)\n", "Parameter containing:\n", "tensor([0.6873], requires_grad=True)\n" ] } ], "source": [ "for param in net.parameters():\n", " print(param)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.4 初始化模型参数" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([0.], requires_grad=True)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from torch.nn import init\n", "\n", "init.normal_(net[0].weight, mean=0.0, std=0.01)\n", "init.constant_(net[0].bias, val=0.0) # 也可以直接修改bias的data: net[0].bias.data.fill_(0)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parameter containing:\n", "tensor([[-0.0142, -0.0161]], requires_grad=True)\n", "Parameter containing:\n", "tensor([0.], requires_grad=True)\n" ] } ], "source": [ "for param in net.parameters():\n", " print(param)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.5 定义损失函数" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "loss = nn.MSELoss()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.6 定义优化算法" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SGD (\n", "Parameter Group 0\n", " dampening: 0\n", " lr: 0.03\n", " momentum: 0\n", " nesterov: False\n", " weight_decay: 0\n", ")\n" ] } ], "source": [ "import torch.optim as optim\n", "\n", "optimizer = optim.SGD(net.parameters(), lr=0.03)\n", "print(optimizer)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# 为不同子网络设置不同的学习率\n", "# optimizer =optim.SGD([\n", "# # 如果对某个参数不指定学习率,就使用最外层的默认学习率\n", "# {'params': net.subnet1.parameters()}, # lr=0.03\n", "# {'params': net.subnet2.parameters(), 'lr': 0.01}\n", "# ], lr=0.03)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# # 调整学习率\n", "# for param_group in optimizer.param_groups:\n", "# param_group['lr'] *= 0.1 # 学习率为之前的0.1倍" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.3.7 训练模型" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 1, loss: 0.000457\n", "epoch 2, loss: 0.000081\n", "epoch 3, loss: 0.000198\n" ] } ], "source": [ "num_epochs = 3\n", "for epoch in range(1, num_epochs + 1):\n", " for X, y in data_iter:\n", " output = net(X)\n", " l = loss(output, y.view(-1, 1))\n", " optimizer.zero_grad() # 梯度清零,等价于net.zero_grad()\n", " l.backward()\n", " optimizer.step()\n", " print('epoch %d, loss: %f' % (epoch, l.item()))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2, -3.4] tensor([[ 1.9999, -3.4005]])\n", "4.2 tensor([4.2011])\n" ] } ], "source": [ "dense = net[0]\n", "print(true_w, dense.weight.data)\n", "print(true_b, dense.bias.data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }