{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 3.7 softmax回归的简洁实现" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.4.1\n" ] } ], "source": [ "import torch\n", "from torch import nn\n", "from torch.nn import init\n", "import numpy as np\n", "import sys\n", "sys.path.append(\"..\") \n", "import d2lzh_pytorch as d2l\n", "\n", "print(torch.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.7.1 获取和读取数据" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "batch_size = 256\n", "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.7.2 定义和初始化模型" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "num_inputs = 784\n", "num_outputs = 10\n", "\n", "# class LinearNet(nn.Module):\n", "# def __init__(self, num_inputs, num_outputs):\n", "# super(LinearNet, self).__init__()\n", "# self.linear = nn.Linear(num_inputs, num_outputs)\n", "# def forward(self, x): # x shape: (batch, 1, 28, 28)\n", "# y = self.linear(x.view(x.shape[0], -1))\n", "# return y\n", " \n", "# net = LinearNet(num_inputs, num_outputs)\n", "\n", "class FlattenLayer(nn.Module):\n", " def __init__(self):\n", " super(FlattenLayer, self).__init__()\n", " def forward(self, x): # x shape: (batch, *, *, ...)\n", " return x.view(x.shape[0], -1)\n", "\n", "from collections import OrderedDict\n", "net = nn.Sequential(\n", " # FlattenLayer(),\n", " # nn.Linear(num_inputs, num_outputs)\n", " OrderedDict([\n", " ('flatten', FlattenLayer()),\n", " ('linear', nn.Linear(num_inputs, num_outputs))])\n", " )" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "init.normal_(net.linear.weight, mean=0, std=0.01)\n", "init.constant_(net.linear.bias, val=0) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.7.3 softmax和交叉熵损失函数" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "loss = nn.CrossEntropyLoss()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.7.4 定义优化算法" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "optimizer = torch.optim.SGD(net.parameters(), lr=0.1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.7.5 训练模型" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 1, loss 0.0031, train acc 0.748, test acc 0.785\n", "epoch 2, loss 0.0022, train acc 0.813, test acc 0.802\n", "epoch 3, loss 0.0021, train acc 0.824, test acc 0.808\n", "epoch 4, loss 0.0020, train acc 0.833, test acc 0.824\n", "epoch 5, loss 0.0019, train acc 0.837, test acc 0.806\n" ] } ], "source": [ "num_epochs = 5\n", "d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)" ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }