Dive-into-DL-PyTorch/code/chapter05_CNN/5.1_conv-layer.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5.1 二维卷积层\n",
    "## 5.1.1 二维互相关运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mPython 3.6.13 ('deepsort') 需要安装 ipykernel。\n",
      "Run the following command to install 'ipykernel' into the Python environment. \n",
      "Command: 'conda install -n deepsort ipykernel --update-deps --force-reinstall'"
     ]
    }
   ],
   "source": [
    "import torch \n",
    "from torch import nn\n",
    "\n",
    "print(torch.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def corr2d(X, K):  # 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
    "    h, w = K.shape\n",
    "    X, K = X.float(), K.float()\n",
    "    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()\n",
    "    return Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[19., 25.],\n",
       "        [37., 43.]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n",
    "K = torch.tensor([[0, 1], [2, 3]])\n",
    "corr2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.1.2 二维卷积层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Conv2D(nn.Module):\n",
    "    def __init__(self, kernel_size):\n",
    "        super(Conv2D, self).__init__()\n",
    "        self.weight = nn.Parameter(torch.randn(kernel_size))\n",
    "        self.bias = nn.Parameter(torch.randn(1))\n",
    "\n",
    "    def forward(self, x):\n",
    "        return corr2d(x, self.weight) + self.bias"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.1.3 图像中物体边缘检测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.ones(6, 8)\n",
    "X[:, 2:6] = 0\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "K = torch.tensor([[1, -1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y = corr2d(X, K)\n",
    "Y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.1.4 通过数据学习核数组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 5, loss 1.844\n",
      "Step 10, loss 0.206\n",
      "Step 15, loss 0.023\n",
      "Step 20, loss 0.003\n"
     ]
    }
   ],
   "source": [
    "# 构造一个核数组形状是(1, 2)的二维卷积层\n",
    "conv2d = Conv2D(kernel_size=(1, 2))\n",
    "\n",
    "step = 20\n",
    "lr = 0.01\n",
    "for i in range(step):\n",
    "    Y_hat = conv2d(X)\n",
    "    l = ((Y_hat - Y) ** 2).sum()\n",
    "    l.backward()\n",
    "    \n",
    "    # 梯度下降\n",
    "    conv2d.weight.data -= lr * conv2d.weight.grad\n",
    "    conv2d.bias.data -= lr * conv2d.bias.grad\n",
    "    \n",
    "    # 梯度清0\n",
    "    conv2d.weight.grad.fill_(0)\n",
    "    conv2d.bias.grad.fill_(0)\n",
    "    if (i + 1) % 5 == 0:\n",
    "        print('Step %d, loss %.3f' % (i + 1, l.item()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "weight:  tensor([[ 0.9948, -1.0092]])\n",
      "bias:  tensor([0.0080])\n"
     ]
    }
   ],
   "source": [
    "print(\"weight: \", conv2d.weight.data)\n",
    "print(\"bias: \", conv2d.bias.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}