{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 6.5 循环神经网络的简洁实现" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0.0 cuda\n" ] } ], "source": [ "import time\n", "import math\n", "import numpy as np\n", "import torch\n", "from torch import nn, optim\n", "import torch.nn.functional as F\n", "\n", "import sys\n", "sys.path.append(\"..\") \n", "import d2lzh_pytorch as d2l\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "\n", "(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()\n", "\n", "print(torch.__version__, device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6.5.1 定义模型" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "num_hiddens = 256\n", "# rnn_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) # 已测试\n", "rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([35, 2, 256]) 1 torch.Size([2, 256])\n" ] } ], "source": [ "num_steps = 35\n", "batch_size = 2\n", "state = None\n", "X = torch.rand(num_steps, batch_size, vocab_size)\n", "Y, state_new = rnn_layer(X, state)\n", "print(Y.shape, len(state_new), state_new[0].shape)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 本类已保存在d2lzh_pytorch包中方便以后使用\n", "class RNNModel(nn.Module):\n", " def __init__(self, rnn_layer, vocab_size):\n", " super(RNNModel, self).__init__()\n", " self.rnn = rnn_layer\n", " self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1) \n", " self.vocab_size = vocab_size\n", " self.dense = nn.Linear(self.hidden_size, vocab_size)\n", " self.state = None\n", "\n", " def forward(self, inputs, state): # inputs: (batch, seq_len)\n", " # 获取one-hot向量表示\n", " X = d2l.to_onehot(inputs, vocab_size) # X是个list\n", " Y, self.state = self.rnn(torch.stack(X), state)\n", " # 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens),它的输出\n", " # 形状为(num_steps * batch_size, vocab_size)\n", " output = self.dense(Y.view(-1, Y.shape[-1]))\n", " return output, self.state" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6.5.2 训练模型" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 本函数已保存在d2lzh_pytorch包中方便以后使用\n", "def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char,\n", " char_to_idx):\n", " state = None\n", " output = [char_to_idx[prefix[0]]] # output会记录prefix加上输出\n", " for t in range(num_chars + len(prefix) - 1):\n", " X = torch.tensor([output[-1]], device=device).view(1, 1)\n", " if state is not None:\n", " if isinstance(state, tuple): # LSTM, state:(h, c) \n", " state = (state[0].to(device), state[1].to(device))\n", " else: \n", " state = state.to(device)\n", " \n", " (Y, state) = model(X, state) # 前向计算不需要传入模型参数\n", " if t < len(prefix) - 1:\n", " output.append(char_to_idx[prefix[t + 1]])\n", " else:\n", " output.append(int(Y.argmax(dim=1).item()))\n", " return ''.join([idx_to_char[i] for i in output])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'分开戏想暖迎凉想征凉征征'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = RNNModel(rnn_layer, vocab_size).to(device)\n", "predict_rnn_pytorch('分开', 10, model, vocab_size, device, idx_to_char, char_to_idx)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 本函数已保存在d2lzh_pytorch包中方便以后使用\n", "def train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device,\n", " corpus_indices, idx_to_char, char_to_idx,\n", " num_epochs, num_steps, lr, clipping_theta,\n", " batch_size, pred_period, pred_len, prefixes):\n", " loss = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n", " model.to(device)\n", " state = None\n", " for epoch in range(num_epochs):\n", " l_sum, n, start = 0.0, 0, time.time()\n", " data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, device) # 相邻采样\n", " for X, Y in data_iter:\n", " if state is not None:\n", " # 使用detach函数从计算图分离隐藏状态, 这是为了\n", " # 使模型参数的梯度计算只依赖一次迭代读取的小批量序列(防止梯度计算开销太大)\n", " if isinstance (state, tuple): # LSTM, state:(h, c) \n", " state = (state[0].detach(), state[1].detach())\n", " else: \n", " state = state.detach()\n", " \n", " (output, state) = model(X, state) # output: 形状为(num_steps * batch_size, vocab_size)\n", " \n", " # Y的形状是(batch_size, num_steps),转置后再变成长度为\n", " # batch * num_steps 的向量,这样跟输出的行一一对应\n", " y = torch.transpose(Y, 0, 1).contiguous().view(-1)\n", " l = loss(output, y.long())\n", " \n", " optimizer.zero_grad()\n", " l.backward()\n", " # 梯度裁剪\n", " d2l.grad_clipping(model.parameters(), clipping_theta, device)\n", " optimizer.step()\n", " l_sum += l.item() * y.shape[0]\n", " n += y.shape[0]\n", " \n", " try:\n", " perplexity = math.exp(l_sum / n)\n", " except OverflowError:\n", " perplexity = float('inf')\n", " if (epoch + 1) % pred_period == 0:\n", " print('epoch %d, perplexity %f, time %.2f sec' % (\n", " epoch + 1, perplexity, time.time() - start))\n", " for prefix in prefixes:\n", " print(' -', predict_rnn_pytorch(\n", " prefix, pred_len, model, vocab_size, device, idx_to_char,\n", " char_to_idx))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 50, perplexity 10.658418, time 0.05 sec\n", " - 分开始我妈 想要你 我不多 让我心到的 我妈妈 我不能再想 我不多再想 我不要再想 我不多再想 我不要\n", " - 不分开 我想要你不你 我 你不要 让我心到的 我妈人 可爱女人 坏坏的让我疯狂的可爱女人 坏坏的让我疯狂的\n", "epoch 100, perplexity 1.308539, time 0.05 sec\n", " - 分开不会痛 不要 你在黑色幽默 开始了美丽全脸的梦滴 闪烁成回忆 伤人的美丽 你的完美主义 太彻底 让我\n", " - 不分开不是我不要再想你 我不能这样牵着你的手不放开 爱可不可以简简单单没有伤害 你 靠着我的肩膀 你 在我\n", "epoch 150, perplexity 1.070370, time 0.05 sec\n", " - 分开不能去河南嵩山 学少林跟武当 快使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈兮 习武之人切记 仁者无敌\n", " - 不分开 在我会想通 是谁开没有全有开始 他心今天 一切人看 我 一口令秋软语的姑娘缓缓走过外滩 消失的 旧\n", "epoch 200, perplexity 1.034663, time 0.05 sec\n", " - 分开不能去吗周杰伦 才离 没要你在一场悲剧 我的完美主义 太彻底 分手的话像语言暴力 我已无能为力再提起\n", " - 不分开 让我面到你 爱情来的太快就像龙卷风 离不开暴风圈来不及逃 我不能再想 我不能再想 我不 我不 我不\n", "epoch 250, perplexity 1.021437, time 0.05 sec\n", " - 分开 我我外的家边 你知道这 我爱不看的太 我想一个又重来不以 迷已文一只剩下回忆 让我叫带你 你你的\n", " - 不分开 我我想想和 是你听没不 我不能不想 不知不觉 你已经离开我 不知不觉 我跟了这节奏 后知后觉 \n" ] } ], "source": [ "num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e-3, 1e-2 # 注意这里的学习率设置\n", "pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']\n", "train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device,\n", " corpus_indices, idx_to_char, char_to_idx,\n", " num_epochs, num_steps, lr, clipping_theta,\n", " batch_size, pred_period, pred_len, prefixes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }