You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

482 lines
16 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 6.4 循环神经网络的从零开始实现"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4.0\n",
"cuda\n"
]
}
],
"source": [
"import time\n",
"import math\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn, optim\n",
"import torch.nn.functional as F\n",
"\n",
"import sys\n",
"sys.path.append(\"..\") \n",
"import d2lzh_pytorch as d2l\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"\n",
"print(torch.__version__)\n",
"print(device)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.1 one-hot向量"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1., 0., 0., ..., 0., 0., 0.],\n",
" [ 0., 0., 1., ..., 0., 0., 0.]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def one_hot(x, n_class, dtype=torch.float32): \n",
" # X shape: (batch), output shape: (batch, n_class)\n",
" x = x.long()\n",
" res = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device)\n",
" res.scatter_(1, x.view(-1, 1), 1)\n",
" return res\n",
" \n",
"x = torch.tensor([0, 2])\n",
"one_hot(x, vocab_size)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5 torch.Size([2, 1027])\n"
]
}
],
"source": [
"# 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
"def to_onehot(X, n_class): \n",
" # X shape: (batch, seq_len), output: seq_len elements of (batch, n_class)\n",
" return [one_hot(X[:, i], n_class) for i in range(X.shape[1])]\n",
"\n",
"X = torch.arange(10).view(2, 5)\n",
"inputs = to_onehot(X, vocab_size)\n",
"print(len(inputs), inputs[0].shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.2 初始化模型参数"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"will use cuda\n"
]
}
],
"source": [
"num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size\n",
"print('will use', device)\n",
"\n",
"def get_params():\n",
" def _one(shape):\n",
" ts = torch.tensor(np.random.normal(0, 0.01, size=shape), device=device, dtype=torch.float32)\n",
" return torch.nn.Parameter(ts, requires_grad=True)\n",
"\n",
" # 隐藏层参数\n",
" W_xh = _one((num_inputs, num_hiddens))\n",
" W_hh = _one((num_hiddens, num_hiddens))\n",
" b_h = torch.nn.Parameter(torch.zeros(num_hiddens, device=device, requires_grad=True))\n",
" # 输出层参数\n",
" W_hq = _one((num_hiddens, num_outputs))\n",
" b_q = torch.nn.Parameter(torch.zeros(num_outputs, device=device, requires_grad=True))\n",
" return nn.ParameterList([W_xh, W_hh, b_h, W_hq, b_q])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.3 定义模型"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def init_rnn_state(batch_size, num_hiddens, device):\n",
" return (torch.zeros((batch_size, num_hiddens), device=device), )"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def rnn(inputs, state, params):\n",
" # inputs和outputs皆为num_steps个形状为(batch_size, vocab_size)的矩阵\n",
" W_xh, W_hh, b_h, W_hq, b_q = params\n",
" H, = state\n",
" outputs = []\n",
" for X in inputs:\n",
" H = torch.tanh(torch.matmul(X, W_xh) + torch.matmul(H, W_hh) + b_h)\n",
" Y = torch.matmul(H, W_hq) + b_q\n",
" outputs.append(Y)\n",
" return outputs, (H,)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5 torch.Size([2, 1027]) torch.Size([2, 256])\n"
]
}
],
"source": [
"state = init_rnn_state(X.shape[0], num_hiddens, device)\n",
"inputs = to_onehot(X.to(device), vocab_size)\n",
"params = get_params()\n",
"outputs, state_new = rnn(inputs, state, params)\n",
"print(len(outputs), outputs[0].shape, state_new[0].shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.4 定义预测函数"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
"def predict_rnn(prefix, num_chars, rnn, params, init_rnn_state,\n",
" num_hiddens, vocab_size, device, idx_to_char, char_to_idx):\n",
" state = init_rnn_state(1, num_hiddens, device)\n",
" output = [char_to_idx[prefix[0]]]\n",
" for t in range(num_chars + len(prefix) - 1):\n",
" # 将上一时间步的输出作为当前时间步的输入\n",
" X = to_onehot(torch.tensor([[output[-1]]], device=device), vocab_size)\n",
" # 计算输出和更新隐藏状态\n",
" (Y, state) = rnn(X, state, params)\n",
" # 下一个时间步的输入是prefix里的字符或者当前的最佳预测字符\n",
" if t < len(prefix) - 1:\n",
" output.append(char_to_idx[prefix[t + 1]])\n",
" else:\n",
" output.append(int(Y[0].argmax(dim=1).item()))\n",
" return ''.join([idx_to_char[i] for i in output])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'分开西圈绪升王凝瓜必客映'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predict_rnn('分开', 10, rnn, params, init_rnn_state, num_hiddens, vocab_size,\n",
" device, idx_to_char, char_to_idx)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.5 裁剪梯度"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
"def grad_clipping(params, theta, device):\n",
" norm = torch.tensor([0.0], device=device)\n",
" for param in params:\n",
" norm += (param.grad.data ** 2).sum()\n",
" norm = norm.sqrt().item()\n",
" if norm > theta:\n",
" for param in params:\n",
" param.grad.data *= (theta / norm)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.6 困惑度\n",
"## 6.4.7 定义模型训练函数"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
"def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,\n",
" vocab_size, device, corpus_indices, idx_to_char,\n",
" char_to_idx, is_random_iter, num_epochs, num_steps,\n",
" lr, clipping_theta, batch_size, pred_period,\n",
" pred_len, prefixes):\n",
" if is_random_iter:\n",
" data_iter_fn = d2l.data_iter_random\n",
" else:\n",
" data_iter_fn = d2l.data_iter_consecutive\n",
" params = get_params()\n",
" loss = nn.CrossEntropyLoss()\n",
"\n",
" for epoch in range(num_epochs):\n",
" if not is_random_iter: # 如使用相邻采样在epoch开始时初始化隐藏状态\n",
" state = init_rnn_state(batch_size, num_hiddens, device)\n",
" l_sum, n, start = 0.0, 0, time.time()\n",
" data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, device)\n",
" for X, Y in data_iter:\n",
" if is_random_iter: # 如使用随机采样,在每个小批量更新前初始化隐藏状态\n",
" state = init_rnn_state(batch_size, num_hiddens, device)\n",
" else: # 否则需要使用detach函数从计算图分离隐藏状态\n",
" for s in state:\n",
" s.detach_()\n",
" \n",
" inputs = to_onehot(X, vocab_size)\n",
" # outputs有num_steps个形状为(batch_size, vocab_size)的矩阵\n",
" (outputs, state) = rnn(inputs, state, params)\n",
" # 拼接之后形状为(num_steps * batch_size, vocab_size)\n",
" outputs = torch.cat(outputs, dim=0)\n",
" # Y的形状是(batch_size, num_steps),转置后再变成长度为\n",
" # batch * num_steps 的向量,这样跟输出的行一一对应\n",
" y = torch.transpose(Y, 0, 1).contiguous().view(-1)\n",
" # 使用交叉熵损失计算平均分类误差\n",
" l = loss(outputs, y.long())\n",
" \n",
" # 梯度清0\n",
" if params[0].grad is not None:\n",
" for param in params:\n",
" param.grad.data.zero_()\n",
" l.backward()\n",
" grad_clipping(params, clipping_theta, device) # 裁剪梯度\n",
" d2l.sgd(params, lr, 1) # 因为误差已经取过均值,梯度不用再做平均\n",
" l_sum += l.item() * y.shape[0]\n",
" n += y.shape[0]\n",
"\n",
" if (epoch + 1) % pred_period == 0:\n",
" print('epoch %d, perplexity %f, time %.2f sec' % (\n",
" epoch + 1, math.exp(l_sum / n), time.time() - start))\n",
" for prefix in prefixes:\n",
" print(' -', predict_rnn(prefix, pred_len, rnn, params, init_rnn_state,\n",
" num_hiddens, vocab_size, device, idx_to_char, char_to_idx))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.4.8 训练模型并创作歌词"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"num_epochs, num_steps, batch_size, lr, clipping_theta = 250, 35, 32, 1e2, 1e-2\n",
"pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 50, perplexity 70.039647, time 0.11 sec\n",
" - 分开 我不要再想 我不能 想你的让我 我的可 你怎么 一颗四 一颗四 我不要 一颗两 一颗四 一颗四 我\n",
" - 不分开 我不要再 你你的外 在人 别你的让我 狂的可 语人两 我不要 一颗两 一颗四 一颗四 我不要 一\n",
"epoch 100, perplexity 9.726828, time 0.12 sec\n",
" - 分开 一直的美栈人 一起看 我不要好生活 你知不觉 我已好好生活 我知道好生活 后知不觉 我跟了这生活 \n",
" - 不分开堡 我不要再想 我不 我不 我不要再想你 不知不觉 你已经离开我 不知不觉 我跟了好生活 我知道好生\n",
"epoch 150, perplexity 2.864874, time 0.11 sec\n",
" - 分开 一只会停留 有不它元羞 这蝪什么奇怪的事都有 包括像猫的狗 印地安老斑鸠 平常话不多 除非是乌鸦抢\n",
" - 不分开扫 我不你再想 我不能再想 我不 我不 我不要再想你 不知不觉 你已经离开我 不知不觉 我跟了这节奏\n",
"epoch 200, perplexity 1.597790, time 0.11 sec\n",
" - 分开 有杰伦 干 载颗拳满的让空美空主 相爱还有个人 再狠狠忘记 你爱过我的证 有晶莹的手滴 让说些人\n",
" - 不分开扫 我叫你爸 你打我妈 这样对吗干嘛这样 何必让它牵鼻子走 瞎 说底牵打我妈要 难道球耳 快使用双截\n",
"epoch 250, perplexity 1.303903, time 0.12 sec\n",
" - 分开 有杰人开留 仙唱它怕羞 蜥蝪横著走 这里什么奇怪的事都有 包括像猫的狗 印地安老斑鸠 平常话不多 \n",
" - 不分开简 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风 不能承受我已无处可躲 我不要再想 我不能\n"
]
}
],
"source": [
"train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,\n",
" vocab_size, device, corpus_indices, idx_to_char,\n",
" char_to_idx, True, num_epochs, num_steps, lr,\n",
" clipping_theta, batch_size, pred_period, pred_len,\n",
" prefixes)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 50, perplexity 59.514416, time 0.11 sec\n",
" - 分开 我想要这 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空\n",
" - 不分开 我不要这 全使了双 我想了这 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空 我想了空\n",
"epoch 100, perplexity 6.801417, time 0.11 sec\n",
" - 分开 我说的这样笑 想你都 不着我 我想就这样牵 你你的回不笑多难的 它在云实 有一条事 全你了空 \n",
" - 不分开觉 你已经离开我 不知不觉 我跟好这节活 我该好好生活 不知不觉 你跟了离开我 不知不觉 我跟好这节\n",
"epoch 150, perplexity 2.063730, time 0.16 sec\n",
" - 分开 我有到这样牵着你的手不放开 爱可不可以简简单单没有伤 古有你烦 我有多烦恼向 你知带悄 回我的外\n",
" - 不分开觉 你已经很个我 不知不觉 我跟了这节奏 后知后觉 又过了一个秋 后哼哈兮 快使用双截棍 哼哼哈兮 \n",
"epoch 200, perplexity 1.300031, time 0.11 sec\n",
" - 分开 我想要这样牵着你的手不放开 爱能不能够永远单甜没有伤害 你 靠着我的肩膀 你 在我胸口睡著 像这样\n",
" - 不分开觉 你已经离开我 不知不觉 我跟了这节奏 后知后觉 又过了一个秋 后知后觉 我该好好生活 我该好好生\n",
"epoch 250, perplexity 1.164455, time 0.11 sec\n",
" - 分开 我有一这样布 对你依依不舍 连隔壁邻居都猜到我现在的感受 河边的风 在吹着头发飘动 牵着你的手 一\n",
" - 不分开觉 你已经离开我 不知不觉 我跟了这节奏 后知后觉 又过了一个秋 后知后觉 我该好好生活 我该好好生\n"
]
}
],
"source": [
"train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,\n",
" vocab_size, device, corpus_indices, idx_to_char,\n",
" char_to_idx, False, num_epochs, num_steps, lr,\n",
" clipping_theta, batch_size, pred_period, pred_len,\n",
" prefixes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}