{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 7.2 梯度下降和随机梯度下降"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import torch\n",
"import math\n",
"import sys\n",
"sys.path.append(\"..\") \n",
"import d2lzh_pytorch as d2l"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7.2.1 一维梯度下降"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 10, x: 0.06046617599999997\n"
]
}
],
"source": [
"def gd(eta):\n",
" x = 10\n",
" results = [x]\n",
" for i in range(10):\n",
" x -= eta * 2 * x # f(x) = x * x的导数为f'(x) = 2 * x\n",
" results.append(x)\n",
" print('epoch 10, x:', x)\n",
" return results\n",
"\n",
"res = gd(0.2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def show_trace(res):\n",
" n = max(abs(min(res)), abs(max(res)), 10)\n",
" f_line = np.arange(-n, n, 0.1)\n",
" d2l.set_figsize()\n",
" d2l.plt.plot(f_line, [x * x for x in f_line])\n",
" d2l.plt.plot(res, [x * x for x in res], '-o')\n",
" d2l.plt.xlabel('x')\n",
" d2l.plt.ylabel('f(x)')\n",
"\n",
"show_trace(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7.2.2 学习率"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 10, x: 3.4867844009999995\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_trace(gd(0.05))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 10, x: 61.917364224000096\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_trace(gd(1.1))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7.2.3 多维梯度下降"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def train_2d(trainer): # 本函数将保存在d2lzh_pytorch包中方便以后使用\n",
" x1, x2, s1, s2 = -5, -2, 0, 0 # s1和s2是自变量状态,本章后续几节会使用\n",
" results = [(x1, x2)]\n",
" for i in range(20):\n",
" x1, x2, s1, s2 = trainer(x1, x2, s1, s2)\n",
" results.append((x1, x2))\n",
" print('epoch %d, x1 %f, x2 %f' % (i + 1, x1, x2))\n",
" return results\n",
"\n",
"def show_trace_2d(f, results): # 本函数将保存在d2lzh_pytorch包中方便以后使用\n",
" d2l.plt.plot(*zip(*results), '-o', color='#ff7f0e')\n",
" x1, x2 = np.meshgrid(np.arange(-5.5, 1.0, 0.1), np.arange(-3.0, 1.0, 0.1))\n",
" d2l.plt.contour(x1, x2, f(x1, x2), colors='#1f77b4')\n",
" d2l.plt.xlabel('x1')\n",
" d2l.plt.ylabel('x2')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 20, x1 -0.057646, x2 -0.000073\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"eta = 0.1\n",
"\n",
"def f_2d(x1, x2): # 目标函数\n",
" return x1 ** 2 + 2 * x2 ** 2\n",
"\n",
"def gd_2d(x1, x2, s1, s2):\n",
" return (x1 - eta * 2 * x1, x2 - eta * 4 * x2, 0, 0)\n",
"\n",
"show_trace_2d(f_2d, train_2d(gd_2d))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7.2.4 随机梯度下降"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 20, x1 -0.047150, x2 -0.075628\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def sgd_2d(x1, x2, s1, s2):\n",
" return (x1 - eta * (2 * x1 + np.random.normal(0.1)),\n",
" x2 - eta * (4 * x2 + np.random.normal(0.1)), 0, 0)\n",
"\n",
"show_trace_2d(f_2d, train_2d(sgd_2d))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}