From f5edc2b1f1b1e8a8a4f107462004e6fed7d10126 Mon Sep 17 00:00:00 2001 From: dxm Date: Mon, 18 Mar 2019 18:27:15 +0800 Subject: [PATCH 1/3] update to fastai 1.0.48 --- Cyclical LR and momentums.ipynb | 168 ++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 54 deletions(-) diff --git a/Cyclical LR and momentums.ipynb b/Cyclical LR and momentums.ipynb index 2a33e32..475009b 100644 --- a/Cyclical LR and momentums.ipynb +++ b/Cyclical LR and momentums.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# run this if you use colab in google\n", + "!curl -s https://course.fast.ai/setup/colab | bash" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -11,6 +21,17 @@ "%autoreload 2" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check the version of fastai >= 1.0.48\n", + "import fastai\n", + "print(fastai.__version__)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -24,8 +45,8 @@ "metadata": {}, "outputs": [], "source": [ - "from fastai.conv_learner import *\n", - "PATH = Path(\"../data/cifar/\")" + "from fastai.vision import *\n", + "path = untar_data(URLs.CIFAR, dest='../data/cifar10'); path" ] }, { @@ -58,9 +79,10 @@ "metadata": {}, "outputs": [], "source": [ - "def get_data(sz,bs):\n", - " tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)\n", - " return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)" + "def get_data(bs):\n", + " ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])\n", + " data = ImageDataBunch.from_folder(path, valid='test', classes=classes,ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)\n", + " return data\n" ] }, { @@ -79,7 +101,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = get_data(size,batch_size)" + "data = get_data(batch_size)" ] }, { @@ -171,13 +193,51 @@ "Let's wrap our model into the fastai library to get all the functions it can offer." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Have a look!\n", + "\n", + "because of model arch should be callable, maybe should add code like this.\n", + "\n", + "```python\n", + "def resnet18(pretrained=False, **kwargs):\n", + " \"\"\"Constructs a ResNet-18 model.\n", + " Args:\n", + " pretrained (bool): If True, returns a model pre-trained on ImageNet\n", + " \"\"\"\n", + " model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)\n", + " if pretrained:\n", + " model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))\n", + " return model\n", + "\n", + "```\n", + "\n", + "reference: [conv2d TypeError](https://forums.fast.ai/t/solved-error-while-creating-learner-for-senet-typeerror-conv2d-argument-input-position-1-must-be-tensor-not-bool/34203)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_your_model_arch(pretrained=False, **kwargs):\n", + " model_arch = ResNet([9,9,9])\n", + " return model_arch\n", + "model_arch = get_your_model_arch\n", + "# model2 = models.resnet18() # get exception\n", + "model2 = models.resnet18" + ] + }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss" ] }, @@ -251,7 +311,7 @@ } ], "source": [ - "learn.sched.plot(10,1)" + "learn.recorder.plot(10,1)" ] }, { @@ -453,7 +513,7 @@ } ], "source": [ - "learn.fit(0.8,1,cycle_len=95,use_clr_beta=(10,13.68,0.95,0.85),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -480,7 +540,7 @@ } ], "source": [ - "learn.sched.plot_lr()" + "learn.recorder.plot_lr()" ] }, { @@ -518,12 +578,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.sched.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.sched.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.sched.rec_metrics)\n", + "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -693,9 +753,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit(0.8,1,cycle_len=95,use_clr_beta=(10,50,0.95,0.85),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.5,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -722,7 +782,7 @@ } ], "source": [ - "learn.sched.plot_lr()" + "learn.recorder.plot_lr()" ] }, { @@ -753,12 +813,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.sched.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.sched.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.sched.rec_metrics)\n", + "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -876,9 +936,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit(3,1,cycle_len=50,use_clr_beta=(20,10,0.95,0.85),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=50, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -898,7 +958,7 @@ } ], "source": [ - "learn.sched.plot_lr()" + "learn.recorder.plot_lr()" ] }, { @@ -929,12 +989,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(50)),learn.sched.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(50)),[learn.sched.losses[i] for i in range(97,50*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(50)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(50)),[learn.recorder.losses[i] for i in range(97,50*98,98)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(50)),learn.sched.rec_metrics)\n", + "ax[1].plot(list(range(50)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1070,9 +1130,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit(3,1,cycle_len=70,use_clr_beta=(20,10,0.95,0.85),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=70, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -1110,12 +1170,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(70)),learn.sched.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(70)),[learn.sched.losses[i] for i in range(97,70*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(70)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(70)),[learn.recorder.losses[i] for i in range(97,70*98,98)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(70)),learn.sched.rec_metrics)\n", + "ax[1].plot(list(range(70)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1278,9 +1338,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit(0.8,1,cycle_len=95,use_clr_beta=(10,13.68,0.9,0.9),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.9, 0.9), wd=1e-4)" ] }, { @@ -1307,7 +1367,7 @@ } ], "source": [ - "learn.sched.plot_lr()" + "learn.recorder.plot_lr()" ] }, { @@ -1338,12 +1398,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.sched.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.sched.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.sched.rec_metrics)\n", + "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1459,9 +1519,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit(3,1,cycle_len=50,use_clr_beta=(20,10,0.9,0.9),wds=1e-4)" + "learn.fit_one_cycle(cyc_len=50, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.9, 0.9), wd=1e-4)" ] }, { @@ -1551,9 +1611,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.lr_find2(wds=1e-2,start_lr=0.01,end_lr=100,num_it=100)" + "learn.lr_find(wd=1e-2,start_lr=0.01,end_lr=100,num_it=100)" ] }, { @@ -1562,9 +1622,9 @@ "metadata": {}, "outputs": [], "source": [ - "trn_lossesw2 = learn.sched.losses\n", - "val_lossesw2 = learn.sched.val_losses\n", - "lrsw2 = learn.sched.lrs" + "trn_lossesw2 = learn.recorder.losses\n", + "val_lossesw2 = learn.recorder.val_losses\n", + "lrsw2 = learn.recorder.lrs" ] }, { @@ -1633,9 +1693,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.lr_find2(wds=1e-3,start_lr=0.01,end_lr=100,num_it=100)" + "learn.lr_find(wd=1e-3,start_lr=0.01,end_lr=100,num_it=100)" ] }, { @@ -1644,9 +1704,9 @@ "metadata": {}, "outputs": [], "source": [ - "trn_lossesw3 = learn.sched.losses\n", - "val_lossesw3 = learn.sched.val_losses\n", - "lrsw3 = learn.sched.lrs" + "trn_lossesw3 = learn.recorder.losses\n", + "val_lossesw3 = learn.recorder.val_losses\n", + "lrsw3 = learn.recorder.lrs" ] }, { @@ -1692,9 +1752,9 @@ } ], "source": [ - "learn = ConvLearner.from_model_data(ResNet([9,9,9]), data)\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.lr_find2(wds=1e-4,start_lr=0.01,end_lr=100,num_it=100)" + "learn.lr_find(wd=1e-2,start_lr=0.01,end_lr=100,num_it=100)" ] }, { @@ -1703,9 +1763,9 @@ "metadata": {}, "outputs": [], "source": [ - "trn_lossesw4 = learn.sched.losses\n", - "val_lossesw4 = learn.sched.val_losses\n", - "lrsw4 = learn.sched.lrs" + "trn_lossesw4 = learn.recorder.losses\n", + "val_lossesw4 = learn.recorder.val_losses\n", + "lrsw4 = learn.recorder.lrs" ] }, { @@ -1774,7 +1834,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.6.3" } }, "nbformat": 4, From c48aba34de39125699e6e6c2d30f7c0d3cf192bf Mon Sep 17 00:00:00 2001 From: lai-bluejay Date: Mon, 18 Mar 2019 23:07:09 +0800 Subject: [PATCH 2/3] update resnet --- Cyclical LR and momentums.ipynb | 178 +++++++++++++++++++++----------- 1 file changed, 115 insertions(+), 63 deletions(-) diff --git a/Cyclical LR and momentums.ipynb b/Cyclical LR and momentums.ipynb index 475009b..e955c27 100644 --- a/Cyclical LR and momentums.ipynb +++ b/Cyclical LR and momentums.ipynb @@ -115,36 +115,64 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Basic bloc of the resnet version for cifar10 (no bottlenecks)." + "Basic bloc of the resnet version for cifar10 (no bottlenecks).\n", + "Reference to pytorch's BasicBlock()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "def conv3x3(in_planes, out_planes, stride=1):\n", + " \"\"\"3x3 convolution with padding\"\"\"\n", + " return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n", + " padding=1, bias=False)\n", + "\n", + "\n", + "def conv1x1(in_planes, out_planes, stride=1):\n", + " \"\"\"1x1 convolution\"\"\"\n", + " return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)\n", + " \n", "class BasicBlock(nn.Module):\n", - " \n", - " def __init__(self, ch_in, ch_out, stride=1):\n", - " super().__init__()\n", - " self.bn1 = nn.BatchNorm2d(ch_out)\n", - " self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1, bias=False)\n", - " self.bn2 = nn.BatchNorm2d(ch_out)\n", - " self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1, bias=False)\n", + " expansion = 1\n", "\n", - " if stride != 1 or ch_in != ch_out:\n", - " self.shortcut = nn.Sequential(\n", - " nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride, bias=False),\n", - " nn.BatchNorm2d(ch_out)\n", - " )\n", + " def __init__(self, inplanes, planes, stride=1, downsample=None):\n", + " super(BasicBlock, self).__init__()\n", + " # Both self.conv1 and self.downsample layers downsample the input when stride != 1\n", + " self.conv1 = conv3x3(inplanes, planes, stride)\n", + " self.bn1 = nn.BatchNorm2d(planes)\n", + " self.relu = nn.ReLU(inplace=True)\n", + " self.conv2 = conv3x3(planes, planes)\n", + " self.bn2 = nn.BatchNorm2d(planes)\n", + " self.downsample = downsample\n", + " self.stride = stride\n", "\n", " def forward(self, x):\n", - " shortcut = self.shortcut(x) if hasattr(self, 'shortcut') else x\n", + " identity = x\n", + "\n", " out = self.conv1(x)\n", - " out = self.bn2(self.conv2(F.relu(self.bn1(out))))\n", - " out += shortcut\n", - " return F.relu(out)" + " out = self.bn1(out)\n", + " out = self.relu(out)\n", + "\n", + " out = self.conv2(out)\n", + " out = self.bn2(out)\n", + "\n", + " if self.downsample is not None:\n", + " identity = self.downsample(x)\n", + "\n", + " out += identity\n", + " out = self.relu(out)\n", + "\n", + " return out" ] }, { @@ -154,36 +182,43 @@ "Resnet for cifar10 with 56 convolutional layers." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we use models.ResNet import from pytorch" + ] + }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "class ResNet(nn.Module):\n", - " def __init__(self, num_blocks, num_classes=10):\n", - " super().__init__()\n", - " self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)\n", - " self.bn1 = nn.BatchNorm2d(16)\n", - " self.layer1 = self.make_group_layer(16, 16, num_blocks[0], stride=1)\n", - " self.layer2 = self.make_group_layer(16, 32, num_blocks[1], stride=2)\n", - " self.layer3 = self.make_group_layer(32, 64, num_blocks[2], stride=2)\n", - " self.linear = nn.Linear(64, num_classes)\n", + "# class ResNet(nn.Module):\n", + "# def __init__(self, num_blocks, num_classes=10):\n", + "# super().__init__()\n", + "# self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)\n", + "# self.bn1 = nn.BatchNorm2d(16)\n", + "# self.layer1 = self.make_group_layer(16, 16, num_blocks[0], stride=1)\n", + "# self.layer2 = self.make_group_layer(16, 32, num_blocks[1], stride=2)\n", + "# self.layer3 = self.make_group_layer(32, 64, num_blocks[2], stride=2)\n", + "# self.linear = nn.Linear(64, num_classes)\n", "\n", - " def make_group_layer(self,ch_in, ch_out, num_blocks, stride):\n", - " layers = [BasicBlock(ch_in, ch_out, stride)]\n", - " for i in range(num_blocks-1):\n", - " layers.append(BasicBlock(ch_out, ch_out, stride=1))\n", - " return nn.Sequential(*layers)\n", + "# def make_group_layer(self,ch_in, ch_out, num_blocks, stride):\n", + "# layers = [BasicBlock(ch_in, ch_out, stride)]\n", + "# for i in range(num_blocks-1):\n", + "# layers.append(BasicBlock(ch_out, ch_out, stride=1))\n", + "# return nn.Sequential(*layers)\n", "\n", - " def forward(self, x):\n", - " out = F.relu(self.bn1(self.conv1(x)))\n", - " out = self.layer1(out)\n", - " out = self.layer2(out)\n", - " out = self.layer3(out)\n", - " out = F.adaptive_avg_pool2d(out, 1)\n", - " out = out.view(out.size(0), -1)\n", - " return F.log_softmax(self.linear(out))" + "# def forward(self, x):\n", + "# out = F.relu(self.bn1(self.conv1(x)))\n", + "# out = self.layer1(out)\n", + "# out = self.layer2(out)\n", + "# out = self.layer3(out)\n", + "# out = F.adaptive_avg_pool2d(out, 1)\n", + "# out = out.view(out.size(0), -1)\n", + "# return F.log_softmax(self.linear(out))" ] }, { @@ -214,7 +249,9 @@ "\n", "```\n", "\n", - "reference: [conv2d TypeError](https://forums.fast.ai/t/solved-error-while-creating-learner-for-senet-typeerror-conv2d-argument-input-position-1-must-be-tensor-not-bool/34203)\n" + "reference: [conv2d TypeError](https://forums.fast.ai/t/solved-error-while-creating-learner-for-senet-typeerror-conv2d-argument-input-position-1-must-be-tensor-not-bool/34203)\n", + "\n", + "And we use models.ResNet import from pytorch\n" ] }, { @@ -224,20 +261,29 @@ "outputs": [], "source": [ "def get_your_model_arch(pretrained=False, **kwargs):\n", - " model_arch = ResNet([9,9,9])\n", + " model_arch = models.ResNet(BasicBlock, [9,9,9,1])\n", " return model_arch\n", "model_arch = get_your_model_arch\n", "# model2 = models.resnet18() # get exception\n", "model2 = models.resnet18" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fastai import metrics" + ] + }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss" ] }, @@ -513,7 +559,8 @@ } ], "source": [ - "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.95, 0.85), wd=1e-4)" + "cyc_len=95\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -578,12 +625,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", + "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -755,7 +802,8 @@ "source": [ "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.5,moms=(0.95, 0.85), wd=1e-4)" + "cyc_len=95\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=0.8,div_factor=10, pct_start=0.5,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -813,12 +861,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", + "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -938,7 +986,8 @@ "source": [ "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit_one_cycle(cyc_len=50, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" + "cyc_len=50\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -989,12 +1038,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(50)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(50)),[learn.recorder.losses[i] for i in range(97,50*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(50)),learn.recorder.metrics)\n", + "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1132,7 +1181,8 @@ "source": [ "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit_one_cycle(cyc_len=70, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" + "cyc_len=70\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" ] }, { @@ -1170,12 +1220,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(70)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(70)),[learn.recorder.losses[i] for i in range(97,70*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(70)),learn.recorder.metrics)\n", + "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1340,7 +1390,8 @@ "source": [ "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit_one_cycle(cyc_len=95, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.9, 0.9), wd=1e-4)" + "cyc_len=96\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.9, 0.9), wd=1e-4)" ] }, { @@ -1398,12 +1449,12 @@ ], "source": [ "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(95)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(95)),[learn.recorder.losses[i] for i in range(97,95*98,98)], label='Training loss')\n", + "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", "ax[0].set_xlabel('Epoch')\n", "ax[0].set_ylabel('Loss')\n", "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(95)),learn.recorder.metrics)\n", + "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", "ax[1].set_xlabel('Epoch')\n", "ax[1].set_ylabel('Accuracy')" ] @@ -1521,7 +1572,8 @@ "source": [ "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", "learn.crit = F.nll_loss\n", - "learn.fit_one_cycle(cyc_len=50, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.9, 0.9), wd=1e-4)" + "cyc_len=50\n", + "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.9, 0.9), wd=1e-4)" ] }, { From d5e9c611bf0372ab904e0ff243f29d8d2a0058e1 Mon Sep 17 00:00:00 2001 From: lai-bluejay Date: Mon, 18 Mar 2019 23:25:24 +0800 Subject: [PATCH 3/3] plot loss&acc func --- Cyclical LR and momentums.ipynb | 80 +++++++++++++++------------------ 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/Cyclical LR and momentums.ipynb b/Cyclical LR and momentums.ipynb index e955c27..fe573be 100644 --- a/Cyclical LR and momentums.ipynb +++ b/Cyclical LR and momentums.ipynb @@ -800,7 +800,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "cyc_len=95\n", "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=0.8,div_factor=10, pct_start=0.5,moms=(0.95, 0.85), wd=1e-4)" @@ -860,15 +860,18 @@ } ], "source": [ - "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", - "ax[0].set_xlabel('Epoch')\n", - "ax[0].set_ylabel('Loss')\n", - "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", - "ax[1].set_xlabel('Epoch')\n", - "ax[1].set_ylabel('Accuracy')" + "def plot_loss_and_acc(learn, cyc_len):\n", + " fig,ax = plt.subplots(2,1,figsize=(8,12))\n", + " ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", + " ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", + " ax[0].set_xlabel('Epoch')\n", + " ax[0].set_ylabel('Loss')\n", + " ax[0].legend(loc='upper right')\n", + " ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", + " ax[1].set_xlabel('Epoch')\n", + " ax[1].set_ylabel('Accuracy')\n", + " \n", + "plot_loss_and_acc(learn, cyc_len)" ] }, { @@ -984,7 +987,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "cyc_len=50\n", "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" @@ -1037,15 +1040,8 @@ } ], "source": [ - "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", - "ax[0].set_xlabel('Epoch')\n", - "ax[0].set_ylabel('Loss')\n", - "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", - "ax[1].set_xlabel('Epoch')\n", - "ax[1].set_ylabel('Accuracy')" + "\n", + "plot_loss_and_acc(learn, cyc_len)" ] }, { @@ -1179,7 +1175,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "cyc_len=70\n", "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.95, 0.85), wd=1e-4)" @@ -1219,15 +1215,8 @@ } ], "source": [ - "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", - "ax[0].set_xlabel('Epoch')\n", - "ax[0].set_ylabel('Loss')\n", - "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", - "ax[1].set_xlabel('Epoch')\n", - "ax[1].set_ylabel('Accuracy')" + "\n", + "plot_loss_and_acc(learn, cyc_len)" ] }, { @@ -1388,9 +1377,9 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", - "cyc_len=96\n", + "cyc_len=95\n", "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=0.8,div_factor=10, pct_start=0.1368,moms=(0.9, 0.9), wd=1e-4)" ] }, @@ -1448,15 +1437,8 @@ } ], "source": [ - "fig,ax = plt.subplots(2,1,figsize=(8,12))\n", - "ax[0].plot(list(range(cyc_len)),learn.recorder.val_losses, label='Validation loss')\n", - "ax[0].plot(list(range(cyc_len)),[learn.recorder.losses[i] for i in range(cyc_len+2,cyc_len*(cyc_len+3),cyc_len+3)], label='Training loss')\n", - "ax[0].set_xlabel('Epoch')\n", - "ax[0].set_ylabel('Loss')\n", - "ax[0].legend(loc='upper right')\n", - "ax[1].plot(list(range(cyc_len)),learn.recorder.metrics)\n", - "ax[1].set_xlabel('Epoch')\n", - "ax[1].set_ylabel('Accuracy')" + "\n", + "plot_loss_and_acc(learn, cyc_len)" ] }, { @@ -1570,7 +1552,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "cyc_len=50\n", "learn.fit_one_cycle(cyc_len=cyc_len, max_lr=3,div_factor=20, pct_start=0.1,moms=(0.9, 0.9), wd=1e-4)" @@ -1583,6 +1565,16 @@ "Against the results aren't quite as good, but very close." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "plot_loss_and_acc(learn, cyc_len)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1663,7 +1655,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "learn.lr_find(wd=1e-2,start_lr=0.01,end_lr=100,num_it=100)" ] @@ -1804,7 +1796,7 @@ } ], "source": [ - "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy).to_fp16()\n", + "learn = cnn_learner(data, base_arch=model_arch, metrics=metrics.accuracy)\n", "learn.crit = F.nll_loss\n", "learn.lr_find(wd=1e-2,start_lr=0.01,end_lr=100,num_it=100)" ]