diff --git "a/en-af/siyavula-baseline/herman_en_af_masakhane2.ipynb" "b/en-af/siyavula-baseline/herman_en_af_masakhane2.ipynb" new file mode 100644--- /dev/null +++ "b/en-af/siyavula-baseline/herman_en_af_masakhane2.ipynb" @@ -0,0 +1,1685 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "herman_en-af_masakhane2", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Igc5itf-xMGj" + }, + "source": [ + "# Masakhane - Machine Translation for African Languages (Using JoeyNMT)\n", + "\n", + "Languages: English-Afrikaans\n", + "\n", + "Author: Herman Kamper" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "l929HimrxS0a" + }, + "source": [ + "## Retrieve data and make a parallel corpus" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "oGRmDELn7Az0", + "outputId": "7c779786-db5d-4c8b-fc66-f4610f9328e5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 127 + } + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n", + "\n", + "Enter your authorization code:\n", + "··········\n", + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "Cn3tgQLzUxwn", + "colab": {} + }, + "source": [ + "# TODO: Set your source and target languages. Keep in mind, these traditionally use language codes as found here:\n", + "# These will also become the suffix's of all vocab and corpus files used throughout\n", + "import os\n", + "source_language = \"en\"\n", + "target_language = \"af\"\n", + "tag = \"baseline\" # Give a unique name to your folder - this is to ensure you don't rewrite any models you've already submitted\n", + "\n", + "os.environ[\"src\"] = source_language # Sets them in bash as well, since we often use bash scripts\n", + "os.environ[\"tgt\"] = target_language\n", + "os.environ[\"tag\"] = tag\n", + "\n", + "# This will save it to a folder in our gdrive instead!\n", + "!mkdir -p \"/content/drive/My Drive/colab/masakhane/$src-$tgt-$tag\"\n", + "os.environ[\"gdrive_path\"] = \"/content/drive/My Drive/colab/masakhane/%s-%s-%s\" % (source_language, target_language, tag)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "kBSgJHEw7Nvx", + "outputId": "612f08e7-8708-45cf-e75f-6ca76ded9089", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + } + }, + "source": [ + "!echo $gdrive_path" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content/drive/My Drive/colab/masakhane/en-af-baseline\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "xq-tDZVks7ZD", + "outputId": "a32e560a-968c-474a-9183-26ed242d5449", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 755 + } + }, + "source": [ + "# Download the corpus\n", + "! wget \"https://www.kamperh.com/data/siyavula_en_af.noweb.3.zip\"\n", + "! unzip siyavula_en_af.noweb.3.zip\n", + "! ls -lah\n", + "! head -3 train.en\n", + "! head -3 train.af\n", + "! cat train.en | wc -l\n", + "! cat train.af | wc -l" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2019-10-14 12:40:33-- https://www.kamperh.com/data/siyavula_en_af.noweb.3.zip\n", + "Resolving www.kamperh.com (www.kamperh.com)... 185.199.109.153, 185.199.110.153, 185.199.111.153, ...\n", + "Connecting to www.kamperh.com (www.kamperh.com)|185.199.109.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 303271 (296K) [application/zip]\n", + "Saving to: ‘siyavula_en_af.noweb.3.zip’\n", + "\n", + "\r siyavula_ 0%[ ] 0 --.-KB/s \rsiyavula_en_af.nowe 100%[===================>] 296.16K --.-KB/s in 0.05s \n", + "\n", + "2019-10-14 12:40:34 (5.89 MB/s) - ‘siyavula_en_af.noweb.3.zip’ saved [303271/303271]\n", + "\n", + "Archive: siyavula_en_af.noweb.3.zip\n", + " inflating: dev.af \n", + " inflating: dev.en \n", + " inflating: readme.md \n", + " inflating: test.af \n", + " inflating: test.en \n", + " inflating: train.af \n", + " inflating: train.en \n", + "total 1.4M\n", + "drwxr-xr-x 1 root root 4.0K Oct 14 12:40 .\n", + "drwxr-xr-x 1 root root 4.0K Oct 14 11:55 ..\n", + "drwxr-xr-x 1 root root 4.0K Oct 8 20:06 .config\n", + "-rw-rw-r-- 1 root root 29K Oct 14 12:17 dev.af\n", + "-rw-rw-r-- 1 root root 28K Oct 14 12:17 dev.en\n", + "drwx------ 3 root root 4.0K Oct 14 12:40 drive\n", + "-rw-rw-r-- 1 root root 310 Oct 11 11:46 readme.md\n", + "drwxr-xr-x 1 root root 4.0K Aug 27 16:17 sample_data\n", + "-rw-r--r-- 1 root root 297K Oct 14 10:18 siyavula_en_af.noweb.3.zip\n", + "-rw-rw-r-- 1 root root 36K Oct 14 12:17 test.af\n", + "-rw-rw-r-- 1 root root 34K Oct 14 12:17 test.en\n", + "-rw-rw-r-- 1 root root 458K Oct 14 12:17 train.af\n", + "-rw-rw-r-- 1 root root 442K Oct 14 12:17 train.en\n", + "how to introduce this topic\n", + "remind them of the lessons in the last term of gr. 4 when they learnt about the earth sun moon and planets .\n", + "use figure 1 to start them thinking about what is on the surface of the earth and under the surface of the earth .\n", + "hoe om hierdie onderwerp bekend te stel\n", + "herinner die leerders aan die lesse in die laaste kwartaal van graad 4 toe hulle van die aarde son maan en planete geleer het .\n", + "gebruik figuur 1 om hulle aan die dink te kry oor wat op die oppervlakte van die aarde en onder die oppervlakte van die aarde is .\n", + "6535\n", + "6535\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "epeCydmCyS8X" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "## Installation of JoeyNMT\n", + "\n", + "JoeyNMT is a simple, minimalist NMT package which is useful for learning and teaching. Check out the documentation for JoeyNMT [here](https://joeynmt.readthedocs.io) " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "iBRMm4kMxZ8L", + "outputId": "4787b1c1-77a3-4004-8c1b-75a258b7b52c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + } + }, + "source": [ + "# Install JoeyNMT\n", + "! git clone https://github.com/joeynmt/joeynmt.git\n", + "! cd joeynmt; pip3 install ." + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'joeynmt'...\n", + "remote: Enumerating objects: 52, done.\u001b[K\n", + "remote: Counting objects: 1% (1/52)\u001b[K\rremote: Counting objects: 3% (2/52)\u001b[K\rremote: Counting objects: 5% (3/52)\u001b[K\rremote: Counting objects: 7% (4/52)\u001b[K\rremote: Counting objects: 9% (5/52)\u001b[K\rremote: Counting objects: 11% (6/52)\u001b[K\rremote: Counting objects: 13% (7/52)\u001b[K\rremote: Counting objects: 15% (8/52)\u001b[K\rremote: Counting objects: 17% (9/52)\u001b[K\rremote: Counting objects: 19% (10/52)\u001b[K\rremote: Counting objects: 21% (11/52)\u001b[K\rremote: Counting objects: 23% (12/52)\u001b[K\rremote: Counting objects: 25% (13/52)\u001b[K\rremote: Counting objects: 26% (14/52)\u001b[K\rremote: Counting objects: 28% (15/52)\u001b[K\rremote: Counting objects: 30% (16/52)\u001b[K\rremote: Counting objects: 32% (17/52)\u001b[K\rremote: Counting objects: 34% (18/52)\u001b[K\rremote: Counting objects: 36% (19/52)\u001b[K\rremote: Counting objects: 38% (20/52)\u001b[K\rremote: Counting objects: 40% (21/52)\u001b[K\rremote: Counting objects: 42% (22/52)\u001b[K\rremote: Counting objects: 44% (23/52)\u001b[K\rremote: Counting objects: 46% (24/52)\u001b[K\rremote: Counting objects: 48% (25/52)\u001b[K\rremote: Counting objects: 50% (26/52)\u001b[K\rremote: Counting objects: 51% (27/52)\u001b[K\rremote: Counting objects: 53% (28/52)\u001b[K\rremote: Counting objects: 55% (29/52)\u001b[K\rremote: Counting objects: 57% (30/52)\u001b[K\rremote: Counting objects: 59% (31/52)\u001b[K\rremote: Counting objects: 61% (32/52)\u001b[K\rremote: Counting objects: 63% (33/52)\u001b[K\rremote: Counting objects: 65% (34/52)\u001b[K\rremote: Counting objects: 67% (35/52)\u001b[K\rremote: Counting objects: 69% (36/52)\u001b[K\rremote: Counting objects: 71% (37/52)\u001b[K\rremote: Counting objects: 73% (38/52)\u001b[K\rremote: Counting objects: 75% (39/52)\u001b[K\rremote: Counting objects: 76% (40/52)\u001b[K\rremote: Counting objects: 78% (41/52)\u001b[K\rremote: Counting objects: 80% (42/52)\u001b[K\rremote: Counting objects: 82% (43/52)\u001b[K\rremote: Counting objects: 84% (44/52)\u001b[K\rremote: Counting objects: 86% (45/52)\u001b[K\rremote: Counting objects: 88% (46/52)\u001b[K\rremote: Counting objects: 90% (47/52)\u001b[K\rremote: Counting objects: 92% (48/52)\u001b[K\rremote: Counting objects: 94% (49/52)\u001b[K\rremote: Counting objects: 96% (50/52)\u001b[K\rremote: Counting objects: 98% (51/52)\u001b[K\rremote: Counting objects: 100% (52/52)\u001b[K\rremote: Counting objects: 100% (52/52), done.\u001b[K\n", + "remote: Compressing objects: 100% (40/40), done.\u001b[K\n", + "remote: Total 2058 (delta 29), reused 24 (delta 12), pack-reused 2006\u001b[K\n", + "Receiving objects: 100% (2058/2058), 2.40 MiB | 2.53 MiB/s, done.\n", + "Resolving deltas: 100% (1418/1418), done.\n", + "Processing /content/joeynmt\n", + "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.16.0)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (4.3.0)\n", + "Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.16.5)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (41.2.0)\n", + "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.2.0)\n", + "Requirement already satisfied: tensorflow>=1.14 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.15.0rc3)\n", + "Requirement already satisfied: torchtext in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.3.1)\n", + "Collecting sacrebleu>=1.3.6 (from joeynmt==0.0.1)\n", + " Downloading https://files.pythonhosted.org/packages/0e/e5/93d252182f7cbd4b59bb3ec5797e2ce33cfd6f5aadaf327db170cf4b7887/sacrebleu-1.4.2-py3-none-any.whl\n", + "Collecting subword-nmt (from joeynmt==0.0.1)\n", + " Downloading https://files.pythonhosted.org/packages/26/08/58267cb3ac00f5f895457777ed9e0d106dbb5e6388fa7923d8663b04b849/subword_nmt-0.3.6-py2.py3-none-any.whl\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (3.0.3)\n", + "Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.9.0)\n", + "Collecting pyyaml>=5.1 (from joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849582fe/PyYAML-5.1.2.tar.gz (265kB)\n", + "\u001b[K |████████████████████████████████| 266kB 7.8MB/s \n", + "\u001b[?25hCollecting pylint (from joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ef/ed/1cb8e7b85a31807aa0bff8b3e60935370bed7e141df8b530aac6352bddff/pylint-2.4.2-py3-none-any.whl (302kB)\n", + "\u001b[K |████████████████████████████████| 307kB 44.6MB/s \n", + "\u001b[?25hRequirement already satisfied: six>=1.12 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.12.0)\n", + "Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow->joeynmt==0.0.1) (0.46)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.11.2)\n", + "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.8.0)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.8.0)\n", + "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.7.1)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.1.0)\n", + "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.1.7)\n", + "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.0.8)\n", + "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: tensorboard<1.16.0,>=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n", + "Requirement already satisfied: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.1)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.33.6)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.2.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (4.28.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (2.21.0)\n", + "Collecting portalocker (from sacrebleu>=1.3.6->joeynmt==0.0.1)\n", + " Downloading https://files.pythonhosted.org/packages/60/ec/836a494dbaa72541f691ec4e66f29fdc8db9bcc7f49e1c2d457ba13ced42/portalocker-1.5.1-py2.py3-none-any.whl\n", + "Collecting typing (from sacrebleu>=1.3.6->joeynmt==0.0.1)\n", + " Downloading https://files.pythonhosted.org/packages/fe/2e/b480ee1b75e6d17d2993738670e75c1feeb9ff7f64452153cf018051cc92/typing-3.7.4.1-py3-none-any.whl\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.4.2)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.5.3)\n", + "Requirement already satisfied: scipy>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (1.3.1)\n", + "Requirement already satisfied: pandas>=0.15.2 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (0.24.2)\n", + "Collecting astroid<2.4,>=2.3.0 (from pylint->joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/13/e1/74a63c85c501c29c52da5be604c025e368f4dd77daf1fa13c878a33e5a36/astroid-2.3.1-py3-none-any.whl (205kB)\n", + "\u001b[K |████████████████████████████████| 215kB 50.2MB/s \n", + "\u001b[?25hCollecting isort<5,>=4.2.5 (from pylint->joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e5/b0/c121fd1fa3419ea9bfd55c7f9c4fedfec5143208d8c7ad3ce3db6c623c21/isort-4.3.21-py2.py3-none-any.whl (42kB)\n", + "\u001b[K |████████████████████████████████| 51kB 24.6MB/s \n", + "\u001b[?25hCollecting mccabe<0.7,>=0.6 (from pylint->joeynmt==0.0.1)\n", + " Downloading https://files.pythonhosted.org/packages/87/89/479dc97e18549e21354893e4ee4ef36db1d237534982482c3681ee6e7b57/mccabe-0.6.1-py2.py3-none-any.whl\n", + "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow>=1.14->joeynmt==0.0.1) (2.8.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (3.1.1)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (0.16.0)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (1.24.3)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2.8)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2019.9.11)\n", + "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas>=0.15.2->seaborn->joeynmt==0.0.1) (2018.9)\n", + "Collecting lazy-object-proxy==1.4.* (from astroid<2.4,>=2.3.0->pylint->joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/26/534a6d32572a9dbca11619321535c0a7ab34688545d9d67c2c204b9e3a3d/lazy_object_proxy-1.4.2-cp36-cp36m-manylinux1_x86_64.whl (49kB)\n", + "\u001b[K |████████████████████████████████| 51kB 24.0MB/s \n", + "\u001b[?25hCollecting typed-ast<1.5,>=1.4.0; implementation_name == \"cpython\" and python_version < \"3.8\" (from astroid<2.4,>=2.3.0->pylint->joeynmt==0.0.1)\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/31/d3/9d1802c161626d0278bafb1ffb32f76b9d01e123881bbf9d91e8ccf28e18/typed_ast-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (736kB)\n", + "\u001b[K |████████████████████████████████| 737kB 47.0MB/s \n", + "\u001b[?25hBuilding wheels for collected packages: joeynmt, pyyaml\n", + " Building wheel for joeynmt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for joeynmt: filename=joeynmt-0.0.1-cp36-none-any.whl size=69430 sha256=369004e99a7050461c10fb79c76bb63b36d4c8aefcc5f53990fd2ce279095e05\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-2493nseq/wheels/db/01/db/751cc9f3e7f6faec127c43644ba250a3ea7ad200594aeda70a\n", + " Building wheel for pyyaml (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pyyaml: filename=PyYAML-5.1.2-cp36-cp36m-linux_x86_64.whl size=44104 sha256=8140f9abc28ffeef011a7c489be5f1176541a7e7a3762277b0428ff3c954d1b0\n", + " Stored in directory: /root/.cache/pip/wheels/d9/45/dd/65f0b38450c47cf7e5312883deb97d065e030c5cca0a365030\n", + "Successfully built joeynmt pyyaml\n", + "Installing collected packages: portalocker, typing, sacrebleu, subword-nmt, pyyaml, lazy-object-proxy, typed-ast, astroid, isort, mccabe, pylint, joeynmt\n", + " Found existing installation: PyYAML 3.13\n", + " Uninstalling PyYAML-3.13:\n", + " Successfully uninstalled PyYAML-3.13\n", + "Successfully installed astroid-2.3.1 isort-4.3.21 joeynmt-0.0.1 lazy-object-proxy-1.4.2 mccabe-0.6.1 portalocker-1.5.1 pylint-2.4.2 pyyaml-5.1.2 sacrebleu-1.4.2 subword-nmt-0.3.6 typed-ast-1.4.0 typing-3.7.4.1\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AaE77Tcppex9" + }, + "source": [ + "# Preprocessing the Data into Subword BPE Tokens\n", + "\n", + "- One of the most powerful improvements for agglutinative languages (a feature of most Bantu languages) is using BPE tokenization [ (Sennrich, 2015) ](https://arxiv.org/abs/1508.07909).\n", + "\n", + "- It was also shown that by optimizing the umber of BPE codes we significantly improve results for low-resourced languages [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021) [(Martinus, 2019)](https://arxiv.org/abs/1906.05685)\n", + "\n", + "- Below we have the scripts for doing BPE tokenization of our data. We use 4000 tokens as recommended by [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021). You do not need to change anything. Simply running the below will be suitable. " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "H-TyjtmXB1mL", + "outputId": "6258182f-8f84-417a-b8cd-88e1c4160304", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 451 + } + }, + "source": [ + "# One of the huge boosts in NMT performance was to use a different method of tokenizing. \n", + "# Usually, NMT would tokenize by words. However, using a method called BPE gave amazing boosts to performance\n", + "\n", + "# Do subword NMT\n", + "from os import path\n", + "\n", + "os.environ[\"data_path\"] = path.join(\"joeynmt\", \"data\", source_language + target_language) # Herman! \n", + "! subword-nmt learn-joint-bpe-and-vocab --input train.$src train.$tgt -s 4000 -o bpe.codes.4000 --write-vocabulary vocab.$src vocab.$tgt\n", + "\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < train.$src > train.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < train.$tgt > train.bpe.$tgt\n", + "\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < dev.$src > dev.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < dev.$tgt > dev.bpe.$tgt\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < test.$src > test.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < test.$tgt > test.bpe.$tgt\n", + "\n", + "# Create directory, move everyone we care about to the correct location\n", + "! mkdir -p $data_path\n", + "! cp train.* $data_path\n", + "! cp test.* $data_path\n", + "! cp dev.* $data_path\n", + "! cp bpe.codes.4000 $data_path\n", + "! ls $data_path\n", + "\n", + "# Also move everything we care about to a mounted location in google drive (relevant if running in colab) at gdrive_path\n", + "! cp train.* \"$gdrive_path\"\n", + "! cp test.* \"$gdrive_path\"\n", + "! cp dev.* \"$gdrive_path\"\n", + "! cp bpe.codes.4000 \"$gdrive_path\"\n", + "! ls \"$gdrive_path\"\n", + "\n", + "# Create that vocab using build_vocab\n", + "! sudo chmod 777 joeynmt/scripts/build_vocab.py\n", + "! joeynmt/scripts/build_vocab.py joeynmt/data/$src$tgt/train.bpe.$src joeynmt/data/$src$tgt/train.bpe.$tgt --output_path joeynmt/data/$src$tgt/vocab.txt\n", + "\n", + "# Some output\n", + "! echo \"BPE Afrikaans Sentences\"\n", + "! tail -n 5 test.bpe.$tgt\n", + "! echo \"Combined BPE Vocab\"\n", + "! tail -n 10 joeynmt/data/$src$tgt/vocab.txt # Herman" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "bpe.codes.4000\tdev.bpe.en test.bpe.af train.af train.en\n", + "dev.af\t\tdev.en\t test.bpe.en train.bpe.af\n", + "dev.bpe.af\ttest.af test.en\t train.bpe.en\n", + "bpe.codes.4000\tdev.bpe.en test.af\t test.en train.bpe.en\n", + "dev.af\t\tdev.en\t test.bpe.af train.af train.en\n", + "dev.bpe.af\tmodels\t test.bpe.en train.bpe.af\n", + "BPE Afrikaans Sentences\n", + "wat is 'n on@@ we@@ t@@ tige elektriese skak@@ el@@ ings ?\n", + "hoe dink jy kan die plaas@@ like reg@@ ering dit keer of die hoeveelheid on@@ we@@ t@@ tige skak@@ el@@ ings ver@@ minder .\n", + "'n on@@ we@@ t@@ tige skak@@ eling is wanneer ie@@ mand toe@@ gan@@ g kry tot elektrisiteit deur 'n kra@@ gl@@ yn te sny en 'n ander l@@ yn daaraan te verbind sonder om daar@@ voor te be@@ taal .\n", + "die plaas@@ like reg@@ ering kan dit probeer stop deur eer@@ st@@ ens te probeer om die ar@@ mer geb@@ ie@@ de met genoeg elektriese toe@@ g@@ ang@@ sp@@ unte te voorsien rond te gaan en te kyk of daar ge@@ vaar@@ like skak@@ el@@ ings is be@@ w@@ us@@ theid oor die gev@@ are van on@@ we@@ t@@ tige skak@@ el@@ ings te verbe@@ ter deur ad@@ ver@@ ten@@ sie@@ bor@@ de radi@@ o die ko@@ er@@ ant ens .\n", + "asses@@ seer enige ander rele@@ van@@ te antwoorde wat die leerder mag hê .\n", + "Combined BPE Vocab\n", + "sende\n", + "stly\n", + "secon@@\n", + "desc@@\n", + "sedimen@@\n", + "vers\n", + "interdepen@@\n", + "sour@@\n", + "plastie@@\n", + "pollut@@\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "IlMitUHR8Qy-", + "outputId": "e739d5d2-ac18-4886-f4be-7698424271aa", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 71 + } + }, + "source": [ + "# Also move everything we care about to a mounted location in google drive (relevant if running in colab) at gdrive_path\n", + "! cp train.* \"$gdrive_path\"\n", + "! cp test.* \"$gdrive_path\"\n", + "! cp dev.* \"$gdrive_path\"\n", + "! cp bpe.codes.4000 \"$gdrive_path\"\n", + "! ls \"$gdrive_path\"" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "bpe.codes.4000\tdev.bpe.en test.af\t test.en train.bpe.en\n", + "dev.af\t\tdev.en\t test.bpe.af train.af train.en\n", + "dev.bpe.af\tmodels\t test.bpe.en train.bpe.af\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Ixmzi60WsUZ8" + }, + "source": [ + "# Creating the JoeyNMT Config\n", + "\n", + "JoeyNMT requires a yaml config. We provide a template below. We've also set a number of defaults with it, that you may play with!\n", + "\n", + "- We used Transformer architecture \n", + "- We set our dropout to reasonably high: 0.3 (recommended in [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021))\n", + "\n", + "Things worth playing with:\n", + "- The batch size (also recommended to change for low-resourced languages)\n", + "- The number of epochs (we've set it at 30 just so it runs in about an hour, for testing purposes)\n", + "- The decoder options (beam_size, alpha)\n", + "- Evaluation metrics (BLEU versus Crhf4)" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "PIs1lY2hxMsl", + "colab": {} + }, + "source": [ + "# This creates the config file for our JoeyNMT system. It might seem overwhelming so we've provided a couple of useful parameters you'll need to update\n", + "# (You can of course play with all the parameters if you'd like!)\n", + "\n", + "name = '%s%s' % (source_language, target_language)\n", + "gdrive_path = os.environ[\"gdrive_path\"]\n", + "\n", + "# Create the config\n", + "config = \"\"\"\n", + "name: \"{name}_transformer\"\n", + "\n", + "data:\n", + " src: \"{source_language}\"\n", + " trg: \"{target_language}\"\n", + " train: \"data/{name}/train.bpe\"\n", + " dev: \"data/{name}/dev.bpe\"\n", + " test: \"data/{name}/test.bpe\"\n", + " level: \"bpe\"\n", + " lowercase: False\n", + " max_sent_length: 100\n", + " src_vocab: \"data/{name}/vocab.txt\"\n", + " trg_vocab: \"data/{name}/vocab.txt\"\n", + "\n", + "testing:\n", + " beam_size: 5\n", + " alpha: 1.0\n", + "\n", + "training:\n", + " #load_model: \"{gdrive_path}/models/{name}_transformer/1.ckpt\" # if uncommented, load a pre-trained model from this checkpoint\n", + " random_seed: 42\n", + " optimizer: \"adam\"\n", + " normalization: \"tokens\"\n", + " adam_betas: [0.9, 0.999] \n", + " scheduling: \"noam\" # Try switching from plateau to Noam scheduling\n", + " learning_rate_factor: 0.5 # factor for Noam scheduler (used with Transformer)\n", + " learning_rate_warmup: 1000 # warmup steps for Noam scheduler (used with Transformer)\n", + " patience: 8\n", + " decrease_factor: 0.7\n", + " loss: \"crossentropy\"\n", + " learning_rate: 0.0002\n", + " learning_rate_min: 0.00000001\n", + " weight_decay: 0.0\n", + " label_smoothing: 0.1\n", + " batch_size: 8192 # 4096 # Herman\n", + " batch_type: \"token\"\n", + " eval_batch_size: 1000 # 3600 # Herman\n", + " eval_batch_type: \"token\"\n", + " batch_multiplier: 1\n", + " early_stopping_metric: \"eval_metric\" # \"ppl\" # Herman\n", + " epochs: 200 # TODO: Decrease for when playing around and checking of working. Around 30 is sufficient to check if its working at all\n", + " validation_freq: 500 # 4000 # Decrease this for testing # Herman\n", + " logging_freq: 50 # 100 # Herman\n", + " eval_metric: \"bleu\"\n", + " model_dir: \"models/{name}_transformer\"\n", + " overwrite: True\n", + " shuffle: True\n", + " use_cuda: True\n", + " max_output_length: 100\n", + " print_valid_sents: [0, 1, 2, 3]\n", + " keep_last_ckpts: 3\n", + "\n", + "model:\n", + " initializer: \"xavier\"\n", + " bias_initializer: \"zeros\"\n", + " init_gain: 1.0\n", + " embed_initializer: \"xavier\"\n", + " embed_init_gain: 1.0\n", + " tied_embeddings: True\n", + " tied_softmax: True\n", + " encoder:\n", + " type: \"transformer\"\n", + " num_layers: 6\n", + " num_heads: 8\n", + " embeddings:\n", + " embedding_dim: 512\n", + " scale: True\n", + " dropout: 0.\n", + " # typically ff_size = 4 x hidden_size\n", + " hidden_size: 512\n", + " ff_size: 2048\n", + " dropout: 0.3\n", + " decoder:\n", + " type: \"transformer\"\n", + " num_layers: 6\n", + " num_heads: 8\n", + " embeddings:\n", + " embedding_dim: 512\n", + " scale: True\n", + " dropout: 0.\n", + " # typically ff_size = 4 x hidden_size\n", + " hidden_size: 512\n", + " ff_size: 2048\n", + " dropout: 0.3\n", + "\"\"\".format(name=name, gdrive_path=os.environ[\"gdrive_path\"], source_language=source_language, target_language=target_language)\n", + "with open(\"joeynmt/configs/transformer_{name}.yaml\".format(name=name),'w') as f:\n", + " f.write(config)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pIifxE3Qzuvs" + }, + "source": [ + "# Train the Model\n", + "\n", + "This single line of joeynmt runs the training using the config we made above" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "6ZBPFwT94WpI", + "outputId": "afc7b4fb-ae9a-4068-8f64-13dd42103643", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + } + }, + "source": [ + "# Train the model\n", + "# You can press Ctrl-C to stop. And then run the next cell to save your checkpoints! \n", + "!cd joeynmt; python3 -m joeynmt train configs/transformer_$src$tgt.yaml" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2019-10-14 12:43:11,392 Hello! This is Joey-NMT.\n", + "2019-10-14 12:43:12,910 Total params: 46140928\n", + "2019-10-14 12:43:12,912 Trainable parameters: ['decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.dec_layer_norm.bias', 'decoder.layers.0.dec_layer_norm.weight', 'decoder.layers.0.feed_forward.layer_norm.bias', 'decoder.layers.0.feed_forward.layer_norm.weight', 'decoder.layers.0.feed_forward.pwff_layer.0.bias', 'decoder.layers.0.feed_forward.pwff_layer.0.weight', 'decoder.layers.0.feed_forward.pwff_layer.3.bias', 'decoder.layers.0.feed_forward.pwff_layer.3.weight', 'decoder.layers.0.src_trg_att.k_layer.bias', 'decoder.layers.0.src_trg_att.k_layer.weight', 'decoder.layers.0.src_trg_att.output_layer.bias', 'decoder.layers.0.src_trg_att.output_layer.weight', 'decoder.layers.0.src_trg_att.q_layer.bias', 'decoder.layers.0.src_trg_att.q_layer.weight', 'decoder.layers.0.src_trg_att.v_layer.bias', 'decoder.layers.0.src_trg_att.v_layer.weight', 'decoder.layers.0.trg_trg_att.k_layer.bias', 'decoder.layers.0.trg_trg_att.k_layer.weight', 'decoder.layers.0.trg_trg_att.output_layer.bias', 'decoder.layers.0.trg_trg_att.output_layer.weight', 'decoder.layers.0.trg_trg_att.q_layer.bias', 'decoder.layers.0.trg_trg_att.q_layer.weight', 'decoder.layers.0.trg_trg_att.v_layer.bias', 'decoder.layers.0.trg_trg_att.v_layer.weight', 'decoder.layers.0.x_layer_norm.bias', 'decoder.layers.0.x_layer_norm.weight', 'decoder.layers.1.dec_layer_norm.bias', 'decoder.layers.1.dec_layer_norm.weight', 'decoder.layers.1.feed_forward.layer_norm.bias', 'decoder.layers.1.feed_forward.layer_norm.weight', 'decoder.layers.1.feed_forward.pwff_layer.0.bias', 'decoder.layers.1.feed_forward.pwff_layer.0.weight', 'decoder.layers.1.feed_forward.pwff_layer.3.bias', 'decoder.layers.1.feed_forward.pwff_layer.3.weight', 'decoder.layers.1.src_trg_att.k_layer.bias', 'decoder.layers.1.src_trg_att.k_layer.weight', 'decoder.layers.1.src_trg_att.output_layer.bias', 'decoder.layers.1.src_trg_att.output_layer.weight', 'decoder.layers.1.src_trg_att.q_layer.bias', 'decoder.layers.1.src_trg_att.q_layer.weight', 'decoder.layers.1.src_trg_att.v_layer.bias', 'decoder.layers.1.src_trg_att.v_layer.weight', 'decoder.layers.1.trg_trg_att.k_layer.bias', 'decoder.layers.1.trg_trg_att.k_layer.weight', 'decoder.layers.1.trg_trg_att.output_layer.bias', 'decoder.layers.1.trg_trg_att.output_layer.weight', 'decoder.layers.1.trg_trg_att.q_layer.bias', 'decoder.layers.1.trg_trg_att.q_layer.weight', 'decoder.layers.1.trg_trg_att.v_layer.bias', 'decoder.layers.1.trg_trg_att.v_layer.weight', 'decoder.layers.1.x_layer_norm.bias', 'decoder.layers.1.x_layer_norm.weight', 'decoder.layers.2.dec_layer_norm.bias', 'decoder.layers.2.dec_layer_norm.weight', 'decoder.layers.2.feed_forward.layer_norm.bias', 'decoder.layers.2.feed_forward.layer_norm.weight', 'decoder.layers.2.feed_forward.pwff_layer.0.bias', 'decoder.layers.2.feed_forward.pwff_layer.0.weight', 'decoder.layers.2.feed_forward.pwff_layer.3.bias', 'decoder.layers.2.feed_forward.pwff_layer.3.weight', 'decoder.layers.2.src_trg_att.k_layer.bias', 'decoder.layers.2.src_trg_att.k_layer.weight', 'decoder.layers.2.src_trg_att.output_layer.bias', 'decoder.layers.2.src_trg_att.output_layer.weight', 'decoder.layers.2.src_trg_att.q_layer.bias', 'decoder.layers.2.src_trg_att.q_layer.weight', 'decoder.layers.2.src_trg_att.v_layer.bias', 'decoder.layers.2.src_trg_att.v_layer.weight', 'decoder.layers.2.trg_trg_att.k_layer.bias', 'decoder.layers.2.trg_trg_att.k_layer.weight', 'decoder.layers.2.trg_trg_att.output_layer.bias', 'decoder.layers.2.trg_trg_att.output_layer.weight', 'decoder.layers.2.trg_trg_att.q_layer.bias', 'decoder.layers.2.trg_trg_att.q_layer.weight', 'decoder.layers.2.trg_trg_att.v_layer.bias', 'decoder.layers.2.trg_trg_att.v_layer.weight', 'decoder.layers.2.x_layer_norm.bias', 'decoder.layers.2.x_layer_norm.weight', 'decoder.layers.3.dec_layer_norm.bias', 'decoder.layers.3.dec_layer_norm.weight', 'decoder.layers.3.feed_forward.layer_norm.bias', 'decoder.layers.3.feed_forward.layer_norm.weight', 'decoder.layers.3.feed_forward.pwff_layer.0.bias', 'decoder.layers.3.feed_forward.pwff_layer.0.weight', 'decoder.layers.3.feed_forward.pwff_layer.3.bias', 'decoder.layers.3.feed_forward.pwff_layer.3.weight', 'decoder.layers.3.src_trg_att.k_layer.bias', 'decoder.layers.3.src_trg_att.k_layer.weight', 'decoder.layers.3.src_trg_att.output_layer.bias', 'decoder.layers.3.src_trg_att.output_layer.weight', 'decoder.layers.3.src_trg_att.q_layer.bias', 'decoder.layers.3.src_trg_att.q_layer.weight', 'decoder.layers.3.src_trg_att.v_layer.bias', 'decoder.layers.3.src_trg_att.v_layer.weight', 'decoder.layers.3.trg_trg_att.k_layer.bias', 'decoder.layers.3.trg_trg_att.k_layer.weight', 'decoder.layers.3.trg_trg_att.output_layer.bias', 'decoder.layers.3.trg_trg_att.output_layer.weight', 'decoder.layers.3.trg_trg_att.q_layer.bias', 'decoder.layers.3.trg_trg_att.q_layer.weight', 'decoder.layers.3.trg_trg_att.v_layer.bias', 'decoder.layers.3.trg_trg_att.v_layer.weight', 'decoder.layers.3.x_layer_norm.bias', 'decoder.layers.3.x_layer_norm.weight', 'decoder.layers.4.dec_layer_norm.bias', 'decoder.layers.4.dec_layer_norm.weight', 'decoder.layers.4.feed_forward.layer_norm.bias', 'decoder.layers.4.feed_forward.layer_norm.weight', 'decoder.layers.4.feed_forward.pwff_layer.0.bias', 'decoder.layers.4.feed_forward.pwff_layer.0.weight', 'decoder.layers.4.feed_forward.pwff_layer.3.bias', 'decoder.layers.4.feed_forward.pwff_layer.3.weight', 'decoder.layers.4.src_trg_att.k_layer.bias', 'decoder.layers.4.src_trg_att.k_layer.weight', 'decoder.layers.4.src_trg_att.output_layer.bias', 'decoder.layers.4.src_trg_att.output_layer.weight', 'decoder.layers.4.src_trg_att.q_layer.bias', 'decoder.layers.4.src_trg_att.q_layer.weight', 'decoder.layers.4.src_trg_att.v_layer.bias', 'decoder.layers.4.src_trg_att.v_layer.weight', 'decoder.layers.4.trg_trg_att.k_layer.bias', 'decoder.layers.4.trg_trg_att.k_layer.weight', 'decoder.layers.4.trg_trg_att.output_layer.bias', 'decoder.layers.4.trg_trg_att.output_layer.weight', 'decoder.layers.4.trg_trg_att.q_layer.bias', 'decoder.layers.4.trg_trg_att.q_layer.weight', 'decoder.layers.4.trg_trg_att.v_layer.bias', 'decoder.layers.4.trg_trg_att.v_layer.weight', 'decoder.layers.4.x_layer_norm.bias', 'decoder.layers.4.x_layer_norm.weight', 'decoder.layers.5.dec_layer_norm.bias', 'decoder.layers.5.dec_layer_norm.weight', 'decoder.layers.5.feed_forward.layer_norm.bias', 'decoder.layers.5.feed_forward.layer_norm.weight', 'decoder.layers.5.feed_forward.pwff_layer.0.bias', 'decoder.layers.5.feed_forward.pwff_layer.0.weight', 'decoder.layers.5.feed_forward.pwff_layer.3.bias', 'decoder.layers.5.feed_forward.pwff_layer.3.weight', 'decoder.layers.5.src_trg_att.k_layer.bias', 'decoder.layers.5.src_trg_att.k_layer.weight', 'decoder.layers.5.src_trg_att.output_layer.bias', 'decoder.layers.5.src_trg_att.output_layer.weight', 'decoder.layers.5.src_trg_att.q_layer.bias', 'decoder.layers.5.src_trg_att.q_layer.weight', 'decoder.layers.5.src_trg_att.v_layer.bias', 'decoder.layers.5.src_trg_att.v_layer.weight', 'decoder.layers.5.trg_trg_att.k_layer.bias', 'decoder.layers.5.trg_trg_att.k_layer.weight', 'decoder.layers.5.trg_trg_att.output_layer.bias', 'decoder.layers.5.trg_trg_att.output_layer.weight', 'decoder.layers.5.trg_trg_att.q_layer.bias', 'decoder.layers.5.trg_trg_att.q_layer.weight', 'decoder.layers.5.trg_trg_att.v_layer.bias', 'decoder.layers.5.trg_trg_att.v_layer.weight', 'decoder.layers.5.x_layer_norm.bias', 'decoder.layers.5.x_layer_norm.weight', 'encoder.layer_norm.bias', 'encoder.layer_norm.weight', 'encoder.layers.0.feed_forward.layer_norm.bias', 'encoder.layers.0.feed_forward.layer_norm.weight', 'encoder.layers.0.feed_forward.pwff_layer.0.bias', 'encoder.layers.0.feed_forward.pwff_layer.0.weight', 'encoder.layers.0.feed_forward.pwff_layer.3.bias', 'encoder.layers.0.feed_forward.pwff_layer.3.weight', 'encoder.layers.0.layer_norm.bias', 'encoder.layers.0.layer_norm.weight', 'encoder.layers.0.src_src_att.k_layer.bias', 'encoder.layers.0.src_src_att.k_layer.weight', 'encoder.layers.0.src_src_att.output_layer.bias', 'encoder.layers.0.src_src_att.output_layer.weight', 'encoder.layers.0.src_src_att.q_layer.bias', 'encoder.layers.0.src_src_att.q_layer.weight', 'encoder.layers.0.src_src_att.v_layer.bias', 'encoder.layers.0.src_src_att.v_layer.weight', 'encoder.layers.1.feed_forward.layer_norm.bias', 'encoder.layers.1.feed_forward.layer_norm.weight', 'encoder.layers.1.feed_forward.pwff_layer.0.bias', 'encoder.layers.1.feed_forward.pwff_layer.0.weight', 'encoder.layers.1.feed_forward.pwff_layer.3.bias', 'encoder.layers.1.feed_forward.pwff_layer.3.weight', 'encoder.layers.1.layer_norm.bias', 'encoder.layers.1.layer_norm.weight', 'encoder.layers.1.src_src_att.k_layer.bias', 'encoder.layers.1.src_src_att.k_layer.weight', 'encoder.layers.1.src_src_att.output_layer.bias', 'encoder.layers.1.src_src_att.output_layer.weight', 'encoder.layers.1.src_src_att.q_layer.bias', 'encoder.layers.1.src_src_att.q_layer.weight', 'encoder.layers.1.src_src_att.v_layer.bias', 'encoder.layers.1.src_src_att.v_layer.weight', 'encoder.layers.2.feed_forward.layer_norm.bias', 'encoder.layers.2.feed_forward.layer_norm.weight', 'encoder.layers.2.feed_forward.pwff_layer.0.bias', 'encoder.layers.2.feed_forward.pwff_layer.0.weight', 'encoder.layers.2.feed_forward.pwff_layer.3.bias', 'encoder.layers.2.feed_forward.pwff_layer.3.weight', 'encoder.layers.2.layer_norm.bias', 'encoder.layers.2.layer_norm.weight', 'encoder.layers.2.src_src_att.k_layer.bias', 'encoder.layers.2.src_src_att.k_layer.weight', 'encoder.layers.2.src_src_att.output_layer.bias', 'encoder.layers.2.src_src_att.output_layer.weight', 'encoder.layers.2.src_src_att.q_layer.bias', 'encoder.layers.2.src_src_att.q_layer.weight', 'encoder.layers.2.src_src_att.v_layer.bias', 'encoder.layers.2.src_src_att.v_layer.weight', 'encoder.layers.3.feed_forward.layer_norm.bias', 'encoder.layers.3.feed_forward.layer_norm.weight', 'encoder.layers.3.feed_forward.pwff_layer.0.bias', 'encoder.layers.3.feed_forward.pwff_layer.0.weight', 'encoder.layers.3.feed_forward.pwff_layer.3.bias', 'encoder.layers.3.feed_forward.pwff_layer.3.weight', 'encoder.layers.3.layer_norm.bias', 'encoder.layers.3.layer_norm.weight', 'encoder.layers.3.src_src_att.k_layer.bias', 'encoder.layers.3.src_src_att.k_layer.weight', 'encoder.layers.3.src_src_att.output_layer.bias', 'encoder.layers.3.src_src_att.output_layer.weight', 'encoder.layers.3.src_src_att.q_layer.bias', 'encoder.layers.3.src_src_att.q_layer.weight', 'encoder.layers.3.src_src_att.v_layer.bias', 'encoder.layers.3.src_src_att.v_layer.weight', 'encoder.layers.4.feed_forward.layer_norm.bias', 'encoder.layers.4.feed_forward.layer_norm.weight', 'encoder.layers.4.feed_forward.pwff_layer.0.bias', 'encoder.layers.4.feed_forward.pwff_layer.0.weight', 'encoder.layers.4.feed_forward.pwff_layer.3.bias', 'encoder.layers.4.feed_forward.pwff_layer.3.weight', 'encoder.layers.4.layer_norm.bias', 'encoder.layers.4.layer_norm.weight', 'encoder.layers.4.src_src_att.k_layer.bias', 'encoder.layers.4.src_src_att.k_layer.weight', 'encoder.layers.4.src_src_att.output_layer.bias', 'encoder.layers.4.src_src_att.output_layer.weight', 'encoder.layers.4.src_src_att.q_layer.bias', 'encoder.layers.4.src_src_att.q_layer.weight', 'encoder.layers.4.src_src_att.v_layer.bias', 'encoder.layers.4.src_src_att.v_layer.weight', 'encoder.layers.5.feed_forward.layer_norm.bias', 'encoder.layers.5.feed_forward.layer_norm.weight', 'encoder.layers.5.feed_forward.pwff_layer.0.bias', 'encoder.layers.5.feed_forward.pwff_layer.0.weight', 'encoder.layers.5.feed_forward.pwff_layer.3.bias', 'encoder.layers.5.feed_forward.pwff_layer.3.weight', 'encoder.layers.5.layer_norm.bias', 'encoder.layers.5.layer_norm.weight', 'encoder.layers.5.src_src_att.k_layer.bias', 'encoder.layers.5.src_src_att.k_layer.weight', 'encoder.layers.5.src_src_att.output_layer.bias', 'encoder.layers.5.src_src_att.output_layer.weight', 'encoder.layers.5.src_src_att.q_layer.bias', 'encoder.layers.5.src_src_att.q_layer.weight', 'encoder.layers.5.src_src_att.v_layer.bias', 'encoder.layers.5.src_src_att.v_layer.weight', 'src_embed.lut.weight']\n", + "2019-10-14 12:43:18,657 cfg.name : enaf_transformer\n", + "2019-10-14 12:43:18,657 cfg.data.src : en\n", + "2019-10-14 12:43:18,657 cfg.data.trg : af\n", + "2019-10-14 12:43:18,657 cfg.data.train : data/enaf/train.bpe\n", + "2019-10-14 12:43:18,657 cfg.data.dev : data/enaf/dev.bpe\n", + "2019-10-14 12:43:18,657 cfg.data.test : data/enaf/test.bpe\n", + "2019-10-14 12:43:18,658 cfg.data.level : bpe\n", + "2019-10-14 12:43:18,658 cfg.data.lowercase : False\n", + "2019-10-14 12:43:18,658 cfg.data.max_sent_length : 100\n", + "2019-10-14 12:43:18,658 cfg.data.src_vocab : data/enaf/vocab.txt\n", + "2019-10-14 12:43:18,658 cfg.data.trg_vocab : data/enaf/vocab.txt\n", + "2019-10-14 12:43:18,658 cfg.testing.beam_size : 5\n", + "2019-10-14 12:43:18,658 cfg.testing.alpha : 1.0\n", + "2019-10-14 12:43:18,658 cfg.training.random_seed : 42\n", + "2019-10-14 12:43:18,658 cfg.training.optimizer : adam\n", + "2019-10-14 12:43:18,658 cfg.training.normalization : tokens\n", + "2019-10-14 12:43:18,659 cfg.training.adam_betas : [0.9, 0.999]\n", + "2019-10-14 12:43:18,659 cfg.training.scheduling : noam\n", + "2019-10-14 12:43:18,659 cfg.training.learning_rate_factor : 0.5\n", + "2019-10-14 12:43:18,659 cfg.training.learning_rate_warmup : 1000\n", + "2019-10-14 12:43:18,659 cfg.training.patience : 8\n", + "2019-10-14 12:43:18,659 cfg.training.decrease_factor : 0.7\n", + "2019-10-14 12:43:18,659 cfg.training.loss : crossentropy\n", + "2019-10-14 12:43:18,659 cfg.training.learning_rate : 0.0002\n", + "2019-10-14 12:43:18,659 cfg.training.learning_rate_min : 1e-08\n", + "2019-10-14 12:43:18,660 cfg.training.weight_decay : 0.0\n", + "2019-10-14 12:43:18,660 cfg.training.label_smoothing : 0.1\n", + "2019-10-14 12:43:18,660 cfg.training.batch_size : 8192\n", + "2019-10-14 12:43:18,660 cfg.training.batch_type : token\n", + "2019-10-14 12:43:18,660 cfg.training.eval_batch_size : 1000\n", + "2019-10-14 12:43:18,660 cfg.training.eval_batch_type : token\n", + "2019-10-14 12:43:18,660 cfg.training.batch_multiplier : 1\n", + "2019-10-14 12:43:18,660 cfg.training.early_stopping_metric : eval_metric\n", + "2019-10-14 12:43:18,661 cfg.training.epochs : 200\n", + "2019-10-14 12:43:18,661 cfg.training.validation_freq : 500\n", + "2019-10-14 12:43:18,661 cfg.training.logging_freq : 50\n", + "2019-10-14 12:43:18,661 cfg.training.eval_metric : bleu\n", + "2019-10-14 12:43:18,661 cfg.training.model_dir : models/enaf_transformer\n", + "2019-10-14 12:43:18,661 cfg.training.overwrite : True\n", + "2019-10-14 12:43:18,661 cfg.training.shuffle : True\n", + "2019-10-14 12:43:18,661 cfg.training.use_cuda : True\n", + "2019-10-14 12:43:18,661 cfg.training.max_output_length : 100\n", + "2019-10-14 12:43:18,661 cfg.training.print_valid_sents : [0, 1, 2, 3]\n", + "2019-10-14 12:43:18,662 cfg.training.keep_last_ckpts : 3\n", + "2019-10-14 12:43:18,662 cfg.model.initializer : xavier\n", + "2019-10-14 12:43:18,662 cfg.model.bias_initializer : zeros\n", + "2019-10-14 12:43:18,662 cfg.model.init_gain : 1.0\n", + "2019-10-14 12:43:18,662 cfg.model.embed_initializer : xavier\n", + "2019-10-14 12:43:18,662 cfg.model.embed_init_gain : 1.0\n", + "2019-10-14 12:43:18,662 cfg.model.tied_embeddings : True\n", + "2019-10-14 12:43:18,662 cfg.model.tied_softmax : True\n", + "2019-10-14 12:43:18,662 cfg.model.encoder.type : transformer\n", + "2019-10-14 12:43:18,662 cfg.model.encoder.num_layers : 6\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.num_heads : 8\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.embeddings.embedding_dim : 512\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.embeddings.scale : True\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.embeddings.dropout : 0.0\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.hidden_size : 512\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.ff_size : 2048\n", + "2019-10-14 12:43:18,663 cfg.model.encoder.dropout : 0.3\n", + "2019-10-14 12:43:18,663 cfg.model.decoder.type : transformer\n", + "2019-10-14 12:43:18,663 cfg.model.decoder.num_layers : 6\n", + "2019-10-14 12:43:18,663 cfg.model.decoder.num_heads : 8\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.embeddings.embedding_dim : 512\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.embeddings.scale : True\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.embeddings.dropout : 0.0\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.hidden_size : 512\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.ff_size : 2048\n", + "2019-10-14 12:43:18,664 cfg.model.decoder.dropout : 0.3\n", + "2019-10-14 12:43:18,664 Data set sizes: \n", + "\ttrain 6532,\n", + "\tvalid 380,\n", + "\ttest 417\n", + "2019-10-14 12:43:18,665 First training example:\n", + "\t[SRC] how to introduce this topic\n", + "\t[TRG] hoe om hierdie onderwerp bekend te stel\n", + "2019-10-14 12:43:18,665 First 10 words (src): (0) (1) (2) (3) (4) . (5) die (6) the (7) is (8) in (9) of\n", + "2019-10-14 12:43:18,665 First 10 words (trg): (0) (1) (2) (3) (4) . (5) die (6) the (7) is (8) in (9) of\n", + "2019-10-14 12:43:18,666 Number of Src words (types): 3907\n", + "2019-10-14 12:43:18,666 Number of Trg words (types): 3907\n", + "2019-10-14 12:43:18,666 Model(\n", + "\tencoder=TransformerEncoder(num_layers=6, num_heads=8),\n", + "\tdecoder=TransformerDecoder(num_layers=6, num_heads=8),\n", + "\tsrc_embed=Embeddings(embedding_dim=512, vocab_size=3907),\n", + "\ttrg_embed=Embeddings(embedding_dim=512, vocab_size=3907))\n", + "2019-10-14 12:43:18,670 EPOCH 1\n", + "2019-10-14 12:44:01,255 Epoch 1: total training loss 244.26\n", + "2019-10-14 12:44:01,255 EPOCH 2\n", + "2019-10-14 12:44:14,551 Epoch 2 Step: 50 Batch Loss: 5.978227 Tokens per Sec: 2607, Lr: 0.000035\n", + "2019-10-14 12:44:43,529 Epoch 2: total training loss 224.33\n", + "2019-10-14 12:44:43,529 EPOCH 3\n", + "2019-10-14 12:45:11,673 Epoch 3 Step: 100 Batch Loss: 5.400428 Tokens per Sec: 2707, Lr: 0.000070\n", + "2019-10-14 12:45:26,552 Epoch 3: total training loss 206.17\n", + "2019-10-14 12:45:26,553 EPOCH 4\n", + "2019-10-14 12:46:07,512 Epoch 4 Step: 150 Batch Loss: 5.548982 Tokens per Sec: 2680, Lr: 0.000105\n", + "2019-10-14 12:46:08,579 Epoch 4: total training loss 192.08\n", + "2019-10-14 12:46:08,579 EPOCH 5\n", + "2019-10-14 12:46:50,694 Epoch 5: total training loss 186.20\n", + "2019-10-14 12:46:50,694 EPOCH 6\n", + "2019-10-14 12:47:04,711 Epoch 6 Step: 200 Batch Loss: 5.020994 Tokens per Sec: 2688, Lr: 0.000140\n", + "2019-10-14 12:47:32,907 Epoch 6: total training loss 182.43\n", + "2019-10-14 12:47:32,907 EPOCH 7\n", + "2019-10-14 12:48:00,308 Epoch 7 Step: 250 Batch Loss: 4.471223 Tokens per Sec: 2591, Lr: 0.000175\n", + "2019-10-14 12:48:15,246 Epoch 7: total training loss 181.59\n", + "2019-10-14 12:48:15,246 EPOCH 8\n", + "2019-10-14 12:48:56,949 Epoch 8 Step: 300 Batch Loss: 4.548144 Tokens per Sec: 2648, Lr: 0.000210\n", + "2019-10-14 12:48:57,974 Epoch 8: total training loss 179.61\n", + "2019-10-14 12:48:57,974 EPOCH 9\n", + "2019-10-14 12:49:40,238 Epoch 9: total training loss 168.50\n", + "2019-10-14 12:49:40,238 EPOCH 10\n", + "2019-10-14 12:49:53,519 Epoch 10 Step: 350 Batch Loss: 4.674281 Tokens per Sec: 2692, Lr: 0.000245\n", + "2019-10-14 12:50:22,528 Epoch 10: total training loss 168.05\n", + "2019-10-14 12:50:22,528 EPOCH 11\n", + "2019-10-14 12:50:49,320 Epoch 11 Step: 400 Batch Loss: 4.504136 Tokens per Sec: 2676, Lr: 0.000280\n", + "2019-10-14 12:51:04,883 Epoch 11: total training loss 163.38\n", + "2019-10-14 12:51:04,883 EPOCH 12\n", + "2019-10-14 12:51:45,286 Epoch 12 Step: 450 Batch Loss: 4.119469 Tokens per Sec: 2645, Lr: 0.000314\n", + "2019-10-14 12:51:47,541 Epoch 12: total training loss 157.61\n", + "2019-10-14 12:51:47,541 EPOCH 13\n", + "2019-10-14 12:52:29,667 Epoch 13: total training loss 147.60\n", + "2019-10-14 12:52:29,667 EPOCH 14\n", + "2019-10-14 12:52:41,707 Epoch 14 Step: 500 Batch Loss: 3.357079 Tokens per Sec: 2702, Lr: 0.000349\n", + "2019-10-14 12:54:17,443 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 12:54:17,444 Saving new checkpoint.\n", + "2019-10-14 12:54:19,027 Example #0\n", + "2019-10-14 12:54:19,027 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 12:54:19,027 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 12:54:19,027 \tHypothesis: hoe kan jy die water van die water ?\n", + "2019-10-14 12:54:19,028 Example #1\n", + "2019-10-14 12:54:19,028 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 12:54:19,028 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 12:54:19,028 \tHypothesis: jy kan jy die water in die water in die water in die water ?\n", + "2019-10-14 12:54:19,028 Example #2\n", + "2019-10-14 12:54:19,029 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 12:54:19,029 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 12:54:19,029 \tHypothesis: dit is 'n paar paar paar paar paar paar materiaal .\n", + "2019-10-14 12:54:19,029 Example #3\n", + "2019-10-14 12:54:19,029 \tSource: how could you improve your design ?\n", + "2019-10-14 12:54:19,029 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 12:54:19,029 \tHypothesis: hoe kan jy die volgende maantuig ?\n", + "2019-10-14 12:54:19,029 Validation result at epoch 14, step 500: bleu: 0.79, loss: 29601.4746, ppl: 67.6085, duration: 97.3222s\n", + "2019-10-14 12:54:50,340 Epoch 14: total training loss 150.84\n", + "2019-10-14 12:54:50,341 EPOCH 15\n", + "2019-10-14 12:55:14,755 Epoch 15 Step: 550 Batch Loss: 3.614363 Tokens per Sec: 2627, Lr: 0.000384\n", + "2019-10-14 12:55:32,574 Epoch 15: total training loss 146.25\n", + "2019-10-14 12:55:32,574 EPOCH 16\n", + "2019-10-14 12:56:11,323 Epoch 16 Step: 600 Batch Loss: 3.674654 Tokens per Sec: 2684, Lr: 0.000419\n", + "2019-10-14 12:56:14,683 Epoch 16: total training loss 134.65\n", + "2019-10-14 12:56:14,683 EPOCH 17\n", + "2019-10-14 12:56:57,512 Epoch 17: total training loss 132.37\n", + "2019-10-14 12:56:57,512 EPOCH 18\n", + "2019-10-14 12:57:07,695 Epoch 18 Step: 650 Batch Loss: 3.671648 Tokens per Sec: 2392, Lr: 0.000454\n", + "2019-10-14 12:57:40,189 Epoch 18: total training loss 122.92\n", + "2019-10-14 12:57:40,190 EPOCH 19\n", + "2019-10-14 12:58:04,285 Epoch 19 Step: 700 Batch Loss: 3.902515 Tokens per Sec: 2553, Lr: 0.000489\n", + "2019-10-14 12:58:22,967 Epoch 19: total training loss 118.08\n", + "2019-10-14 12:58:22,968 EPOCH 20\n", + "2019-10-14 12:59:00,420 Epoch 20 Step: 750 Batch Loss: 2.831395 Tokens per Sec: 2677, Lr: 0.000524\n", + "2019-10-14 12:59:04,987 Epoch 20: total training loss 112.01\n", + "2019-10-14 12:59:04,987 EPOCH 21\n", + "2019-10-14 12:59:47,422 Epoch 21: total training loss 106.90\n", + "2019-10-14 12:59:47,423 EPOCH 22\n", + "2019-10-14 12:59:56,598 Epoch 22 Step: 800 Batch Loss: 3.423971 Tokens per Sec: 2571, Lr: 0.000559\n", + "2019-10-14 13:00:30,123 Epoch 22: total training loss 99.38\n", + "2019-10-14 13:00:30,123 EPOCH 23\n", + "2019-10-14 13:00:52,852 Epoch 23 Step: 850 Batch Loss: 2.306886 Tokens per Sec: 2943, Lr: 0.000594\n", + "2019-10-14 13:01:12,274 Epoch 23: total training loss 95.09\n", + "2019-10-14 13:01:12,275 EPOCH 24\n", + "2019-10-14 13:01:47,395 Epoch 24 Step: 900 Batch Loss: 3.138384 Tokens per Sec: 2630, Lr: 0.000629\n", + "2019-10-14 13:01:54,577 Epoch 24: total training loss 88.29\n", + "2019-10-14 13:01:54,577 EPOCH 25\n", + "2019-10-14 13:02:37,321 Epoch 25: total training loss 81.18\n", + "2019-10-14 13:02:37,321 EPOCH 26\n", + "2019-10-14 13:02:43,928 Epoch 26 Step: 950 Batch Loss: 1.868362 Tokens per Sec: 2742, Lr: 0.000664\n", + "2019-10-14 13:03:19,550 Epoch 26: total training loss 80.46\n", + "2019-10-14 13:03:19,550 EPOCH 27\n", + "2019-10-14 13:03:41,126 Epoch 27 Step: 1000 Batch Loss: 2.968389 Tokens per Sec: 2822, Lr: 0.000699\n", + "2019-10-14 13:05:16,804 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 13:05:16,804 Saving new checkpoint.\n", + "2019-10-14 13:05:18,951 Example #0\n", + "2019-10-14 13:05:18,952 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 13:05:18,952 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 13:05:18,952 \tHypothesis: as jy hoe kan dit verhoek kry ?\n", + "2019-10-14 13:05:18,952 Example #1\n", + "2019-10-14 13:05:18,952 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 13:05:18,952 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 13:05:18,952 \tHypothesis: dink jy dat jy die water deur die water te los deur dit deur jou filter te los ?\n", + "2019-10-14 13:05:18,953 Example #2\n", + "2019-10-14 13:05:18,953 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 13:05:18,953 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 13:05:18,953 \tHypothesis: probeer dit en dan as dit met 'n aktiwiteit maak .\n", + "2019-10-14 13:05:18,953 Example #3\n", + "2019-10-14 13:05:18,953 \tSource: how could you improve your design ?\n", + "2019-10-14 13:05:18,954 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:05:18,954 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:05:18,954 Validation result at epoch 27, step 1000: bleu: 13.47, loss: 23326.1074, ppl: 27.6726, duration: 97.8270s\n", + "2019-10-14 13:05:39,334 Epoch 27: total training loss 72.92\n", + "2019-10-14 13:05:39,334 EPOCH 28\n", + "2019-10-14 13:06:16,085 Epoch 28 Step: 1050 Batch Loss: 1.593412 Tokens per Sec: 2656, Lr: 0.000682\n", + "2019-10-14 13:06:21,639 Epoch 28: total training loss 67.38\n", + "2019-10-14 13:06:21,639 EPOCH 29\n", + "2019-10-14 13:07:03,784 Epoch 29: total training loss 62.05\n", + "2019-10-14 13:07:03,784 EPOCH 30\n", + "2019-10-14 13:07:12,184 Epoch 30 Step: 1100 Batch Loss: 1.236794 Tokens per Sec: 2804, Lr: 0.000666\n", + "2019-10-14 13:07:46,124 Epoch 30: total training loss 62.43\n", + "2019-10-14 13:07:46,124 EPOCH 31\n", + "2019-10-14 13:08:09,436 Epoch 31 Step: 1150 Batch Loss: 2.306559 Tokens per Sec: 2739, Lr: 0.000652\n", + "2019-10-14 13:08:29,001 Epoch 31: total training loss 57.30\n", + "2019-10-14 13:08:29,001 EPOCH 32\n", + "2019-10-14 13:09:04,729 Epoch 32 Step: 1200 Batch Loss: 1.965446 Tokens per Sec: 2616, Lr: 0.000638\n", + "2019-10-14 13:09:11,791 Epoch 32: total training loss 52.97\n", + "2019-10-14 13:09:11,792 EPOCH 33\n", + "2019-10-14 13:09:53,262 Epoch 33: total training loss 47.08\n", + "2019-10-14 13:09:53,262 EPOCH 34\n", + "2019-10-14 13:10:02,434 Epoch 34 Step: 1250 Batch Loss: 0.589475 Tokens per Sec: 2552, Lr: 0.000625\n", + "2019-10-14 13:10:36,168 Epoch 34: total training loss 45.60\n", + "2019-10-14 13:10:36,168 EPOCH 35\n", + "2019-10-14 13:10:59,672 Epoch 35 Step: 1300 Batch Loss: 1.279803 Tokens per Sec: 2816, Lr: 0.000613\n", + "2019-10-14 13:11:19,286 Epoch 35: total training loss 42.86\n", + "2019-10-14 13:11:19,287 EPOCH 36\n", + "2019-10-14 13:11:54,857 Epoch 36 Step: 1350 Batch Loss: 1.528362 Tokens per Sec: 2678, Lr: 0.000601\n", + "2019-10-14 13:12:01,510 Epoch 36: total training loss 40.04\n", + "2019-10-14 13:12:01,511 EPOCH 37\n", + "2019-10-14 13:12:44,000 Epoch 37: total training loss 37.40\n", + "2019-10-14 13:12:44,000 EPOCH 38\n", + "2019-10-14 13:12:49,693 Epoch 38 Step: 1400 Batch Loss: 0.699236 Tokens per Sec: 2253, Lr: 0.000591\n", + "2019-10-14 13:13:25,735 Epoch 38: total training loss 37.23\n", + "2019-10-14 13:13:25,735 EPOCH 39\n", + "2019-10-14 13:13:45,608 Epoch 39 Step: 1450 Batch Loss: 0.727600 Tokens per Sec: 2463, Lr: 0.000580\n", + "2019-10-14 13:14:08,322 Epoch 39: total training loss 34.42\n", + "2019-10-14 13:14:08,323 EPOCH 40\n", + "2019-10-14 13:14:42,296 Epoch 40 Step: 1500 Batch Loss: 0.550756 Tokens per Sec: 2637, Lr: 0.000571\n", + "2019-10-14 13:16:17,769 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 13:16:17,769 Saving new checkpoint.\n", + "2019-10-14 13:16:19,392 Example #0\n", + "2019-10-14 13:16:19,393 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 13:16:19,393 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 13:16:19,393 \tHypothesis: indien jy so verhoek kan jy dit in die water oplos ?\n", + "2019-10-14 13:16:19,393 Example #1\n", + "2019-10-14 13:16:19,393 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 13:16:19,393 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 13:16:19,393 \tHypothesis: dink jy dat jy verder kan suiwer deur die water gesuiwer kan word deur dit weer die filter te gebruik ?\n", + "2019-10-14 13:16:19,394 Example #2\n", + "2019-10-14 13:16:19,394 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 13:16:19,394 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 13:16:19,394 \tHypothesis: probeer dit en kyk of 'n verskil 'n verskil verskil .\n", + "2019-10-14 13:16:19,394 Example #3\n", + "2019-10-14 13:16:19,394 \tSource: how could you improve your design ?\n", + "2019-10-14 13:16:19,394 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:16:19,394 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:16:19,394 Validation result at epoch 40, step 1500: bleu: 17.52, loss: 23168.0332, ppl: 27.0569, duration: 97.0980s\n", + "2019-10-14 13:16:27,724 Epoch 40: total training loss 30.99\n", + "2019-10-14 13:16:27,724 EPOCH 41\n", + "2019-10-14 13:17:10,016 Epoch 41: total training loss 29.16\n", + "2019-10-14 13:17:10,016 EPOCH 42\n", + "2019-10-14 13:17:16,590 Epoch 42 Step: 1550 Batch Loss: 0.864469 Tokens per Sec: 2310, Lr: 0.000561\n", + "2019-10-14 13:17:52,810 Epoch 42: total training loss 27.85\n", + "2019-10-14 13:17:52,811 EPOCH 43\n", + "2019-10-14 13:18:13,481 Epoch 43 Step: 1600 Batch Loss: 0.502151 Tokens per Sec: 2681, Lr: 0.000552\n", + "2019-10-14 13:18:35,395 Epoch 43: total training loss 25.32\n", + "2019-10-14 13:18:35,395 EPOCH 44\n", + "2019-10-14 13:19:09,322 Epoch 44 Step: 1650 Batch Loss: 0.357009 Tokens per Sec: 2491, Lr: 0.000544\n", + "2019-10-14 13:19:18,049 Epoch 44: total training loss 24.82\n", + "2019-10-14 13:19:18,050 EPOCH 45\n", + "2019-10-14 13:20:00,030 Epoch 45: total training loss 24.90\n", + "2019-10-14 13:20:00,030 EPOCH 46\n", + "2019-10-14 13:20:05,353 Epoch 46 Step: 1700 Batch Loss: 0.509773 Tokens per Sec: 2230, Lr: 0.000536\n", + "2019-10-14 13:20:42,493 Epoch 46: total training loss 22.97\n", + "2019-10-14 13:20:42,493 EPOCH 47\n", + "2019-10-14 13:21:01,650 Epoch 47 Step: 1750 Batch Loss: 0.740150 Tokens per Sec: 2504, Lr: 0.000528\n", + "2019-10-14 13:21:24,222 Epoch 47: total training loss 21.81\n", + "2019-10-14 13:21:24,222 EPOCH 48\n", + "2019-10-14 13:21:58,169 Epoch 48 Step: 1800 Batch Loss: 0.870140 Tokens per Sec: 2663, Lr: 0.000521\n", + "2019-10-14 13:22:07,053 Epoch 48: total training loss 26.55\n", + "2019-10-14 13:22:07,053 EPOCH 49\n", + "2019-10-14 13:22:48,748 Epoch 49: total training loss 21.67\n", + "2019-10-14 13:22:48,749 EPOCH 50\n", + "2019-10-14 13:22:54,819 Epoch 50 Step: 1850 Batch Loss: 0.840069 Tokens per Sec: 2535, Lr: 0.000514\n", + "2019-10-14 13:23:30,577 Epoch 50: total training loss 19.63\n", + "2019-10-14 13:23:30,577 EPOCH 51\n", + "2019-10-14 13:23:53,289 Epoch 51 Step: 1900 Batch Loss: 0.312580 Tokens per Sec: 2814, Lr: 0.000507\n", + "2019-10-14 13:24:12,748 Epoch 51: total training loss 17.41\n", + "2019-10-14 13:24:12,748 EPOCH 52\n", + "2019-10-14 13:24:48,503 Epoch 52 Step: 1950 Batch Loss: 0.355698 Tokens per Sec: 2590, Lr: 0.000500\n", + "2019-10-14 13:24:55,549 Epoch 52: total training loss 17.01\n", + "2019-10-14 13:24:55,549 EPOCH 53\n", + "2019-10-14 13:25:37,796 Epoch 53: total training loss 17.39\n", + "2019-10-14 13:25:37,796 EPOCH 54\n", + "2019-10-14 13:25:44,859 Epoch 54 Step: 2000 Batch Loss: 0.708449 Tokens per Sec: 3057, Lr: 0.000494\n", + "2019-10-14 13:27:20,375 Example #0\n", + "2019-10-14 13:27:20,375 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 13:27:20,375 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 13:27:20,376 \tHypothesis: as jy kan verhoed van dit akkaat?\n", + "2019-10-14 13:27:20,376 Example #1\n", + "2019-10-14 13:27:20,376 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 13:27:20,376 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 13:27:20,376 \tHypothesis: dink jy dat jy die water deur die gesuiwer kan word deur die filter gegooi ?\n", + "2019-10-14 13:27:20,376 Example #2\n", + "2019-10-14 13:27:20,377 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 13:27:20,377 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 13:27:20,377 \tHypothesis: probeer dit op en kyk of dit 'n verskil sien .\n", + "2019-10-14 13:27:20,377 Example #3\n", + "2019-10-14 13:27:20,377 \tSource: how could you improve your design ?\n", + "2019-10-14 13:27:20,377 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:27:20,377 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:27:20,378 Validation result at epoch 54, step 2000: bleu: 16.99, loss: 24336.3438, ppl: 31.9524, duration: 95.5183s\n", + "2019-10-14 13:27:55,572 Epoch 54: total training loss 16.50\n", + "2019-10-14 13:27:55,572 EPOCH 55\n", + "2019-10-14 13:28:16,651 Epoch 55 Step: 2050 Batch Loss: 0.215570 Tokens per Sec: 2692, Lr: 0.000488\n", + "2019-10-14 13:28:38,249 Epoch 55: total training loss 15.59\n", + "2019-10-14 13:28:38,250 EPOCH 56\n", + "2019-10-14 13:29:12,406 Epoch 56 Step: 2100 Batch Loss: 0.415079 Tokens per Sec: 2700, Lr: 0.000482\n", + "2019-10-14 13:29:20,423 Epoch 56: total training loss 14.83\n", + "2019-10-14 13:29:20,423 EPOCH 57\n", + "2019-10-14 13:30:03,068 Epoch 57: total training loss 13.85\n", + "2019-10-14 13:30:03,069 EPOCH 58\n", + "2019-10-14 13:30:08,604 Epoch 58 Step: 2150 Batch Loss: 0.424672 Tokens per Sec: 2335, Lr: 0.000477\n", + "2019-10-14 13:30:45,748 Epoch 58: total training loss 13.42\n", + "2019-10-14 13:30:45,748 EPOCH 59\n", + "2019-10-14 13:31:05,153 Epoch 59 Step: 2200 Batch Loss: 0.479001 Tokens per Sec: 2905, Lr: 0.000471\n", + "2019-10-14 13:31:27,490 Epoch 59: total training loss 12.58\n", + "2019-10-14 13:31:27,491 EPOCH 60\n", + "2019-10-14 13:32:02,216 Epoch 60 Step: 2250 Batch Loss: 0.338876 Tokens per Sec: 2750, Lr: 0.000466\n", + "2019-10-14 13:32:09,794 Epoch 60: total training loss 11.78\n", + "2019-10-14 13:32:09,794 EPOCH 61\n", + "2019-10-14 13:32:52,017 Epoch 61: total training loss 12.11\n", + "2019-10-14 13:32:52,018 EPOCH 62\n", + "2019-10-14 13:32:56,769 Epoch 62 Step: 2300 Batch Loss: 0.220975 Tokens per Sec: 1681, Lr: 0.000461\n", + "2019-10-14 13:33:33,964 Epoch 62: total training loss 14.47\n", + "2019-10-14 13:33:33,964 EPOCH 63\n", + "2019-10-14 13:33:53,694 Epoch 63 Step: 2350 Batch Loss: 0.380454 Tokens per Sec: 2739, Lr: 0.000456\n", + "2019-10-14 13:34:16,460 Epoch 63: total training loss 13.72\n", + "2019-10-14 13:34:16,460 EPOCH 64\n", + "2019-10-14 13:34:50,164 Epoch 64 Step: 2400 Batch Loss: 0.254034 Tokens per Sec: 2638, Lr: 0.000451\n", + "2019-10-14 13:34:59,227 Epoch 64: total training loss 12.56\n", + "2019-10-14 13:34:59,227 EPOCH 65\n", + "2019-10-14 13:35:41,891 Epoch 65: total training loss 11.52\n", + "2019-10-14 13:35:41,891 EPOCH 66\n", + "2019-10-14 13:35:46,436 Epoch 66 Step: 2450 Batch Loss: 0.359930 Tokens per Sec: 3065, Lr: 0.000446\n", + "2019-10-14 13:36:23,305 Epoch 66: total training loss 10.27\n", + "2019-10-14 13:36:23,305 EPOCH 67\n", + "2019-10-14 13:36:42,483 Epoch 67 Step: 2500 Batch Loss: 0.197312 Tokens per Sec: 2530, Lr: 0.000442\n", + "2019-10-14 13:38:17,908 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 13:38:17,908 Saving new checkpoint.\n", + "2019-10-14 13:38:19,640 Example #0\n", + "2019-10-14 13:38:19,640 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 13:38:19,641 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 13:38:19,641 \tHypothesis: indien jy kan verhoed van dit watervrieswater ?\n", + "2019-10-14 13:38:19,641 Example #1\n", + "2019-10-14 13:38:19,641 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 13:38:19,641 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 13:38:19,641 \tHypothesis: dink jy kan die water gesuiwer kan word deur die vinnige filter te verwyder ?\n", + "2019-10-14 13:38:19,641 Example #2\n", + "2019-10-14 13:38:19,641 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 13:38:19,641 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 13:38:19,641 \tHypothesis: probeer dit en kyk of dit 'n verskil waaruit 'n verskil .\n", + "2019-10-14 13:38:19,641 Example #3\n", + "2019-10-14 13:38:19,642 \tSource: how could you improve your design ?\n", + "2019-10-14 13:38:19,642 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:38:19,642 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:38:19,642 Validation result at epoch 67, step 2500: bleu: 18.67, loss: 24009.4551, ppl: 30.4997, duration: 97.1581s\n", + "2019-10-14 13:38:43,156 Epoch 67: total training loss 10.00\n", + "2019-10-14 13:38:43,157 EPOCH 68\n", + "2019-10-14 13:39:16,718 Epoch 68 Step: 2550 Batch Loss: 0.202484 Tokens per Sec: 2620, Lr: 0.000438\n", + "2019-10-14 13:39:25,771 Epoch 68: total training loss 10.04\n", + "2019-10-14 13:39:25,771 EPOCH 69\n", + "2019-10-14 13:40:08,296 Epoch 69: total training loss 10.42\n", + "2019-10-14 13:40:08,296 EPOCH 70\n", + "2019-10-14 13:40:13,103 Epoch 70 Step: 2600 Batch Loss: 0.330738 Tokens per Sec: 3438, Lr: 0.000433\n", + "2019-10-14 13:40:50,804 Epoch 70: total training loss 10.82\n", + "2019-10-14 13:40:50,804 EPOCH 71\n", + "2019-10-14 13:41:09,713 Epoch 71 Step: 2650 Batch Loss: 0.404237 Tokens per Sec: 2610, Lr: 0.000429\n", + "2019-10-14 13:41:33,656 Epoch 71: total training loss 9.97\n", + "2019-10-14 13:41:33,656 EPOCH 72\n", + "2019-10-14 13:42:06,690 Epoch 72 Step: 2700 Batch Loss: 0.202149 Tokens per Sec: 2622, Lr: 0.000425\n", + "2019-10-14 13:42:16,002 Epoch 72: total training loss 9.23\n", + "2019-10-14 13:42:16,002 EPOCH 73\n", + "2019-10-14 13:42:58,534 Epoch 73: total training loss 9.40\n", + "2019-10-14 13:42:58,534 EPOCH 74\n", + "2019-10-14 13:43:02,796 Epoch 74 Step: 2750 Batch Loss: 0.351751 Tokens per Sec: 2436, Lr: 0.000421\n", + "2019-10-14 13:43:40,780 Epoch 74: total training loss 9.55\n", + "2019-10-14 13:43:40,781 EPOCH 75\n", + "2019-10-14 13:43:57,967 Epoch 75 Step: 2800 Batch Loss: 0.194954 Tokens per Sec: 2414, Lr: 0.000418\n", + "2019-10-14 13:44:23,007 Epoch 75: total training loss 9.31\n", + "2019-10-14 13:44:23,008 EPOCH 76\n", + "2019-10-14 13:44:54,230 Epoch 76 Step: 2850 Batch Loss: 0.300233 Tokens per Sec: 2649, Lr: 0.000414\n", + "2019-10-14 13:45:05,609 Epoch 76: total training loss 8.73\n", + "2019-10-14 13:45:05,609 EPOCH 77\n", + "2019-10-14 13:45:48,617 Epoch 77: total training loss 8.41\n", + "2019-10-14 13:45:48,617 EPOCH 78\n", + "2019-10-14 13:45:50,484 Epoch 78 Step: 2900 Batch Loss: 0.131869 Tokens per Sec: 1625, Lr: 0.000410\n", + "2019-10-14 13:46:31,375 Epoch 78: total training loss 8.38\n", + "2019-10-14 13:46:31,375 EPOCH 79\n", + "2019-10-14 13:46:47,337 Epoch 79 Step: 2950 Batch Loss: 0.196720 Tokens per Sec: 2779, Lr: 0.000407\n", + "2019-10-14 13:47:14,218 Epoch 79: total training loss 8.05\n", + "2019-10-14 13:47:14,218 EPOCH 80\n", + "2019-10-14 13:47:43,682 Epoch 80 Step: 3000 Batch Loss: 0.177025 Tokens per Sec: 2766, Lr: 0.000403\n", + "2019-10-14 13:49:19,192 Example #0\n", + "2019-10-14 13:49:19,192 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 13:49:19,193 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 13:49:19,193 \tHypothesis: as jy kan verhoed van dit watervrieswater ?\n", + "2019-10-14 13:49:19,193 Example #1\n", + "2019-10-14 13:49:19,193 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 13:49:19,193 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 13:49:19,193 \tHypothesis: dink jy dat jy die water gesuiwer kan word deur jou filter gegooi word ?\n", + "2019-10-14 13:49:19,193 Example #2\n", + "2019-10-14 13:49:19,194 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 13:49:19,194 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 13:49:19,194 \tHypothesis: probeer dit sien en as dit 'n verskil is .\n", + "2019-10-14 13:49:19,194 Example #3\n", + "2019-10-14 13:49:19,194 \tSource: how could you improve your design ?\n", + "2019-10-14 13:49:19,194 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:49:19,194 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 13:49:19,195 Validation result at epoch 80, step 3000: bleu: 17.61, loss: 23779.2090, ppl: 29.5162, duration: 95.5121s\n", + "2019-10-14 13:49:31,864 Epoch 80: total training loss 8.00\n", + "2019-10-14 13:49:31,865 EPOCH 81\n", + "2019-10-14 13:50:14,088 Epoch 81 Step: 3050 Batch Loss: 0.274788 Tokens per Sec: 2658, Lr: 0.000400\n", + "2019-10-14 13:50:14,089 Epoch 81: total training loss 8.38\n", + "2019-10-14 13:50:14,089 EPOCH 82\n", + "2019-10-14 13:50:55,671 Epoch 82: total training loss 8.24\n", + "2019-10-14 13:50:55,671 EPOCH 83\n", + "2019-10-14 13:51:11,083 Epoch 83 Step: 3100 Batch Loss: 0.196188 Tokens per Sec: 2979, Lr: 0.000397\n", + "2019-10-14 13:51:37,830 Epoch 83: total training loss 7.34\n", + "2019-10-14 13:51:37,830 EPOCH 84\n", + "2019-10-14 13:52:07,124 Epoch 84 Step: 3150 Batch Loss: 0.199934 Tokens per Sec: 2795, Lr: 0.000394\n", + "2019-10-14 13:52:20,097 Epoch 84: total training loss 7.67\n", + "2019-10-14 13:52:20,097 EPOCH 85\n", + "2019-10-14 13:53:02,868 Epoch 85 Step: 3200 Batch Loss: 0.152920 Tokens per Sec: 2624, Lr: 0.000391\n", + "2019-10-14 13:53:02,868 Epoch 85: total training loss 7.51\n", + "2019-10-14 13:53:02,869 EPOCH 86\n", + "2019-10-14 13:53:45,270 Epoch 86: total training loss 7.43\n", + "2019-10-14 13:53:45,271 EPOCH 87\n", + "2019-10-14 13:53:59,125 Epoch 87 Step: 3250 Batch Loss: 0.253703 Tokens per Sec: 2731, Lr: 0.000388\n", + "2019-10-14 13:54:27,365 Epoch 87: total training loss 9.07\n", + "2019-10-14 13:54:27,365 EPOCH 88\n", + "2019-10-14 13:54:53,512 Epoch 88 Step: 3300 Batch Loss: 0.200083 Tokens per Sec: 2637, Lr: 0.000385\n", + "2019-10-14 13:55:09,850 Epoch 88: total training loss 8.31\n", + "2019-10-14 13:55:09,851 EPOCH 89\n", + "2019-10-14 13:55:50,036 Epoch 89 Step: 3350 Batch Loss: 0.285257 Tokens per Sec: 2665, Lr: 0.000382\n", + "2019-10-14 13:55:52,249 Epoch 89: total training loss 7.43\n", + "2019-10-14 13:55:52,250 EPOCH 90\n", + "2019-10-14 13:56:34,941 Epoch 90: total training loss 7.23\n", + "2019-10-14 13:56:34,941 EPOCH 91\n", + "2019-10-14 13:56:45,804 Epoch 91 Step: 3400 Batch Loss: 0.151101 Tokens per Sec: 2442, Lr: 0.000379\n", + "2019-10-14 13:57:17,732 Epoch 91: total training loss 7.13\n", + "2019-10-14 13:57:17,733 EPOCH 92\n", + "2019-10-14 13:57:42,551 Epoch 92 Step: 3450 Batch Loss: 0.238364 Tokens per Sec: 2702, Lr: 0.000376\n", + "2019-10-14 13:58:00,074 Epoch 92: total training loss 7.13\n", + "2019-10-14 13:58:00,074 EPOCH 93\n", + "2019-10-14 13:58:38,428 Epoch 93 Step: 3500 Batch Loss: 0.156168 Tokens per Sec: 2574, Lr: 0.000374\n", + "2019-10-14 14:00:14,020 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 14:00:14,020 Saving new checkpoint.\n", + "2019-10-14 14:00:15,696 Example #0\n", + "2019-10-14 14:00:15,696 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:00:15,696 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:00:15,697 \tHypothesis: as jy kan verhoed dat dit nwatervrieswater ?\n", + "2019-10-14 14:00:15,697 Example #1\n", + "2019-10-14 14:00:15,697 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:00:15,697 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:00:15,697 \tHypothesis: dink jy jy kan die water gesuiwer deur die filter te suiwer word deur jou filter .\n", + "2019-10-14 14:00:15,697 Example #2\n", + "2019-10-14 14:00:15,697 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:00:15,697 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:00:15,698 \tHypothesis: probeer dit kyk en kyk of dit 'n verskil waaruit 'n verskil .\n", + "2019-10-14 14:00:15,698 Example #3\n", + "2019-10-14 14:00:15,698 \tSource: how could you improve your design ?\n", + "2019-10-14 14:00:15,698 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:00:15,698 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:00:15,698 Validation result at epoch 93, step 3500: bleu: 18.91, loss: 23638.4180, ppl: 28.9306, duration: 97.2703s\n", + "2019-10-14 14:00:20,121 Epoch 93: total training loss 6.92\n", + "2019-10-14 14:00:20,121 EPOCH 94\n", + "2019-10-14 14:01:02,420 Epoch 94: total training loss 6.41\n", + "2019-10-14 14:01:02,420 EPOCH 95\n", + "2019-10-14 14:01:12,494 Epoch 95 Step: 3550 Batch Loss: 0.199882 Tokens per Sec: 2254, Lr: 0.000371\n", + "2019-10-14 14:01:45,432 Epoch 95: total training loss 6.56\n", + "2019-10-14 14:01:45,432 EPOCH 96\n", + "2019-10-14 14:02:09,582 Epoch 96 Step: 3600 Batch Loss: 0.212894 Tokens per Sec: 2708, Lr: 0.000368\n", + "2019-10-14 14:02:27,804 Epoch 96: total training loss 6.28\n", + "2019-10-14 14:02:27,804 EPOCH 97\n", + "2019-10-14 14:03:06,199 Epoch 97 Step: 3650 Batch Loss: 0.126355 Tokens per Sec: 2621, Lr: 0.000366\n", + "2019-10-14 14:03:10,681 Epoch 97: total training loss 6.23\n", + "2019-10-14 14:03:10,681 EPOCH 98\n", + "2019-10-14 14:03:52,402 Epoch 98: total training loss 5.97\n", + "2019-10-14 14:03:52,402 EPOCH 99\n", + "2019-10-14 14:04:02,672 Epoch 99 Step: 3700 Batch Loss: 0.119305 Tokens per Sec: 2693, Lr: 0.000363\n", + "2019-10-14 14:04:34,213 Epoch 99: total training loss 5.99\n", + "2019-10-14 14:04:34,213 EPOCH 100\n", + "2019-10-14 14:04:59,032 Epoch 100 Step: 3750 Batch Loss: 0.150591 Tokens per Sec: 2597, Lr: 0.000361\n", + "2019-10-14 14:05:16,652 Epoch 100: total training loss 6.17\n", + "2019-10-14 14:05:16,652 EPOCH 101\n", + "2019-10-14 14:05:55,422 Epoch 101 Step: 3800 Batch Loss: 0.141966 Tokens per Sec: 2635, Lr: 0.000358\n", + "2019-10-14 14:05:59,029 Epoch 101: total training loss 5.96\n", + "2019-10-14 14:05:59,029 EPOCH 102\n", + "2019-10-14 14:06:41,278 Epoch 102: total training loss 5.75\n", + "2019-10-14 14:06:41,278 EPOCH 103\n", + "2019-10-14 14:06:52,467 Epoch 103 Step: 3850 Batch Loss: 0.160515 Tokens per Sec: 2275, Lr: 0.000356\n", + "2019-10-14 14:07:23,817 Epoch 103: total training loss 5.90\n", + "2019-10-14 14:07:23,818 EPOCH 104\n", + "2019-10-14 14:07:49,366 Epoch 104 Step: 3900 Batch Loss: 0.192392 Tokens per Sec: 2688, Lr: 0.000354\n", + "2019-10-14 14:08:06,294 Epoch 104: total training loss 5.76\n", + "2019-10-14 14:08:06,294 EPOCH 105\n", + "2019-10-14 14:08:44,341 Epoch 105 Step: 3950 Batch Loss: 0.129470 Tokens per Sec: 2555, Lr: 0.000352\n", + "2019-10-14 14:08:49,102 Epoch 105: total training loss 5.88\n", + "2019-10-14 14:08:49,103 EPOCH 106\n", + "2019-10-14 14:09:31,311 Epoch 106: total training loss 5.78\n", + "2019-10-14 14:09:31,311 EPOCH 107\n", + "2019-10-14 14:09:40,304 Epoch 107 Step: 4000 Batch Loss: 0.183306 Tokens per Sec: 2589, Lr: 0.000349\n", + "2019-10-14 14:11:15,886 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 14:11:15,887 Saving new checkpoint.\n", + "2019-10-14 14:11:17,395 Example #0\n", + "2019-10-14 14:11:17,395 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:11:17,395 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:11:17,395 \tHypothesis: as jy so verhoed dat dit van watervroue kon maak ?\n", + "2019-10-14 14:11:17,395 Example #1\n", + "2019-10-14 14:11:17,395 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:11:17,395 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:11:17,395 \tHypothesis: dink jy kan die water gesuiwer het deur die filter te gesuiwer deur jou filter op jou filter gaan ?\n", + "2019-10-14 14:11:17,396 Example #2\n", + "2019-10-14 14:11:17,396 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:11:17,396 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:11:17,396 \tHypothesis: probeer dit uitkyk en as dit 'n verskil is .\n", + "2019-10-14 14:11:17,396 Example #3\n", + "2019-10-14 14:11:17,396 \tSource: how could you improve your design ?\n", + "2019-10-14 14:11:17,396 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:11:17,396 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:11:17,396 Validation result at epoch 107, step 4000: bleu: 19.69, loss: 23474.0020, ppl: 28.2613, duration: 97.0915s\n", + "2019-10-14 14:11:50,786 Epoch 107: total training loss 5.62\n", + "2019-10-14 14:11:50,786 EPOCH 108\n", + "2019-10-14 14:12:14,833 Epoch 108 Step: 4050 Batch Loss: 0.197383 Tokens per Sec: 2695, Lr: 0.000347\n", + "2019-10-14 14:12:33,790 Epoch 108: total training loss 5.56\n", + "2019-10-14 14:12:33,791 EPOCH 109\n", + "2019-10-14 14:13:11,618 Epoch 109 Step: 4100 Batch Loss: 0.163495 Tokens per Sec: 2669, Lr: 0.000345\n", + "2019-10-14 14:13:16,015 Epoch 109: total training loss 5.36\n", + "2019-10-14 14:13:16,015 EPOCH 110\n", + "2019-10-14 14:13:57,927 Epoch 110: total training loss 5.58\n", + "2019-10-14 14:13:57,928 EPOCH 111\n", + "2019-10-14 14:14:07,250 Epoch 111 Step: 4150 Batch Loss: 0.117560 Tokens per Sec: 2564, Lr: 0.000343\n", + "2019-10-14 14:14:39,416 Epoch 111: total training loss 5.19\n", + "2019-10-14 14:14:39,416 EPOCH 112\n", + "2019-10-14 14:15:04,200 Epoch 112 Step: 4200 Batch Loss: 0.138829 Tokens per Sec: 2694, Lr: 0.000341\n", + "2019-10-14 14:15:21,704 Epoch 112: total training loss 5.17\n", + "2019-10-14 14:15:21,704 EPOCH 113\n", + "2019-10-14 14:16:01,083 Epoch 113 Step: 4250 Batch Loss: 0.181514 Tokens per Sec: 2607, Lr: 0.000339\n", + "2019-10-14 14:16:04,541 Epoch 113: total training loss 5.40\n", + "2019-10-14 14:16:04,541 EPOCH 114\n", + "2019-10-14 14:16:47,336 Epoch 114: total training loss 5.60\n", + "2019-10-14 14:16:47,336 EPOCH 115\n", + "2019-10-14 14:16:56,394 Epoch 115 Step: 4300 Batch Loss: 0.130552 Tokens per Sec: 2567, Lr: 0.000337\n", + "2019-10-14 14:17:29,756 Epoch 115: total training loss 5.44\n", + "2019-10-14 14:17:29,756 EPOCH 116\n", + "2019-10-14 14:17:52,787 Epoch 116 Step: 4350 Batch Loss: 0.123957 Tokens per Sec: 2612, Lr: 0.000335\n", + "2019-10-14 14:18:11,981 Epoch 116: total training loss 5.18\n", + "2019-10-14 14:18:11,982 EPOCH 117\n", + "2019-10-14 14:18:49,066 Epoch 117 Step: 4400 Batch Loss: 0.174190 Tokens per Sec: 2633, Lr: 0.000333\n", + "2019-10-14 14:18:54,430 Epoch 117: total training loss 5.31\n", + "2019-10-14 14:18:54,430 EPOCH 118\n", + "2019-10-14 14:19:36,729 Epoch 118: total training loss 5.43\n", + "2019-10-14 14:19:36,729 EPOCH 119\n", + "2019-10-14 14:19:44,827 Epoch 119 Step: 4450 Batch Loss: 0.113445 Tokens per Sec: 2421, Lr: 0.000331\n", + "2019-10-14 14:20:18,533 Epoch 119: total training loss 5.30\n", + "2019-10-14 14:20:18,534 EPOCH 120\n", + "2019-10-14 14:20:41,879 Epoch 120 Step: 4500 Batch Loss: 0.117656 Tokens per Sec: 2725, Lr: 0.000329\n", + "2019-10-14 14:22:17,244 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 14:22:17,244 Saving new checkpoint.\n", + "2019-10-14 14:22:18,845 Example #0\n", + "2019-10-14 14:22:18,846 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:22:18,846 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:22:18,846 \tHypothesis: as jy so verhoed hoe dit van water ?\n", + "2019-10-14 14:22:18,846 Example #1\n", + "2019-10-14 14:22:18,846 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:22:18,846 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:22:18,846 \tHypothesis: dink jy jy kan die water gesuiwer word deur jou filter te gefiltreer ?\n", + "2019-10-14 14:22:18,846 Example #2\n", + "2019-10-14 14:22:18,847 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:22:18,847 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:22:18,847 \tHypothesis: probeer dit uit en kyk as dit 'n verskil maak .\n", + "2019-10-14 14:22:18,847 Example #3\n", + "2019-10-14 14:22:18,847 \tSource: how could you improve your design ?\n", + "2019-10-14 14:22:18,847 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:22:18,847 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:22:18,847 Validation result at epoch 120, step 4500: bleu: 19.82, loss: 23306.6621, ppl: 27.5961, duration: 96.9675s\n", + "2019-10-14 14:22:37,625 Epoch 120: total training loss 5.20\n", + "2019-10-14 14:22:37,625 EPOCH 121\n", + "2019-10-14 14:23:13,791 Epoch 121 Step: 4550 Batch Loss: 0.174481 Tokens per Sec: 2583, Lr: 0.000328\n", + "2019-10-14 14:23:20,700 Epoch 121: total training loss 5.47\n", + "2019-10-14 14:23:20,701 EPOCH 122\n", + "2019-10-14 14:24:02,569 Epoch 122: total training loss 5.27\n", + "2019-10-14 14:24:02,570 EPOCH 123\n", + "2019-10-14 14:24:10,970 Epoch 123 Step: 4600 Batch Loss: 0.217661 Tokens per Sec: 2786, Lr: 0.000326\n", + "2019-10-14 14:24:45,008 Epoch 123: total training loss 5.62\n", + "2019-10-14 14:24:45,008 EPOCH 124\n", + "2019-10-14 14:25:07,490 Epoch 124 Step: 4650 Batch Loss: 0.158564 Tokens per Sec: 2841, Lr: 0.000324\n", + "2019-10-14 14:25:26,989 Epoch 124: total training loss 5.68\n", + "2019-10-14 14:25:26,989 EPOCH 125\n", + "2019-10-14 14:26:03,930 Epoch 125 Step: 4700 Batch Loss: 0.124293 Tokens per Sec: 2575, Lr: 0.000322\n", + "2019-10-14 14:26:09,665 Epoch 125: total training loss 5.24\n", + "2019-10-14 14:26:09,665 EPOCH 126\n", + "2019-10-14 14:26:51,925 Epoch 126: total training loss 4.93\n", + "2019-10-14 14:26:51,925 EPOCH 127\n", + "2019-10-14 14:27:01,634 Epoch 127 Step: 4750 Batch Loss: 0.171232 Tokens per Sec: 3068, Lr: 0.000321\n", + "2019-10-14 14:27:34,062 Epoch 127: total training loss 4.83\n", + "2019-10-14 14:27:34,062 EPOCH 128\n", + "2019-10-14 14:27:56,389 Epoch 128 Step: 4800 Batch Loss: 0.124279 Tokens per Sec: 2422, Lr: 0.000319\n", + "2019-10-14 14:28:16,404 Epoch 128: total training loss 4.82\n", + "2019-10-14 14:28:16,405 EPOCH 129\n", + "2019-10-14 14:28:53,031 Epoch 129 Step: 4850 Batch Loss: 0.101875 Tokens per Sec: 2577, Lr: 0.000317\n", + "2019-10-14 14:28:58,831 Epoch 129: total training loss 4.87\n", + "2019-10-14 14:28:58,831 EPOCH 130\n", + "2019-10-14 14:29:41,890 Epoch 130: total training loss 4.84\n", + "2019-10-14 14:29:41,890 EPOCH 131\n", + "2019-10-14 14:29:48,409 Epoch 131 Step: 4900 Batch Loss: 0.141378 Tokens per Sec: 2225, Lr: 0.000316\n", + "2019-10-14 14:30:24,052 Epoch 131: total training loss 4.69\n", + "2019-10-14 14:30:24,052 EPOCH 132\n", + "2019-10-14 14:30:44,513 Epoch 132 Step: 4950 Batch Loss: 0.157539 Tokens per Sec: 2599, Lr: 0.000314\n", + "2019-10-14 14:31:07,026 Epoch 132: total training loss 4.77\n", + "2019-10-14 14:31:07,027 EPOCH 133\n", + "2019-10-14 14:31:40,728 Epoch 133 Step: 5000 Batch Loss: 0.133076 Tokens per Sec: 2766, Lr: 0.000313\n", + "2019-10-14 14:33:16,232 Example #0\n", + "2019-10-14 14:33:16,232 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:33:16,232 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:33:16,233 \tHypothesis: as jy so verhoed hoe dit met water ?\n", + "2019-10-14 14:33:16,233 Example #1\n", + "2019-10-14 14:33:16,233 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:33:16,233 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:33:16,233 \tHypothesis: dink jy jy kan die water gesuiwer deur jou filter te gefiltreer deur die filter .\n", + "2019-10-14 14:33:16,233 Example #2\n", + "2019-10-14 14:33:16,233 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:33:16,234 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:33:16,234 \tHypothesis: probeer dit uitdaging en as 'n verskil is .\n", + "2019-10-14 14:33:16,234 Example #3\n", + "2019-10-14 14:33:16,234 \tSource: how could you improve your design ?\n", + "2019-10-14 14:33:16,234 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:33:16,234 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:33:16,234 Validation result at epoch 133, step 5000: bleu: 19.27, loss: 23490.8320, ppl: 28.3291, duration: 95.5061s\n", + "2019-10-14 14:33:24,597 Epoch 133: total training loss 4.64\n", + "2019-10-14 14:33:24,597 EPOCH 134\n", + "2019-10-14 14:34:07,206 Epoch 134: total training loss 4.63\n", + "2019-10-14 14:34:07,206 EPOCH 135\n", + "2019-10-14 14:34:11,491 Epoch 135 Step: 5050 Batch Loss: 0.119887 Tokens per Sec: 2513, Lr: 0.000311\n", + "2019-10-14 14:34:49,656 Epoch 135: total training loss 4.67\n", + "2019-10-14 14:34:49,656 EPOCH 136\n", + "2019-10-14 14:35:06,831 Epoch 136 Step: 5100 Batch Loss: 0.136887 Tokens per Sec: 2509, Lr: 0.000309\n", + "2019-10-14 14:35:32,280 Epoch 136: total training loss 4.62\n", + "2019-10-14 14:35:32,281 EPOCH 137\n", + "2019-10-14 14:36:04,516 Epoch 137 Step: 5150 Batch Loss: 0.131575 Tokens per Sec: 2615, Lr: 0.000308\n", + "2019-10-14 14:36:14,826 Epoch 137: total training loss 4.42\n", + "2019-10-14 14:36:14,827 EPOCH 138\n", + "2019-10-14 14:36:56,906 Epoch 138: total training loss 4.43\n", + "2019-10-14 14:36:56,906 EPOCH 139\n", + "2019-10-14 14:37:00,621 Epoch 139 Step: 5200 Batch Loss: 0.123772 Tokens per Sec: 2996, Lr: 0.000306\n", + "2019-10-14 14:37:39,468 Epoch 139: total training loss 4.50\n", + "2019-10-14 14:37:39,468 EPOCH 140\n", + "2019-10-14 14:37:56,629 Epoch 140 Step: 5250 Batch Loss: 0.130182 Tokens per Sec: 2530, Lr: 0.000305\n", + "2019-10-14 14:38:22,912 Epoch 140: total training loss 4.46\n", + "2019-10-14 14:38:22,912 EPOCH 141\n", + "2019-10-14 14:38:51,822 Epoch 141 Step: 5300 Batch Loss: 0.100073 Tokens per Sec: 2450, Lr: 0.000304\n", + "2019-10-14 14:39:05,272 Epoch 141: total training loss 4.46\n", + "2019-10-14 14:39:05,272 EPOCH 142\n", + "2019-10-14 14:39:48,031 Epoch 142: total training loss 4.38\n", + "2019-10-14 14:39:48,031 EPOCH 143\n", + "2019-10-14 14:39:48,870 Epoch 143 Step: 5350 Batch Loss: 0.088209 Tokens per Sec: 2229, Lr: 0.000302\n", + "2019-10-14 14:40:30,954 Epoch 143: total training loss 4.30\n", + "2019-10-14 14:40:30,954 EPOCH 144\n", + "2019-10-14 14:40:46,005 Epoch 144 Step: 5400 Batch Loss: 0.136266 Tokens per Sec: 2839, Lr: 0.000301\n", + "2019-10-14 14:41:13,647 Epoch 144: total training loss 4.28\n", + "2019-10-14 14:41:13,647 EPOCH 145\n", + "2019-10-14 14:41:41,768 Epoch 145 Step: 5450 Batch Loss: 0.137936 Tokens per Sec: 2748, Lr: 0.000299\n", + "2019-10-14 14:41:55,878 Epoch 145: total training loss 4.36\n", + "2019-10-14 14:41:55,879 EPOCH 146\n", + "2019-10-14 14:42:38,410 Epoch 146 Step: 5500 Batch Loss: 0.085756 Tokens per Sec: 2639, Lr: 0.000298\n", + "2019-10-14 14:44:13,921 Example #0\n", + "2019-10-14 14:44:13,922 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:44:13,922 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:44:13,922 \tHypothesis: as jy so verhoed hoe dit waterdreineer ?\n", + "2019-10-14 14:44:13,922 Example #1\n", + "2019-10-14 14:44:13,923 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:44:13,923 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:44:13,923 \tHypothesis: dink jy dat jy die water nie kan suiwer deur jou filter gefiltreer word nie .\n", + "2019-10-14 14:44:13,923 Example #2\n", + "2019-10-14 14:44:13,923 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:44:13,923 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:44:13,923 \tHypothesis: probeer uitvind as dit te sien as dit 'n verskil is .\n", + "2019-10-14 14:44:13,923 Example #3\n", + "2019-10-14 14:44:13,924 \tSource: how could you improve your design ?\n", + "2019-10-14 14:44:13,924 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:44:13,924 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:44:13,924 Validation result at epoch 146, step 5500: bleu: 19.25, loss: 23279.6992, ppl: 27.4904, duration: 95.5132s\n", + "2019-10-14 14:44:13,925 Epoch 146: total training loss 4.24\n", + "2019-10-14 14:44:13,925 EPOCH 147\n", + "2019-10-14 14:44:56,256 Epoch 147: total training loss 4.11\n", + "2019-10-14 14:44:56,256 EPOCH 148\n", + "2019-10-14 14:45:10,586 Epoch 148 Step: 5550 Batch Loss: 0.078974 Tokens per Sec: 2438, Lr: 0.000297\n", + "2019-10-14 14:45:38,994 Epoch 148: total training loss 4.21\n", + "2019-10-14 14:45:38,994 EPOCH 149\n", + "2019-10-14 14:46:08,082 Epoch 149 Step: 5600 Batch Loss: 0.082520 Tokens per Sec: 2693, Lr: 0.000295\n", + "2019-10-14 14:46:21,574 Epoch 149: total training loss 4.02\n", + "2019-10-14 14:46:21,575 EPOCH 150\n", + "2019-10-14 14:47:03,818 Epoch 150 Step: 5650 Batch Loss: 0.096497 Tokens per Sec: 2657, Lr: 0.000294\n", + "2019-10-14 14:47:03,818 Epoch 150: total training loss 4.14\n", + "2019-10-14 14:47:03,819 EPOCH 151\n", + "2019-10-14 14:47:45,635 Epoch 151: total training loss 4.13\n", + "2019-10-14 14:47:45,636 EPOCH 152\n", + "2019-10-14 14:47:59,173 Epoch 152 Step: 5700 Batch Loss: 0.092602 Tokens per Sec: 2526, Lr: 0.000293\n", + "2019-10-14 14:48:28,080 Epoch 152: total training loss 4.02\n", + "2019-10-14 14:48:28,080 EPOCH 153\n", + "2019-10-14 14:48:56,395 Epoch 153 Step: 5750 Batch Loss: 0.124726 Tokens per Sec: 2647, Lr: 0.000291\n", + "2019-10-14 14:49:10,637 Epoch 153: total training loss 3.95\n", + "2019-10-14 14:49:10,637 EPOCH 154\n", + "2019-10-14 14:49:52,910 Epoch 154: total training loss 3.89\n", + "2019-10-14 14:49:52,910 EPOCH 155\n", + "2019-10-14 14:49:54,092 Epoch 155 Step: 5800 Batch Loss: 0.094692 Tokens per Sec: 2686, Lr: 0.000290\n", + "2019-10-14 14:50:34,843 Epoch 155: total training loss 3.90\n", + "2019-10-14 14:50:34,843 EPOCH 156\n", + "2019-10-14 14:50:50,854 Epoch 156 Step: 5850 Batch Loss: 0.130135 Tokens per Sec: 2700, Lr: 0.000289\n", + "2019-10-14 14:51:17,100 Epoch 156: total training loss 3.96\n", + "2019-10-14 14:51:17,101 EPOCH 157\n", + "2019-10-14 14:51:47,377 Epoch 157 Step: 5900 Batch Loss: 0.135181 Tokens per Sec: 2735, Lr: 0.000288\n", + "2019-10-14 14:51:59,568 Epoch 157: total training loss 3.95\n", + "2019-10-14 14:51:59,568 EPOCH 158\n", + "2019-10-14 14:52:42,174 Epoch 158: total training loss 4.03\n", + "2019-10-14 14:52:42,175 EPOCH 159\n", + "2019-10-14 14:52:43,244 Epoch 159 Step: 5950 Batch Loss: 0.081408 Tokens per Sec: 1891, Lr: 0.000286\n", + "2019-10-14 14:53:24,426 Epoch 159: total training loss 4.05\n", + "2019-10-14 14:53:24,427 EPOCH 160\n", + "2019-10-14 14:53:39,417 Epoch 160 Step: 6000 Batch Loss: 0.115625 Tokens per Sec: 2704, Lr: 0.000285\n", + "2019-10-14 14:55:14,808 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 14:55:14,809 Saving new checkpoint.\n", + "2019-10-14 14:55:16,434 Example #0\n", + "2019-10-14 14:55:16,435 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 14:55:16,435 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 14:55:16,435 \tHypothesis: as jy so verhoed dat dit waterdig is ?\n", + "2019-10-14 14:55:16,435 Example #1\n", + "2019-10-14 14:55:16,436 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 14:55:16,436 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 14:55:16,436 \tHypothesis: dink jy jy kan die water gesuiwer word deur die filter te gefiltreer deur dit te filtreer ?\n", + "2019-10-14 14:55:16,436 Example #2\n", + "2019-10-14 14:55:16,436 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 14:55:16,436 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 14:55:16,437 \tHypothesis: probeer dit sien en as dit 'n verskil is .\n", + "2019-10-14 14:55:16,437 Example #3\n", + "2019-10-14 14:55:16,437 \tSource: how could you improve your design ?\n", + "2019-10-14 14:55:16,437 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:55:16,437 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 14:55:16,437 Validation result at epoch 160, step 6000: bleu: 20.27, loss: 23328.7344, ppl: 27.6829, duration: 97.0199s\n", + "2019-10-14 14:55:44,007 Epoch 160: total training loss 4.00\n", + "2019-10-14 14:55:44,008 EPOCH 161\n", + "2019-10-14 14:56:13,277 Epoch 161 Step: 6050 Batch Loss: 0.128801 Tokens per Sec: 2708, Lr: 0.000284\n", + "2019-10-14 14:56:27,051 Epoch 161: total training loss 3.96\n", + "2019-10-14 14:56:27,052 EPOCH 162\n", + "2019-10-14 14:57:08,731 Epoch 162: total training loss 3.65\n", + "2019-10-14 14:57:08,731 EPOCH 163\n", + "2019-10-14 14:57:11,512 Epoch 163 Step: 6100 Batch Loss: 0.127656 Tokens per Sec: 3648, Lr: 0.000283\n", + "2019-10-14 14:57:50,865 Epoch 163: total training loss 3.82\n", + "2019-10-14 14:57:50,865 EPOCH 164\n", + "2019-10-14 14:58:06,017 Epoch 164 Step: 6150 Batch Loss: 0.111806 Tokens per Sec: 2435, Lr: 0.000282\n", + "2019-10-14 14:58:32,562 Epoch 164: total training loss 3.93\n", + "2019-10-14 14:58:32,562 EPOCH 165\n", + "2019-10-14 14:59:04,040 Epoch 165 Step: 6200 Batch Loss: 0.089149 Tokens per Sec: 2749, Lr: 0.000281\n", + "2019-10-14 14:59:14,987 Epoch 165: total training loss 3.73\n", + "2019-10-14 14:59:14,988 EPOCH 166\n", + "2019-10-14 14:59:58,162 Epoch 166: total training loss 3.89\n", + "2019-10-14 14:59:58,162 EPOCH 167\n", + "2019-10-14 15:00:00,791 Epoch 167 Step: 6250 Batch Loss: 0.135432 Tokens per Sec: 3149, Lr: 0.000280\n", + "2019-10-14 15:00:40,359 Epoch 167: total training loss 3.85\n", + "2019-10-14 15:00:40,359 EPOCH 168\n", + "2019-10-14 15:00:56,583 Epoch 168 Step: 6300 Batch Loss: 0.132239 Tokens per Sec: 2878, Lr: 0.000278\n", + "2019-10-14 15:01:22,409 Epoch 168: total training loss 3.89\n", + "2019-10-14 15:01:22,409 EPOCH 169\n", + "2019-10-14 15:01:51,398 Epoch 169 Step: 6350 Batch Loss: 0.094800 Tokens per Sec: 2633, Lr: 0.000277\n", + "2019-10-14 15:02:04,529 Epoch 169: total training loss 3.77\n", + "2019-10-14 15:02:04,530 EPOCH 170\n", + "2019-10-14 15:02:46,805 Epoch 170: total training loss 3.85\n", + "2019-10-14 15:02:46,806 EPOCH 171\n", + "2019-10-14 15:02:47,752 Epoch 171 Step: 6400 Batch Loss: 0.113079 Tokens per Sec: 643, Lr: 0.000276\n", + "2019-10-14 15:03:28,899 Epoch 171: total training loss 3.85\n", + "2019-10-14 15:03:28,899 EPOCH 172\n", + "2019-10-14 15:03:43,995 Epoch 172 Step: 6450 Batch Loss: 0.102424 Tokens per Sec: 2813, Lr: 0.000275\n", + "2019-10-14 15:04:10,910 Epoch 172: total training loss 4.36\n", + "2019-10-14 15:04:10,911 EPOCH 173\n", + "2019-10-14 15:04:39,502 Epoch 173 Step: 6500 Batch Loss: 0.103793 Tokens per Sec: 2556, Lr: 0.000274\n", + "2019-10-14 15:06:14,983 Example #0\n", + "2019-10-14 15:06:14,984 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 15:06:14,984 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 15:06:14,984 \tHypothesis: as jy so verhoed dat dit watervalge ?\n", + "2019-10-14 15:06:14,984 Example #1\n", + "2019-10-14 15:06:14,984 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 15:06:14,984 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 15:06:14,985 \tHypothesis: dink jy kan jy die water gesuiwer kan jy die filter gegooi word deur dit weer blink te filtreer ?\n", + "2019-10-14 15:06:14,985 Example #2\n", + "2019-10-14 15:06:14,985 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 15:06:14,985 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 15:06:14,985 \tHypothesis: probeer dit so en kyk of dit 'n verskil is .\n", + "2019-10-14 15:06:14,985 Example #3\n", + "2019-10-14 15:06:14,985 \tSource: how could you improve your design ?\n", + "2019-10-14 15:06:14,985 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:06:14,986 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:06:14,986 Validation result at epoch 173, step 6500: bleu: 19.71, loss: 23496.4551, ppl: 28.3518, duration: 95.4832s\n", + "2019-10-14 15:06:29,240 Epoch 173: total training loss 6.11\n", + "2019-10-14 15:06:29,240 EPOCH 174\n", + "2019-10-14 15:07:11,017 Epoch 174 Step: 6550 Batch Loss: 0.146055 Tokens per Sec: 2672, Lr: 0.000273\n", + "2019-10-14 15:07:11,945 Epoch 174: total training loss 5.35\n", + "2019-10-14 15:07:11,945 EPOCH 175\n", + "2019-10-14 15:07:54,279 Epoch 175: total training loss 4.67\n", + "2019-10-14 15:07:54,279 EPOCH 176\n", + "2019-10-14 15:08:06,498 Epoch 176 Step: 6600 Batch Loss: 0.142089 Tokens per Sec: 2373, Lr: 0.000272\n", + "2019-10-14 15:08:37,168 Epoch 176: total training loss 4.18\n", + "2019-10-14 15:08:37,169 EPOCH 177\n", + "2019-10-14 15:09:04,209 Epoch 177 Step: 6650 Batch Loss: 0.113586 Tokens per Sec: 2835, Lr: 0.000271\n", + "2019-10-14 15:09:19,590 Epoch 177: total training loss 4.05\n", + "2019-10-14 15:09:19,590 EPOCH 178\n", + "2019-10-14 15:10:00,919 Epoch 178 Step: 6700 Batch Loss: 0.080085 Tokens per Sec: 2699, Lr: 0.000270\n", + "2019-10-14 15:10:01,865 Epoch 178: total training loss 3.84\n", + "2019-10-14 15:10:01,866 EPOCH 179\n", + "2019-10-14 15:10:44,116 Epoch 179: total training loss 3.85\n", + "2019-10-14 15:10:44,116 EPOCH 180\n", + "2019-10-14 15:10:57,191 Epoch 180 Step: 6750 Batch Loss: 0.106096 Tokens per Sec: 2939, Lr: 0.000269\n", + "2019-10-14 15:11:26,836 Epoch 180: total training loss 3.76\n", + "2019-10-14 15:11:26,836 EPOCH 181\n", + "2019-10-14 15:11:53,274 Epoch 181 Step: 6800 Batch Loss: 0.097475 Tokens per Sec: 2850, Lr: 0.000268\n", + "2019-10-14 15:12:09,548 Epoch 181: total training loss 3.65\n", + "2019-10-14 15:12:09,549 EPOCH 182\n", + "2019-10-14 15:12:49,181 Epoch 182 Step: 6850 Batch Loss: 0.079030 Tokens per Sec: 2539, Lr: 0.000267\n", + "2019-10-14 15:12:52,858 Epoch 182: total training loss 3.57\n", + "2019-10-14 15:12:52,858 EPOCH 183\n", + "2019-10-14 15:13:35,346 Epoch 183: total training loss 3.66\n", + "2019-10-14 15:13:35,346 EPOCH 184\n", + "2019-10-14 15:13:43,913 Epoch 184 Step: 6900 Batch Loss: 0.093704 Tokens per Sec: 2404, Lr: 0.000266\n", + "2019-10-14 15:14:18,351 Epoch 184: total training loss 3.50\n", + "2019-10-14 15:14:18,351 EPOCH 185\n", + "2019-10-14 15:14:41,398 Epoch 185 Step: 6950 Batch Loss: 0.096410 Tokens per Sec: 2681, Lr: 0.000265\n", + "2019-10-14 15:15:01,099 Epoch 185: total training loss 3.45\n", + "2019-10-14 15:15:01,099 EPOCH 186\n", + "2019-10-14 15:15:38,145 Epoch 186 Step: 7000 Batch Loss: 0.082676 Tokens per Sec: 2685, Lr: 0.000264\n", + "2019-10-14 15:17:13,775 Hooray! New best validation result [eval_metric]!\n", + "2019-10-14 15:17:13,775 Saving new checkpoint.\n", + "2019-10-14 15:17:15,334 Example #0\n", + "2019-10-14 15:17:15,335 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 15:17:15,335 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 15:17:15,335 \tHypothesis: as jy so verhoed dat dit soos 'n waterdreineer ?\n", + "2019-10-14 15:17:15,335 Example #1\n", + "2019-10-14 15:17:15,335 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 15:17:15,335 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 15:17:15,335 \tHypothesis: dink jy jy kan die water gesuiwer kan jy dit deur die filter gegooi word ?\n", + "2019-10-14 15:17:15,335 Example #2\n", + "2019-10-14 15:17:15,335 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 15:17:15,335 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 15:17:15,336 \tHypothesis: probeer dit op en kyk as 'n verskil is .\n", + "2019-10-14 15:17:15,336 Example #3\n", + "2019-10-14 15:17:15,336 \tSource: how could you improve your design ?\n", + "2019-10-14 15:17:15,336 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:17:15,336 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:17:15,336 Validation result at epoch 186, step 7000: bleu: 20.83, loss: 23252.3848, ppl: 27.3837, duration: 97.1910s\n", + "2019-10-14 15:17:20,836 Epoch 186: total training loss 3.35\n", + "2019-10-14 15:17:20,837 EPOCH 187\n", + "2019-10-14 15:18:03,845 Epoch 187: total training loss 3.44\n", + "2019-10-14 15:18:03,845 EPOCH 188\n", + "2019-10-14 15:18:12,022 Epoch 188 Step: 7050 Batch Loss: 0.071749 Tokens per Sec: 2664, Lr: 0.000263\n", + "2019-10-14 15:18:46,620 Epoch 188: total training loss 3.75\n", + "2019-10-14 15:18:46,620 EPOCH 189\n", + "2019-10-14 15:19:08,337 Epoch 189 Step: 7100 Batch Loss: 0.103738 Tokens per Sec: 2573, Lr: 0.000262\n", + "2019-10-14 15:19:29,403 Epoch 189: total training loss 4.28\n", + "2019-10-14 15:19:29,403 EPOCH 190\n", + "2019-10-14 15:20:04,715 Epoch 190 Step: 7150 Batch Loss: 0.125232 Tokens per Sec: 2643, Lr: 0.000261\n", + "2019-10-14 15:20:12,069 Epoch 190: total training loss 3.94\n", + "2019-10-14 15:20:12,069 EPOCH 191\n", + "2019-10-14 15:20:54,184 Epoch 191: total training loss 4.58\n", + "2019-10-14 15:20:54,184 EPOCH 192\n", + "2019-10-14 15:20:59,785 Epoch 192 Step: 7200 Batch Loss: 0.090326 Tokens per Sec: 2403, Lr: 0.000260\n", + "2019-10-14 15:21:36,528 Epoch 192: total training loss 3.77\n", + "2019-10-14 15:21:36,528 EPOCH 193\n", + "2019-10-14 15:21:56,275 Epoch 193 Step: 7250 Batch Loss: 0.195592 Tokens per Sec: 2635, Lr: 0.000260\n", + "2019-10-14 15:22:18,486 Epoch 193: total training loss 4.89\n", + "2019-10-14 15:22:18,487 EPOCH 194\n", + "2019-10-14 15:22:53,886 Epoch 194 Step: 7300 Batch Loss: 0.082657 Tokens per Sec: 2655, Lr: 0.000259\n", + "2019-10-14 15:23:00,890 Epoch 194: total training loss 4.01\n", + "2019-10-14 15:23:00,890 EPOCH 195\n", + "2019-10-14 15:23:42,916 Epoch 195: total training loss 3.74\n", + "2019-10-14 15:23:42,916 EPOCH 196\n", + "2019-10-14 15:23:49,849 Epoch 196 Step: 7350 Batch Loss: 0.120253 Tokens per Sec: 2927, Lr: 0.000258\n", + "2019-10-14 15:24:25,059 Epoch 196: total training loss 3.48\n", + "2019-10-14 15:24:25,059 EPOCH 197\n", + "2019-10-14 15:24:45,965 Epoch 197 Step: 7400 Batch Loss: 0.082194 Tokens per Sec: 2643, Lr: 0.000257\n", + "2019-10-14 15:25:07,440 Epoch 197: total training loss 3.54\n", + "2019-10-14 15:25:07,440 EPOCH 198\n", + "2019-10-14 15:25:43,051 Epoch 198 Step: 7450 Batch Loss: 0.105848 Tokens per Sec: 2682, Lr: 0.000256\n", + "2019-10-14 15:25:49,785 Epoch 198: total training loss 3.37\n", + "2019-10-14 15:25:49,785 EPOCH 199\n", + "2019-10-14 15:26:32,946 Epoch 199: total training loss 3.40\n", + "2019-10-14 15:26:32,946 EPOCH 200\n", + "2019-10-14 15:26:39,715 Epoch 200 Step: 7500 Batch Loss: 0.067201 Tokens per Sec: 2443, Lr: 0.000255\n", + "2019-10-14 15:28:15,358 Example #0\n", + "2019-10-14 15:28:15,359 \tSource: if so how could you prevent it from leaking ?\n", + "2019-10-14 15:28:15,359 \tReference: indien wel hoe kan jy verhoed dat dit lek ?\n", + "2019-10-14 15:28:15,359 \tHypothesis: as jy so kan verhoek soos wat jy in water ?\n", + "2019-10-14 15:28:15,359 Example #1\n", + "2019-10-14 15:28:15,359 \tSource: do you think you could further purify the water you filtered by passing it through your filter again ?\n", + "2019-10-14 15:28:15,360 \tReference: dink jy jy kan die water verder skoonmaak deur dit weer deur jou filter te gooi ?\n", + "2019-10-14 15:28:15,360 \tHypothesis: dink jy jy kan die water deur die filter gesuiwer word deur jou filter te filtreer ?\n", + "2019-10-14 15:28:15,360 Example #2\n", + "2019-10-14 15:28:15,360 \tSource: try it out and see if this makes a difference .\n", + "2019-10-14 15:28:15,360 \tReference: probeer dit en kyk of dit 'n verskil maak .\n", + "2019-10-14 15:28:15,360 \tHypothesis: probeer dit uitkyk en kyk of dit 'n verskil .\n", + "2019-10-14 15:28:15,360 Example #3\n", + "2019-10-14 15:28:15,361 \tSource: how could you improve your design ?\n", + "2019-10-14 15:28:15,361 \tReference: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:28:15,361 \tHypothesis: hoe kan jy jou ontwerp verbeter ?\n", + "2019-10-14 15:28:15,361 Validation result at epoch 200, step 7500: bleu: 20.42, loss: 23162.3203, ppl: 27.0349, duration: 95.6454s\n", + "2019-10-14 15:28:51,013 Epoch 200: total training loss 3.35\n", + "2019-10-14 15:28:51,014 Training ended after 200 epochs.\n", + "2019-10-14 15:28:51,014 Best validation result at step 7000: 20.83 eval_metric.\n", + "2019-10-14 15:29:18,002 dev bleu: 22.06 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 15:29:18,002 Translations saved to: models/enaf_transformer/00007000.hyps.dev\n", + "2019-10-14 15:29:49,947 test bleu: 14.84 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 15:29:49,948 Translations saved to: models/enaf_transformer/00007000.hyps.test\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "MBoDS09JM807", + "outputId": "7fb80c1e-6262-452b-f6d4-85b2f980343c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + } + }, + "source": [ + "# Copy the created models from the notebook storage to google drive for persistant storage \n", + "!mkdir -p \"$gdrive_path/models/${src}${tgt}_transformer/\" # Herman\n", + "!cp -r joeynmt/models/${src}${tgt}_transformer/* \"$gdrive_path/models/${src}${tgt}_transformer/\"" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "cp: cannot create symbolic link '/content/drive/My Drive/colab/masakhane/en-af-baseline/models/enaf_transformer/best.ckpt': Function not implemented\n", + "cp: cannot create symbolic link '/content/drive/My Drive/colab/masakhane/en-af-baseline/models/enaf_transformer/best.ckpt': Function not implemented\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "n94wlrCjVc17", + "outputId": "58522ee6-91dc-4de9-d708-6b52ef7757ee", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 287 + } + }, + "source": [ + "# Output our validation accuracy\n", + "! cat \"$gdrive_path/models/${src}${tgt}_transformer/validations.txt\"" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Steps: 500\tLoss: 29601.47461\tPPL: 67.60847\tbleu: 0.78623\tLR: 0.00034939\t*\n", + "Steps: 1000\tLoss: 23326.10742\tPPL: 27.67259\tbleu: 13.46693\tLR: 0.00069877\t*\n", + "Steps: 1500\tLoss: 23168.03320\tPPL: 27.05686\tbleu: 17.51870\tLR: 0.00057054\t*\n", + "Steps: 2000\tLoss: 24336.34375\tPPL: 31.95243\tbleu: 16.99290\tLR: 0.00049411\t\n", + "Steps: 2500\tLoss: 24009.45508\tPPL: 30.49967\tbleu: 18.66731\tLR: 0.00044194\t*\n", + "Steps: 3000\tLoss: 23779.20898\tPPL: 29.51625\tbleu: 17.61267\tLR: 0.00040344\t\n", + "Steps: 3500\tLoss: 23638.41797\tPPL: 28.93059\tbleu: 18.91206\tLR: 0.00037351\t*\n", + "Steps: 4000\tLoss: 23474.00195\tPPL: 28.26134\tbleu: 19.68848\tLR: 0.00034939\t*\n", + "Steps: 4500\tLoss: 23306.66211\tPPL: 27.59610\tbleu: 19.81664\tLR: 0.00032940\t*\n", + "Steps: 5000\tLoss: 23490.83203\tPPL: 28.32913\tbleu: 19.27047\tLR: 0.00031250\t\n", + "Steps: 5500\tLoss: 23279.69922\tPPL: 27.49038\tbleu: 19.25319\tLR: 0.00029796\t\n", + "Steps: 6000\tLoss: 23328.73438\tPPL: 27.68294\tbleu: 20.27485\tLR: 0.00028527\t*\n", + "Steps: 6500\tLoss: 23496.45508\tPPL: 28.35181\tbleu: 19.71401\tLR: 0.00027408\t\n", + "Steps: 7000\tLoss: 23252.38477\tPPL: 27.38370\tbleu: 20.83329\tLR: 0.00026411\t*\n", + "Steps: 7500\tLoss: 23162.32031\tPPL: 27.03487\tbleu: 20.42162\tLR: 0.00025516\t\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "66WhRE9lIhoD", + "outputId": "36c76633-73c3-47e5-d09d-38857cd1e115", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 89 + } + }, + "source": [ + "# Test our model\n", + "! cd joeynmt; python3 -m joeynmt test \"$gdrive_path/models/${src}${tgt}_transformer/config.yaml\"" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2019-10-14 15:31:18,094 - dev bleu: 22.06 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 15:31:18,094 - dev bleu: 22.06 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 15:31:50,269 - test bleu: 14.84 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 15:31:50,269 - test bleu: 14.84 [Beam search decoding with beam size = 5 and alpha = 1.0]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uPqSUwVzLLMM", + "colab_type": "text" + }, + "source": [ + "## Record\n", + "\n", + "After 200 epochs:\n", + "\n", + " Steps: 500\tLoss: 28996.02539\tPPL: 65.32051\tbleu: 0.74017\tLR: 0.00034939\t*\n", + " Steps: 1000\tLoss: 22725.31836\tPPL: 26.45606\tbleu: 12.15630\tLR: 0.00069877\t*\n", + " Steps: 1500\tLoss: 22900.86719\tPPL: 27.13401\tbleu: 17.04406\tLR: 0.00057054\t*\n", + " Steps: 2000\tLoss: 24123.17773\tPPL: 32.36132\tbleu: 17.20765\tLR: 0.00049411\t*\n", + " Steps: 2500\tLoss: 23582.63867\tPPL: 29.93578\tbleu: 18.16604\tLR: 0.00044194\t*\n", + " Steps: 3000\tLoss: 23164.73633\tPPL: 28.18586\tbleu: 19.39783\tLR: 0.00040344\t*\n", + " Steps: 3500\tLoss: 23084.53516\tPPL: 27.86192\tbleu: 19.46346\tLR: 0.00037351\t*\n", + " Steps: 4000\tLoss: 23180.01953\tPPL: 28.24801\tbleu: 19.10164\tLR: 0.00034939\t\n", + " Steps: 4500\tLoss: 22994.55078\tPPL: 27.50288\tbleu: 20.05288\tLR: 0.00032940\t*\n", + " Steps: 5000\tLoss: 22928.59961\tPPL: 27.24268\tbleu: 19.66884\tLR: 0.00031250\t\n", + " Steps: 5500\tLoss: 22814.38477\tPPL: 26.79788\tbleu: 18.71092\tLR: 0.00029796\t\n", + " Steps: 6000\tLoss: 22747.05664\tPPL: 26.53909\tbleu: 19.54311\tLR: 0.00028527\t\n", + " Steps: 6500\tLoss: 22670.42383\tPPL: 26.24757\tbleu: 19.12990\tLR: 0.00027408\t\n", + " Steps: 7000\tLoss: 22537.89453\tPPL: 25.75094\tbleu: 19.76692\tLR: 0.00026411\t\n", + " Steps: 7500\tLoss: 22478.74023\tPPL: 25.53232\tbleu: 20.04524\tLR: 0.00025516\t" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xvFGcTI4aXMZ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file