diff --git "a/en-am/jw300-amharic-baseline/English_to_Amharic.ipynb" "b/en-am/jw300-amharic-baseline/English_to_Amharic.ipynb" new file mode 100644--- /dev/null +++ "b/en-am/jw300-amharic-baseline/English_to_Amharic.ipynb" @@ -0,0 +1,1413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "-UsLDHm-Jw5P", + "outputId": "3379e87c-18e7-4b8c-e35d-d2620c18d396" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "GT6gOLJ1apgr" + }, + "outputs": [], + "source": [ + "# TODO: Set your source and target languages. Keep in mind, these traditionally use language codes as found here:\n", + "# These will also become the suffix's of all vocab and corpus files used throughout\n", + "import os\n", + "source_language = \"en\"\n", + "target_language = \"am\"\n", + "tag = \"baseline\" # Give a unique name to your folder - this is to ensure you don't rewrite any models you've already submitted\n", + "\n", + "os.environ[\"src\"] = source_language # Sets them in bash as well, since we often use bash scripts\n", + "os.environ[\"tgt\"] = target_language\n", + "os.environ[\"tag\"] = tag\n", + "\n", + "# This will save it to a folder in our gdrive instead!\n", + "!mkdir -p \"/content/drive/My Drive/masakhane/$src-$tgt\"\n", + "os.environ[\"gdrive_path\"] = \"/content/drive/My Drive/masakhane/%s-%s\" % (source_language, target_language)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74, + "resources": { + "http://localhost:8080/nbextensions/google.colab/files.js": { + "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=", + "headers": [ + [ + "content-type", + "application/javascript" + ] + ], + "ok": true, + "status": 200, + "status_text": "" + } + } + }, + "colab_type": "code", + "id": "RhI5rCxkLOQz", + "outputId": "edb11ba6-15e8-4e54-e6c1-c7cbf6aeb71c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving en_am.csv to en_am (1).csv\n" + ] + } + ], + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "1TsgIUEavc9K" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import io\n", + "data = pd.read_csv(io.BytesIO(uploaded['en_am.csv']))\n", + "# Dataset is now stored in a Pandas Dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "66qYqydYwaSB", + "outputId": "05546c33-4d7b-4d2f-9df3-e8b19be512f8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnglishAmharic
017 When Aʹbram was 99 years old, Jehovah appe...17 አብራም ዕድሜው 99 ዓመት ሲሆን ይሖዋ ተገለጠለትና እንዲህ አለው፦...
12 I will establish my covenant between me and...2 ከአንተ ጋር ቃል ኪዳኔን እመሠርታለሁ፤+ አንተንም እጅግ አበዛሃለሁ።”+
23 At this Aʹbram fell facedown, and God conti...3 በዚህ ጊዜ አብራም በግንባሩ ተደፋ፤ አምላክም እንዲህ በማለት እሱን ...
34 “As for me, look! my covenant is with you,+...4 “በእኔ በኩል ከአንተ ጋር የገባሁት ቃል ኪዳኔ እንደጸና ነው፤+ አን...
45 Your name will no longer be Aʹbram;* your n...5 ከእንግዲህ ስምህ አብራም* አይባልም፤ የብዙ ብሔር አባት ስለማደርግህ...
\n", + "
" + ], + "text/plain": [ + " English Amharic\n", + "0 17 When Aʹbram was 99 years old, Jehovah appe... 17 አብራም ዕድሜው 99 ዓመት ሲሆን ይሖዋ ተገለጠለትና እንዲህ አለው፦...\n", + "1 2 I will establish my covenant between me and... 2 ከአንተ ጋር ቃል ኪዳኔን እመሠርታለሁ፤+ አንተንም እጅግ አበዛሃለሁ።”+ \n", + "2 3 At this Aʹbram fell facedown, and God conti... 3 በዚህ ጊዜ አብራም በግንባሩ ተደፋ፤ አምላክም እንዲህ በማለት እሱን ...\n", + "3 4 “As for me, look! my covenant is with you,+... 4 “በእኔ በኩል ከአንተ ጋር የገባሁት ቃል ኪዳኔ እንደጸና ነው፤+ አን...\n", + "4 5 Your name will no longer be Aʹbram;* your n... 5 ከእንግዲህ ስምህ አብራም* አይባልም፤ የብዙ ብሔር አባት ስለማደርግህ..." + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 824 + }, + "colab_type": "code", + "id": "XIktrj1xNVC-", + "outputId": "29fd7309-d27d-425f-e370-262e88f0a056" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnglishAmharic
359726 Jehovah spoke further to Moses, saying:26 በተጨማሪም ይሖዋ ሙሴን እንዲህ አለው፦
482717 Jehovah went on to say to Moses:17 ይሖዋ ሙሴን እንዲህ አለው፦
101984 However, the high places were not removed,+...4 ሆኖም ከፍ ያሉት የማምለኪያ ቦታዎች አልተወገዱም ነበር፤+ ሕዝቡ አሁ...
118594 “Your father made our yoke harsh.+ But if y...4 “አባትህ ቀንበራችንን አክብዶብን ነበር።+ አንተ ግን አባትህ የሰጠን...
118616 King Re·ho·boʹam then consulted with the ol...6 ከዚያም ንጉሥ ሮብዓም አባቱ ሰለሞን በሕይወት በነበረበት ጊዜ ሲያገለ...
118638 However, he rejected the advice that the ol...8 ሆኖም ሮብዓም ሽማግሌዎቹ የሰጡትን ምክር ትቶ አብሮ አደጎቹ ከነበሩት...
118649 He asked them: “What advice do you offer on...9 እሱም እንዲህ ሲል ጠየቃቸው፦ “‘አባትህ የጫነብንን ቀንበር አቅልልል...
1186712 Jer·o·boʹam and all the people came to Re·...12 ኢዮርብዓምና መላው ሕዝብ ንጉሡ “በሦስተኛው ቀን ተመልሳችሁ ኑ” ባ...
1187217 But Re·ho·boʹam continued to reign over th...17 ይሁንና ሮብዓም በይሁዳ ከተሞች በሚኖሩት እስራኤላውያን ላይ መግዛቱ...
163525 Be exalted above the heavens, O God;May yo...5 አምላክ ሆይ፣ ከሰማያት በላይ ከፍ ከፍ በል፤ክብርህ በምድር ሁሉ ላ...
183593 They said to him: “This is what Hez·e·kiʹah...3 እነሱም እንዲህ አሉት፦ “ሕዝቅያስ እንዲህ ይላል፦ ‘ይህ ቀን የጭንቀ...
183615 So the servants of King Hez·e·kiʹah went in...5 በመሆኑም የንጉሥ ሕዝቅያስ አገልጋዮች ወደ ኢሳይያስ ሄዱ፤+
1836610 “This is what you should say to King Hez·e...10 “የይሁዳን ንጉሥ ሕዝቅያስን እንዲህ በሉት፦ ‘የምትታመንበት አምላክ...
1836711 Look! You have heard what the kings of As·...11 እነሆ፣ የአሦር ነገሥታት ሌሎቹን አገሮች ሁሉ ፈጽመው በማጥፋት ያደ...
1837014 Hez·e·kiʹah took the letters out of the ha...14 ሕዝቅያስ ደብዳቤዎቹን ከመልእክተኞቹ እጅ ተቀብሎ አነበበ። ከዚያም ...
2102111 And the word of Jehovah again came to me, ...11 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦
2130916 The word of Jehovah again came to me, saying:16 የይሖዋ ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦
214592 Then the word of Jehovah came to me, saying:2 ከዚያም የይሖዋ ቃል እንዲህ ሲል ወደ እኔ መጣ፦
2163518 And the word of Jehovah again came to me, ...18 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦
2165015 The word of Jehovah again came to me, saying:15 የይሖዋ ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦
2167417 And the word of Jehovah again came to me, ...17 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦
244805 Others fell on rocky ground where there was...5 ሌሎቹ ደግሞ ብዙ አፈር በሌለው ድንጋያማ መሬት ላይ ወደቁ፤ አፈሩም ...
244816 But when the sun rose, they were scorched, ...6 ፀሐይ በወጣ ጊዜ ግን ተቃጠሉ፤ ሥር ስላልነበራቸውም ደረቁ።
292052 May you have undeserved kindness and peace ...2 አባታችን ከሆነው አምላክና ከጌታ ኢየሱስ ክርስቶስ ጸጋና ሰላም ለእና...
293602 May you have undeserved kindness and peace ...2 አባታችን ከሆነው አምላክና ከጌታ ኢየሱስ ክርስቶስ ጸጋና ሰላም ለእና...
\n", + "
" + ], + "text/plain": [ + " English Amharic\n", + "3597 26 Jehovah spoke further to Moses, saying: 26 በተጨማሪም ይሖዋ ሙሴን እንዲህ አለው፦\n", + "4827 17 Jehovah went on to say to Moses: 17 ይሖዋ ሙሴን እንዲህ አለው፦\n", + "10198 4 However, the high places were not removed,+... 4 ሆኖም ከፍ ያሉት የማምለኪያ ቦታዎች አልተወገዱም ነበር፤+ ሕዝቡ አሁ...\n", + "11859 4 “Your father made our yoke harsh.+ But if y... 4 “አባትህ ቀንበራችንን አክብዶብን ነበር።+ አንተ ግን አባትህ የሰጠን...\n", + "11861 6 King Re·ho·boʹam then consulted with the ol... 6 ከዚያም ንጉሥ ሮብዓም አባቱ ሰለሞን በሕይወት በነበረበት ጊዜ ሲያገለ...\n", + "11863 8 However, he rejected the advice that the ol... 8 ሆኖም ሮብዓም ሽማግሌዎቹ የሰጡትን ምክር ትቶ አብሮ አደጎቹ ከነበሩት...\n", + "11864 9 He asked them: “What advice do you offer on... 9 እሱም እንዲህ ሲል ጠየቃቸው፦ “‘አባትህ የጫነብንን ቀንበር አቅልልል...\n", + "11867 12 Jer·o·boʹam and all the people came to Re·... 12 ኢዮርብዓምና መላው ሕዝብ ንጉሡ “በሦስተኛው ቀን ተመልሳችሁ ኑ” ባ...\n", + "11872 17 But Re·ho·boʹam continued to reign over th... 17 ይሁንና ሮብዓም በይሁዳ ከተሞች በሚኖሩት እስራኤላውያን ላይ መግዛቱ...\n", + "16352 5 Be exalted above the heavens, O God;May yo... 5 አምላክ ሆይ፣ ከሰማያት በላይ ከፍ ከፍ በል፤ክብርህ በምድር ሁሉ ላ...\n", + "18359 3 They said to him: “This is what Hez·e·kiʹah... 3 እነሱም እንዲህ አሉት፦ “ሕዝቅያስ እንዲህ ይላል፦ ‘ይህ ቀን የጭንቀ...\n", + "18361 5 So the servants of King Hez·e·kiʹah went in... 5 በመሆኑም የንጉሥ ሕዝቅያስ አገልጋዮች ወደ ኢሳይያስ ሄዱ፤+\n", + "18366 10 “This is what you should say to King Hez·e... 10 “የይሁዳን ንጉሥ ሕዝቅያስን እንዲህ በሉት፦ ‘የምትታመንበት አምላክ...\n", + "18367 11 Look! You have heard what the kings of As·... 11 እነሆ፣ የአሦር ነገሥታት ሌሎቹን አገሮች ሁሉ ፈጽመው በማጥፋት ያደ...\n", + "18370 14 Hez·e·kiʹah took the letters out of the ha... 14 ሕዝቅያስ ደብዳቤዎቹን ከመልእክተኞቹ እጅ ተቀብሎ አነበበ። ከዚያም ...\n", + "21021 11 And the word of Jehovah again came to me, ... 11 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦\n", + "21309 16 The word of Jehovah again came to me, saying: 16 የይሖዋ ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦\n", + "21459 2 Then the word of Jehovah came to me, saying: 2 ከዚያም የይሖዋ ቃል እንዲህ ሲል ወደ እኔ መጣ፦\n", + "21635 18 And the word of Jehovah again came to me, ... 18 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦\n", + "21650 15 The word of Jehovah again came to me, saying: 15 የይሖዋ ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦\n", + "21674 17 And the word of Jehovah again came to me, ... 17 የይሖዋም ቃል ዳግመኛ እንዲህ ሲል ወደ እኔ መጣ፦\n", + "24480 5 Others fell on rocky ground where there was... 5 ሌሎቹ ደግሞ ብዙ አፈር በሌለው ድንጋያማ መሬት ላይ ወደቁ፤ አፈሩም ...\n", + "24481 6 But when the sun rose, they were scorched, ... 6 ፀሐይ በወጣ ጊዜ ግን ተቃጠሉ፤ ሥር ስላልነበራቸውም ደረቁ።\n", + "29205 2 May you have undeserved kindness and peace ... 2 አባታችን ከሆነው አምላክና ከጌታ ኢየሱስ ክርስቶስ ጸጋና ሰላም ለእና...\n", + "29360 2 May you have undeserved kindness and peace ... 2 አባታችን ከሆነው አምላክና ከጌታ ኢየሱስ ክርስቶስ ጸጋና ሰላም ለእና..." + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.duplicated()]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "eHtFaZtlwcYd" + }, + "outputs": [], + "source": [ + "data = data.rename(columns={\"English\":\"source_sentence\", \"Amharic\":\"target_sentence\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "LeILBPq8MXhs", + "outputId": "91726358-a8c9-4c8a-c1a4-48c7eaa87f7f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of Data before Removing duplicate: 31078\n", + "Length of Data after Removing duplicate: 31053\n" + ] + } + ], + "source": [ + "print(\"Length of Data before Removing duplicate: \",len(data))\n", + "data = data.drop_duplicates()\n", + "print(\"Length of Data after Removing duplicate: \",len(data))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tyODzTVfx5GO" + }, + "outputs": [], + "source": [ + "# Do the split between dev/test/train and create parallel corpora\n", + "num_dev_patterns = 1000\n", + "num_test_patterns = 1000\n", + "df = data\n", + "# Lower case the corpora\n", + "df[\"source_sentence\"] = df[\"source_sentence\"].str.lower()\n", + "df[\"target_sentence\"] = df[\"target_sentence\"].str.lower()\n", + "\n", + "\n", + "devtest = df.tail(num_dev_patterns + num_test_patterns)\n", + "test = devtest.tail(num_test_patterns)\n", + "dev = devtest.head(num_dev_patterns)\n", + "stripped = df.drop(df.tail(num_dev_patterns + num_test_patterns).index)\n", + "\n", + "stripped[[\"source_sentence\"]].to_csv(\"train.en\", index=False)\n", + "stripped[[\"target_sentence\"]].to_csv(\"train.am\", index=False)\n", + "\n", + "dev[[\"source_sentence\"]].to_csv(\"dev.en\", index=False)\n", + "dev[[\"target_sentence\"]].to_csv(\"dev.am\", index=False)\n", + "\n", + "test[[\"source_sentence\"]].to_csv(\"test.en\", index=False)\n", + "test[[\"target_sentence\"]].to_csv(\"test.am\", index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "myWQvGt7yXUM", + "outputId": "2eda08a4-ba95-4541-8a43-080eb7b130ae" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fatal: destination path 'joeynmt' already exists and is not an empty directory.\n", + "Processing /content/joeynmt\n", + "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.16.0)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (4.3.0)\n", + "Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.16.5)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (41.2.0)\n", + "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.2.0)\n", + "Requirement already satisfied: tensorflow>=1.14 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.15.0rc3)\n", + "Requirement already satisfied: torchtext in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.3.1)\n", + "Requirement already satisfied: sacrebleu>=1.3.6 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.4.2)\n", + "Requirement already satisfied: subword-nmt in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.3.6)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (3.0.3)\n", + "Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.9.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (5.1.2)\n", + "Requirement already satisfied: pylint in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (2.4.2)\n", + "Requirement already satisfied: six>=1.12 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.12.0)\n", + "Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow->joeynmt==0.0.1) (0.46)\n", + "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.0.8)\n", + "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.2.2)\n", + "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.1.7)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.11.2)\n", + "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.7.1)\n", + "Requirement already satisfied: tensorboard<1.16.0,>=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.1.0)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.33.6)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.8.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.1)\n", + "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.8.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (4.28.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (2.21.0)\n", + "Requirement already satisfied: typing in /usr/local/lib/python3.6/dist-packages (from sacrebleu>=1.3.6->joeynmt==0.0.1) (3.7.4.1)\n", + "Requirement already satisfied: portalocker in /usr/local/lib/python3.6/dist-packages (from sacrebleu>=1.3.6->joeynmt==0.0.1) (1.5.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (1.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.5.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.4.2)\n", + "Requirement already satisfied: pandas>=0.15.2 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (0.24.2)\n", + "Requirement already satisfied: scipy>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (1.3.1)\n", + "Requirement already satisfied: mccabe<0.7,>=0.6 in /usr/local/lib/python3.6/dist-packages (from pylint->joeynmt==0.0.1) (0.6.1)\n", + "Requirement already satisfied: isort<5,>=4.2.5 in /usr/local/lib/python3.6/dist-packages (from pylint->joeynmt==0.0.1) (4.3.21)\n", + "Requirement already satisfied: astroid<2.4,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from pylint->joeynmt==0.0.1) (2.3.1)\n", + "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow>=1.14->joeynmt==0.0.1) (2.8.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (3.1.1)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (0.16.0)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (1.24.3)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (3.0.4)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2019.9.11)\n", + "Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas>=0.15.2->seaborn->joeynmt==0.0.1) (2018.9)\n", + "Requirement already satisfied: typed-ast<1.5,>=1.4.0; implementation_name == \"cpython\" and python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from astroid<2.4,>=2.3.0->pylint->joeynmt==0.0.1) (1.4.0)\n", + "Requirement already satisfied: lazy-object-proxy==1.4.* in /usr/local/lib/python3.6/dist-packages (from astroid<2.4,>=2.3.0->pylint->joeynmt==0.0.1) (1.4.2)\n", + "Building wheels for collected packages: joeynmt\n", + " Building wheel for joeynmt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for joeynmt: filename=joeynmt-0.0.1-cp36-none-any.whl size=69430 sha256=7733008144773a47708392eba04ae71d09319832da6a63cb1da06d7a64eaf97c\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-1qyjokz_/wheels/db/01/db/751cc9f3e7f6faec127c43644ba250a3ea7ad200594aeda70a\n", + "Successfully built joeynmt\n", + "Installing collected packages: joeynmt\n", + " Found existing installation: joeynmt 0.0.1\n", + " Uninstalling joeynmt-0.0.1:\n", + " Successfully uninstalled joeynmt-0.0.1\n", + "Successfully installed joeynmt-0.0.1\n" + ] + } + ], + "source": [ + "\n", + "# Install JoeyNMT\n", + "! git clone https://github.com/joeynmt/joeynmt.git\n", + "! cd joeynmt; pip3 install ." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 479 + }, + "colab_type": "code", + "id": "XoLltS6xybX3", + "outputId": "7570ef9a-d153-4069-d905-2c2bd7daaa39" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mkdir: cannot create directory ‘joeynmt/data/’: File exists\n", + "mkdir: cannot create directory ‘joeynmt/data/enam/’: File exists\n", + "cp: missing destination file operand after 'bpe.codes.4000'\n", + "Try 'cp --help' for more information.\n", + " bpe.codes.4000 dev.en\t sample_data\t train.am vocab.en\n", + " data.csv\t drive\t\t test.am\t train.bpe.am\n", + " dev.am\t\t 'en_am (1).csv' test.bpe.am\t train.bpe.en\n", + " dev.bpe.am\t en_am.csv\t test.bpe.en\t train.en\n", + " dev.bpe.en\t joeynmt\t test.en\t vocab.am\n", + "BPE amharic Sentences\n", + "17 መንፈ@@ ሱና ሙ@@ ሽ@@ ራ@@ ይ@@ ቱ@@ ም+ “@@ ና@@ !” ይላ@@ ሉ፤ የሚ@@ ሰማ@@ ም ሁሉ “@@ ና@@ !” ይ@@ በ@@ ል፤ የተ@@ ጠ@@ ማ@@ ም ሁሉ ይ@@ ምጣ@@ ፤+ የሚ@@ ፈል@@ ግ ሁሉ የ@@ ሕይወ@@ ትን ውኃ በ@@ ነፃ ይ@@ ውሰ@@ ድ@@ ።+ \n", + "18 “@@ በዚህ ጥ@@ ቅል@@ ል ላይ የሰ@@ ፈ@@ ሩትን የ@@ ትንቢት ቃ@@ ላት ለሚ@@ ሰማ ሁሉ እ@@ መሠ@@ ክ@@ ራ@@ ለሁ@@ ፦ ማንም በ@@ እነዚህ ነገሮች ላይ አንዲት ነገር ቢ@@ ጨ@@ ምር@@ + አምላክ በዚህ ጥ@@ ቅል@@ ል ውስጥ የተ@@ ጻ@@ ፉ@@ ትን መቅ@@ ሰ@@ ፍ@@ ቶች ይ@@ ጨ@@ ምር@@ በታ@@ ል፤+\n", + "19 ማንም በዚህ የ@@ ትንቢት መጽሐ@@ ፍ ጥ@@ ቅል@@ ል ላይ ከሰ@@ ፈ@@ ሩት ቃ@@ ላት አንዲት ነገር ቢያ@@ ጎ@@ ድል አምላክ በዚህ ጥ@@ ቅል@@ ል ላይ ከተ@@ ጠ@@ ቀ@@ ሱ@@ ት የ@@ ሕይወት ዛ@@ ፎ@@ ች@@ ና+ ከ@@ ቅ@@ ድ@@ ስ@@ ቲ@@ ቱ ከተማ@@ + ዕ@@ ጣ ፋ@@ ን@@ ታ@@ ውን ይወ@@ ስድ@@ በታ@@ ል። \n", + "20 “@@ ስለ እነዚህ ነገሮች የሚ@@ መሠ@@ ክ@@ ረው ‘@@ አዎ፣ ቶ@@ ሎ እ@@ መጣ@@ ለሁ@@ ’+ ይላ@@ ል።” “@@ አ@@ ሜ@@ ን@@ ! ጌታ ኢየሱ@@ ስ፣ ና@@ ።” \n", + "21 የ@@ ጌታ የ@@ ኢየሱስ ጸ@@ ጋ ከ@@ ቅዱ@@ ሳ@@ ኑ ጋር ይሁን@@ ።\n", + "Combined BPE Vocab\n", + "ኋ\n", + "isra@@\n", + "betwe@@\n", + "‘\n", + "ቾ\n", + "ኼ\n", + "ח@@\n", + "ז\n", + "“\n", + "ጱ\n" + ] + } + ], + "source": [ + "# One of the huge boosts in NMT performance was to use a different method of tokenizing. \n", + "# Usually, NMT would tokenize by words. However, using a method called BPE gave amazing boosts to performance\n", + "\n", + "# Do subword NMT\n", + "! mkdir joeynmt/data/\n", + "! mkdir joeynmt/data/enam/\n", + "! export data_path=joeynmt/data/$src$tgt/\n", + "! subword-nmt learn-joint-bpe-and-vocab --input train.$src train.$tgt -s 4000 -o bpe.codes.4000 --write-vocabulary vocab.$src vocab.$tgt\n", + "\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < train.$src > train.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < train.$tgt > train.bpe.$tgt\n", + "\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < dev.$src > dev.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < dev.$tgt > dev.bpe.$tgt\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < test.$src > test.bpe.$src\n", + "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < test.$tgt > test.bpe.$tgt\n", + "\n", + "# Create directory, move everyone we care about to the correct location\n", + "#! mkdir -p $data_path\n", + "! cp train.* joeynmt/data/enam/\n", + "! cp test.* joeynmt/data/enam/\n", + "! cp dev.* joeynmt/data/enam/\n", + "! cp bpe.codes.4000 $data_path\n", + "! ls $data_path\n", + "\n", + "# Create that vocab using build_vocab\n", + "! sudo chmod 777 joeynmt/scripts/build_vocab.py\n", + "! joeynmt/scripts/build_vocab.py joeynmt/data/$src$tgt/train.bpe.$src joeynmt/data/$src$tgt/train.bpe.$tgt --output_path joeynmt/data/$src$tgt/vocab.txt\n", + "\n", + "# Some output\n", + "! echo \"BPE amharic Sentences\"\n", + "! tail -n 5 test.bpe.$tgt\n", + "! echo \"Combined BPE Vocab\"\n", + "! tail -n 10 joeynmt/data/enam/vocab.txt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "colab_type": "code", + "id": "X9xZAtq7KBy1", + "outputId": "21c558db-b4a6-46d5-e897-91af17e21f69" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bpe.codes.4000\tdev.bpe.en test.bpe.am train.am train.en\n", + "dev.am\t\tdev.en\t test.bpe.en train.bpe.am\n", + "dev.bpe.am\ttest.am test.en\t train.bpe.en\n" + ] + } + ], + "source": [ + "\n", + "# Also move everything we care about to a mounted location in google drive (relevant if running in colab) at gdrive_path\n", + "! cp train.* \"$gdrive_path\"\n", + "! cp test.* \"$gdrive_path\"\n", + "! cp dev.* \"$gdrive_path\"\n", + "! cp bpe.codes.4000 \"$gdrive_path\"\n", + "! ls \"$gdrive_path\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "cokx_Tjmy80O" + }, + "outputs": [], + "source": [ + "# This creates the config file for our JoeyNMT system. It might seem overwhelming so we've provided a couple of useful parameters you'll need to update\n", + "# (You can of course play with all the parameters if you'd like!)\n", + "name = '%s%s' % (source_language, target_language)\n", + "\n", + "config = \"\"\"\n", + "name: \"{name}_transformer\"\n", + "\n", + "data:\n", + " src: \"{source_language}\"\n", + " trg: \"{target_language}\"\n", + " train: \"data/{name}/train.bpe\"\n", + " dev: \"data/{name}/dev.bpe\"\n", + " test: \"data/{name}/test.bpe\"\n", + " level: \"bpe\"\n", + " lowercase: False\n", + " max_sent_length: 100\n", + " src_vocab: \"data/{name}/vocab.txt\"\n", + " trg_vocab: \"data/{name}/vocab.txt\"\n", + "\n", + "testing:\n", + " beam_size: 5\n", + " alpha: 1.0\n", + "\n", + "training:\n", + " #load_model: \"models/{name}_transformer/12000.ckpt\" # if given, load a pre-trained model from this checkpoint\n", + " random_seed: 42\n", + " optimizer: \"adam\"\n", + " normalization: \"tokens\"\n", + " adam_betas: [0.9, 0.999] \n", + " scheduling: \"noam\" # Try switching from plateau to Noam scheduling\n", + " learning_rate_factor: 0.5 # factor for Noam scheduler (used with Transformer)\n", + " learning_rate_warmup: 1000 # warmup steps for Noam scheduler (used with Transformer)\n", + " patience: 8\n", + " decrease_factor: 0.7\n", + " loss: \"crossentropy\"\n", + " learning_rate: 0.0002\n", + " learning_rate_min: 0.00000001\n", + " weight_decay: 0.0\n", + " label_smoothing: 0.1\n", + " batch_size: 4096\n", + " batch_type: \"token\"\n", + " eval_batch_size: 3600\n", + " eval_batch_type: \"token\"\n", + " batch_multiplier: 1\n", + " early_stopping_metric: \"ppl\"\n", + " epochs: 14 # TODO: Decrease for when playing around and checking of working. Around 30 is sufficient to check if its working at all\n", + " validation_freq: 400 # Decrease this for testing\n", + " logging_freq: 100\n", + " eval_metric: \"bleu\"\n", + " model_dir: \"models/{name}_transformer\"\n", + " overwrite: True\n", + " shuffle: True\n", + " use_cuda: True\n", + " max_output_length: 100\n", + " print_valid_sents: [0, 1, 2, 3]\n", + " keep_last_ckpts: 3\n", + "\n", + "model:\n", + " initializer: \"xavier\"\n", + " bias_initializer: \"zeros\"\n", + " init_gain: 1.0\n", + " embed_initializer: \"xavier\"\n", + " embed_init_gain: 1.0\n", + " tied_embeddings: True\n", + " tied_softmax: True\n", + " encoder:\n", + " type: \"transformer\"\n", + " num_layers: 6\n", + " num_heads: 8\n", + " embeddings:\n", + " embedding_dim: 512\n", + " scale: True\n", + " dropout: 0.\n", + " # typically ff_size = 4 x hidden_size\n", + " hidden_size: 512\n", + " ff_size: 2048\n", + " dropout: 0.3\n", + " decoder:\n", + " type: \"transformer\"\n", + " num_layers: 6\n", + " num_heads: 8\n", + " embeddings:\n", + " embedding_dim: 512\n", + " scale: True\n", + " dropout: 0.\n", + " # typically ff_size = 4 x hidden_size\n", + " hidden_size: 512\n", + " ff_size: 2048\n", + " dropout: 0.3\n", + "\"\"\".format(name=name, source_language=source_language, target_language=target_language)\n", + "with open(\"joeynmt/configs/transformer_{name}.yaml\".format(name=name),'w') as f:\n", + " f.write(config)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "pkxjU17f0MGf", + "outputId": "29d1a674-6d1f-4c8f-cec7-594486c43c74" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-10-14 08:00:29,227 Hello! This is Joey-NMT.\n", + "2019-10-14 08:00:30,950 Total params: 46475264\n", + "2019-10-14 08:00:30,952 Trainable parameters: ['decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.dec_layer_norm.bias', 'decoder.layers.0.dec_layer_norm.weight', 'decoder.layers.0.feed_forward.layer_norm.bias', 'decoder.layers.0.feed_forward.layer_norm.weight', 'decoder.layers.0.feed_forward.pwff_layer.0.bias', 'decoder.layers.0.feed_forward.pwff_layer.0.weight', 'decoder.layers.0.feed_forward.pwff_layer.3.bias', 'decoder.layers.0.feed_forward.pwff_layer.3.weight', 'decoder.layers.0.src_trg_att.k_layer.bias', 'decoder.layers.0.src_trg_att.k_layer.weight', 'decoder.layers.0.src_trg_att.output_layer.bias', 'decoder.layers.0.src_trg_att.output_layer.weight', 'decoder.layers.0.src_trg_att.q_layer.bias', 'decoder.layers.0.src_trg_att.q_layer.weight', 'decoder.layers.0.src_trg_att.v_layer.bias', 'decoder.layers.0.src_trg_att.v_layer.weight', 'decoder.layers.0.trg_trg_att.k_layer.bias', 'decoder.layers.0.trg_trg_att.k_layer.weight', 'decoder.layers.0.trg_trg_att.output_layer.bias', 'decoder.layers.0.trg_trg_att.output_layer.weight', 'decoder.layers.0.trg_trg_att.q_layer.bias', 'decoder.layers.0.trg_trg_att.q_layer.weight', 'decoder.layers.0.trg_trg_att.v_layer.bias', 'decoder.layers.0.trg_trg_att.v_layer.weight', 'decoder.layers.0.x_layer_norm.bias', 'decoder.layers.0.x_layer_norm.weight', 'decoder.layers.1.dec_layer_norm.bias', 'decoder.layers.1.dec_layer_norm.weight', 'decoder.layers.1.feed_forward.layer_norm.bias', 'decoder.layers.1.feed_forward.layer_norm.weight', 'decoder.layers.1.feed_forward.pwff_layer.0.bias', 'decoder.layers.1.feed_forward.pwff_layer.0.weight', 'decoder.layers.1.feed_forward.pwff_layer.3.bias', 'decoder.layers.1.feed_forward.pwff_layer.3.weight', 'decoder.layers.1.src_trg_att.k_layer.bias', 'decoder.layers.1.src_trg_att.k_layer.weight', 'decoder.layers.1.src_trg_att.output_layer.bias', 'decoder.layers.1.src_trg_att.output_layer.weight', 'decoder.layers.1.src_trg_att.q_layer.bias', 'decoder.layers.1.src_trg_att.q_layer.weight', 'decoder.layers.1.src_trg_att.v_layer.bias', 'decoder.layers.1.src_trg_att.v_layer.weight', 'decoder.layers.1.trg_trg_att.k_layer.bias', 'decoder.layers.1.trg_trg_att.k_layer.weight', 'decoder.layers.1.trg_trg_att.output_layer.bias', 'decoder.layers.1.trg_trg_att.output_layer.weight', 'decoder.layers.1.trg_trg_att.q_layer.bias', 'decoder.layers.1.trg_trg_att.q_layer.weight', 'decoder.layers.1.trg_trg_att.v_layer.bias', 'decoder.layers.1.trg_trg_att.v_layer.weight', 'decoder.layers.1.x_layer_norm.bias', 'decoder.layers.1.x_layer_norm.weight', 'decoder.layers.2.dec_layer_norm.bias', 'decoder.layers.2.dec_layer_norm.weight', 'decoder.layers.2.feed_forward.layer_norm.bias', 'decoder.layers.2.feed_forward.layer_norm.weight', 'decoder.layers.2.feed_forward.pwff_layer.0.bias', 'decoder.layers.2.feed_forward.pwff_layer.0.weight', 'decoder.layers.2.feed_forward.pwff_layer.3.bias', 'decoder.layers.2.feed_forward.pwff_layer.3.weight', 'decoder.layers.2.src_trg_att.k_layer.bias', 'decoder.layers.2.src_trg_att.k_layer.weight', 'decoder.layers.2.src_trg_att.output_layer.bias', 'decoder.layers.2.src_trg_att.output_layer.weight', 'decoder.layers.2.src_trg_att.q_layer.bias', 'decoder.layers.2.src_trg_att.q_layer.weight', 'decoder.layers.2.src_trg_att.v_layer.bias', 'decoder.layers.2.src_trg_att.v_layer.weight', 'decoder.layers.2.trg_trg_att.k_layer.bias', 'decoder.layers.2.trg_trg_att.k_layer.weight', 'decoder.layers.2.trg_trg_att.output_layer.bias', 'decoder.layers.2.trg_trg_att.output_layer.weight', 'decoder.layers.2.trg_trg_att.q_layer.bias', 'decoder.layers.2.trg_trg_att.q_layer.weight', 'decoder.layers.2.trg_trg_att.v_layer.bias', 'decoder.layers.2.trg_trg_att.v_layer.weight', 'decoder.layers.2.x_layer_norm.bias', 'decoder.layers.2.x_layer_norm.weight', 'decoder.layers.3.dec_layer_norm.bias', 'decoder.layers.3.dec_layer_norm.weight', 'decoder.layers.3.feed_forward.layer_norm.bias', 'decoder.layers.3.feed_forward.layer_norm.weight', 'decoder.layers.3.feed_forward.pwff_layer.0.bias', 'decoder.layers.3.feed_forward.pwff_layer.0.weight', 'decoder.layers.3.feed_forward.pwff_layer.3.bias', 'decoder.layers.3.feed_forward.pwff_layer.3.weight', 'decoder.layers.3.src_trg_att.k_layer.bias', 'decoder.layers.3.src_trg_att.k_layer.weight', 'decoder.layers.3.src_trg_att.output_layer.bias', 'decoder.layers.3.src_trg_att.output_layer.weight', 'decoder.layers.3.src_trg_att.q_layer.bias', 'decoder.layers.3.src_trg_att.q_layer.weight', 'decoder.layers.3.src_trg_att.v_layer.bias', 'decoder.layers.3.src_trg_att.v_layer.weight', 'decoder.layers.3.trg_trg_att.k_layer.bias', 'decoder.layers.3.trg_trg_att.k_layer.weight', 'decoder.layers.3.trg_trg_att.output_layer.bias', 'decoder.layers.3.trg_trg_att.output_layer.weight', 'decoder.layers.3.trg_trg_att.q_layer.bias', 'decoder.layers.3.trg_trg_att.q_layer.weight', 'decoder.layers.3.trg_trg_att.v_layer.bias', 'decoder.layers.3.trg_trg_att.v_layer.weight', 'decoder.layers.3.x_layer_norm.bias', 'decoder.layers.3.x_layer_norm.weight', 'decoder.layers.4.dec_layer_norm.bias', 'decoder.layers.4.dec_layer_norm.weight', 'decoder.layers.4.feed_forward.layer_norm.bias', 'decoder.layers.4.feed_forward.layer_norm.weight', 'decoder.layers.4.feed_forward.pwff_layer.0.bias', 'decoder.layers.4.feed_forward.pwff_layer.0.weight', 'decoder.layers.4.feed_forward.pwff_layer.3.bias', 'decoder.layers.4.feed_forward.pwff_layer.3.weight', 'decoder.layers.4.src_trg_att.k_layer.bias', 'decoder.layers.4.src_trg_att.k_layer.weight', 'decoder.layers.4.src_trg_att.output_layer.bias', 'decoder.layers.4.src_trg_att.output_layer.weight', 'decoder.layers.4.src_trg_att.q_layer.bias', 'decoder.layers.4.src_trg_att.q_layer.weight', 'decoder.layers.4.src_trg_att.v_layer.bias', 'decoder.layers.4.src_trg_att.v_layer.weight', 'decoder.layers.4.trg_trg_att.k_layer.bias', 'decoder.layers.4.trg_trg_att.k_layer.weight', 'decoder.layers.4.trg_trg_att.output_layer.bias', 'decoder.layers.4.trg_trg_att.output_layer.weight', 'decoder.layers.4.trg_trg_att.q_layer.bias', 'decoder.layers.4.trg_trg_att.q_layer.weight', 'decoder.layers.4.trg_trg_att.v_layer.bias', 'decoder.layers.4.trg_trg_att.v_layer.weight', 'decoder.layers.4.x_layer_norm.bias', 'decoder.layers.4.x_layer_norm.weight', 'decoder.layers.5.dec_layer_norm.bias', 'decoder.layers.5.dec_layer_norm.weight', 'decoder.layers.5.feed_forward.layer_norm.bias', 'decoder.layers.5.feed_forward.layer_norm.weight', 'decoder.layers.5.feed_forward.pwff_layer.0.bias', 'decoder.layers.5.feed_forward.pwff_layer.0.weight', 'decoder.layers.5.feed_forward.pwff_layer.3.bias', 'decoder.layers.5.feed_forward.pwff_layer.3.weight', 'decoder.layers.5.src_trg_att.k_layer.bias', 'decoder.layers.5.src_trg_att.k_layer.weight', 'decoder.layers.5.src_trg_att.output_layer.bias', 'decoder.layers.5.src_trg_att.output_layer.weight', 'decoder.layers.5.src_trg_att.q_layer.bias', 'decoder.layers.5.src_trg_att.q_layer.weight', 'decoder.layers.5.src_trg_att.v_layer.bias', 'decoder.layers.5.src_trg_att.v_layer.weight', 'decoder.layers.5.trg_trg_att.k_layer.bias', 'decoder.layers.5.trg_trg_att.k_layer.weight', 'decoder.layers.5.trg_trg_att.output_layer.bias', 'decoder.layers.5.trg_trg_att.output_layer.weight', 'decoder.layers.5.trg_trg_att.q_layer.bias', 'decoder.layers.5.trg_trg_att.q_layer.weight', 'decoder.layers.5.trg_trg_att.v_layer.bias', 'decoder.layers.5.trg_trg_att.v_layer.weight', 'decoder.layers.5.x_layer_norm.bias', 'decoder.layers.5.x_layer_norm.weight', 'encoder.layer_norm.bias', 'encoder.layer_norm.weight', 'encoder.layers.0.feed_forward.layer_norm.bias', 'encoder.layers.0.feed_forward.layer_norm.weight', 'encoder.layers.0.feed_forward.pwff_layer.0.bias', 'encoder.layers.0.feed_forward.pwff_layer.0.weight', 'encoder.layers.0.feed_forward.pwff_layer.3.bias', 'encoder.layers.0.feed_forward.pwff_layer.3.weight', 'encoder.layers.0.layer_norm.bias', 'encoder.layers.0.layer_norm.weight', 'encoder.layers.0.src_src_att.k_layer.bias', 'encoder.layers.0.src_src_att.k_layer.weight', 'encoder.layers.0.src_src_att.output_layer.bias', 'encoder.layers.0.src_src_att.output_layer.weight', 'encoder.layers.0.src_src_att.q_layer.bias', 'encoder.layers.0.src_src_att.q_layer.weight', 'encoder.layers.0.src_src_att.v_layer.bias', 'encoder.layers.0.src_src_att.v_layer.weight', 'encoder.layers.1.feed_forward.layer_norm.bias', 'encoder.layers.1.feed_forward.layer_norm.weight', 'encoder.layers.1.feed_forward.pwff_layer.0.bias', 'encoder.layers.1.feed_forward.pwff_layer.0.weight', 'encoder.layers.1.feed_forward.pwff_layer.3.bias', 'encoder.layers.1.feed_forward.pwff_layer.3.weight', 'encoder.layers.1.layer_norm.bias', 'encoder.layers.1.layer_norm.weight', 'encoder.layers.1.src_src_att.k_layer.bias', 'encoder.layers.1.src_src_att.k_layer.weight', 'encoder.layers.1.src_src_att.output_layer.bias', 'encoder.layers.1.src_src_att.output_layer.weight', 'encoder.layers.1.src_src_att.q_layer.bias', 'encoder.layers.1.src_src_att.q_layer.weight', 'encoder.layers.1.src_src_att.v_layer.bias', 'encoder.layers.1.src_src_att.v_layer.weight', 'encoder.layers.2.feed_forward.layer_norm.bias', 'encoder.layers.2.feed_forward.layer_norm.weight', 'encoder.layers.2.feed_forward.pwff_layer.0.bias', 'encoder.layers.2.feed_forward.pwff_layer.0.weight', 'encoder.layers.2.feed_forward.pwff_layer.3.bias', 'encoder.layers.2.feed_forward.pwff_layer.3.weight', 'encoder.layers.2.layer_norm.bias', 'encoder.layers.2.layer_norm.weight', 'encoder.layers.2.src_src_att.k_layer.bias', 'encoder.layers.2.src_src_att.k_layer.weight', 'encoder.layers.2.src_src_att.output_layer.bias', 'encoder.layers.2.src_src_att.output_layer.weight', 'encoder.layers.2.src_src_att.q_layer.bias', 'encoder.layers.2.src_src_att.q_layer.weight', 'encoder.layers.2.src_src_att.v_layer.bias', 'encoder.layers.2.src_src_att.v_layer.weight', 'encoder.layers.3.feed_forward.layer_norm.bias', 'encoder.layers.3.feed_forward.layer_norm.weight', 'encoder.layers.3.feed_forward.pwff_layer.0.bias', 'encoder.layers.3.feed_forward.pwff_layer.0.weight', 'encoder.layers.3.feed_forward.pwff_layer.3.bias', 'encoder.layers.3.feed_forward.pwff_layer.3.weight', 'encoder.layers.3.layer_norm.bias', 'encoder.layers.3.layer_norm.weight', 'encoder.layers.3.src_src_att.k_layer.bias', 'encoder.layers.3.src_src_att.k_layer.weight', 'encoder.layers.3.src_src_att.output_layer.bias', 'encoder.layers.3.src_src_att.output_layer.weight', 'encoder.layers.3.src_src_att.q_layer.bias', 'encoder.layers.3.src_src_att.q_layer.weight', 'encoder.layers.3.src_src_att.v_layer.bias', 'encoder.layers.3.src_src_att.v_layer.weight', 'encoder.layers.4.feed_forward.layer_norm.bias', 'encoder.layers.4.feed_forward.layer_norm.weight', 'encoder.layers.4.feed_forward.pwff_layer.0.bias', 'encoder.layers.4.feed_forward.pwff_layer.0.weight', 'encoder.layers.4.feed_forward.pwff_layer.3.bias', 'encoder.layers.4.feed_forward.pwff_layer.3.weight', 'encoder.layers.4.layer_norm.bias', 'encoder.layers.4.layer_norm.weight', 'encoder.layers.4.src_src_att.k_layer.bias', 'encoder.layers.4.src_src_att.k_layer.weight', 'encoder.layers.4.src_src_att.output_layer.bias', 'encoder.layers.4.src_src_att.output_layer.weight', 'encoder.layers.4.src_src_att.q_layer.bias', 'encoder.layers.4.src_src_att.q_layer.weight', 'encoder.layers.4.src_src_att.v_layer.bias', 'encoder.layers.4.src_src_att.v_layer.weight', 'encoder.layers.5.feed_forward.layer_norm.bias', 'encoder.layers.5.feed_forward.layer_norm.weight', 'encoder.layers.5.feed_forward.pwff_layer.0.bias', 'encoder.layers.5.feed_forward.pwff_layer.0.weight', 'encoder.layers.5.feed_forward.pwff_layer.3.bias', 'encoder.layers.5.feed_forward.pwff_layer.3.weight', 'encoder.layers.5.layer_norm.bias', 'encoder.layers.5.layer_norm.weight', 'encoder.layers.5.src_src_att.k_layer.bias', 'encoder.layers.5.src_src_att.k_layer.weight', 'encoder.layers.5.src_src_att.output_layer.bias', 'encoder.layers.5.src_src_att.output_layer.weight', 'encoder.layers.5.src_src_att.q_layer.bias', 'encoder.layers.5.src_src_att.q_layer.weight', 'encoder.layers.5.src_src_att.v_layer.bias', 'encoder.layers.5.src_src_att.v_layer.weight', 'src_embed.lut.weight']\n", + "2019-10-14 08:00:35,220 cfg.name : enam_transformer\n", + "2019-10-14 08:00:35,221 cfg.data.src : en\n", + "2019-10-14 08:00:35,221 cfg.data.trg : am\n", + "2019-10-14 08:00:35,221 cfg.data.train : data/enam/train.bpe\n", + "2019-10-14 08:00:35,221 cfg.data.dev : data/enam/dev.bpe\n", + "2019-10-14 08:00:35,221 cfg.data.test : data/enam/test.bpe\n", + "2019-10-14 08:00:35,221 cfg.data.level : bpe\n", + "2019-10-14 08:00:35,221 cfg.data.lowercase : False\n", + "2019-10-14 08:00:35,221 cfg.data.max_sent_length : 100\n", + "2019-10-14 08:00:35,221 cfg.data.src_vocab : data/enam/vocab.txt\n", + "2019-10-14 08:00:35,221 cfg.data.trg_vocab : data/enam/vocab.txt\n", + "2019-10-14 08:00:35,221 cfg.testing.beam_size : 5\n", + "2019-10-14 08:00:35,222 cfg.testing.alpha : 1.0\n", + "2019-10-14 08:00:35,222 cfg.training.random_seed : 42\n", + "2019-10-14 08:00:35,222 cfg.training.optimizer : adam\n", + "2019-10-14 08:00:35,222 cfg.training.normalization : tokens\n", + "2019-10-14 08:00:35,222 cfg.training.adam_betas : [0.9, 0.999]\n", + "2019-10-14 08:00:35,222 cfg.training.scheduling : noam\n", + "2019-10-14 08:00:35,222 cfg.training.learning_rate_factor : 0.5\n", + "2019-10-14 08:00:35,222 cfg.training.learning_rate_warmup : 1000\n", + "2019-10-14 08:00:35,222 cfg.training.patience : 8\n", + "2019-10-14 08:00:35,222 cfg.training.decrease_factor : 0.7\n", + "2019-10-14 08:00:35,222 cfg.training.loss : crossentropy\n", + "2019-10-14 08:00:35,222 cfg.training.learning_rate : 0.0002\n", + "2019-10-14 08:00:35,223 cfg.training.learning_rate_min : 1e-08\n", + "2019-10-14 08:00:35,223 cfg.training.weight_decay : 0.0\n", + "2019-10-14 08:00:35,223 cfg.training.label_smoothing : 0.1\n", + "2019-10-14 08:00:35,223 cfg.training.batch_size : 4096\n", + "2019-10-14 08:00:35,223 cfg.training.batch_type : token\n", + "2019-10-14 08:00:35,223 cfg.training.eval_batch_size : 3600\n", + "2019-10-14 08:00:35,223 cfg.training.eval_batch_type : token\n", + "2019-10-14 08:00:35,223 cfg.training.batch_multiplier : 1\n", + "2019-10-14 08:00:35,223 cfg.training.early_stopping_metric : ppl\n", + "2019-10-14 08:00:35,223 cfg.training.epochs : 14\n", + "2019-10-14 08:00:35,223 cfg.training.validation_freq : 400\n", + "2019-10-14 08:00:35,223 cfg.training.logging_freq : 100\n", + "2019-10-14 08:00:35,223 cfg.training.eval_metric : bleu\n", + "2019-10-14 08:00:35,223 cfg.training.model_dir : models/enam_transformer\n", + "2019-10-14 08:00:35,224 cfg.training.overwrite : True\n", + "2019-10-14 08:00:35,224 cfg.training.shuffle : True\n", + "2019-10-14 08:00:35,224 cfg.training.use_cuda : True\n", + "2019-10-14 08:00:35,224 cfg.training.max_output_length : 100\n", + "2019-10-14 08:00:35,224 cfg.training.print_valid_sents : [0, 1, 2, 3]\n", + "2019-10-14 08:00:35,224 cfg.training.keep_last_ckpts : 3\n", + "2019-10-14 08:00:35,224 cfg.model.initializer : xavier\n", + "2019-10-14 08:00:35,224 cfg.model.bias_initializer : zeros\n", + "2019-10-14 08:00:35,224 cfg.model.init_gain : 1.0\n", + "2019-10-14 08:00:35,224 cfg.model.embed_initializer : xavier\n", + "2019-10-14 08:00:35,224 cfg.model.embed_init_gain : 1.0\n", + "2019-10-14 08:00:35,224 cfg.model.tied_embeddings : True\n", + "2019-10-14 08:00:35,224 cfg.model.tied_softmax : True\n", + "2019-10-14 08:00:35,224 cfg.model.encoder.type : transformer\n", + "2019-10-14 08:00:35,224 cfg.model.encoder.num_layers : 6\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.num_heads : 8\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.embeddings.embedding_dim : 512\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.embeddings.scale : True\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.embeddings.dropout : 0.0\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.hidden_size : 512\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.ff_size : 2048\n", + "2019-10-14 08:00:35,225 cfg.model.encoder.dropout : 0.3\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.type : transformer\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.num_layers : 6\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.num_heads : 8\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.embeddings.embedding_dim : 512\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.embeddings.scale : True\n", + "2019-10-14 08:00:35,225 cfg.model.decoder.embeddings.dropout : 0.0\n", + "2019-10-14 08:00:35,226 cfg.model.decoder.hidden_size : 512\n", + "2019-10-14 08:00:35,226 cfg.model.decoder.ff_size : 2048\n", + "2019-10-14 08:00:35,226 cfg.model.decoder.dropout : 0.3\n", + "2019-10-14 08:00:35,226 Data set sizes: \n", + "\ttrain 29017,\n", + "\tvalid 1001,\n", + "\ttest 1001\n", + "2019-10-14 08:00:35,226 First training example:\n", + "\t[SRC] s@@ our@@ ce@@ _@@ sen@@ ten@@ ce\n", + "\t[TRG] tar@@ ge@@ t@@ _@@ sen@@ ten@@ ce\n", + "2019-10-14 08:00:35,226 First 10 words (src): (0) (1) (2) (3) (4) the (5) and (6) of (7) to (8) በ@@ (9) የ@@\n", + "2019-10-14 08:00:35,227 First 10 words (trg): (0) (1) (2) (3) (4) the (5) and (6) of (7) to (8) በ@@ (9) የ@@\n", + "2019-10-14 08:00:35,227 Number of Src words (types): 4560\n", + "2019-10-14 08:00:35,227 Number of Trg words (types): 4560\n", + "2019-10-14 08:00:35,227 Model(\n", + "\tencoder=TransformerEncoder(num_layers=6, num_heads=8),\n", + "\tdecoder=TransformerDecoder(num_layers=6, num_heads=8),\n", + "\tsrc_embed=Embeddings(embedding_dim=512, vocab_size=4560),\n", + "\ttrg_embed=Embeddings(embedding_dim=512, vocab_size=4560))\n", + "2019-10-14 08:00:35,232 EPOCH 1\n", + "2019-10-14 08:01:49,888 Epoch 1 Step: 100 Batch Loss: 6.234140 Tokens per Sec: 3758, Lr: 0.000070\n", + "2019-10-14 08:03:05,499 Epoch 1 Step: 200 Batch Loss: 5.999924 Tokens per Sec: 7419, Lr: 0.000140\n", + "2019-10-14 08:04:21,850 Epoch 1 Step: 300 Batch Loss: 5.889366 Tokens per Sec: 11070, Lr: 0.000210\n", + "2019-10-14 08:05:18,601 Epoch 1: total training loss 2297.35\n", + "2019-10-14 08:05:18,601 EPOCH 2\n", + "2019-10-14 08:05:36,921 Epoch 2 Step: 400 Batch Loss: 5.249513 Tokens per Sec: 3649, Lr: 0.000280\n", + "2019-10-14 08:09:43,506 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:09:43,506 Saving new checkpoint.\n", + "2019-10-14 08:09:45,165 Example #0\n", + "2019-10-14 08:09:45,165 \tSource: source_sentence\n", + "2019-10-14 08:09:45,165 \tReference: target_sentence\n", + "2019-10-14 08:09:45,165 \tHypothesis: 3 ይሖዋ የየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየይን ይም ይም ላይ ላይ ላይ ላይ ይም ይም በይም ይም ይ።+\n", + "2019-10-14 08:09:45,165 Example #1\n", + "2019-10-14 08:09:45,166 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:09:45,166 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:09:45,166 \tHypothesis: 9 ከዚያም የየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየየስ፤\n", + "2019-10-14 08:09:45,166 Example #2\n", + "2019-10-14 08:09:45,166 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:09:45,166 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:09:45,166 \tHypothesis: 9 ““የየየየየየየየየየየየየየየየየየየየየየየላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ስ።\n", + "2019-10-14 08:09:45,166 Example #3\n", + "2019-10-14 08:09:45,167 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:09:45,167 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:09:45,167 \tHypothesis: 9 ““የየየየየየየየየየየየየየየየየየየየየየየየላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ላይ ስ።\n", + "2019-10-14 08:09:45,167 Validation result at epoch 2, step 400: bleu: 0.00, loss: 202366.3438, ppl: 255.8878, duration: 248.2455s\n", + "2019-10-14 08:11:01,978 Epoch 2 Step: 500 Batch Loss: 5.287745 Tokens per Sec: 4569, Lr: 0.000349\n", + "2019-10-14 08:12:17,468 Epoch 2 Step: 600 Batch Loss: 5.312285 Tokens per Sec: 8331, Lr: 0.000419\n", + "2019-10-14 08:13:33,583 Epoch 2 Step: 700 Batch Loss: 4.887832 Tokens per Sec: 11976, Lr: 0.000489\n", + "2019-10-14 08:14:12,413 Epoch 2: total training loss 1975.61\n", + "2019-10-14 08:14:12,413 EPOCH 3\n", + "2019-10-14 08:14:49,083 Epoch 3 Step: 800 Batch Loss: 4.989283 Tokens per Sec: 3726, Lr: 0.000559\n", + "2019-10-14 08:18:54,989 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:18:54,989 Saving new checkpoint.\n", + "2019-10-14 08:18:56,546 Example #0\n", + "2019-10-14 08:18:56,547 \tSource: source_sentence\n", + "2019-10-14 08:18:56,547 \tReference: target_sentence\n", + "2019-10-14 08:18:56,547 \tHypothesis: 2 “በሰው ጋር ጋር ይቆዳል።+\n", + "2019-10-14 08:18:56,547 Example #1\n", + "2019-10-14 08:18:56,547 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:18:56,547 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:18:56,547 \tHypothesis: 3 የቤን ልጅ ሆይ፣ በእኔ ሆይ፣ በእኔ በእኔ ሆይ፣ በእኔ ሆይ፣ በዚን የዚን ልጅ ሆይ፣ በእኔ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ እናገራለሁ።\n", + "2019-10-14 08:18:56,547 Example #2\n", + "2019-10-14 08:18:56,548 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:18:56,548 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:18:56,548 \tHypothesis: 7 ሆኖም ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን ግን\n", + "2019-10-14 08:18:56,548 Example #3\n", + "2019-10-14 08:18:56,548 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:18:56,548 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:18:56,548 \tHypothesis: 7 “እኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ ሆይ፣ በእኔ አልችልም።\n", + "2019-10-14 08:18:56,548 Validation result at epoch 3, step 800: bleu: 0.00, loss: 180673.2500, ppl: 141.2256, duration: 247.4652s\n", + "2019-10-14 08:20:11,702 Epoch 3 Step: 900 Batch Loss: 4.617150 Tokens per Sec: 5490, Lr: 0.000629\n", + "2019-10-14 08:21:27,377 Epoch 3 Step: 1000 Batch Loss: 4.429297 Tokens per Sec: 9117, Lr: 0.000699\n", + "2019-10-14 08:22:43,675 Epoch 3 Step: 1100 Batch Loss: 4.203452 Tokens per Sec: 12760, Lr: 0.000666\n", + "2019-10-14 08:23:05,433 Epoch 3: total training loss 1692.23\n", + "2019-10-14 08:23:05,434 EPOCH 4\n", + "2019-10-14 08:23:59,745 Epoch 4 Step: 1200 Batch Loss: 3.949697 Tokens per Sec: 3728, Lr: 0.000638\n", + "2019-10-14 08:28:05,125 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:28:05,125 Saving new checkpoint.\n", + "2019-10-14 08:28:06,685 Example #0\n", + "2019-10-14 08:28:06,686 \tSource: source_sentence\n", + "2019-10-14 08:28:06,686 \tReference: target_sentence\n", + "2019-10-14 08:28:06,686 \tHypothesis: 2 በብሔራት መካከል የሚሸከውን ሁሉ ይዛል።+\n", + "2019-10-14 08:28:06,686 Example #1\n", + "2019-10-14 08:28:06,686 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:28:06,686 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:28:06,686 \tHypothesis: 6 እኔም በሰማርያ ላይ የተሸከኝ ሰዎች መካከል የሰማርያ ሰዎች ሁሉ የሰማርያ ቃል ኪዳን ቃል ኪዳን አስወግዳለሁ።+\n", + "2019-10-14 08:28:06,686 Example #2\n", + "2019-10-14 08:28:06,687 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:28:06,687 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:28:06,687 \tHypothesis: 6 ሆኖም የያዕቆብ ልጅ ሆይ፣ የያዕቆብ ልጅ ሆይ፣ የያዕቆብ ልጅ ሆይ፣ የያዕቆብ ልጅ ሆይ፣ የያዕቆብ ልጅ ሆይ፣ የሰጠኝና በአባቶቼም ላይ ተተተተተተተተተተተተተተተላለች።+\n", + "2019-10-14 08:28:06,687 Example #3\n", + "2019-10-14 08:28:06,687 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:28:06,687 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:28:06,687 \tHypothesis: 6 የቃል ኪዳን ልጅ የሰጠኝ ልጅ የሰጠኝ ቃል ኪዳን ቃል ኪዳን አስወግዳለሁ።+\n", + "2019-10-14 08:28:06,687 Validation result at epoch 4, step 1200: bleu: 0.00, loss: 155360.5781, ppl: 70.5840, duration: 246.9424s\n", + "2019-10-14 08:29:22,344 Epoch 4 Step: 1300 Batch Loss: 3.567261 Tokens per Sec: 6378, Lr: 0.000613\n", + "2019-10-14 08:30:38,552 Epoch 4 Step: 1400 Batch Loss: 3.827760 Tokens per Sec: 10045, Lr: 0.000591\n", + "2019-10-14 08:31:54,866 Epoch 4 Step: 1500 Batch Loss: 3.823745 Tokens per Sec: 13743, Lr: 0.000571\n", + "2019-10-14 08:31:56,320 Epoch 4: total training loss 1427.33\n", + "2019-10-14 08:31:56,320 EPOCH 5\n", + "2019-10-14 08:33:10,934 Epoch 5 Step: 1600 Batch Loss: 3.546507 Tokens per Sec: 3693, Lr: 0.000552\n", + "2019-10-14 08:37:16,343 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:37:16,344 Saving new checkpoint.\n", + "2019-10-14 08:37:18,134 Example #0\n", + "2019-10-14 08:37:18,134 \tSource: source_sentence\n", + "2019-10-14 08:37:18,134 \tReference: target_sentence\n", + "2019-10-14 08:37:18,134 \tHypothesis: 11\n", + "2019-10-14 08:37:18,135 Example #1\n", + "2019-10-14 08:37:18,135 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:37:18,135 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:37:18,135 \tHypothesis: 11 በጌድዮስ ላይ የተረፈውን ነገር አደረግኩ፤ የሕዝቤ አባት የሆነው የየበኩር ልጅ የየበኩር ልጅ የየየበኩር ልጅ የየበኩር ልጅ ነበረች።+\n", + "2019-10-14 08:37:18,135 Example #2\n", + "2019-10-14 08:37:18,135 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:37:18,135 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:37:18,136 \tHypothesis: 11 ሆኖም አምላክ ከእናቴ ጋር እንከን እንከን እናቱ እናቱ እናቱ እናቴ ጋር እንከን እንድንቅ እናውቀላለሁ።+\n", + "2019-10-14 08:37:18,136 Example #3\n", + "2019-10-14 08:37:18,136 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:37:18,136 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:37:18,136 \tHypothesis: 11 የልጅ ልጅ ልጅ ልጅ ሆይ፣ ይህን ሲሰማ እየመጣ ዘንድ የፈጸምኩት ሰው እንደሆንኩ ያውቃሉ፤ምክንያቱም እኔ የሕዝቦችም ሁሉ ይነጻል።\n", + "2019-10-14 08:37:18,136 Validation result at epoch 5, step 1600: bleu: 0.14, loss: 142127.5312, ppl: 49.1180, duration: 247.2014s\n", + "2019-10-14 08:38:34,687 Epoch 5 Step: 1700 Batch Loss: 3.334598 Tokens per Sec: 7312, Lr: 0.000536\n", + "2019-10-14 08:39:50,953 Epoch 5 Step: 1800 Batch Loss: 3.345548 Tokens per Sec: 11060, Lr: 0.000521\n", + "2019-10-14 08:40:48,250 Epoch 5: total training loss 1271.16\n", + "2019-10-14 08:40:48,250 EPOCH 6\n", + "2019-10-14 08:41:06,020 Epoch 6 Step: 1900 Batch Loss: 3.088701 Tokens per Sec: 3766, Lr: 0.000507\n", + "2019-10-14 08:42:22,057 Epoch 6 Step: 2000 Batch Loss: 3.342481 Tokens per Sec: 4579, Lr: 0.000494\n", + "2019-10-14 08:46:26,927 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:46:26,927 Saving new checkpoint.\n", + "2019-10-14 08:46:28,748 Example #0\n", + "2019-10-14 08:46:28,749 \tSource: source_sentence\n", + "2019-10-14 08:46:28,749 \tReference: target_sentence\n", + "2019-10-14 08:46:28,749 \tHypothesis: 38\n", + "2019-10-14 08:46:28,749 Example #1\n", + "2019-10-14 08:46:28,749 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:46:28,749 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:46:28,749 \tHypothesis: 14 እኔም በብሔራት መካከል በብሔራት መካከል ስሜ ተአምራዊ ምልክቶች ነበር፤ ምክንያቱም በአባቶቻቸው ፊት በስተቀር በሕዝቤ መካከል ተሸሸንኩ።+\n", + "2019-10-14 08:46:28,749 Example #2\n", + "2019-10-14 08:46:28,750 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:46:28,750 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:46:28,750 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ጋር ለመገናኘት ፈቃደኛ አልሆነም፤ ምክንያቱም በጌታዬ+ ላይ በስተቀር በስተቀር በስተቀር በስተቀር በስተቀር በስተቀር በስተቀር በአንድነት ተከተለኝ+\n", + "2019-10-14 08:46:28,750 Example #3\n", + "2019-10-14 08:46:28,751 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:46:28,751 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:46:28,751 \tHypothesis: 16 እኔ የምናገረው ቃል ኪዳን መሠረት ለእነሱ ስሜ ስል ስል ስል ስል ስል እችላለሁ፤ለእነሱም ሆነ ለእነሱ አልፎ አልፎ አልችልም።+\n", + "2019-10-14 08:46:28,751 Validation result at epoch 6, step 2000: bleu: 0.00, loss: 131049.6797, ppl: 36.2593, duration: 246.6934s\n", + "2019-10-14 08:47:44,397 Epoch 6 Step: 2100 Batch Loss: 2.853915 Tokens per Sec: 8311, Lr: 0.000482\n", + "2019-10-14 08:49:01,053 Epoch 6 Step: 2200 Batch Loss: 3.146314 Tokens per Sec: 11926, Lr: 0.000471\n", + "2019-10-14 08:49:38,597 Epoch 6: total training loss 1132.80\n", + "2019-10-14 08:49:38,597 EPOCH 7\n", + "2019-10-14 08:50:16,519 Epoch 7 Step: 2300 Batch Loss: 2.925962 Tokens per Sec: 3732, Lr: 0.000461\n", + "2019-10-14 08:51:32,220 Epoch 7 Step: 2400 Batch Loss: 2.730455 Tokens per Sec: 5582, Lr: 0.000451\n", + "2019-10-14 08:55:36,725 Hooray! New best validation result [ppl]!\n", + "2019-10-14 08:55:36,725 Saving new checkpoint.\n", + "2019-10-14 08:55:38,352 Example #0\n", + "2019-10-14 08:55:38,353 \tSource: source_sentence\n", + "2019-10-14 08:55:38,353 \tReference: target_sentence\n", + "2019-10-14 08:55:38,353 \tHypothesis: 16\n", + "2019-10-14 08:55:38,353 Example #1\n", + "2019-10-14 08:55:38,353 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 08:55:38,353 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 08:55:38,354 \tHypothesis: 14 እኔም በእነዚህ ብሔራት መካከል በብዙ ብሔራትን በሐዋርያት በብዙ ብሔራት መካከል እጅግ ብዙ ብዙ ብዙ ብዙ ብዙ ሀብት እንደሆኑ በብዙ ብሔራት መካከል እጅግ ብዙ ናቸው።+\n", + "2019-10-14 08:55:38,354 Example #2\n", + "2019-10-14 08:55:38,354 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 08:55:38,354 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 08:55:38,354 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ከእናቴ ጋር በተያያዘ ጊዜ በእናቴ በእናቴ ላይ በነበረበት ጊዜ ሁሉ ለጌታችን ሰጠኝ።+\n", + "2019-10-14 08:55:38,354 Example #3\n", + "2019-10-14 08:55:38,355 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 08:55:38,355 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 08:55:38,355 \tHypothesis: 16 ለሰው ልጅ ለሆነው ለሰዎች የሚገባውን ነገር ለማስነሳት+ ለሌሎች ብሔራት እንዲሰጥ ለማድረግ የላከኝ ሰው ነበር፤*ከሌሎች አማልክት መካከል አንድም ሰው አልነበረምና።+\n", + "2019-10-14 08:55:38,355 Validation result at epoch 7, step 2400: bleu: 0.00, loss: 126511.4062, ppl: 32.0197, duration: 246.1343s\n", + "2019-10-14 08:56:53,689 Epoch 7 Step: 2500 Batch Loss: 2.120047 Tokens per Sec: 9324, Lr: 0.000442\n", + "2019-10-14 08:58:08,805 Epoch 7 Step: 2600 Batch Loss: 2.384480 Tokens per Sec: 13045, Lr: 0.000433\n", + "2019-10-14 08:58:28,834 Epoch 7: total training loss 1055.87\n", + "2019-10-14 08:58:28,835 EPOCH 8\n", + "2019-10-14 08:59:24,271 Epoch 8 Step: 2700 Batch Loss: 2.453546 Tokens per Sec: 3712, Lr: 0.000425\n", + "2019-10-14 09:00:39,646 Epoch 8 Step: 2800 Batch Loss: 2.913447 Tokens per Sec: 6448, Lr: 0.000418\n", + "2019-10-14 09:04:44,054 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:04:44,054 Saving new checkpoint.\n", + "2019-10-14 09:04:45,897 Example #0\n", + "2019-10-14 09:04:45,897 \tSource: source_sentence\n", + "2019-10-14 09:04:45,898 \tReference: target_sentence\n", + "2019-10-14 09:04:45,898 \tHypothesis: 16\n", + "2019-10-14 09:04:45,898 Example #1\n", + "2019-10-14 09:04:45,898 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:04:45,898 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካ��ት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:04:45,898 \tHypothesis: 14 ደግሞም በሕያውነቴ እምላለሁ፣ በብዙ ብሔር በሆኑ ብሔራት መካከል እንደ እነሱ ያለ ነገር ተደርጎ ነበር፤ ምክንያቱም እኔ ራሴ ራሴ ራሴ ራሴ ራሴ ራሴ ራሴ ነበር።+\n", + "2019-10-14 09:04:45,898 Example #2\n", + "2019-10-14 09:04:45,898 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:04:45,898 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:04:45,899 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ጋር በተያያዘ ጊዜ የእናቱን እናት ነበር፤ ደግሞም ለእሱ ጸሎታ+ ተጣለልኝ፤+\n", + "2019-10-14 09:04:45,899 Example #3\n", + "2019-10-14 09:04:45,899 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:04:45,899 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:04:45,899 \tHypothesis: 16 የሕይወት ዘመኔ የሰጠኝን ልጅ እንዲጠብቅ ለማድረግ ነው፤+ እኔም በብሔራት መካከል እንዳላደርግ* አደርገዋለሁ፤* ነገር ግን አልሰማሁም፤*\n", + "2019-10-14 09:04:45,899 Validation result at epoch 8, step 2800: bleu: 0.78, loss: 121347.4766, ppl: 27.7951, duration: 246.2530s\n", + "2019-10-14 09:06:02,283 Epoch 8 Step: 2900 Batch Loss: 2.546571 Tokens per Sec: 10070, Lr: 0.000410\n", + "2019-10-14 09:07:18,562 Epoch 8 Step: 3000 Batch Loss: 2.732360 Tokens per Sec: 13806, Lr: 0.000403\n", + "2019-10-14 09:07:18,697 Epoch 8: total training loss 977.82\n", + "2019-10-14 09:07:18,698 EPOCH 9\n", + "2019-10-14 09:08:33,588 Epoch 9 Step: 3100 Batch Loss: 2.216206 Tokens per Sec: 3720, Lr: 0.000397\n", + "2019-10-14 09:09:49,814 Epoch 9 Step: 3200 Batch Loss: 2.877482 Tokens per Sec: 7388, Lr: 0.000391\n", + "2019-10-14 09:13:54,195 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:13:54,195 Saving new checkpoint.\n", + "2019-10-14 09:13:56,306 Example #0\n", + "2019-10-14 09:13:56,306 \tSource: source_sentence\n", + "2019-10-14 09:13:56,307 \tReference: target_sentence\n", + "2019-10-14 09:13:56,307 \tHypothesis: 16 ሌሎቹ ደግሞ ሰላም ሰላም ሰላም ሰላም ሰላም ይላሉ።\n", + "2019-10-14 09:13:56,307 Example #1\n", + "2019-10-14 09:13:56,307 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:13:56,307 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:13:56,307 \tHypothesis: 14 እኔም በብሔራት መካከል እንደ ንግሥናው እንደ ንግሥናው እንደ ንግሥናው በኖሮት ነበር።+\n", + "2019-10-14 09:13:56,307 Example #2\n", + "2019-10-14 09:13:56,308 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:13:56,308 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:13:56,308 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ከእናቴ ከእናቴ ከእናቴ ጋር በተያያዘ ሳለ የእናቴን ስም ከልቤ ወደ እኔ መጣ፤+\n", + "2019-10-14 09:13:56,308 Example #3\n", + "2019-10-14 09:13:56,308 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:13:56,308 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:13:56,309 \tHypothesis: 16 እኔም ለሰው ምሥራች የሚሰጠኝ ነገር እንዲ��ጸም አደርጋለሁ፤+ በመሆኑም በብሔራት መካከል እንዲጠፋ ለማድረግ ለሌሎች ብሔራት ይነግራል፤*\n", + "2019-10-14 09:13:56,309 Validation result at epoch 9, step 3200: bleu: 0.69, loss: 118366.3984, ppl: 25.6151, duration: 246.4949s\n", + "2019-10-14 09:15:11,804 Epoch 9 Step: 3300 Batch Loss: 2.753889 Tokens per Sec: 11157, Lr: 0.000385\n", + "2019-10-14 09:16:08,737 Epoch 9: total training loss 916.72\n", + "2019-10-14 09:16:08,738 EPOCH 10\n", + "2019-10-14 09:16:27,584 Epoch 10 Step: 3400 Batch Loss: 2.347569 Tokens per Sec: 3676, Lr: 0.000379\n", + "2019-10-14 09:17:43,095 Epoch 10 Step: 3500 Batch Loss: 2.163630 Tokens per Sec: 4624, Lr: 0.000374\n", + "2019-10-14 09:18:59,631 Epoch 10 Step: 3600 Batch Loss: 2.231137 Tokens per Sec: 8261, Lr: 0.000368\n", + "2019-10-14 09:23:04,227 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:23:04,228 Saving new checkpoint.\n", + "2019-10-14 09:23:05,906 Example #0\n", + "2019-10-14 09:23:05,906 \tSource: source_sentence\n", + "2019-10-14 09:23:05,906 \tReference: target_sentence\n", + "2019-10-14 09:23:05,906 \tHypothesis: 16 አብርሃም እንዲህ ሲል አዘዛቸው፦\n", + "2019-10-14 09:23:05,907 Example #1\n", + "2019-10-14 09:23:05,907 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:23:05,907 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:23:05,907 \tHypothesis: 14 በብሔራት መካከል በንግሥና በዝርዝር የተነሳ በብዙ ብሔራት መካከል ታላቅ ቁጣ ተሞልቻለሁ፤ ምክንያቱም አባቴ ከአባቴ ጋር ግንኙነት ፈጸመ።+\n", + "2019-10-14 09:23:05,907 Example #2\n", + "2019-10-14 09:23:05,907 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:23:05,908 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:23:05,908 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ከእናቴ ከእናቴ ከሆነው ከእናቴ ከሆነው ከእናቴ ከሆነው ከእናቴ ጋር በተያያዘ ጊዜ+\n", + "2019-10-14 09:23:05,908 Example #3\n", + "2019-10-14 09:23:05,908 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:23:05,908 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:23:05,908 \tHypothesis: 16 እኔ በብሔራት መካከል ይህን ምሥራች እንዳለኝ አወቀኝ፤+ እኔም በብሔራት መካከል ምንም ነገር አልሰጠሁም፤*\n", + "2019-10-14 09:23:05,908 Validation result at epoch 10, step 3600: bleu: 1.05, loss: 116329.0000, ppl: 24.2243, duration: 246.2766s\n", + "2019-10-14 09:24:22,059 Epoch 10 Step: 3700 Batch Loss: 2.388584 Tokens per Sec: 12050, Lr: 0.000363\n", + "2019-10-14 09:24:58,716 Epoch 10: total training loss 856.58\n", + "2019-10-14 09:24:58,717 EPOCH 11\n", + "2019-10-14 09:25:37,936 Epoch 11 Step: 3800 Batch Loss: 1.489956 Tokens per Sec: 3692, Lr: 0.000358\n", + "2019-10-14 09:26:53,523 Epoch 11 Step: 3900 Batch Loss: 2.285307 Tokens per Sec: 5625, Lr: 0.000354\n", + "2019-10-14 09:28:08,744 Epoch 11 Step: 4000 Batch Loss: 2.457195 Tokens per Sec: 9368, Lr: 0.000349\n", + "2019-10-14 09:32:13,156 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:32:13,156 Saving new checkpoint.\n", + "2019-10-14 09:32:14,791 Example #0\n", + "2019-10-14 09:32:14,792 \tSource: source_sentence\n", + "2019-10-14 09:32:14,792 \tReference: target_sentence\n", + "2019-10-14 09:32:14,792 \tHypothesis: 42\n", + "2019-10-14 09:32:14,792 Example #1\n", + "2019-10-14 09:32:14,793 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:32:14,793 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በ���ኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:32:14,793 \tHypothesis: 14 በብሔራት መካከል እጅግ ብዙ ተአምራት እንደነበረች በብዙ ብሔራት ይበልጥ ታላቅ ደስታ ተቀበሉ፤ ምክንያቱም እኔ ራሴ ከአባቴ ጋር ነኝ።+\n", + "2019-10-14 09:32:14,793 Example #2\n", + "2019-10-14 09:32:14,793 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:32:14,793 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:32:14,794 \tHypothesis: 15 ሆኖም የእናቴን ስም ከእናት ከእናቱ ጋር በተያያዘ ጊዜ+ የወለደችለትን የልዑሉ አምላክ አስታውስ፤+\n", + "2019-10-14 09:32:14,794 Example #3\n", + "2019-10-14 09:32:14,794 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:32:14,794 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:32:14,794 \tHypothesis: 16 እኔ ግን እላችኋለሁ፣ በብሔራት መካከል እንደ እኔ ያለ ምሥራች እንዳላስነሳት+ ሁሉን ነገር እንዳላስነሳኝ፣*ከዚህ ይልቅ በአንድነት አልሰጠኝም፤*@@\n", + "2019-10-14 09:32:14,795 Validation result at epoch 11, step 4000: bleu: 1.28, loss: 114728.0703, ppl: 23.1847, duration: 246.0503s\n", + "2019-10-14 09:33:30,951 Epoch 11 Step: 4100 Batch Loss: 2.131005 Tokens per Sec: 12983, Lr: 0.000345\n", + "2019-10-14 09:33:48,687 Epoch 11: total training loss 816.33\n", + "2019-10-14 09:33:48,687 EPOCH 12\n", + "2019-10-14 09:34:46,741 Epoch 12 Step: 4200 Batch Loss: 1.735204 Tokens per Sec: 3733, Lr: 0.000341\n", + "2019-10-14 09:36:02,689 Epoch 12 Step: 4300 Batch Loss: 2.138577 Tokens per Sec: 6556, Lr: 0.000337\n", + "2019-10-14 09:37:19,002 Epoch 12 Step: 4400 Batch Loss: 1.867105 Tokens per Sec: 10241, Lr: 0.000333\n", + "2019-10-14 09:41:23,480 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:41:23,480 Saving new checkpoint.\n", + "2019-10-14 09:41:25,169 Example #0\n", + "2019-10-14 09:41:25,169 \tSource: source_sentence\n", + "2019-10-14 09:41:25,169 \tReference: target_sentence\n", + "2019-10-14 09:41:25,169 \tHypothesis: 16 አብርሃምም እንዲህ ይባላል።\n", + "2019-10-14 09:41:25,169 Example #1\n", + "2019-10-14 09:41:25,170 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:41:25,170 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:41:25,170 \tHypothesis: 14 እኔም በብዙ ብሔራት ፊት ታላቅ ከሆነው ከበርካታ ይበልጥ ታላቅ የከበረ ታላቅ ምክር ነበረብኩ፤ ምክንያቱም አባቴ ከአባቴ ጋር ነበር።+\n", + "2019-10-14 09:41:25,170 Example #2\n", + "2019-10-14 09:41:25,170 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:41:25,170 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:41:25,170 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ማህፀን ከወለደ በኋላ በፍጹም ተልእኮኛል፤+\n", + "2019-10-14 09:41:25,170 Example #3\n", + "2019-10-14 09:41:25,171 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:41:25,171 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:41:25,171 \tHypothesis: 16 እኔ ግን እላችኋለሁ፣ በብሔራት መካከል ምንም መልስ መስጠት እንዳላውቅ ለማድረግ ቆርጬ ነበር፤+ ሌላው ቀርቶ ሌሎችን ከሌላው ሰው ጋር አልቀራም፤*\n", + "2019-10-14 09:41:25,171 Validation result at epoch 12, step 4400: bleu: 1.35, loss: 113117.1172, ppl: 22.1836, duration: 246.1682s\n", + "2019-10-14 09:42:38,603 Epoch 12: total training loss 769.72\n", + "2019-10-14 09:42:38,603 EPOCH 13\n", + "2019-10-14 09:42:40,204 Epoch 13 Step: 4500 Batch Loss: 2.054061 Tokens per Sec: 3700, Lr: 0.000329\n", + "2019-10-14 09:43:55,041 Epoch 13 Step: 4600 Batch Loss: 2.091669 Tokens per Sec: 3841, Lr: 0.000326\n", + "2019-10-14 09:45:09,512 Epoch 13 Step: 4700 Batch Loss: 1.962294 Tokens per Sec: 7611, Lr: 0.000322\n", + "2019-10-14 09:46:24,581 Epoch 13 Step: 4800 Batch Loss: 2.237689 Tokens per Sec: 11288, Lr: 0.000319\n", + "2019-10-14 09:50:26,860 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:50:26,861 Saving new checkpoint.\n", + "2019-10-14 09:50:28,541 Example #0\n", + "2019-10-14 09:50:28,541 \tSource: source_sentence\n", + "2019-10-14 09:50:28,541 \tReference: target_sentence\n", + "2019-10-14 09:50:28,541 \tHypothesis: 16 አብርሃም የሚናገረው ነገር ይስማስማ፤\n", + "2019-10-14 09:50:28,542 Example #1\n", + "2019-10-14 09:50:28,542 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:50:28,542 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በአይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:50:28,542 \tHypothesis: 14 እኔ በብሔራት መካከል እጅግ ብዙ ብሔር ከሆኑት ከአባቶቻቸው ይበልጥ ይበልጥ ታላቅ ሆኜ እንደኖርኩ በሚገባ አውቃለሁ።+\n", + "2019-10-14 09:50:28,542 Example #2\n", + "2019-10-14 09:50:28,542 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:50:28,542 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:50:28,542 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ማህፀን ከወለደ በኋላ ስእለቴን አክብር፤+\n", + "2019-10-14 09:50:28,543 Example #3\n", + "2019-10-14 09:50:28,543 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:50:28,543 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:50:28,543 \tHypothesis: 16 እኔ ለአሕዛብ እንደምታዩት ሁሉ እኔም ከብሔራት ጋር እንደምታወሱ+ እኔም ወዲያውኑ እንደምታዩት አደርገዋለሁ።*+\n", + "2019-10-14 09:50:28,543 Validation result at epoch 13, step 4800: bleu: 1.69, loss: 112920.9766, ppl: 22.0647, duration: 243.9612s\n", + "2019-10-14 09:51:23,334 Epoch 13: total training loss 738.98\n", + "2019-10-14 09:51:23,334 EPOCH 14\n", + "2019-10-14 09:51:42,738 Epoch 14 Step: 4900 Batch Loss: 1.462516 Tokens per Sec: 3754, Lr: 0.000316\n", + "2019-10-14 09:52:57,904 Epoch 14 Step: 5000 Batch Loss: 1.738421 Tokens per Sec: 4713, Lr: 0.000313\n", + "2019-10-14 09:54:12,915 Epoch 14 Step: 5100 Batch Loss: 2.116813 Tokens per Sec: 8492, Lr: 0.000309\n", + "2019-10-14 09:55:27,565 Epoch 14 Step: 5200 Batch Loss: 1.625122 Tokens per Sec: 12277, Lr: 0.000306\n", + "2019-10-14 09:59:30,538 Hooray! New best validation result [ppl]!\n", + "2019-10-14 09:59:30,538 Saving new checkpoint.\n", + "2019-10-14 09:59:32,237 Example #0\n", + "2019-10-14 09:59:32,238 \tSource: source_sentence\n", + "2019-10-14 09:59:32,238 \tReference: target_sentence\n", + "2019-10-14 09:59:32,238 \tHypothesis: 42 ያህን ይወዳል።\n", + "2019-10-14 09:59:32,238 Example #1\n", + "2019-10-14 09:59:32,238 \tSource: \"14 and i was making greater progress in juʹda·ism than many of my own age in my nation, as i was far more zealous for the traditions of my fathers.+\"\n", + "2019-10-14 09:59:32,238 \tReference: 14 ለአባቶቼ ወግ ከፍተኛ ቅንዓት ስለነበረኝ ከወገኖቼ መካከል በእኔ ዕድሜ ካሉት ከብዙዎቹ የበለጠ በ���ይሁዳውያን ሃይማኖት የላቀ እድገት እያደረግኩ ነበር።+\n", + "2019-10-14 09:59:32,238 \tHypothesis: 14 እኔም በአባቶቼ ከከብሔራት ይበልጥ ብዙ ብሔር የሆነ እጅግ ብዙ መሆናቸውን እጅግ አስተዋይ ነበር።+\n", + "2019-10-14 09:59:32,239 Example #2\n", + "2019-10-14 09:59:32,239 \tSource: \"15 but when god, who separated me from my mother’s womb and called me through his undeserved kindness,+ thought good\"\n", + "2019-10-14 09:59:32,239 \tReference: 15 ሆኖም ከእናቴ ማህፀን እንድለይ* ያደረገኝና በጸጋው አማካኝነት የጠራኝ አምላክ+\n", + "2019-10-14 09:59:32,239 \tHypothesis: 15 ሆኖም አምላክ ከእናቴ ማህፀን በሰማች ጊዜ የእናቴን ፈቃድ አወቀ፤+\n", + "2019-10-14 09:59:32,239 Example #3\n", + "2019-10-14 09:59:32,239 \tSource: \"16 to reveal his son through me so that i might declare the good news about him to the nations,+ i did not immediately consult with any human;*\"\n", + "2019-10-14 09:59:32,239 \tReference: 16 ስለ ክርስቶስ የሚገልጸውን ምሥራች ለአሕዛብ እንዳውጅ+ ልጁን በእኔ አማካኝነት ለመግለጥ በወደደ ጊዜ ከማንም ሰው* ጋር ወዲያው አልተማከርኩም፤\n", + "2019-10-14 09:59:32,239 \tHypothesis: 16 እኔ በብሔራት መካከል ልዩነት እንዳለኝ ለማድረግ ነው፤+ ይህም ቢሆን ከሰው ጋር በአንድነት እንዳላጣለኝ ተፈቀደልኝ፤ከሰው ጋር በሌላ ሰው ላይ ምንም ነገር አልነካም፤*\n", + "2019-10-14 09:59:32,239 Validation result at epoch 14, step 5200: bleu: 1.66, loss: 111556.8594, ppl: 21.2552, duration: 244.6738s\n", + "2019-10-14 10:00:09,071 Epoch 14: total training loss 700.47\n", + "2019-10-14 10:00:09,071 Training ended after 14 epochs.\n", + "2019-10-14 10:00:09,072 Best validation result at step 5200: 21.26 ppl.\n", + "2019-10-14 10:03:07,050 dev bleu: 2.18 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 10:03:07,051 Translations saved to: models/enam_transformer/00005200.hyps.dev\n", + "2019-10-14 10:06:49,668 test bleu: 2.03 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 10:06:49,669 Translations saved to: models/enam_transformer/00005200.hyps.test\n" + ] + } + ], + "source": [ + "!cd joeynmt; python3 -m joeynmt train configs/transformer_$src$tgt.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "colab_type": "code", + "id": "6wSyOBpksEOv", + "outputId": "cb6388a6-bd5c-4bd1-e374-d57049e11430" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Steps: 400\tLoss: 202366.34375\tPPL: 255.88783\tbleu: 0.00000\tLR: 0.00027951\t*\n", + "Steps: 800\tLoss: 180673.25000\tPPL: 141.22563\tbleu: 0.00000\tLR: 0.00055902\t*\n", + "Steps: 1200\tLoss: 155360.57812\tPPL: 70.58404\tbleu: 0.00000\tLR: 0.00063789\t*\n", + "Steps: 1600\tLoss: 142127.53125\tPPL: 49.11797\tbleu: 0.13719\tLR: 0.00055243\t*\n", + "Steps: 2000\tLoss: 131049.67969\tPPL: 36.25935\tbleu: 0.00000\tLR: 0.00049411\t*\n", + "Steps: 2400\tLoss: 126511.40625\tPPL: 32.01968\tbleu: 0.00000\tLR: 0.00045105\t*\n", + "Steps: 2800\tLoss: 121347.47656\tPPL: 27.79515\tbleu: 0.77889\tLR: 0.00041760\t*\n", + "Steps: 3200\tLoss: 118366.39844\tPPL: 25.61509\tbleu: 0.69353\tLR: 0.00039063\t*\n", + "Steps: 3600\tLoss: 116329.00000\tPPL: 24.22434\tbleu: 1.05430\tLR: 0.00036828\t*\n", + "Steps: 4000\tLoss: 114728.07031\tPPL: 23.18472\tbleu: 1.28224\tLR: 0.00034939\t*\n", + "Steps: 4400\tLoss: 113117.11719\tPPL: 22.18361\tbleu: 1.35120\tLR: 0.00033313\t*\n", + "Steps: 4800\tLoss: 112920.97656\tPPL: 22.06471\tbleu: 1.68777\tLR: 0.00031894\t*\n", + "Steps: 5200\tLoss: 111556.85938\tPPL: 21.25524\tbleu: 1.66150\tLR: 0.00030643\t*\n" + ] + } + ], + "source": [ + "! cat joeynmt/models/enam_transformer/validations.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "1ZJGEqJdnPsh", + "outputId": "bb18e1b2-ad95-4750-b92c-04d0aaf247ee" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cp: cannot create symbolic link '/content/drive/My Drive/masakhane/en-am/models/enam_transformer/best.ckpt': Function not implemented\n" + ] + } + ], + "source": [ + "# Copy the created models from the notebook storage to google drive for persistant storage \n", + "!mkdir \"$gdrive_path/models/\"\n", + "!cp -r joeynmt/models/* \"$gdrive_path/models/${src}${tgt}_transformer/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "colab_type": "code", + "id": "vDaExTv7KhWD", + "outputId": "d1fc5627-1e1e-40d4-8f70-0f18114eb0c8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Steps: 400\tLoss: 202366.34375\tPPL: 255.88783\tbleu: 0.00000\tLR: 0.00027951\t*\n", + "Steps: 800\tLoss: 180673.25000\tPPL: 141.22563\tbleu: 0.00000\tLR: 0.00055902\t*\n", + "Steps: 1200\tLoss: 155360.57812\tPPL: 70.58404\tbleu: 0.00000\tLR: 0.00063789\t*\n", + "Steps: 1600\tLoss: 142127.53125\tPPL: 49.11797\tbleu: 0.13719\tLR: 0.00055243\t*\n", + "Steps: 2000\tLoss: 131049.67969\tPPL: 36.25935\tbleu: 0.00000\tLR: 0.00049411\t*\n", + "Steps: 2400\tLoss: 126511.40625\tPPL: 32.01968\tbleu: 0.00000\tLR: 0.00045105\t*\n", + "Steps: 2800\tLoss: 121347.47656\tPPL: 27.79515\tbleu: 0.77889\tLR: 0.00041760\t*\n", + "Steps: 3200\tLoss: 118366.39844\tPPL: 25.61509\tbleu: 0.69353\tLR: 0.00039063\t*\n", + "Steps: 3600\tLoss: 116329.00000\tPPL: 24.22434\tbleu: 1.05430\tLR: 0.00036828\t*\n", + "Steps: 4000\tLoss: 114728.07031\tPPL: 23.18472\tbleu: 1.28224\tLR: 0.00034939\t*\n", + "Steps: 4400\tLoss: 113117.11719\tPPL: 22.18361\tbleu: 1.35120\tLR: 0.00033313\t*\n", + "Steps: 4800\tLoss: 112920.97656\tPPL: 22.06471\tbleu: 1.68777\tLR: 0.00031894\t*\n", + "Steps: 5200\tLoss: 111556.85938\tPPL: 21.25524\tbleu: 1.66150\tLR: 0.00030643\t*\n" + ] + } + ], + "source": [ + "! cat \"$gdrive_path/models/${src}${tgt}_transformer/validations.txt\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "MrehNFeVwNzZ", + "outputId": "66c738a6-69b5-4520-ed17-53e7e0e9d85e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-10-14 10:11:08,078 - dev bleu: 2.18 [Beam search decoding with beam size = 5 and alpha = 1.0]\n", + "2019-10-14 10:14:50,952 - test bleu: 2.03 [Beam search decoding with beam size = 5 and alpha = 1.0]\n" + ] + } + ], + "source": [ + "! cd joeynmt; python3 -m joeynmt test models/enam_transformer/config.yaml\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "English_to_Amharic.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}