{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# |export\n", "import gradio as gr\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "# |export\n", "df = pd.read_csv(\n", " \"https://docs.google.com/spreadsheets/d/e/2PACX-1vSC40sszorOjHfozmNqJT9lFiJhG94u3fbr3Ss_7fzcU3xqqJQuW1Ie_SNcWEB-uIsBi9NBUK7-ddet/pub?output=csv\",\n", " skiprows=1,\n", ")\n" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "# |export\n", "# Drop footers\n", "df = df.copy()[~df[\"Model\"].isna()]\n" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "# |export\n", "# Drop TBA models\n", "df = df.copy()[df[\"Parameters \\n(B)\"] != \"TBA\"]\n" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelLabSelected \\nplaygroundsParameters \\n(B)Tokens \\ntrained (B)Ratio T:P\\n(Chinchilla scaling)Training datasetAnnounced\\nβ–ΌPublic?ReleasedPaper / RepoNotes
3Kosmos-1MicrosoftNaN1.6360225:1πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹Feb/2023πŸ”΄Feb/2023https://arxiv.org/abs/2302.14045Multimodal large language model (MLLM). Raven’...
4LLaMA-65BMeta AIWeights leaked: https://github.com/facebookres...65140022:1πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹Feb/2023🟑Feb/2023https://research.facebook.com/publications/lla...Researchers only, noncommercial only. 'LLaMA-6...
5MOSSFudan Universityhttps://moss.fastnlp.top/2043022:1πŸ•Έ πŸŒ‹Feb/2023🟒Feb/2023https://txsun1997.github.io/blogs/moss.htmlMajor bandwidth issues: https://www.reuters.co...
6PalmyraWriterhttps://huggingface.co/models?search=palmyra2030015:1πŸŒ‹Feb/2023🟒Feb/2023https://writer.com/blog/palmyra/Only up to 5B available open-source 'trained o...
7Luminous Supreme ControlAleph Alphahttps://app.aleph-alpha.com/playground/completion70NaNNaNπŸ†† πŸ“šβ¬† πŸ•Έ πŸ‘₯Feb/2023🟒Feb/2023https://docs.aleph-alpha.com/docs/introduction...β€˜Control’ means instruction tuned
\n", "
" ], "text/plain": [ " Model Lab \\\n", "3 Kosmos-1 Microsoft \n", "4 LLaMA-65B Meta AI \n", "5 MOSS Fudan University \n", "6 Palmyra Writer \n", "7 Luminous Supreme Control Aleph Alpha \n", "\n", " Selected \\nplaygrounds Parameters \\n(B) \\\n", "3 NaN 1.6 \n", "4 Weights leaked: https://github.com/facebookres... 65 \n", "5 https://moss.fastnlp.top/ 20 \n", "6 https://huggingface.co/models?search=palmyra 20 \n", "7 https://app.aleph-alpha.com/playground/completion 70 \n", "\n", " Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n", "3 360 225:1 πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹ \n", "4 1400 22:1 πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹ \n", "5 430 22:1 πŸ•Έ πŸŒ‹ \n", "6 300 15:1 πŸŒ‹ \n", "7 NaN NaN πŸ†† πŸ“šβ¬† πŸ•Έ πŸ‘₯ \n", "\n", " Announced\\nβ–Ό Public? Released \\\n", "3 Feb/2023 πŸ”΄ Feb/2023 \n", "4 Feb/2023 🟑 Feb/2023 \n", "5 Feb/2023 🟒 Feb/2023 \n", "6 Feb/2023 🟒 Feb/2023 \n", "7 Feb/2023 🟒 Feb/2023 \n", "\n", " Paper / Repo \\\n", "3 https://arxiv.org/abs/2302.14045 \n", "4 https://research.facebook.com/publications/lla... \n", "5 https://txsun1997.github.io/blogs/moss.html \n", "6 https://writer.com/blog/palmyra/ \n", "7 https://docs.aleph-alpha.com/docs/introduction... \n", "\n", " Notes \n", "3 Multimodal large language model (MLLM). Raven’... \n", "4 Researchers only, noncommercial only. 'LLaMA-6... \n", "5 Major bandwidth issues: https://www.reuters.co... \n", "6 Only up to 5B available open-source 'trained o... \n", "7 β€˜Control’ means instruction tuned " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()\n" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelLabSelected \\nplaygroundsParameters \\n(B)Tokens \\ntrained (B)Ratio T:P\\n(Chinchilla scaling)Training datasetAnnounced\\nβ–ΌPublic?ReleasedPaper / RepoNotes
92MeenaGoogleNaN2.6100003,847:1πŸ‘₯ πŸŒ‹Jan/2020πŸ”΄Jan/2020https://arxiv.org/abs/2001.09977Dialogue model. Trained 61B tokens for 164x ep...
93RoBERTaMeta AIHugging Face0.35522006,198:1πŸ†† πŸ“š ⬆ πŸ•ΈJul/2019🟒Jul/2019https://arxiv.org/abs/1907.11692See cite ROBERTA
94GPT-2OpenAIHugging Face1.5107:1⬆Feb/2019🟒Nov/2019https://openai.com/blog/better-language-models/Reddit outbound only
95GPT-1OpenAIHugging Face0.1NaNNaNπŸ“šJun/2018🟒Jun/2018https://openai.com/blog/language-unsupervised/Books only
96BERTGoogleHugging Face0.3137457:1πŸ†† πŸ“šOct/2018🟒Oct/2018https://arxiv.org/abs/1810.04805NaN
\n", "
" ], "text/plain": [ " Model Lab Selected \\nplaygrounds Parameters \\n(B) \\\n", "92 Meena Google NaN 2.6 \n", "93 RoBERTa Meta AI Hugging Face 0.355 \n", "94 GPT-2 OpenAI Hugging Face 1.5 \n", "95 GPT-1 OpenAI Hugging Face 0.1 \n", "96 BERT Google Hugging Face 0.3 \n", "\n", " Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n", "92 10000 3,847:1 πŸ‘₯ πŸŒ‹ \n", "93 2200 6,198:1 πŸ†† πŸ“š ⬆ πŸ•Έ \n", "94 10 7:1 ⬆ \n", "95 NaN NaN πŸ“š \n", "96 137 457:1 πŸ†† πŸ“š \n", "\n", " Announced\\nβ–Ό Public? Released \\\n", "92 Jan/2020 πŸ”΄ Jan/2020 \n", "93 Jul/2019 🟒 Jul/2019 \n", "94 Feb/2019 🟒 Nov/2019 \n", "95 Jun/2018 🟒 Jun/2018 \n", "96 Oct/2018 🟒 Oct/2018 \n", "\n", " Paper / Repo \\\n", "92 https://arxiv.org/abs/2001.09977 \n", "93 https://arxiv.org/abs/1907.11692 \n", "94 https://openai.com/blog/better-language-models/ \n", "95 https://openai.com/blog/language-unsupervised/ \n", "96 https://arxiv.org/abs/1810.04805 \n", "\n", " Notes \n", "92 Dialogue model. Trained 61B tokens for 164x ep... \n", "93 See cite ROBERTA \n", "94 Reddit outbound only \n", "95 Books only \n", "96 NaN " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail()\n" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# |export\n", "def make_clickable_cell(cell):\n", " if pd.isnull(cell):\n", " return \"\"\n", " else:\n", " return f'{cell}'\n" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "# |export\n", "columns_to_click = [\"Paper / Repo\", \"Selected \\nplaygrounds\"]\n", "for col in columns_to_click:\n", " df[col] = df[col].apply(make_clickable_cell)\n" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelLabSelected \\nplaygroundsParameters \\n(B)Tokens \\ntrained (B)Ratio T:P\\n(Chinchilla scaling)Training datasetAnnounced\\nβ–ΌPublic?ReleasedPaper / RepoNotes
3Kosmos-1Microsoft1.6360225:1πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹Feb/2023πŸ”΄Feb/2023<a target=\"_blank\" href=\"https://arxiv.org/ab...Multimodal large language model (MLLM). Raven’...
4LLaMA-65BMeta AI<a target=\"_blank\" href=\"Weights leaked: http...65140022:1πŸ†† πŸ“šβ¬† πŸ•Έ πŸŒ‹Feb/2023🟑Feb/2023<a target=\"_blank\" href=\"https://research.fac...Researchers only, noncommercial only. 'LLaMA-6...
5MOSSFudan University<a target=\"_blank\" href=\"https://moss.fastnlp...2043022:1πŸ•Έ πŸŒ‹Feb/2023🟒Feb/2023<a target=\"_blank\" href=\"https://txsun1997.gi...Major bandwidth issues: https://www.reuters.co...
6PalmyraWriter<a target=\"_blank\" href=\"https://huggingface....2030015:1πŸŒ‹Feb/2023🟒Feb/2023<a target=\"_blank\" href=\"https://writer.com/b...Only up to 5B available open-source 'trained o...
7Luminous Supreme ControlAleph Alpha<a target=\"_blank\" href=\"https://app.aleph-al...70NaNNaNπŸ†† πŸ“šβ¬† πŸ•Έ πŸ‘₯Feb/2023🟒Feb/2023<a target=\"_blank\" href=\"https://docs.aleph-a...β€˜Control’ means instruction tuned
\n", "
" ], "text/plain": [ " Model Lab \\\n", "3 Kosmos-1 Microsoft \n", "4 LLaMA-65B Meta AI \n", "5 MOSS Fudan University \n", "6 Palmyra Writer \n", "7 Luminous Supreme Control Aleph Alpha \n", "\n", " Selected \\nplaygrounds Parameters \\n(B) \\\n", "3 1.6 \n", "4 The Large Language Models Landscape\"\"\"\n", "description = \"\"\"Large Language Models (LLMs) today come in a variety architectures and capabilities. This interactive landscape provides a visual overview of the most important LLMs, including their training data, size, release date, and whether they are openly accessible or not. It also includes notes on each model to provide additional context. This landscape is derived from data compiled by Dr. Alan D. Thompson at [lifearchitect.ai](https://lifearchitect.ai).\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "# |export\n", "dtypes = [\"str\" if c not in columns_to_click else \"markdown\" for c in df.columns]\n" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7868\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# |export\n", "def value_func():\n", " return df\n", "\n", "\n", "with gr.Blocks() as demo:\n", " gr.Markdown(title)\n", " gr.Markdown(description)\n", " gr.components.DataFrame(value=value_func, datatype=dtypes)\n", "\n", "demo.launch()\n" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Closing server running on port: 7868\n" ] } ], "source": [ "demo.close()\n" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "from nbdev.export import nb_export\n", "\n", "nb_export(\"app.ipynb\", lib_path=\".\", name=\"app\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "hf", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "66e5af1d4a3a75efffc7cd5a7f382675fc3ac06b0697676e06fa85c907378a99" } } }, "nbformat": 4, "nbformat_minor": 2 }