{ "cells": [ { "cell_type": "code", "execution_count": 35, "id": "0562d8a6-e8e3-4659-ab21-e99d76adcf3c", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n", "35982,9796527,0.12911629676818848\n", "\n" ] } ], "source": [ "for i in range(10):\n", " with open(\"test_set.txt\") as f:\n", " print(f.readline())" ] }, { "cell_type": "code", "execution_count": 3, "id": "9e72123e-5a81-4fd1-a07b-f847aee5a590", "metadata": { "tags": [] }, "outputs": [], "source": [ "test_behavior_path = \"/work/Blue/ebnerd/ebnerd_testset/test/behaviors.parquet\"\n", "\n", "import polars as pl\n", "\n", "test_behavior_df = pl.read_parquet(test_behavior_path)" ] }, { "cell_type": "code", "execution_count": 95, "id": "7c337f1c-8a0e-4a61-9916-0c86887f320e", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 13536710/13536710 [18:13<00:00, 12380.33it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Zipping predictions.txt to predictions.zip\n" ] } ], "source": [ "from tqdm import tqdm\n", "import numpy as np\n", "from pathlib import Path\n", "import zipfile\n", "\n", "\n", "def transform_list(input_list):\n", " # 입력 리스트를 Numpy 배열로 변환합니다.\n", " arr = np.array(input_list)\n", "\n", " # 내림차순으로 정렬된 인덱스를 가져옵니다.\n", " sorted_indices = np.argsort(-arr)\n", "\n", " # 순위를 매깁니다 (1부터 시작).\n", " ranks = np.empty_like(sorted_indices)\n", " ranks[sorted_indices] = np.arange(1, len(arr) + 1)\n", "\n", " return ranks.tolist()\n", "\n", "def zip_submission_file(\n", " path: Path,\n", " filename_zip: str = None,\n", " verbose: bool = True,\n", " rm_file: bool = True,\n", ") -> None:\n", " \"\"\"\n", " Compresses a specified file into a ZIP archive within the same directory.\n", "\n", " Args:\n", " path (Path): The directory path where the file to be zipped and the resulting zip file will be located.\n", " filename_input (str, optional): The name of the file to be compressed. Defaults to the path.name.\n", " filename_zip (str, optional): The name of the output ZIP file. Defaults to \"prediction.zip\".\n", " verbose (bool, optional): If set to True, the function will print the process details. Defaults to True.\n", " rm_file (bool, optional): If set to True, the original file will be removed after compression. Defaults to True.\n", "\n", " Returns:\n", " None: This function does not return any value.\n", " \"\"\"\n", " path = Path(path)\n", " if filename_zip:\n", " path_zip = path.parent.joinpath(filename_zip)\n", " else:\n", " path_zip = path.with_suffix(\".zip\")\n", "\n", " if path_zip.suffix != \".zip\":\n", " raise ValueError(f\"suffix for {path_zip.name} has to be '.zip'\")\n", " if verbose:\n", " print(f\"Zipping {path} to {path_zip}\")\n", " f = zipfile.ZipFile(path_zip, \"w\", zipfile.ZIP_DEFLATED)\n", " f.write(path, arcname=path.name)\n", " f.close()\n", " if rm_file:\n", " path.unlink()\n", "\n", "with open(\"predictions.txt\", 'w') as wf:\n", " with open(\"test_set.txt\", 'r') as f:\n", " behaviors_iter = test_behavior_df.select(\"impression_id\", \"user_id\", \"article_ids_inview\").iter_rows()\n", " index = 0\n", " for data in tqdm(behaviors_iter, total=len(test_behavior_df)):\n", " impression_id = data[0]\n", " user_id = data[1]\n", " article_ids_inview = data[2]\n", "\n", " scores = []\n", "\n", " for article_id in article_ids_inview:\n", " preds = f.readline().split(\",\")\n", "\n", " p_user_id = preds[0]\n", " p_article_id = preds[1]\n", " p_score = preds[2]\n", "\n", " if str(article_id) == str(p_article_id):\n", " scores.append(float(p_score))\n", " else:\n", " print(\"Different 0.0\")\n", " scores.append(float(0.0))\n", "\n", " index_ranked = transform_list(scores)\n", " preds = \"[\" + \",\".join([str(ir) for ir in index_ranked]) + \"]\"\n", "\n", " wf.write(\" \".join([str(impression_id), preds]) + \"\\n\")\n", "\n", "zip_submission_file(path=Path(\"predictions.txt\"), rm_file=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "2d5c1bcc-e4b0-4217-93ec-4ca3e24dc6ab", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "blue", "language": "python", "name": "blue" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }