{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "872b02d2", "metadata": {}, "outputs": [], "source": [ "import json\n", "from tqdm import tqdm\n", "\n", "# 1. Read the json file\n", "with open('./tutorial/winenara.json', 'r') as f:\n", " data = json.load(f)" ] }, { "cell_type": "code", "execution_count": 3, "id": "d8a201c7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\n" ] } ], "source": [ "%cd vivino-api" ] }, { "cell_type": "code", "execution_count": 4, "id": "c4d9347d", "metadata": {}, "outputs": [], "source": [ "import os\n", "file_list = os.listdir('./output')\n", "json_list = [file.replace('.json', '') for file in file_list]\n", "\n", "data_list = []\n", "for wine_data in data:\n", " if wine_data['vivino_link'].split('/')[-3] not in json_list:\n", " data_list.append(wine_data)" ] }, { "cell_type": "code", "execution_count": 5, "id": "23166a02", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "247" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data_list)" ] }, { "cell_type": "code", "execution_count": 6, "id": "93bc471e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/247 [00:00)\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'le-chateau-de-chamirey-mercurey' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Waited for 15 seconds on the page 9\n", "Waited for 30 seconds on the page 9\n", "Waited for 45 seconds on the page 9\n", "Waited for 60 seconds on the page 9\n", "Waited for 75 seconds on the page 9\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Waited for 15 seconds on the page 17\n", "Waited for 30 seconds on the page 17\n", "Waited for 45 seconds on the page 17\n", "Waited for 60 seconds on the page 17\n", "Waited for 75 seconds on the page 17\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Waited for 15 seconds on the page 23\n", "Waited for 30 seconds on the page 23\n", "Waited for 45 seconds on the page 23\n", "Waited for 60 seconds on the page 23\n", "Waited for 75 seconds on the page 23\n", "Waited for 90 seconds on the page 23\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'es-imperial-rioja-gran-reserva' }\n", "Waited for 15 seconds on the page 1\n", "Waited for 30 seconds on the page 1\n", "Waited for 45 seconds on the page 1\n", "Waited for 60 seconds on the page 1\n", "Waited for 75 seconds on the page 1\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Waited for 15 seconds on the page 5\n", "Waited for 30 seconds on the page 5\n", "Waited for 45 seconds on the page 5\n", "Waited for 60 seconds on the page 5\n", "Waited for 75 seconds on the page 5\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Waited for 15 seconds on the page 11\n", "Waited for 30 seconds on the page 11\n", "Waited for 45 seconds on the page 11\n", "Waited for 60 seconds on the page 11\n", "Waited for 75 seconds on the page 11\n", "Waited for 90 seconds on the page 11\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'langetwins-winery-and-vineyards-centennial-zinfandel' }{\n", " _: [],\n", " name: 'casillero-del-diablo-pinot-noir-reserva-central-valley'\n", "}\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Waited for 15 seconds on the page 6\n", "Waited for 30 seconds on the page 6\n", "Waited for 45 seconds on the page 6\n", "Waited for 60 seconds on the page 6\n", "Waited for 75 seconds on the page 6\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Waited for 15 seconds on the page 14\n", "Waited for 30 seconds on the page 14\n", "Waited for 45 seconds on the page 14\n", "Waited for 60 seconds on the page 14\n", "Waited for 75 seconds on the page 14\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Waited for 15 seconds on the page 20\n", "Waited for 30 seconds on the page 20\n", "Waited for 45 seconds on the page 20\n", "Waited for 60 seconds on the page 20\n", "Waited for 75 seconds on the page 20\n", "Waited for 90 seconds on the page 20\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Waited for 15 seconds on the page 24\n", "Waited for 30 seconds on the page 24\n", "Waited for 45 seconds on the page 24\n", "Waited for 60 seconds on the page 24\n", "Waited for 75 seconds on the page 24\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Waited for 15 seconds on the page 30\n", "Waited for 30 seconds on the page 30\n", "Waited for 45 seconds on the page 30\n", "Waited for 60 seconds on the page 30\n", "Waited for 75 seconds on the page 30\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Waited for 15 seconds on the page 38\n", "Waited for 30 seconds on the page 38\n", "Waited for 45 seconds on the page 38\n", "Waited for 60 seconds on the page 38\n", "Waited for 75 seconds on the page 38\n", "Waited for 90 seconds on the page 38\n", "Results were collected from the page: 38\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Waited for 15 seconds on the page 13\n", "Waited for 30 seconds on the page 13\n", "Waited for 45 seconds on the page 13\n", "Waited for 60 seconds on the page 13\n", "Waited for 75 seconds on the page 13\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{\n", " _: [],\n", " name: 'de-markus-molitor-brauneberger-mandelgraben-pinot-noir'\n", "}\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Waited for 15 seconds on the page 13\n", "Waited for 30 seconds on the page 13\n", "Waited for 45 seconds on the page 13\n", "Waited for 60 seconds on the page 13\n", "Waited for 75 seconds on the page 13\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Waited for 15 seconds on the page 19\n", "Waited for 30 seconds on the page 19\n", "Waited for 45 seconds on the page 19\n", "Waited for 60 seconds on the page 19\n", "Waited for 75 seconds on the page 19\n", "Waited for 90 seconds on the page 19\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Waited for 15 seconds on the page 23\n", "Waited for 30 seconds on the page 23\n", "Waited for 45 seconds on the page 23\n", "Waited for 60 seconds on the page 23\n", "Waited for 75 seconds on the page 23\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Waited for 15 seconds on the page 29\n", "Waited for 30 seconds on the page 29\n", "Waited for 45 seconds on the page 29\n", "Waited for 60 seconds on the page 29\n", "Waited for 75 seconds on the page 29\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Results were collected from the page: 36\n", "Waited for 15 seconds on the page 37\n", "Waited for 30 seconds on the page 37\n", "Waited for 45 seconds on the page 37\n", "Waited for 60 seconds on the page 37\n", "Waited for 75 seconds on the page 37\n", "Waited for 90 seconds on the page 37\n", "Results were collected from the page: 37\n", "Results were collected from the page: 38\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Results were collected from the page: 42\n", "Waited for 15 seconds on the page 43\n", "Waited for 30 seconds on the page 43\n", "Waited for 45 seconds on the page 43\n", "Waited for 60 seconds on the page 43\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'carmel-road-winery-monterey-pinot-noir' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Waited for 15 seconds on the page 12\n", "Waited for 30 seconds on the page 12\n", "Waited for 45 seconds on the page 12\n", "Waited for 60 seconds on the page 12\n", "Waited for 75 seconds on the page 12\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property '1' of null\n", " at __puppeteer_evaluation_script__:23:90\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{\n", " _: [],\n", " name: 'louis-bouillot-perle-de-vigne-grande-reserve-cremant-de-bourgogne-brut-cremant-de-bourgogne'\n", "}\n", "Waited for 15 seconds on the page 1\n", "Waited for 30 seconds on the page 1\n", "Waited for 45 seconds on the page 1\n", "Waited for 60 seconds on the page 1\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Waited for 15 seconds on the page 10\n", "Waited for 30 seconds on the page 10\n", "Waited for 45 seconds on the page 10\n", "Waited for 60 seconds on the page 10\n", "Waited for 75 seconds on the page 10\n", "Waited for 90 seconds on the page 10\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'domaine-digioia-royer-bourgogne-pinot-noir' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Waited for 15 seconds on the page 14\n", "Waited for 30 seconds on the page 14\n", "Waited for 45 seconds on the page 14\n", "Waited for 60 seconds on the page 14\n", "Waited for 75 seconds on the page 14\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Waited for 15 seconds on the page 21\n", "Waited for 30 seconds on the page 21\n", "Waited for 45 seconds on the page 21\n", "Waited for 60 seconds on the page 21\n", "Waited for 75 seconds on the page 21\n", "Waited for 90 seconds on the page 21\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Waited for 15 seconds on the page 25\n", "Waited for 30 seconds on the page 25\n", "Waited for 45 seconds on the page 25\n", "Waited for 60 seconds on the page 25\n", "Waited for 75 seconds on the page 25\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Waited for 15 seconds on the page 32\n", "Waited for 30 seconds on the page 32\n", "Waited for 45 seconds on the page 32\n", "Waited for 60 seconds on the page 32\n", "Waited for 75 seconds on the page 32\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Results were collected from the page: 38\n", "Waited for 15 seconds on the page 39\n", "Waited for 30 seconds on the page 39\n", "Waited for 45 seconds on the page 39\n", "Waited for 60 seconds on the page 39\n", "Waited for 75 seconds on the page 39\n", "Waited for 90 seconds on the page 39\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Results were collected from the page: 42\n", "Results were collected from the page: 43\n", "Results were collected from the page: 44\n", "Results were collected from the page: 45\n", "Results were collected from the page: 46\n", "Waited for 15 seconds on the page 47\n", "Waited for 30 seconds on the page 47\n", "Waited for 45 seconds on the page 47\n", "Waited for 60 seconds on the page 47\n", "Waited for 75 seconds on the page 47\n", "Waited for 90 seconds on the page 47\n", "Waited for 105 seconds on the page 47\n", "Waited for 120 seconds on the page 47\n", "Results were collected from the page: 47\n", "Results were collected from the page: 48\n", "Results were collected from the page: 49\n", "Results were collected from the page: 50\n", "Finish!\n", "{ _: [], name: 'es-bodegas-palacio-glorioso-rioja-gran-reserva' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Waited for 15 seconds on the page 12\n", "Waited for 30 seconds on the page 12\n", "Waited for 45 seconds on the page 12\n", "Waited for 60 seconds on the page 12\n", "Waited for 75 seconds on the page 12\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Waited for 15 seconds on the page 19\n", "Waited for 30 seconds on the page 19\n", "Waited for 45 seconds on the page 19\n", "Waited for 60 seconds on the page 19\n", "Waited for 75 seconds on the page 19\n", "Waited for 90 seconds on the page 19\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Waited for 15 seconds on the page 22\n", "Waited for 30 seconds on the page 22\n", "Waited for 45 seconds on the page 22\n", "Waited for 60 seconds on the page 22\n", "Waited for 75 seconds on the page 22\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'errazuriz-villa-don-maximiano-aconcagua-valley' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Waited for 15 seconds on the page 8\n", "Waited for 30 seconds on the page 8\n", "Waited for 45 seconds on the page 8\n", "Waited for 60 seconds on the page 8\n", "Waited for 75 seconds on the page 8\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Waited for 15 seconds on the page 13\n", "Waited for 30 seconds on the page 13\n", "Waited for 45 seconds on the page 13\n", "Waited for 60 seconds on the page 13\n", "Waited for 75 seconds on the page 13\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Waited for 15 seconds on the page 21\n", "Waited for 30 seconds on the page 21\n", "Waited for 45 seconds on the page 21\n", "Waited for 60 seconds on the page 21\n", "Waited for 75 seconds on the page 21\n", "Waited for 90 seconds on the page 21\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Waited for 15 seconds on the page 25\n", "Waited for 30 seconds on the page 25\n", "Waited for 45 seconds on the page 25\n", "Waited for 60 seconds on the page 25\n", "Waited for 75 seconds on the page 25\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Waited for 15 seconds on the page 32\n", "Waited for 30 seconds on the page 32\n", "Waited for 45 seconds on the page 32\n", "Waited for 60 seconds on the page 32\n", "Waited for 75 seconds on the page 32\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Waited for 15 seconds on the page 38\n", "Waited for 30 seconds on the page 38\n", "Waited for 45 seconds on the page 38\n", "Waited for 60 seconds on the page 38\n", "Waited for 75 seconds on the page 38\n", "Waited for 90 seconds on the page 38\n", "Results were collected from the page: 38\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Results were collected from the page: 42\n", "Results were collected from the page: 43\n", "Waited for 15 seconds on the page 44\n", "Waited for 30 seconds on the page 44\n", "Waited for 45 seconds on the page 44\n", "Waited for 60 seconds on the page 44\n", "Results were collected from the page: 44\n", "Results were collected from the page: 45\n", "Results were collected from the page: 46\n", "Results were collected from the page: 47\n", "Results were collected from the page: 48\n", "Results were collected from the page: 49\n", "Results were collected from the page: 50\n", "Finish!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 1/247 [42:31<174:19:24, 2551.07s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{\n", " _: [],\n", " name: 'antinori-tuscany-tenuta-guado-al-tasso-il-bruciato-bolgheri'\n", "}\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Waited for 15 seconds on the page 7\n", "Waited for 30 seconds on the page 7\n", "Waited for 45 seconds on the page 7\n", "Waited for 60 seconds on the page 7\n", "Waited for 75 seconds on the page 7\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Waited for 15 seconds on the page 13\n", "Waited for 30 seconds on the page 13\n", "Waited for 45 seconds on the page 13\n", "Waited for 60 seconds on the page 13\n", "Waited for 75 seconds on the page 13\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Waited for 15 seconds on the page 19\n", "Waited for 30 seconds on the page 19\n", "Waited for 45 seconds on the page 19\n", "Waited for 60 seconds on the page 19\n", "Waited for 75 seconds on the page 19\n", "Waited for 90 seconds on the page 19\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Waited for 15 seconds on the page 25\n", "Waited for 30 seconds on the page 25\n", "Waited for 45 seconds on the page 25\n", "Waited for 60 seconds on the page 25\n", "Waited for 75 seconds on the page 25\n", "Waited for 90 seconds on the page 25\n", "Waited for 105 seconds on the page 25\n", "Waited for 120 seconds on the page 25\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Waited for 15 seconds on the page 31\n", "Waited for 30 seconds on the page 31\n", "Waited for 45 seconds on the page 31\n", "Waited for 60 seconds on the page 31\n", "Waited for 75 seconds on the page 31\n", "Results were collected from the page: 31\n", "Waited for 15 seconds on the page 32\n", "Waited for 30 seconds on the page 32\n", "Waited for 45 seconds on the page 32\n", "Waited for 60 seconds on the page 32\n", "Waited for 75 seconds on the page 32\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Waited for 15 seconds on the page 38\n", "Waited for 30 seconds on the page 38\n", "Waited for 45 seconds on the page 38\n", "Waited for 60 seconds on the page 38\n", "Waited for 75 seconds on the page 38\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'cune-cvne-reserva-rioja' }\n", "Waited for 15 seconds on the page 1\n", "Waited for 30 seconds on the page 1\n", "Waited for 45 seconds on the page 1\n", "Waited for 60 seconds on the page 1\n", "Waited for 75 seconds on the page 1\n", "Waited for 90 seconds on the page 1\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Waited for 15 seconds on the page 3\n", "Waited for 30 seconds on the page 3\n", "Waited for 45 seconds on the page 3\n", "Waited for 60 seconds on the page 3\n", "Waited for 75 seconds on the page 3\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Waited for 15 seconds on the page 11\n", "Waited for 30 seconds on the page 11\n", "Waited for 45 seconds on the page 11\n", "Waited for 60 seconds on the page 11\n", "Waited for 75 seconds on the page 11\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Waited for 15 seconds on the page 17\n", "Waited for 30 seconds on the page 17\n", "Waited for 45 seconds on the page 17\n", "Waited for 60 seconds on the page 17\n", "Waited for 75 seconds on the page 17\n", "Waited for 90 seconds on the page 17\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Waited for 15 seconds on the page 23\n", "Waited for 30 seconds on the page 23\n", "Waited for 45 seconds on the page 23\n", "Waited for 60 seconds on the page 23\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Waited for 15 seconds on the page 36\n", "Waited for 30 seconds on the page 36\n", "Waited for 45 seconds on the page 36\n", "Waited for 60 seconds on the page 36\n", "Waited for 75 seconds on the page 36\n", "Waited for 90 seconds on the page 36\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Results were collected from the page: 38\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Results were collected from the page: 42\n", "Waited for 15 seconds on the page 43\n", "Waited for 30 seconds on the page 43\n", "Waited for 45 seconds on the page 43\n", "Waited for 60 seconds on the page 43\n", "Waited for 75 seconds on the page 43\n", "Results were collected from the page: 43\n", "Waited for 15 seconds on the page 44\n", "Waited for 30 seconds on the page 44\n", "Waited for 45 seconds on the page 44\n", "Waited for 60 seconds on the page 44\n", "Waited for 75 seconds on the page 44\n", "Results were collected from the page: 44\n", "Results were collected from the page: 45\n", "Results were collected from the page: 46\n", "Results were collected from the page: 47\n", "Results were collected from the page: 48\n", "Results were collected from the page: 49\n", "Waited for 15 seconds on the page 50\n", "Waited for 30 seconds on the page 50\n", "Waited for 45 seconds on the page 50\n", "Waited for 60 seconds on the page 50\n", "Waited for 75 seconds on the page 50\n", "Results were collected from the page: 50\n", "Finish!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 4%|▎ | 9/247 [55:36<19:32:05, 295.49s/it] " ] }, { "name": "stdout", "output_type": "stream", "text": [ "{ _: [], name: 'es-imperial-rioja-reserva' }\n", "Waited for 15 seconds on the page 1\n", "Waited for 30 seconds on the page 1\n", "Waited for 45 seconds on the page 1\n", "Waited for 60 seconds on the page 1\n", "Waited for 75 seconds on the page 1\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Waited for 15 seconds on the page 4\n", "Waited for 30 seconds on the page 4\n", "Waited for 45 seconds on the page 4\n", "Waited for 60 seconds on the page 4\n", "Waited for 75 seconds on the page 4\n", "Waited for 90 seconds on the page 4\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Waited for 15 seconds on the page 11\n", "Waited for 30 seconds on the page 11\n", "Waited for 45 seconds on the page 11\n", "Waited for 60 seconds on the page 11\n", "Waited for 75 seconds on the page 11\n", "Waited for 90 seconds on the page 11\n", "Waited for 105 seconds on the page 11\n", "Waited for 120 seconds on the page 11\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Waited for 15 seconds on the page 16\n", "Waited for 30 seconds on the page 16\n", "Waited for 45 seconds on the page 16\n", "Waited for 60 seconds on the page 16\n", "Waited for 75 seconds on the page 16\n", "Results were collected from the page: 16\n", "Waited for 15 seconds on the page 17\n", "Waited for 30 seconds on the page 17\n", "Waited for 45 seconds on the page 17\n", "Waited for 60 seconds on the page 17\n", "Waited for 75 seconds on the page 17\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Waited for 15 seconds on the page 24\n", "Waited for 30 seconds on the page 24\n", "Waited for 45 seconds on the page 24\n", "Waited for 60 seconds on the page 24\n", "Waited for 75 seconds on the page 24\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Waited for 15 seconds on the page 31\n", "Waited for 30 seconds on the page 31\n", "Waited for 45 seconds on the page 31\n", "Waited for 60 seconds on the page 31\n", "Waited for 75 seconds on the page 31\n", "Results were collected from the page: 31\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 5%|▌ | 13/247 [1:01:30<13:52:40, 213.50s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{ _: [], name: 'palmer-vintage-champagne' }\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Waited for 15 seconds on the page 3\n", "Waited for 30 seconds on the page 3\n", "Waited for 45 seconds on the page 3\n", "Waited for 60 seconds on the page 3\n", "Waited for 75 seconds on the page 3\n", "Waited for 90 seconds on the page 3\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Waited for 15 seconds on the page 10\n", "Waited for 30 seconds on the page 10\n", "Waited for 45 seconds on the page 10\n", "Waited for 60 seconds on the page 10\n", "Waited for 75 seconds on the page 10\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Waited for 15 seconds on the page 12\n", "Waited for 30 seconds on the page 12\n", "Waited for 45 seconds on the page 12\n", "Waited for 60 seconds on the page 12\n", "Waited for 75 seconds on the page 12\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Waited for 15 seconds on the page 18\n", "Waited for 30 seconds on the page 18\n", "Waited for 45 seconds on the page 18\n", "Waited for 60 seconds on the page 18\n", "Waited for 75 seconds on the page 18\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Waited for 15 seconds on the page 25\n", "Waited for 30 seconds on the page 25\n", "Waited for 45 seconds on the page 25\n", "Waited for 60 seconds on the page 25\n", "Waited for 75 seconds on the page 25\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property 'textContent' of null\n", " at __puppeteer_evaluation_script__:25:53\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "{ _: [], name: 'paul-dangin-fils-fortyseven-brut-champagne' }\n", "Waited for 15 seconds on the page 1\n", "Waited for 30 seconds on the page 1\n", "Waited for 45 seconds on the page 1\n", "Waited for 60 seconds on the page 1\n", "Results were collected from the page: 1\n", "Results were collected from the page: 2\n", "Results were collected from the page: 3\n", "Results were collected from the page: 4\n", "Results were collected from the page: 5\n", "Results were collected from the page: 6\n", "Results were collected from the page: 7\n", "Results were collected from the page: 8\n", "Results were collected from the page: 9\n", "Results were collected from the page: 10\n", "Results were collected from the page: 11\n", "Results were collected from the page: 12\n", "Results were collected from the page: 13\n", "Results were collected from the page: 14\n", "Results were collected from the page: 15\n", "Results were collected from the page: 16\n", "Results were collected from the page: 17\n", "Results were collected from the page: 18\n", "Results were collected from the page: 19\n", "Results were collected from the page: 20\n", "Results were collected from the page: 21\n", "Waited for 15 seconds on the page 22\n", "Waited for 30 seconds on the page 22\n", "Waited for 45 seconds on the page 22\n", "Waited for 60 seconds on the page 22\n", "Waited for 75 seconds on the page 22\n", "Results were collected from the page: 22\n", "Results were collected from the page: 23\n", "Results were collected from the page: 24\n", "Results were collected from the page: 25\n", "Results were collected from the page: 26\n", "Results were collected from the page: 27\n", "Waited for 15 seconds on the page 28\n", "Waited for 30 seconds on the page 28\n", "Waited for 45 seconds on the page 28\n", "Waited for 60 seconds on the page 28\n", "Waited for 75 seconds on the page 28\n", "Results were collected from the page: 28\n", "Results were collected from the page: 29\n", "Results were collected from the page: 30\n", "Results were collected from the page: 31\n", "Results were collected from the page: 32\n", "Results were collected from the page: 33\n", "Results were collected from the page: 34\n", "Results were collected from the page: 35\n", "Waited for 15 seconds on the page 36\n", "Waited for 30 seconds on the page 36\n", "Waited for 45 seconds on the page 36\n", "Waited for 60 seconds on the page 36\n", "Waited for 75 seconds on the page 36\n", "Results were collected from the page: 36\n", "Results were collected from the page: 37\n", "Results were collected from the page: 38\n", "Results were collected from the page: 39\n", "Results were collected from the page: 40\n", "Results were collected from the page: 41\n", "Waited for 15 seconds on the page 42\n", "Waited for 30 seconds on the page 42\n", "Waited for 45 seconds on the page 42\n", "Waited for 60 seconds on the page 42\n", "Waited for 75 seconds on the page 42\n", "Exception: Error: Evaluation failed: TypeError: Cannot read property '1' of null\n", " at __puppeteer_evaluation_script__:23:90\n", " at Array.map ()\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n" ] } ], "source": [ "import concurrent.futures\n", "from tqdm import tqdm\n", "\n", "def fetch_wine_data(wine_data):\n", " wine_name = wine_data['vivino_link'].split('/')[-3]\n", " !node vivino.js \"--name={wine_name}\"\n", "\n", "# Use a ThreadPoolExecutor to run fetch_wine_data in parallel\n", "with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n", " list(tqdm(executor.map(fetch_wine_data, data_list), total=len(data_list)))" ] }, { "cell_type": "code", "execution_count": 4, "id": "ac66b46d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/960 [00:00)\n", " at __puppeteer_evaluation_script__:19:63\n", " at ExecutionContext._evaluateInternal (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:217:19)\n", " at runMicrotasks ()\n", " at processTicksAndRejections (node:internal/process/task_queues:96:5)\n", " at async ExecutionContext.evaluate (c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\\node_modules\\puppeteer\\lib\\cjs\\puppeteer\\common\\ExecutionContext.js:106:16)\n", " at async run (file:///c:/Users/chois/Desktop/Audrey/data/vivino-api/vivino.js:201:23)\n", "Finish!\n", "^C\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 2/960 [01:30<11:14:10, 42.22s/it]" ] } ], "source": [ "for wine_data in tqdm(data):\n", " wine_name = wine_data['vivino_link'].split('/')[-3]\n", " !node vivino.js \"--name={wine_name}\"" ] }, { "cell_type": "code", "execution_count": 12, "id": "710c6e28", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c:\\Users\\chois\\Desktop\\Audrey\\data\\vivino-api\n" ] } ], "source": [ "%cd vivino-api" ] }, { "cell_type": "code", "execution_count": 26, "id": "506e231b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'schlumberger-spring-edition-rose-brut-klassik'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_name" ] }, { "cell_type": "code", "execution_count": 28, "id": "99e67287", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{ _: [], name: 'schlumberger-spring-edition-rose-brut-klassik' }\n", "Results were collected from the page: 1\n", "Finish!\n" ] } ], "source": [ "# npm install puppeteer\n" ] }, { "cell_type": "code", "execution_count": null, "id": "921aa4fc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "id": "21dbc27f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "1bc2bd04", "metadata": {}, "outputs": [], "source": [ "# Use pandas to read CSV file\n", "df = pd.read_csv('./XWines_Full_100K_wines.csv')\n", "\n", "# print the first 5 rows of the DataFrame\n", "print(df.head())" ] }, { "cell_type": "code", "execution_count": null, "id": "fc42a893", "metadata": {}, "outputs": [], "source": [ "df" ] } ], "metadata": { "kernelspec": { "display_name": "nemo", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }