{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "730ba509", "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": null, "id": "d9acd4b6", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import sys\n", "proj_dir = Path.cwd().parent\n", "\n", "sys.path.append(str(proj_dir))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "62452860", "metadata": {}, "outputs": [], "source": [ "from utilities.pushshift_data import scrape_submissions_by_day, submissions_to_dataframe, get_post_count_for_day" ] }, { "cell_type": "code", "execution_count": 4, "id": "a956a623", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "17df3f2812084d3591e914ffcfd948b0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "2023-04-12 16:23:59,392 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-03-01 20:00:00\n", "2023-04-12 16:24:03,524 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-03-01 14:37:16\n", "2023-04-12 16:24:08,443 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-03-01 05:02:52\n", "2023-04-12 16:24:13,409 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-03-01 00:43:35\n", "2023-04-12 16:24:17,548 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-02-28 20:28:35\n", "2023-04-12 16:24:21,490 - INFO - Fetching data between timestamps 2013-02-28 20:00:00 and 2013-02-28 20:00:48\n", "2023-04-12 16:24:23,658 - INFO - Finished scraping 4106 submissions in 28.86 seconds\n" ] } ], "source": [ "subreddit_to_scrape = \"askreddit\"\n", "day_to_scrape = \"2013-03-01\"\n", "submissions = scrape_submissions_by_day(subreddit_to_scrape, day_to_scrape)" ] }, { "cell_type": "code", "execution_count": 5, "id": "b1cc845b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | permalink | \n", "selftext | \n", "url | \n", "created_utc | \n", "author | \n", "num_comments | \n", "score | \n", "title | \n", "id | \n", "downs | \n", "ups | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "/r/AskReddit/comments/19hbm0/in_the_way_that_p... | \n", "Basically, do other parts of the world have th... | \n", "http://www.reddit.com/r/AskReddit/comments/19h... | \n", "2013-03-01 19:58:55 | \n", "sjr63 | \n", "1 | \n", "1 | \n", "In the way that popular English and American m... | \n", "19hbm0 | \n", "0 | \n", "1 | \n", "
1 | \n", "/r/AskReddit/comments/19hblp/could_i_buy_an_an... | \n", "\n", " | http://www.reddit.com/r/AskReddit/comments/19h... | \n", "2013-03-01 19:58:50 | \n", "WeirdPlane | \n", "13 | \n", "1 | \n", "Could I buy an Android phone without a plan an... | \n", "19hblp | \n", "0 | \n", "1 | \n", "
2 | \n", "/r/AskReddit/comments/19hblj/how_do_i_reddit/ | \n", "Yeah.\n", "\n", "How do I reddit? I don't use or read re... | \n", "http://www.reddit.com/r/AskReddit/comments/19h... | \n", "2013-03-01 19:58:47 | \n", "xxnovaroxgg | \n", "14 | \n", "0 | \n", "How do I reddit | \n", "19hblj | \n", "0 | \n", "0 | \n", "
3 | \n", "/r/AskReddit/comments/19hbjx/xpost_rsurvival_h... | \n", "My brothers, dad and I have always been huge L... | \n", "http://www.reddit.com/r/AskReddit/comments/19h... | \n", "2013-03-01 19:58:07 | \n", "tuffstough | \n", "0 | \n", "1 | \n", "(x-post r/survival) Have any redditors seen Le... | \n", "19hbjx | \n", "0 | \n", "1 | \n", "
4 | \n", "/r/AskReddit/comments/19hbjk/female_redditors_... | \n", "I'm curious, guys tend to get asked the usual ... | \n", "http://www.reddit.com/r/AskReddit/comments/19h... | \n", "2013-03-01 19:57:58 | \n", "redditredditx3 | \n", "13 | \n", "2 | \n", "Female Redditors, which part of the male physi... | \n", "19hbjk | \n", "0 | \n", "2 | \n", "