wira.indra commited on
Commit
924ded6
1 Parent(s): 8cad016

add twitter feature

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -2
  2. twitter_scraper.py +5 -19
requirements.txt CHANGED
@@ -2,5 +2,4 @@ torch
2
  transformers
3
  snscrape
4
  pandas
5
- matplotlib
6
- numpy
 
2
  transformers
3
  snscrape
4
  pandas
5
+ matplotlib
 
twitter_scraper.py CHANGED
@@ -1,28 +1,14 @@
1
  import snscrape.modules.twitter as sntwitter
2
  import pandas as pd
3
  import re
4
- import tqdm
5
- import sys
6
 
7
 
8
- def scrape_tweets(query, max_tweets=10, output_path="./scraper/output/" ):
9
  tweets_list = []
10
-
11
- tweets_list = []
12
- if sys.version_info.minor>=8:
13
- for i,tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())):
14
- if max_tweets != -1 and i >= int(max_tweets):
15
- break
16
- tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.quoteCount, tweet.url, tweet.lang])
17
-
18
- df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Likes', 'Retweets', 'Replies', 'Quotes', 'URL', 'Language'])
19
- df = df[df["Language"] == "in"]
20
- else:
21
- for i,tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())):
22
- if max_tweets != -1 and i >= int(max_tweets):
23
- break
24
- tweets_list.append([tweet.date, tweet.id, tweet.content])
25
- df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text'])
26
 
27
  df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Likes', 'Retweets', 'Replies', 'Quotes', 'URL', 'Language'])
28
  df = df[df["Language"] == "in"]
 
1
  import snscrape.modules.twitter as sntwitter
2
  import pandas as pd
3
  import re
 
 
4
 
5
 
6
+ def scrape_tweets(query, max_tweets=10):
7
  tweets_list = []
8
+ for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
9
+ if max_tweets != -1 and i >= int(2):
10
+ break
11
+ tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.quoteCount, tweet.url, tweet.lang])
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Likes', 'Retweets', 'Replies', 'Quotes', 'URL', 'Language'])
14
  df = df[df["Language"] == "in"]