Spaces:

Abu1998
/

YTCommentsScaraper

Sleeping

App Files Files Community

YTCommentsScaraper / app.py

Abu1998

Update app.py

c9bbe34 verified 3 months ago

raw

history blame contribute delete

4.72 kB

	import streamlit as st
	import pandas as pd
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError
	import time

	# Streamlit UI
	st.title("YouTube Video Comments Extractor")

	# User input for API key and video ID
	api_key = st.text_input("Enter your YouTube API Key", type="password")
	video_id = st.text_input("Enter the Video ID")

	# Function to extract comments from a video
	def get_video_comments(api_key, video_id):
	youtube = build('youtube', 'v3', developerKey=api_key)
	comments = []
	next_page_token = None
	progress_bar = st.progress(0) # Initialize progress bar

	while True:
	try:
	response = youtube.commentThreads().list(
	part='snippet,replies',
	videoId=video_id,
	maxResults=100,
	pageToken=next_page_token
	).execute()

	for item in response['items']:
	comment = item['snippet']['topLevelComment']['snippet']
	comments.append({
	'VideoID': video_id,
	'Channel': comment.get('authorChannelUrl', ''),
	'CommentedDateTime': comment['publishedAt'],
	'NumOfCommentlikes': comment['likeCount'],
	'NumOfCommentDislikes': 0,
	'NumOfReplies': item['snippet']['totalReplyCount'],
	'Comment': comment['textDisplay'],
	'CommentedUserID': comment['authorChannelId']['value'],
	'RepliedUserID': '',
	'Reply': '',
	'RepliesLikes': 0,
	'RepliesDislike': 0,
	'ToWhomTheyReplied': '',
	'Comment\|repliedTime': comment['updatedAt'],
	'VideoUploadedtimeanddate': ''
	})

	# Handle replies (if any)
	if 'replies' in item:
	for reply in item['replies']['comments']:
	reply_snippet = reply['snippet']
	comments.append({
	'VideoID': video_id,
	'Channel': reply_snippet.get('authorChannelUrl', ''),
	'CommentedDateTime': reply_snippet['publishedAt'],
	'NumOfCommentlikes': reply_snippet['likeCount'],
	'NumOfCommentDislikes': 0,
	'NumOfReplies': 0,
	'Comment': '',
	'CommentedUserID': '',
	'RepliedUserID': reply_snippet['authorChannelId']['value'],
	'Reply': reply_snippet['textDisplay'],
	'RepliesLikes': reply_snippet['likeCount'],
	'RepliesDislike': 0,
	'ToWhomTheyReplied': reply_snippet['parentId'],
	'Comment\|repliedTime': reply_snippet['updatedAt'],
	'VideoUploadedtimeanddate': ''
	})

	next_page_token = response.get('nextPageToken')
	if not next_page_token:
	break

	# Update the progress bar (you may want to adjust the value)
	progress_bar.progress(min(100, len(comments) // 10))

	except HttpError as e:
	st.error(f"An error occurred: {e}")
	break

	return comments

	# Action button
	if st.button("Extract Comments"):
	if api_key and video_id:
	st.text("Extracting comments... This might take a while.")
	comments_data = get_video_comments(api_key, video_id)
	progress_bar = st.progress(100)
	time.sleep(1) # Just to simulate completion delay
	progress_bar.empty()

	if comments_data:
	df_comments = pd.DataFrame(comments_data)
	csv_file = f"{video_id}.csv"
	df_comments.to_csv(csv_file, index=False)
	st.success(f"Comments extracted and saved to {csv_file}")

	# Show dataframe details
	st.write("First 5 rows of the extracted comments:")
	st.dataframe(df_comments.head())

	st.write("DataFrame Info:")
	buffer = st.text("")
	df_comments.info(buf=buffer)
	st.text(buffer)

	st.write("DataFrame Description:")
	st.dataframe(df_comments.describe())

	st.download_button(label="Download CSV", data=df_comments.to_csv(index=False), file_name=csv_file, mime='text/csv')
	else:
	st.warning("No comments found or an error occurred.")
	else:
	st.warning("Please enter both API key and Video ID.")