Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
import time | |
# Streamlit UI | |
st.title("YouTube Video Comments Extractor") | |
# User input for API key and video ID | |
api_key = st.text_input("Enter your YouTube API Key", type="password") | |
video_id = st.text_input("Enter the Video ID") | |
# Function to extract comments from a video | |
def get_video_comments(api_key, video_id): | |
youtube = build('youtube', 'v3', developerKey=api_key) | |
comments = [] | |
next_page_token = None | |
progress_bar = st.progress(0) # Initialize progress bar | |
while True: | |
try: | |
response = youtube.commentThreads().list( | |
part='snippet,replies', | |
videoId=video_id, | |
maxResults=100, | |
pageToken=next_page_token | |
).execute() | |
for item in response['items']: | |
comment = item['snippet']['topLevelComment']['snippet'] | |
comments.append({ | |
'VideoID': video_id, | |
'Channel': comment.get('authorChannelUrl', ''), | |
'CommentedDateTime': comment['publishedAt'], | |
'NumOfCommentlikes': comment['likeCount'], | |
'NumOfCommentDislikes': 0, | |
'NumOfReplies': item['snippet']['totalReplyCount'], | |
'Comment': comment['textDisplay'], | |
'CommentedUserID': comment['authorChannelId']['value'], | |
'RepliedUserID': '', | |
'Reply': '', | |
'RepliesLikes': 0, | |
'RepliesDislike': 0, | |
'ToWhomTheyReplied': '', | |
'Comment|repliedTime': comment['updatedAt'], | |
'VideoUploadedtimeanddate': '' | |
}) | |
# Handle replies (if any) | |
if 'replies' in item: | |
for reply in item['replies']['comments']: | |
reply_snippet = reply['snippet'] | |
comments.append({ | |
'VideoID': video_id, | |
'Channel': reply_snippet.get('authorChannelUrl', ''), | |
'CommentedDateTime': reply_snippet['publishedAt'], | |
'NumOfCommentlikes': reply_snippet['likeCount'], | |
'NumOfCommentDislikes': 0, | |
'NumOfReplies': 0, | |
'Comment': '', | |
'CommentedUserID': '', | |
'RepliedUserID': reply_snippet['authorChannelId']['value'], | |
'Reply': reply_snippet['textDisplay'], | |
'RepliesLikes': reply_snippet['likeCount'], | |
'RepliesDislike': 0, | |
'ToWhomTheyReplied': reply_snippet['parentId'], | |
'Comment|repliedTime': reply_snippet['updatedAt'], | |
'VideoUploadedtimeanddate': '' | |
}) | |
next_page_token = response.get('nextPageToken') | |
if not next_page_token: | |
break | |
# Update the progress bar (you may want to adjust the value) | |
progress_bar.progress(min(100, len(comments) // 10)) | |
except HttpError as e: | |
st.error(f"An error occurred: {e}") | |
break | |
return comments | |
# Action button | |
if st.button("Extract Comments"): | |
if api_key and video_id: | |
st.text("Extracting comments... This might take a while.") | |
comments_data = get_video_comments(api_key, video_id) | |
progress_bar = st.progress(100) | |
time.sleep(1) # Just to simulate completion delay | |
progress_bar.empty() | |
if comments_data: | |
df_comments = pd.DataFrame(comments_data) | |
csv_file = f"{video_id}.csv" | |
df_comments.to_csv(csv_file, index=False) | |
st.success(f"Comments extracted and saved to {csv_file}") | |
# Show dataframe details | |
st.write("First 5 rows of the extracted comments:") | |
st.dataframe(df_comments.head()) | |
st.write("DataFrame Info:") | |
buffer = st.text("") | |
df_comments.info(buf=buffer) | |
st.text(buffer) | |
st.write("DataFrame Description:") | |
st.dataframe(df_comments.describe()) | |
st.download_button(label="Download CSV", data=df_comments.to_csv(index=False), file_name=csv_file, mime='text/csv') | |
else: | |
st.warning("No comments found or an error occurred.") | |
else: | |
st.warning("Please enter both API key and Video ID.") | |