youtube-to-Blog / app.py
GundeRichardson's picture
Create app.py
999b5dc verified
raw
history blame
11.4 kB
import streamlit as st
from dotenv import load_dotenv
import google.generativeai as genai
import os
from youtube_transcript_api import YouTubeTranscriptApi
import time
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from google.api_core.exceptions import ResourceExhausted
# Load environment variables from a .env file
load_dotenv()
# Configure the Google Generative AI client with the API key from environment variables
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type(ResourceExhausted),
reraise=True
)
def generate_content_with_retry(model, prompt):
time.sleep(2) # Increased delay between API calls
try:
return model.generate_content(prompt)
except ResourceExhausted as e:
st.warning(f"API quota exceeded. Retrying in a moment... ({e})")
raise e
# Define the base prompt template
base_prompt_template = '''
You are an AI assistant specializing in transforming long-form content, such as YouTube video transcripts or user-provided text, into a single, cohesive, and engaging blog post. Your task is to create a comprehensive blog post that captures the essence of the entire input while enriching it with additional information, insights, and a conversational touch.
Guidelines for the Blog Post:
1. Structure:
- Title: Create an engaging title for the blog post.
- Meta Description: Write a compelling 150-160 character meta description for SEO.
- Introduction: Briefly introduce the topic and hook the reader.
- Main Body: Divide into relevant sections with subheadings. Ensure smooth transitions between sections.
- Conclusion: Summarize key points and provide a call-to-action.
2. Content Enhancement:
- Synthesize information from all parts of the input to create a coherent narrative.
- Provide additional explanations, examples, or related information to enrich the content.
- Include interesting anecdotes or expert opinions to add depth and credibility.
3. Engagement:
- Use a {tone} tone consistently throughout the post.
- Include relevant descriptions of potential visuals or infographics.
- Structure the post for easy readability using subheadings, bullet points, and short paragraphs.
4. SEO Optimization:
- Naturally incorporate these keywords: {keywords}
- Use variations and related terms to avoid keyword stuffing.
- Implement proper heading structure (H1 for title, H2 for main sections, H3 for subsections).
5. Length and Style:
- Aim for a total of approximately {word_count} words for the entire blog post.
- Use varied sentence structures and paragraph lengths for better flow.
- Incorporate rhetorical devices like analogies, metaphors, or storytelling elements where appropriate.
6. Cohesion:
- Ensure that all parts of the blog post connect logically and flow smoothly.
- Use transitional phrases to link different sections and ideas.
- Maintain consistent themes and arguments throughout the post.
7. Formatting:
- Use appropriate HTML tags for headings (h1, h2, h3), lists (ul, ol), and emphasis (strong, em).
- Suggest places to break up text with [IMAGE PLACEHOLDER] or [VIDEO EMBED PLACEHOLDER] tags.
- Include a table of contents for longer articles.
8. Additional Elements:
- Create a "Key Takeaways" or "TL;DR" section for quick reference.
- Suggest pull quotes or highlight boxes for important information.
- If applicable, include a section addressing common questions or misconceptions about the topic.
Important: Create only ONE cohesive blog post that covers all the main points from the entire input. Ensure that the final output is a single, well-structured article, not multiple separate posts.
Please create a single, detailed, and engaging blog post based on the following input:
{input_text}
Remember to maintain a {tone} tone throughout the post and aim for a total of {word_count} words for the entire article.
'''
# Expanded tone options
TONE_OPTIONS = [
"Professional", "Casual", "Humorous", "Inspirational", "Educational",
"Conversational", "Formal", "Enthusiastic", "Empathetic", "Authoritative"
]
# Article length options
LENGTH_OPTIONS = {
"Medium (1000-1500 words)": 1250,
"Long (1500-2500 words)": 2000,
"Extra Long (2500-3500 words)": 3000,
"Comprehensive (3500-5000 words)": 4250
}
# Function to extract video ID from various YouTube URL formats
def extract_video_id(url):
patterns = [
r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(?:embed\/)?(?:v\/)?(?:shorts\/)?(?:live\/)?(?:feature=player_embedded&v=)?([^?&"\'>]+)',
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
import threading
# Cache for storing processed data
cache = {}
cache_lock = threading.Lock()
# Optimized transcript fetching with caching
@st.cache_data
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def get_transcript(youtube_video_url, max_retries=3, delay=2):
video_id = extract_video_id(youtube_video_url)
if not video_id:
raise ValueError("Invalid YouTube URL")
# Check cache first
if video_id in cache:
return cache[video_id]
for attempt in range(max_retries):
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = " ".join([entry["text"] for entry in transcript])
# Cache the result
with cache_lock:
cache[video_id] = text
return text
except Exception as e:
if attempt == max_retries - 1:
raise e
time.sleep(delay * (attempt + 1)) # Exponential backoff
raise Exception("Failed to retrieve transcript after multiple attempts")
# Function to chunk long text
def chunk_text(text, chunk_size=4000, overlap=500):
# Only chunk if text is longer than chunk_size
if len(text) <= chunk_size:
return [text]
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
# Find the nearest sentence end
if end < len(text):
end = text.rfind('.', start, end) + 1
if end <= start:
end = start + chunk_size
chunk = text[start:end].strip()
if chunk:
chunks.append(chunk)
start = end - overlap
return chunks
# Function to generate blog post using Gemini AI model with retries
def generate_blog_post(input_text, tone, keywords, length):
word_count = LENGTH_OPTIONS[length]
chunks = chunk_text(input_text)
model = genai.GenerativeModel("gemini-1.5-flash")
all_content = []
for i, chunk in enumerate(chunks):
chunk_prompt = f"""
Analyze the following part of content and extract key points, main ideas, and important details:
{chunk}
Provide a concise summary of this part, highlighting the most important information.
"""
try:
response = generate_content_with_retry(model, chunk_prompt)
all_content.append(response.text)
except Exception as e:
st.error(f"Error processing chunk {i+1}: {str(e)}")
return None
final_prompt = base_prompt_template.format(
tone=tone,
keywords=', '.join(keywords),
word_count=word_count,
input_text='\n'.join(all_content)
)
try:
final_response = generate_content_with_retry(model, final_prompt)
return final_response.text
except Exception as e:
st.error(f"Error generating final blog post: {str(e)}")
return None
# Streamlit UI with progress tracking
def main():
st.set_page_config(page_title="BlogBrain Genius AI", layout="wide")
# Initialize session state
if 'blog_post' not in st.session_state:
st.session_state.blog_post = None
if 'processing' not in st.session_state:
st.session_state.processing = False
st.title("✍️ BlogBrain Genius AI: Video to Blog Alchemist")
# Input method selection with proper state management
input_method = st.radio("Choose input method:", ("YouTube Video", "Custom Text"))
input_text = ""
if input_method == "YouTube Video":
youtube_url = st.text_input("Enter YouTube URL:")
if youtube_url and not st.session_state.processing:
try:
with st.spinner("Fetching transcript..."):
input_text = get_transcript(youtube_url)
except Exception as e:
st.error(f"Error: {str(e)}")
else:
input_text = st.text_area("Enter your content:", height=200)
# Sidebar options
with st.sidebar:
st.markdown("<h1 style='text-align: center; color: #4A90E2;'>🧠 BlogBrain Genius AI</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center;'>Transform Content into Engaging Blog Posts</p>", unsafe_allow_html=True)
st.markdown("---")
tone = st.selectbox("Select tone:", TONE_OPTIONS)
keywords = st.text_input("Enter keywords (comma-separated):")
length = st.selectbox("Select length:", list(LENGTH_OPTIONS.keys()))
if st.button("Generate Blog Post") and input_text:
st.session_state.processing = True
try:
with st.spinner("Generating a single, comprehensive blog post..."):
blog_post = generate_blog_post(
input_text,
tone,
keywords.split(",") if keywords else [],
length
)
if blog_post:
st.session_state.blog_post = blog_post
st.success("Blog post generated successfully!")
else:
st.error("Failed to generate the blog post. Please try again later.")
except Exception as e:
st.error(f"An unexpected error occurred: {str(e)}")
finally:
st.session_state.processing = False
# Display results
if st.session_state.blog_post:
st.markdown(st.session_state.blog_post)
st.download_button(
"Download Blog Post",
st.session_state.blog_post,
"blog_post.md",
"text/markdown"
)
if __name__ == "__main__":
main()
# Sidebar with creator information
st.sidebar.markdown("---")
st.sidebar.title("About the Creator")
st.sidebar.info("""
Designed by Richardson Gunde 🎨
This advanced application uses AI to generate a single, comprehensive blog post based on long-form content from YouTube videos or user input.
πŸ”— [LinkedIn](https://www.linkedin.com/in/richardson-gunde)
πŸ“§ [Email](mailto:[email protected])
""")
st.markdown("""
---
:green[This advanced app leverages the power of Google's Gemini AI to generate a single, detailed, SEO-optimized long-form blog post from YouTube videos or custom text.
It handles extensive content while ensuring a cohesive output.]
""")