Spaces:

GundeRichardson
/

youtube-to-Blog

Sleeping

App Files Files Community

youtube-to-Blog / app.py

GundeRichardson

Create app.py

999b5dc verified 7 days ago

raw

history blame

11.4 kB

	import streamlit as st
	from dotenv import load_dotenv
	import google.generativeai as genai
	import os
	from youtube_transcript_api import YouTubeTranscriptApi
	import time
	import re
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
	from google.api_core.exceptions import ResourceExhausted
	# Load environment variables from a .env file
	load_dotenv()

	# Configure the Google Generative AI client with the API key from environment variables
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


	@retry(
	stop=stop_after_attempt(5),
	wait=wait_exponential(multiplier=1, min=4, max=60),
	retry=retry_if_exception_type(ResourceExhausted),
	reraise=True
	)
	def generate_content_with_retry(model, prompt):
	time.sleep(2) # Increased delay between API calls
	try:
	return model.generate_content(prompt)
	except ResourceExhausted as e:
	st.warning(f"API quota exceeded. Retrying in a moment... ({e})")
	raise e

	# Define the base prompt template
	base_prompt_template = '''
	You are an AI assistant specializing in transforming long-form content, such as YouTube video transcripts or user-provided text, into a single, cohesive, and engaging blog post. Your task is to create a comprehensive blog post that captures the essence of the entire input while enriching it with additional information, insights, and a conversational touch.

	Guidelines for the Blog Post:

	1. Structure:
	- Title: Create an engaging title for the blog post.
	- Meta Description: Write a compelling 150-160 character meta description for SEO.
	- Introduction: Briefly introduce the topic and hook the reader.
	- Main Body: Divide into relevant sections with subheadings. Ensure smooth transitions between sections.
	- Conclusion: Summarize key points and provide a call-to-action.

	2. Content Enhancement:
	- Synthesize information from all parts of the input to create a coherent narrative.
	- Provide additional explanations, examples, or related information to enrich the content.
	- Include interesting anecdotes or expert opinions to add depth and credibility.

	3. Engagement:
	- Use a {tone} tone consistently throughout the post.
	- Include relevant descriptions of potential visuals or infographics.
	- Structure the post for easy readability using subheadings, bullet points, and short paragraphs.

	4. SEO Optimization:
	- Naturally incorporate these keywords: {keywords}
	- Use variations and related terms to avoid keyword stuffing.
	- Implement proper heading structure (H1 for title, H2 for main sections, H3 for subsections).

	5. Length and Style:
	- Aim for a total of approximately {word_count} words for the entire blog post.
	- Use varied sentence structures and paragraph lengths for better flow.
	- Incorporate rhetorical devices like analogies, metaphors, or storytelling elements where appropriate.

	6. Cohesion:
	- Ensure that all parts of the blog post connect logically and flow smoothly.
	- Use transitional phrases to link different sections and ideas.
	- Maintain consistent themes and arguments throughout the post.

	7. Formatting:
	- Use appropriate HTML tags for headings (h1, h2, h3), lists (ul, ol), and emphasis (strong, em).
	- Suggest places to break up text with [IMAGE PLACEHOLDER] or [VIDEO EMBED PLACEHOLDER] tags.
	- Include a table of contents for longer articles.

	8. Additional Elements:
	- Create a "Key Takeaways" or "TL;DR" section for quick reference.
	- Suggest pull quotes or highlight boxes for important information.
	- If applicable, include a section addressing common questions or misconceptions about the topic.

	Important: Create only ONE cohesive blog post that covers all the main points from the entire input. Ensure that the final output is a single, well-structured article, not multiple separate posts.

	Please create a single, detailed, and engaging blog post based on the following input:

	{input_text}

	Remember to maintain a {tone} tone throughout the post and aim for a total of {word_count} words for the entire article.
	'''

	# Expanded tone options
	TONE_OPTIONS = [
	"Professional", "Casual", "Humorous", "Inspirational", "Educational",
	"Conversational", "Formal", "Enthusiastic", "Empathetic", "Authoritative"
	]

	# Article length options
	LENGTH_OPTIONS = {
	"Medium (1000-1500 words)": 1250,
	"Long (1500-2500 words)": 2000,
	"Extra Long (2500-3500 words)": 3000,
	"Comprehensive (3500-5000 words)": 4250
	}

	# Function to extract video ID from various YouTube URL formats
	def extract_video_id(url):
	patterns = [
	r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\|youtu\.be)\/(?:watch\?v=)?(?:embed\/)?(?:v\/)?(?:shorts\/)?(?:live\/)?(?:feature=player_embedded&v=)?([^?&"\'>]+)',
	]
	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	return match.group(1)
	return None

	import threading
	# Cache for storing processed data
	cache = {}
	cache_lock = threading.Lock()

	# Optimized transcript fetching with caching
	@st.cache_data
	@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
	def get_transcript(youtube_video_url, max_retries=3, delay=2):
	video_id = extract_video_id(youtube_video_url)
	if not video_id:
	raise ValueError("Invalid YouTube URL")

	# Check cache first
	if video_id in cache:
	return cache[video_id]

	for attempt in range(max_retries):
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	text = " ".join([entry["text"] for entry in transcript])

	# Cache the result
	with cache_lock:
	cache[video_id] = text

	return text
	except Exception as e:
	if attempt == max_retries - 1:
	raise e
	time.sleep(delay * (attempt + 1)) # Exponential backoff

	raise Exception("Failed to retrieve transcript after multiple attempts")

	# Function to chunk long text
	def chunk_text(text, chunk_size=4000, overlap=500):
	# Only chunk if text is longer than chunk_size
	if len(text) <= chunk_size:
	return [text]

	chunks = []
	start = 0
	while start < len(text):
	end = start + chunk_size

	# Find the nearest sentence end
	if end < len(text):
	end = text.rfind('.', start, end) + 1
	if end <= start:
	end = start + chunk_size

	chunk = text[start:end].strip()
	if chunk:
	chunks.append(chunk)
	start = end - overlap

	return chunks

	# Function to generate blog post using Gemini AI model with retries
	def generate_blog_post(input_text, tone, keywords, length):
	word_count = LENGTH_OPTIONS[length]
	chunks = chunk_text(input_text)
	model = genai.GenerativeModel("gemini-1.5-flash")

	all_content = []
	for i, chunk in enumerate(chunks):
	chunk_prompt = f"""
	Analyze the following part of content and extract key points, main ideas, and important details:

	{chunk}

	Provide a concise summary of this part, highlighting the most important information.
	"""
	try:
	response = generate_content_with_retry(model, chunk_prompt)
	all_content.append(response.text)
	except Exception as e:
	st.error(f"Error processing chunk {i+1}: {str(e)}")
	return None

	final_prompt = base_prompt_template.format(
	tone=tone,
	keywords=', '.join(keywords),
	word_count=word_count,
	input_text='\n'.join(all_content)
	)

	try:
	final_response = generate_content_with_retry(model, final_prompt)
	return final_response.text
	except Exception as e:
	st.error(f"Error generating final blog post: {str(e)}")
	return None


	# Streamlit UI with progress tracking
	def main():
	st.set_page_config(page_title="BlogBrain Genius AI", layout="wide")

	# Initialize session state
	if 'blog_post' not in st.session_state:
	st.session_state.blog_post = None
	if 'processing' not in st.session_state:
	st.session_state.processing = False

	st.title("✍️ BlogBrain Genius AI: Video to Blog Alchemist")

	# Input method selection with proper state management
	input_method = st.radio("Choose input method:", ("YouTube Video", "Custom Text"))

	input_text = ""
	if input_method == "YouTube Video":
	youtube_url = st.text_input("Enter YouTube URL:")
	if youtube_url and not st.session_state.processing:
	try:
	with st.spinner("Fetching transcript..."):
	input_text = get_transcript(youtube_url)
	except Exception as e:
	st.error(f"Error: {str(e)}")
	else:
	input_text = st.text_area("Enter your content:", height=200)

	# Sidebar options
	with st.sidebar:
	st.markdown("<h1 style='text-align: center; color: #4A90E2;'>🧠 BlogBrain Genius AI</h1>", unsafe_allow_html=True)
	st.markdown("<p style='text-align: center;'>Transform Content into Engaging Blog Posts</p>", unsafe_allow_html=True)
	st.markdown("---")
	tone = st.selectbox("Select tone:", TONE_OPTIONS)
	keywords = st.text_input("Enter keywords (comma-separated):")
	length = st.selectbox("Select length:", list(LENGTH_OPTIONS.keys()))

	if st.button("Generate Blog Post") and input_text:
	st.session_state.processing = True
	try:
	with st.spinner("Generating a single, comprehensive blog post..."):
	blog_post = generate_blog_post(
	input_text,
	tone,
	keywords.split(",") if keywords else [],
	length
	)
	if blog_post:
	st.session_state.blog_post = blog_post
	st.success("Blog post generated successfully!")
	else:
	st.error("Failed to generate the blog post. Please try again later.")
	except Exception as e:
	st.error(f"An unexpected error occurred: {str(e)}")
	finally:
	st.session_state.processing = False

	# Display results
	if st.session_state.blog_post:
	st.markdown(st.session_state.blog_post)
	st.download_button(
	"Download Blog Post",
	st.session_state.blog_post,
	"blog_post.md",
	"text/markdown"
	)

	if __name__ == "__main__":
	main()

	# Sidebar with creator information
	st.sidebar.markdown("---")
	st.sidebar.title("About the Creator")
	st.sidebar.info("""
	Designed by Richardson Gunde 🎨

	This advanced application uses AI to generate a single, comprehensive blog post based on long-form content from YouTube videos or user input.

	🔗 [LinkedIn](https://www.linkedin.com/in/richardson-gunde)
	📧 [Email](mailto:[email protected])
	""")

	st.markdown("""
	---
	:green[This advanced app leverages the power of Google's Gemini AI to generate a single, detailed, SEO-optimized long-form blog post from YouTube videos or custom text.
	It handles extensive content while ensuring a cohesive output.]
	""")