GundeRichardson commited on
Commit
999b5dc
β€’
1 Parent(s): b16b0f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -0
app.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ import google.generativeai as genai
4
+ import os
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+ import time
7
+ import re
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
10
+ from google.api_core.exceptions import ResourceExhausted
11
+ # Load environment variables from a .env file
12
+ load_dotenv()
13
+
14
+ # Configure the Google Generative AI client with the API key from environment variables
15
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
+
17
+
18
+ @retry(
19
+ stop=stop_after_attempt(5),
20
+ wait=wait_exponential(multiplier=1, min=4, max=60),
21
+ retry=retry_if_exception_type(ResourceExhausted),
22
+ reraise=True
23
+ )
24
+ def generate_content_with_retry(model, prompt):
25
+ time.sleep(2) # Increased delay between API calls
26
+ try:
27
+ return model.generate_content(prompt)
28
+ except ResourceExhausted as e:
29
+ st.warning(f"API quota exceeded. Retrying in a moment... ({e})")
30
+ raise e
31
+
32
+ # Define the base prompt template
33
+ base_prompt_template = '''
34
+ You are an AI assistant specializing in transforming long-form content, such as YouTube video transcripts or user-provided text, into a single, cohesive, and engaging blog post. Your task is to create a comprehensive blog post that captures the essence of the entire input while enriching it with additional information, insights, and a conversational touch.
35
+
36
+ Guidelines for the Blog Post:
37
+
38
+ 1. Structure:
39
+ - Title: Create an engaging title for the blog post.
40
+ - Meta Description: Write a compelling 150-160 character meta description for SEO.
41
+ - Introduction: Briefly introduce the topic and hook the reader.
42
+ - Main Body: Divide into relevant sections with subheadings. Ensure smooth transitions between sections.
43
+ - Conclusion: Summarize key points and provide a call-to-action.
44
+
45
+ 2. Content Enhancement:
46
+ - Synthesize information from all parts of the input to create a coherent narrative.
47
+ - Provide additional explanations, examples, or related information to enrich the content.
48
+ - Include interesting anecdotes or expert opinions to add depth and credibility.
49
+
50
+ 3. Engagement:
51
+ - Use a {tone} tone consistently throughout the post.
52
+ - Include relevant descriptions of potential visuals or infographics.
53
+ - Structure the post for easy readability using subheadings, bullet points, and short paragraphs.
54
+
55
+ 4. SEO Optimization:
56
+ - Naturally incorporate these keywords: {keywords}
57
+ - Use variations and related terms to avoid keyword stuffing.
58
+ - Implement proper heading structure (H1 for title, H2 for main sections, H3 for subsections).
59
+
60
+ 5. Length and Style:
61
+ - Aim for a total of approximately {word_count} words for the entire blog post.
62
+ - Use varied sentence structures and paragraph lengths for better flow.
63
+ - Incorporate rhetorical devices like analogies, metaphors, or storytelling elements where appropriate.
64
+
65
+ 6. Cohesion:
66
+ - Ensure that all parts of the blog post connect logically and flow smoothly.
67
+ - Use transitional phrases to link different sections and ideas.
68
+ - Maintain consistent themes and arguments throughout the post.
69
+
70
+ 7. Formatting:
71
+ - Use appropriate HTML tags for headings (h1, h2, h3), lists (ul, ol), and emphasis (strong, em).
72
+ - Suggest places to break up text with [IMAGE PLACEHOLDER] or [VIDEO EMBED PLACEHOLDER] tags.
73
+ - Include a table of contents for longer articles.
74
+
75
+ 8. Additional Elements:
76
+ - Create a "Key Takeaways" or "TL;DR" section for quick reference.
77
+ - Suggest pull quotes or highlight boxes for important information.
78
+ - If applicable, include a section addressing common questions or misconceptions about the topic.
79
+
80
+ Important: Create only ONE cohesive blog post that covers all the main points from the entire input. Ensure that the final output is a single, well-structured article, not multiple separate posts.
81
+
82
+ Please create a single, detailed, and engaging blog post based on the following input:
83
+
84
+ {input_text}
85
+
86
+ Remember to maintain a {tone} tone throughout the post and aim for a total of {word_count} words for the entire article.
87
+ '''
88
+
89
+ # Expanded tone options
90
+ TONE_OPTIONS = [
91
+ "Professional", "Casual", "Humorous", "Inspirational", "Educational",
92
+ "Conversational", "Formal", "Enthusiastic", "Empathetic", "Authoritative"
93
+ ]
94
+
95
+ # Article length options
96
+ LENGTH_OPTIONS = {
97
+ "Medium (1000-1500 words)": 1250,
98
+ "Long (1500-2500 words)": 2000,
99
+ "Extra Long (2500-3500 words)": 3000,
100
+ "Comprehensive (3500-5000 words)": 4250
101
+ }
102
+
103
+ # Function to extract video ID from various YouTube URL formats
104
+ def extract_video_id(url):
105
+ patterns = [
106
+ r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(?:embed\/)?(?:v\/)?(?:shorts\/)?(?:live\/)?(?:feature=player_embedded&v=)?([^?&"\'>]+)',
107
+ ]
108
+ for pattern in patterns:
109
+ match = re.search(pattern, url)
110
+ if match:
111
+ return match.group(1)
112
+ return None
113
+
114
+ import threading
115
+ # Cache for storing processed data
116
+ cache = {}
117
+ cache_lock = threading.Lock()
118
+
119
+ # Optimized transcript fetching with caching
120
+ @st.cache_data
121
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
122
+ def get_transcript(youtube_video_url, max_retries=3, delay=2):
123
+ video_id = extract_video_id(youtube_video_url)
124
+ if not video_id:
125
+ raise ValueError("Invalid YouTube URL")
126
+
127
+ # Check cache first
128
+ if video_id in cache:
129
+ return cache[video_id]
130
+
131
+ for attempt in range(max_retries):
132
+ try:
133
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
134
+ text = " ".join([entry["text"] for entry in transcript])
135
+
136
+ # Cache the result
137
+ with cache_lock:
138
+ cache[video_id] = text
139
+
140
+ return text
141
+ except Exception as e:
142
+ if attempt == max_retries - 1:
143
+ raise e
144
+ time.sleep(delay * (attempt + 1)) # Exponential backoff
145
+
146
+ raise Exception("Failed to retrieve transcript after multiple attempts")
147
+
148
+ # Function to chunk long text
149
+ def chunk_text(text, chunk_size=4000, overlap=500):
150
+ # Only chunk if text is longer than chunk_size
151
+ if len(text) <= chunk_size:
152
+ return [text]
153
+
154
+ chunks = []
155
+ start = 0
156
+ while start < len(text):
157
+ end = start + chunk_size
158
+
159
+ # Find the nearest sentence end
160
+ if end < len(text):
161
+ end = text.rfind('.', start, end) + 1
162
+ if end <= start:
163
+ end = start + chunk_size
164
+
165
+ chunk = text[start:end].strip()
166
+ if chunk:
167
+ chunks.append(chunk)
168
+ start = end - overlap
169
+
170
+ return chunks
171
+
172
+ # Function to generate blog post using Gemini AI model with retries
173
+ def generate_blog_post(input_text, tone, keywords, length):
174
+ word_count = LENGTH_OPTIONS[length]
175
+ chunks = chunk_text(input_text)
176
+ model = genai.GenerativeModel("gemini-1.5-flash")
177
+
178
+ all_content = []
179
+ for i, chunk in enumerate(chunks):
180
+ chunk_prompt = f"""
181
+ Analyze the following part of content and extract key points, main ideas, and important details:
182
+
183
+ {chunk}
184
+
185
+ Provide a concise summary of this part, highlighting the most important information.
186
+ """
187
+ try:
188
+ response = generate_content_with_retry(model, chunk_prompt)
189
+ all_content.append(response.text)
190
+ except Exception as e:
191
+ st.error(f"Error processing chunk {i+1}: {str(e)}")
192
+ return None
193
+
194
+ final_prompt = base_prompt_template.format(
195
+ tone=tone,
196
+ keywords=', '.join(keywords),
197
+ word_count=word_count,
198
+ input_text='\n'.join(all_content)
199
+ )
200
+
201
+ try:
202
+ final_response = generate_content_with_retry(model, final_prompt)
203
+ return final_response.text
204
+ except Exception as e:
205
+ st.error(f"Error generating final blog post: {str(e)}")
206
+ return None
207
+
208
+
209
+ # Streamlit UI with progress tracking
210
+ def main():
211
+ st.set_page_config(page_title="BlogBrain Genius AI", layout="wide")
212
+
213
+ # Initialize session state
214
+ if 'blog_post' not in st.session_state:
215
+ st.session_state.blog_post = None
216
+ if 'processing' not in st.session_state:
217
+ st.session_state.processing = False
218
+
219
+ st.title("✍️ BlogBrain Genius AI: Video to Blog Alchemist")
220
+
221
+ # Input method selection with proper state management
222
+ input_method = st.radio("Choose input method:", ("YouTube Video", "Custom Text"))
223
+
224
+ input_text = ""
225
+ if input_method == "YouTube Video":
226
+ youtube_url = st.text_input("Enter YouTube URL:")
227
+ if youtube_url and not st.session_state.processing:
228
+ try:
229
+ with st.spinner("Fetching transcript..."):
230
+ input_text = get_transcript(youtube_url)
231
+ except Exception as e:
232
+ st.error(f"Error: {str(e)}")
233
+ else:
234
+ input_text = st.text_area("Enter your content:", height=200)
235
+
236
+ # Sidebar options
237
+ with st.sidebar:
238
+ st.markdown("<h1 style='text-align: center; color: #4A90E2;'>🧠 BlogBrain Genius AI</h1>", unsafe_allow_html=True)
239
+ st.markdown("<p style='text-align: center;'>Transform Content into Engaging Blog Posts</p>", unsafe_allow_html=True)
240
+ st.markdown("---")
241
+ tone = st.selectbox("Select tone:", TONE_OPTIONS)
242
+ keywords = st.text_input("Enter keywords (comma-separated):")
243
+ length = st.selectbox("Select length:", list(LENGTH_OPTIONS.keys()))
244
+
245
+ if st.button("Generate Blog Post") and input_text:
246
+ st.session_state.processing = True
247
+ try:
248
+ with st.spinner("Generating a single, comprehensive blog post..."):
249
+ blog_post = generate_blog_post(
250
+ input_text,
251
+ tone,
252
+ keywords.split(",") if keywords else [],
253
+ length
254
+ )
255
+ if blog_post:
256
+ st.session_state.blog_post = blog_post
257
+ st.success("Blog post generated successfully!")
258
+ else:
259
+ st.error("Failed to generate the blog post. Please try again later.")
260
+ except Exception as e:
261
+ st.error(f"An unexpected error occurred: {str(e)}")
262
+ finally:
263
+ st.session_state.processing = False
264
+
265
+ # Display results
266
+ if st.session_state.blog_post:
267
+ st.markdown(st.session_state.blog_post)
268
+ st.download_button(
269
+ "Download Blog Post",
270
+ st.session_state.blog_post,
271
+ "blog_post.md",
272
+ "text/markdown"
273
+ )
274
+
275
+ if __name__ == "__main__":
276
+ main()
277
+
278
+ # Sidebar with creator information
279
+ st.sidebar.markdown("---")
280
+ st.sidebar.title("About the Creator")
281
+ st.sidebar.info("""
282
+ Designed by Richardson Gunde 🎨
283
+
284
+ This advanced application uses AI to generate a single, comprehensive blog post based on long-form content from YouTube videos or user input.
285
+
286
+ πŸ”— [LinkedIn](https://www.linkedin.com/in/richardson-gunde)
287
+ πŸ“§ [Email](mailto:[email protected])
288
+ """)
289
+
290
+ st.markdown("""
291
+ ---
292
+ :green[This advanced app leverages the power of Google's Gemini AI to generate a single, detailed, SEO-optimized long-form blog post from YouTube videos or custom text.
293
+ It handles extensive content while ensuring a cohesive output.]
294
+ """)