Spaces:
Sleeping
Sleeping
GundeRichardson
commited on
Commit
β’
999b5dc
1
Parent(s):
b16b0f8
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import google.generativeai as genai
|
4 |
+
import os
|
5 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
6 |
+
import time
|
7 |
+
import re
|
8 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9 |
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
10 |
+
from google.api_core.exceptions import ResourceExhausted
|
11 |
+
# Load environment variables from a .env file
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Configure the Google Generative AI client with the API key from environment variables
|
15 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
16 |
+
|
17 |
+
|
18 |
+
@retry(
|
19 |
+
stop=stop_after_attempt(5),
|
20 |
+
wait=wait_exponential(multiplier=1, min=4, max=60),
|
21 |
+
retry=retry_if_exception_type(ResourceExhausted),
|
22 |
+
reraise=True
|
23 |
+
)
|
24 |
+
def generate_content_with_retry(model, prompt):
|
25 |
+
time.sleep(2) # Increased delay between API calls
|
26 |
+
try:
|
27 |
+
return model.generate_content(prompt)
|
28 |
+
except ResourceExhausted as e:
|
29 |
+
st.warning(f"API quota exceeded. Retrying in a moment... ({e})")
|
30 |
+
raise e
|
31 |
+
|
32 |
+
# Define the base prompt template
|
33 |
+
base_prompt_template = '''
|
34 |
+
You are an AI assistant specializing in transforming long-form content, such as YouTube video transcripts or user-provided text, into a single, cohesive, and engaging blog post. Your task is to create a comprehensive blog post that captures the essence of the entire input while enriching it with additional information, insights, and a conversational touch.
|
35 |
+
|
36 |
+
Guidelines for the Blog Post:
|
37 |
+
|
38 |
+
1. Structure:
|
39 |
+
- Title: Create an engaging title for the blog post.
|
40 |
+
- Meta Description: Write a compelling 150-160 character meta description for SEO.
|
41 |
+
- Introduction: Briefly introduce the topic and hook the reader.
|
42 |
+
- Main Body: Divide into relevant sections with subheadings. Ensure smooth transitions between sections.
|
43 |
+
- Conclusion: Summarize key points and provide a call-to-action.
|
44 |
+
|
45 |
+
2. Content Enhancement:
|
46 |
+
- Synthesize information from all parts of the input to create a coherent narrative.
|
47 |
+
- Provide additional explanations, examples, or related information to enrich the content.
|
48 |
+
- Include interesting anecdotes or expert opinions to add depth and credibility.
|
49 |
+
|
50 |
+
3. Engagement:
|
51 |
+
- Use a {tone} tone consistently throughout the post.
|
52 |
+
- Include relevant descriptions of potential visuals or infographics.
|
53 |
+
- Structure the post for easy readability using subheadings, bullet points, and short paragraphs.
|
54 |
+
|
55 |
+
4. SEO Optimization:
|
56 |
+
- Naturally incorporate these keywords: {keywords}
|
57 |
+
- Use variations and related terms to avoid keyword stuffing.
|
58 |
+
- Implement proper heading structure (H1 for title, H2 for main sections, H3 for subsections).
|
59 |
+
|
60 |
+
5. Length and Style:
|
61 |
+
- Aim for a total of approximately {word_count} words for the entire blog post.
|
62 |
+
- Use varied sentence structures and paragraph lengths for better flow.
|
63 |
+
- Incorporate rhetorical devices like analogies, metaphors, or storytelling elements where appropriate.
|
64 |
+
|
65 |
+
6. Cohesion:
|
66 |
+
- Ensure that all parts of the blog post connect logically and flow smoothly.
|
67 |
+
- Use transitional phrases to link different sections and ideas.
|
68 |
+
- Maintain consistent themes and arguments throughout the post.
|
69 |
+
|
70 |
+
7. Formatting:
|
71 |
+
- Use appropriate HTML tags for headings (h1, h2, h3), lists (ul, ol), and emphasis (strong, em).
|
72 |
+
- Suggest places to break up text with [IMAGE PLACEHOLDER] or [VIDEO EMBED PLACEHOLDER] tags.
|
73 |
+
- Include a table of contents for longer articles.
|
74 |
+
|
75 |
+
8. Additional Elements:
|
76 |
+
- Create a "Key Takeaways" or "TL;DR" section for quick reference.
|
77 |
+
- Suggest pull quotes or highlight boxes for important information.
|
78 |
+
- If applicable, include a section addressing common questions or misconceptions about the topic.
|
79 |
+
|
80 |
+
Important: Create only ONE cohesive blog post that covers all the main points from the entire input. Ensure that the final output is a single, well-structured article, not multiple separate posts.
|
81 |
+
|
82 |
+
Please create a single, detailed, and engaging blog post based on the following input:
|
83 |
+
|
84 |
+
{input_text}
|
85 |
+
|
86 |
+
Remember to maintain a {tone} tone throughout the post and aim for a total of {word_count} words for the entire article.
|
87 |
+
'''
|
88 |
+
|
89 |
+
# Expanded tone options
|
90 |
+
TONE_OPTIONS = [
|
91 |
+
"Professional", "Casual", "Humorous", "Inspirational", "Educational",
|
92 |
+
"Conversational", "Formal", "Enthusiastic", "Empathetic", "Authoritative"
|
93 |
+
]
|
94 |
+
|
95 |
+
# Article length options
|
96 |
+
LENGTH_OPTIONS = {
|
97 |
+
"Medium (1000-1500 words)": 1250,
|
98 |
+
"Long (1500-2500 words)": 2000,
|
99 |
+
"Extra Long (2500-3500 words)": 3000,
|
100 |
+
"Comprehensive (3500-5000 words)": 4250
|
101 |
+
}
|
102 |
+
|
103 |
+
# Function to extract video ID from various YouTube URL formats
|
104 |
+
def extract_video_id(url):
|
105 |
+
patterns = [
|
106 |
+
r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(?:embed\/)?(?:v\/)?(?:shorts\/)?(?:live\/)?(?:feature=player_embedded&v=)?([^?&"\'>]+)',
|
107 |
+
]
|
108 |
+
for pattern in patterns:
|
109 |
+
match = re.search(pattern, url)
|
110 |
+
if match:
|
111 |
+
return match.group(1)
|
112 |
+
return None
|
113 |
+
|
114 |
+
import threading
|
115 |
+
# Cache for storing processed data
|
116 |
+
cache = {}
|
117 |
+
cache_lock = threading.Lock()
|
118 |
+
|
119 |
+
# Optimized transcript fetching with caching
|
120 |
+
@st.cache_data
|
121 |
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
122 |
+
def get_transcript(youtube_video_url, max_retries=3, delay=2):
|
123 |
+
video_id = extract_video_id(youtube_video_url)
|
124 |
+
if not video_id:
|
125 |
+
raise ValueError("Invalid YouTube URL")
|
126 |
+
|
127 |
+
# Check cache first
|
128 |
+
if video_id in cache:
|
129 |
+
return cache[video_id]
|
130 |
+
|
131 |
+
for attempt in range(max_retries):
|
132 |
+
try:
|
133 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
134 |
+
text = " ".join([entry["text"] for entry in transcript])
|
135 |
+
|
136 |
+
# Cache the result
|
137 |
+
with cache_lock:
|
138 |
+
cache[video_id] = text
|
139 |
+
|
140 |
+
return text
|
141 |
+
except Exception as e:
|
142 |
+
if attempt == max_retries - 1:
|
143 |
+
raise e
|
144 |
+
time.sleep(delay * (attempt + 1)) # Exponential backoff
|
145 |
+
|
146 |
+
raise Exception("Failed to retrieve transcript after multiple attempts")
|
147 |
+
|
148 |
+
# Function to chunk long text
|
149 |
+
def chunk_text(text, chunk_size=4000, overlap=500):
|
150 |
+
# Only chunk if text is longer than chunk_size
|
151 |
+
if len(text) <= chunk_size:
|
152 |
+
return [text]
|
153 |
+
|
154 |
+
chunks = []
|
155 |
+
start = 0
|
156 |
+
while start < len(text):
|
157 |
+
end = start + chunk_size
|
158 |
+
|
159 |
+
# Find the nearest sentence end
|
160 |
+
if end < len(text):
|
161 |
+
end = text.rfind('.', start, end) + 1
|
162 |
+
if end <= start:
|
163 |
+
end = start + chunk_size
|
164 |
+
|
165 |
+
chunk = text[start:end].strip()
|
166 |
+
if chunk:
|
167 |
+
chunks.append(chunk)
|
168 |
+
start = end - overlap
|
169 |
+
|
170 |
+
return chunks
|
171 |
+
|
172 |
+
# Function to generate blog post using Gemini AI model with retries
|
173 |
+
def generate_blog_post(input_text, tone, keywords, length):
|
174 |
+
word_count = LENGTH_OPTIONS[length]
|
175 |
+
chunks = chunk_text(input_text)
|
176 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
177 |
+
|
178 |
+
all_content = []
|
179 |
+
for i, chunk in enumerate(chunks):
|
180 |
+
chunk_prompt = f"""
|
181 |
+
Analyze the following part of content and extract key points, main ideas, and important details:
|
182 |
+
|
183 |
+
{chunk}
|
184 |
+
|
185 |
+
Provide a concise summary of this part, highlighting the most important information.
|
186 |
+
"""
|
187 |
+
try:
|
188 |
+
response = generate_content_with_retry(model, chunk_prompt)
|
189 |
+
all_content.append(response.text)
|
190 |
+
except Exception as e:
|
191 |
+
st.error(f"Error processing chunk {i+1}: {str(e)}")
|
192 |
+
return None
|
193 |
+
|
194 |
+
final_prompt = base_prompt_template.format(
|
195 |
+
tone=tone,
|
196 |
+
keywords=', '.join(keywords),
|
197 |
+
word_count=word_count,
|
198 |
+
input_text='\n'.join(all_content)
|
199 |
+
)
|
200 |
+
|
201 |
+
try:
|
202 |
+
final_response = generate_content_with_retry(model, final_prompt)
|
203 |
+
return final_response.text
|
204 |
+
except Exception as e:
|
205 |
+
st.error(f"Error generating final blog post: {str(e)}")
|
206 |
+
return None
|
207 |
+
|
208 |
+
|
209 |
+
# Streamlit UI with progress tracking
|
210 |
+
def main():
|
211 |
+
st.set_page_config(page_title="BlogBrain Genius AI", layout="wide")
|
212 |
+
|
213 |
+
# Initialize session state
|
214 |
+
if 'blog_post' not in st.session_state:
|
215 |
+
st.session_state.blog_post = None
|
216 |
+
if 'processing' not in st.session_state:
|
217 |
+
st.session_state.processing = False
|
218 |
+
|
219 |
+
st.title("βοΈ BlogBrain Genius AI: Video to Blog Alchemist")
|
220 |
+
|
221 |
+
# Input method selection with proper state management
|
222 |
+
input_method = st.radio("Choose input method:", ("YouTube Video", "Custom Text"))
|
223 |
+
|
224 |
+
input_text = ""
|
225 |
+
if input_method == "YouTube Video":
|
226 |
+
youtube_url = st.text_input("Enter YouTube URL:")
|
227 |
+
if youtube_url and not st.session_state.processing:
|
228 |
+
try:
|
229 |
+
with st.spinner("Fetching transcript..."):
|
230 |
+
input_text = get_transcript(youtube_url)
|
231 |
+
except Exception as e:
|
232 |
+
st.error(f"Error: {str(e)}")
|
233 |
+
else:
|
234 |
+
input_text = st.text_area("Enter your content:", height=200)
|
235 |
+
|
236 |
+
# Sidebar options
|
237 |
+
with st.sidebar:
|
238 |
+
st.markdown("<h1 style='text-align: center; color: #4A90E2;'>π§ BlogBrain Genius AI</h1>", unsafe_allow_html=True)
|
239 |
+
st.markdown("<p style='text-align: center;'>Transform Content into Engaging Blog Posts</p>", unsafe_allow_html=True)
|
240 |
+
st.markdown("---")
|
241 |
+
tone = st.selectbox("Select tone:", TONE_OPTIONS)
|
242 |
+
keywords = st.text_input("Enter keywords (comma-separated):")
|
243 |
+
length = st.selectbox("Select length:", list(LENGTH_OPTIONS.keys()))
|
244 |
+
|
245 |
+
if st.button("Generate Blog Post") and input_text:
|
246 |
+
st.session_state.processing = True
|
247 |
+
try:
|
248 |
+
with st.spinner("Generating a single, comprehensive blog post..."):
|
249 |
+
blog_post = generate_blog_post(
|
250 |
+
input_text,
|
251 |
+
tone,
|
252 |
+
keywords.split(",") if keywords else [],
|
253 |
+
length
|
254 |
+
)
|
255 |
+
if blog_post:
|
256 |
+
st.session_state.blog_post = blog_post
|
257 |
+
st.success("Blog post generated successfully!")
|
258 |
+
else:
|
259 |
+
st.error("Failed to generate the blog post. Please try again later.")
|
260 |
+
except Exception as e:
|
261 |
+
st.error(f"An unexpected error occurred: {str(e)}")
|
262 |
+
finally:
|
263 |
+
st.session_state.processing = False
|
264 |
+
|
265 |
+
# Display results
|
266 |
+
if st.session_state.blog_post:
|
267 |
+
st.markdown(st.session_state.blog_post)
|
268 |
+
st.download_button(
|
269 |
+
"Download Blog Post",
|
270 |
+
st.session_state.blog_post,
|
271 |
+
"blog_post.md",
|
272 |
+
"text/markdown"
|
273 |
+
)
|
274 |
+
|
275 |
+
if __name__ == "__main__":
|
276 |
+
main()
|
277 |
+
|
278 |
+
# Sidebar with creator information
|
279 |
+
st.sidebar.markdown("---")
|
280 |
+
st.sidebar.title("About the Creator")
|
281 |
+
st.sidebar.info("""
|
282 |
+
Designed by Richardson Gunde π¨
|
283 |
+
|
284 |
+
This advanced application uses AI to generate a single, comprehensive blog post based on long-form content from YouTube videos or user input.
|
285 |
+
|
286 |
+
π [LinkedIn](https://www.linkedin.com/in/richardson-gunde)
|
287 |
+
π§ [Email](mailto:[email protected])
|
288 |
+
""")
|
289 |
+
|
290 |
+
st.markdown("""
|
291 |
+
---
|
292 |
+
:green[This advanced app leverages the power of Google's Gemini AI to generate a single, detailed, SEO-optimized long-form blog post from YouTube videos or custom text.
|
293 |
+
It handles extensive content while ensuring a cohesive output.]
|
294 |
+
""")
|