|
import os |
|
import PyPDF2 |
|
from pptx import Presentation |
|
import subprocess |
|
from io import BytesIO |
|
import sys |
|
import requests |
|
|
|
hf_token = os.environ['MY_HF_TOKEN'] |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf" |
|
headers = {"Authorization": "Bearer "+hf_token} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.json() |
|
|
|
sys.path.append("/home/user/app") |
|
|
|
|
|
def generate_text2ppt_input_prompt(input_type, input_value, input_pages): |
|
header = """ |
|
Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages. |
|
+++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~. |
|
+++ |
|
""" % input_pages |
|
|
|
summary_value = "" |
|
|
|
if input_type == "Link": |
|
summary_value += input_value |
|
summary_value += "\n" |
|
elif input_type == "Text": |
|
summary_value += input_value |
|
summary_value += "\n" |
|
elif input_type == "PDF": |
|
with open(input_value, 'rb') as pdf_file: |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
num_pages = len(pdf_reader.pages) |
|
|
|
|
|
text = "" |
|
for page_num in range(num_pages): |
|
page = pdf_reader.pages[page_num] |
|
text += page.extract_text() |
|
summary_value += text |
|
summary_value += "\n" |
|
else: |
|
print("ERROR: Invalid input") |
|
|
|
rule_value = """ |
|
=== |
|
- Always use '---' as a slide divider. |
|
- Write factually only about the content or link provided. |
|
- Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular). |
|
- Use emojis only once in every two pages, and use various other designs. |
|
- When using images and tables, specify the size considering the page size so that all the text content appears. |
|
- Make Slide 1 the title, for a total of %s pages. |
|
- Write the content of the PPT richly in markdown. |
|
- Don't explain slide by slide, just write the code. |
|
- Don't write using the content of the example, just refer to the format. |
|
~~~ |
|
<!-- Slide 0. Slide Topic --> |
|
# Slide Title |
|
![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png) |
|
- This is 🤗**TEXT2PPT service PA!** using llama2. |
|
- Converts `link`,`text`, `PDF` input or upload into PPT. |
|
""" % input_pages |
|
|
|
return header + summary_value + rule_value |
|
|
|
|
|
|
|
def text2ppt(input_prompt, input_theme): |
|
output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt, |
|
"parameters": { |
|
"return_full_text": False, |
|
"max_new_tokens": 1000}}) |
|
|
|
reply = output[0]['generated_text'] |
|
|
|
md_text = reply[4:] if reply[:3] == "---" else reply |
|
md_text_list = md_text.split('\n') |
|
|
|
f = open("text2ppt_input.md", 'w') |
|
for i in range(0, len(md_text_list)): |
|
data = md_text_list[i].strip() + "\n" |
|
f.write(data) |
|
f.close() |
|
|
|
if input_theme == 'default': |
|
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True) |
|
else: |
|
ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx" |
|
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True) |
|
|
|
|
|
def ppt2script(input_file, input_type): |
|
|
|
if input_type=="PDF": |
|
with open(input_file, 'rb') as pdf_file: |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
num_pages = len(pdf_reader.pages) |
|
|
|
|
|
text = "" |
|
for page_num in range(num_pages): |
|
page = pdf_reader.pages[page_num] |
|
text += "[PAGE_NUM " + str(page_num + 1) + "]" |
|
text += page.extract_text() |
|
else: |
|
prs = Presentation(input_file) |
|
|
|
text = "" |
|
page_num = 0 |
|
for slide in prs.slides: |
|
text += "[PAGE_NUM " + str(page_num + 1) + "]" |
|
page_num += 1 |
|
for shape in slide.shapes: |
|
if not shape.has_text_frame: |
|
continue |
|
for paragraph in shape.text_frame.paragraphs: |
|
for run in paragraph.runs: |
|
text += run.text |
|
|
|
header = """ |
|
You are an assistant helping with PPT presentations. |
|
~~~Follow the rules below and write a presentation script for the PPT content below. |
|
~~~ |
|
- When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number. |
|
- Write only in text without using markdown language. |
|
- Add additional explanations or examples to the PPT content. |
|
--- |
|
""" |
|
|
|
input_prompt = header + text |
|
|
|
output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt, |
|
"parameters": { |
|
"return_full_text": False, |
|
"max_new_tokens": 1000}}) |
|
reply = output[0]['generated_text'] |
|
|
|
return reply |
|
|