File size: 5,620 Bytes
863ee52 5da8554 863ee52 5da8554 3bb1385 5da8554 1274b0f 5da8554 863ee52 5c41c3c 1173304 863ee52 5c41c3c 863ee52 57eefd3 1274b0f 249d61a b3fcf69 1b9fc54 863ee52 1b9fc54 863ee52 57eefd3 863ee52 1274b0f 249d61a b3fcf69 1b9fc54 863ee52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import PyPDF2
from pptx import Presentation
import subprocess
from io import BytesIO
import sys
import requests
hf_token = os.environ['MY_HF_TOKEN']
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
headers = {"Authorization": "Bearer "+hf_token}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
sys.path.append("/home/user/app")
# Function to generate text2ppt input prompt
def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
header = """
Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages.
+++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~.
+++
""" % input_pages
summary_value = ""
if input_type == "Link":
summary_value += input_value
summary_value += "\n"
elif input_type == "Text":
summary_value += input_value
summary_value += "\n"
elif input_type == "PDF":
with open(input_value, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# Convert the content of each page to a string.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += page.extract_text()
summary_value += text
summary_value += "\n"
else:
print("ERROR: Invalid input")
rule_value = """
===
- Always use '---' as a slide divider.
- Write factually only about the content or link provided.
- Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular).
- Use emojis only once in every two pages, and use various other designs.
- When using images and tables, specify the size considering the page size so that all the text content appears.
- Make Slide 1 the title, for a total of %s pages.
- Write the content of the PPT richly in markdown.
- Don't explain slide by slide, just write the code.
- Don't write using the content of the example, just refer to the format.
~~~
<!-- Slide 0. Slide Topic -->
# Slide Title
![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
- This is 🤗**TEXT2PPT service PA!** using llama2.
- Converts `link`,`text`, `PDF` input or upload into PPT.
""" % input_pages
return header + summary_value + rule_value
# Function to execute text2ppt
def text2ppt(input_prompt, input_theme):
output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt,
"parameters": {
"return_full_text": False,
"max_new_tokens": 1000}})
reply = output[0]['generated_text']
md_text = reply[4:] if reply[:3] == "---" else reply
md_text_list = md_text.split('\n')
f = open("text2ppt_input.md", 'w')
for i in range(0, len(md_text_list)):
data = md_text_list[i].strip() + "\n"
f.write(data)
f.close()
if input_theme == 'default':
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
else:
ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx"
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
def ppt2script(input_file, input_type):
if input_type=="PDF":
with open(input_file, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# Convert the content of each page to a string.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += "[PAGE_NUM " + str(page_num + 1) + "]"
text += page.extract_text()
else:
prs = Presentation(input_file)
text = ""
page_num = 0
for slide in prs.slides:
text += "[PAGE_NUM " + str(page_num + 1) + "]"
page_num += 1
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text += run.text
header = """
You are an assistant helping with PPT presentations.
~~~Follow the rules below and write a presentation script for the PPT content below.
~~~
- When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number.
- Write only in text without using markdown language.
- Add additional explanations or examples to the PPT content.
---
"""
input_prompt = header + text
output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt,
"parameters": {
"return_full_text": False,
"max_new_tokens": 1000}})
reply = output[0]['generated_text']
return reply
|