Presentation-Assistant-LLaMA2

Running

App Files Files Community

Presentation-Assistant-LLaMA2 / presentation_assistant /presentation_assistant.py

Hyeonseo

revised: remove chatgpt token key

57eefd3 over 1 year ago

raw

history blame contribute delete

5.62 kB

	import os
	import PyPDF2
	from pptx import Presentation
	import subprocess
	from io import BytesIO
	import sys
	import requests

	hf_token = os.environ['MY_HF_TOKEN']

	API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
	headers = {"Authorization": "Bearer "+hf_token}

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	sys.path.append("/home/user/app")

	# Function to generate text2ppt input prompt
	def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
	header = """
	Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages.
	+++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~.
	+++
	""" % input_pages

	summary_value = ""

	if input_type == "Link":
	summary_value += input_value
	summary_value += "\n"
	elif input_type == "Text":
	summary_value += input_value
	summary_value += "\n"
	elif input_type == "PDF":
	with open(input_value, 'rb') as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	num_pages = len(pdf_reader.pages)

	# Convert the content of each page to a string.
	text = ""
	for page_num in range(num_pages):
	page = pdf_reader.pages[page_num]
	text += page.extract_text()
	summary_value += text
	summary_value += "\n"
	else:
	print("ERROR: Invalid input")

	rule_value = """
	===
	- Always use '---' as a slide divider.
	- Write factually only about the content or link provided.
	- Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(\|-\|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular).
	- Use emojis only once in every two pages, and use various other designs.
	- When using images and tables, specify the size considering the page size so that all the text content appears.
	- Make Slide 1 the title, for a total of %s pages.
	- Write the content of the PPT richly in markdown.
	- Don't explain slide by slide, just write the code.
	- Don't write using the content of the example, just refer to the format.
	~~~
	<!-- Slide 0. Slide Topic -->
	# Slide Title
	![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
	- This is 🤗TEXT2PPT service PA! using llama2.
	- Converts `link`,`text`, `PDF` input or upload into PPT.
	""" % input_pages

	return header + summary_value + rule_value


	# Function to execute text2ppt
	def text2ppt(input_prompt, input_theme):
	output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt,
	"parameters": {
	"return_full_text": False,
	"max_new_tokens": 1000}})

	reply = output[0]['generated_text']

	md_text = reply[4:] if reply[:3] == "---" else reply
	md_text_list = md_text.split('\n')

	f = open("text2ppt_input.md", 'w')
	for i in range(0, len(md_text_list)):
	data = md_text_list[i].strip() + "\n"
	f.write(data)
	f.close()

	if input_theme == 'default':
	subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
	else:
	ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx"
	subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)


	def ppt2script(input_file, input_type):

	if input_type=="PDF":
	with open(input_file, 'rb') as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	num_pages = len(pdf_reader.pages)

	# Convert the content of each page to a string.
	text = ""
	for page_num in range(num_pages):
	page = pdf_reader.pages[page_num]
	text += "[PAGE_NUM " + str(page_num + 1) + "]"
	text += page.extract_text()
	else:
	prs = Presentation(input_file)

	text = ""
	page_num = 0
	for slide in prs.slides:
	text += "[PAGE_NUM " + str(page_num + 1) + "]"
	page_num += 1
	for shape in slide.shapes:
	if not shape.has_text_frame:
	continue
	for paragraph in shape.text_frame.paragraphs:
	for run in paragraph.runs:
	text += run.text

	header = """
	You are an assistant helping with PPT presentations.
	~~~Follow the rules below and write a presentation script for the PPT content below.
	~~~
	- When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number.
	- Write only in text without using markdown language.
	- Add additional explanations or examples to the PPT content.
	---
	"""

	input_prompt = header + text

	output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt,
	"parameters": {
	"return_full_text": False,
	"max_new_tokens": 1000}})
	reply = output[0]['generated_text']

	return reply