Presentation-Assistant-LLaMA2

Running

File size: 5,620 Bytes

import os
import PyPDF2
from pptx import Presentation
import subprocess
from io import BytesIO
import sys
import requests

hf_token = os.environ['MY_HF_TOKEN']

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
headers = {"Authorization": "Bearer "+hf_token}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
sys.path.append("/home/user/app") 

# Function to generate text2ppt input prompt
def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
    header = """
    Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages.
    +++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~.
    +++
    """ % input_pages

    summary_value = ""

    if input_type == "Link":
        summary_value += input_value
        summary_value += "\n"
    elif input_type == "Text":
        summary_value += input_value
        summary_value += "\n"
    elif input_type == "PDF":
        with open(input_value, 'rb') as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            num_pages = len(pdf_reader.pages)

        # Convert the content of each page to a string.
        text = ""
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
        summary_value += text
        summary_value += "\n"
    else:
        print("ERROR: Invalid input")

    rule_value = """
    ===
    - Always use '---' as a slide divider.
    - Write factually only about the content or link provided.
    - Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular).
    - Use emojis only once in every two pages, and use various other designs.
    - When using images and tables, specify the size considering the page size so that all the text content appears.
    - Make Slide 1 the title, for a total of %s pages.
    - Write the content of the PPT richly in markdown.
    - Don't explain slide by slide, just write the code.
    - Don't write using the content of the example, just refer to the format.
    ~~~
    <!-- Slide 0. Slide Topic -->
    # Slide Title
    ![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
    - This is 🤗**TEXT2PPT service PA!** using llama2.
    - Converts `link`,`text`, `PDF` input or upload into PPT. 
    """ % input_pages

    return header + summary_value + rule_value


# Function to execute text2ppt
def text2ppt(input_prompt, input_theme):
    output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt,     
                    "parameters": {
                    "return_full_text": False,
                    "max_new_tokens": 1000}})
    
    reply = output[0]['generated_text']
    
    md_text = reply[4:] if reply[:3] == "---" else reply
    md_text_list = md_text.split('\n')

    f = open("text2ppt_input.md", 'w')
    for i in range(0, len(md_text_list)):
        data = md_text_list[i].strip() + "\n"
        f.write(data)
    f.close()

    if input_theme == 'default':
        subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
    else:
        ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx"
        subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
    

def ppt2script(input_file, input_type):

    if input_type=="PDF":
        with open(input_file, 'rb') as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            num_pages = len(pdf_reader.pages)
    
            # Convert the content of each page to a string.
            text = ""
            for page_num in range(num_pages):
                page = pdf_reader.pages[page_num]
                text += "[PAGE_NUM " + str(page_num + 1) + "]"
                text += page.extract_text()
    else:
        prs = Presentation(input_file)

        text = ""
        page_num = 0
        for slide in prs.slides:
            text += "[PAGE_NUM " + str(page_num + 1) + "]"
            page_num += 1
            for shape in slide.shapes:
                if not shape.has_text_frame:
                    continue
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        text += run.text

    header = """
    You are an assistant helping with PPT presentations. 
    ~~~Follow the rules below and write a presentation script for the PPT content below.
    ~~~
    - When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number.
    - Write only in text without using markdown language.
    - Add additional explanations or examples to the PPT content.
    ---
    """

    input_prompt = header + text
    
    output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt,     
                    "parameters": {
                    "return_full_text": False,
                    "max_new_tokens": 1000}})
    reply = output[0]['generated_text']

    return reply