File size: 5,620 Bytes
863ee52
 
 
 
 
 
5da8554
863ee52
5da8554
 
3bb1385
5da8554
 
 
1274b0f
5da8554
 
863ee52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c41c3c
1173304
863ee52
 
 
 
 
 
 
 
 
 
 
5c41c3c
863ee52
 
 
 
 
 
 
57eefd3
1274b0f
 
249d61a
b3fcf69
 
1b9fc54
 
863ee52
 
 
 
 
1b9fc54
863ee52
 
 
 
 
 
 
 
 
 
57eefd3
863ee52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1274b0f
 
 
249d61a
b3fcf69
1b9fc54
863ee52
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import PyPDF2
from pptx import Presentation
import subprocess
from io import BytesIO
import sys
import requests

hf_token = os.environ['MY_HF_TOKEN']

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
headers = {"Authorization": "Bearer "+hf_token}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
sys.path.append("/home/user/app") 

# Function to generate text2ppt input prompt
def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
    header = """
    Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages.
    +++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~.
    +++
    """ % input_pages

    summary_value = ""

    if input_type == "Link":
        summary_value += input_value
        summary_value += "\n"
    elif input_type == "Text":
        summary_value += input_value
        summary_value += "\n"
    elif input_type == "PDF":
        with open(input_value, 'rb') as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            num_pages = len(pdf_reader.pages)

        # Convert the content of each page to a string.
        text = ""
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
        summary_value += text
        summary_value += "\n"
    else:
        print("ERROR: Invalid input")

    rule_value = """
    ===
    - Always use '---' as a slide divider.
    - Write factually only about the content or link provided.
    - Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular).
    - Use emojis only once in every two pages, and use various other designs.
    - When using images and tables, specify the size considering the page size so that all the text content appears.
    - Make Slide 1 the title, for a total of %s pages.
    - Write the content of the PPT richly in markdown.
    - Don't explain slide by slide, just write the code.
    - Don't write using the content of the example, just refer to the format.
    ~~~
    <!-- Slide 0. Slide Topic -->
    # Slide Title
    ![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
    - This is 🤗**TEXT2PPT service PA!** using llama2.
    - Converts `link`,`text`, `PDF` input or upload into PPT. 
    """ % input_pages

    return header + summary_value + rule_value


# Function to execute text2ppt
def text2ppt(input_prompt, input_theme):
    output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt,     
                    "parameters": {
                    "return_full_text": False,
                    "max_new_tokens": 1000}})
    
    reply = output[0]['generated_text']
    
    md_text = reply[4:] if reply[:3] == "---" else reply
    md_text_list = md_text.split('\n')

    f = open("text2ppt_input.md", 'w')
    for i in range(0, len(md_text_list)):
        data = md_text_list[i].strip() + "\n"
        f.write(data)
    f.close()

    if input_theme == 'default':
        subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
    else:
        ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx"
        subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
    

def ppt2script(input_file, input_type):

    if input_type=="PDF":
        with open(input_file, 'rb') as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            num_pages = len(pdf_reader.pages)
    
            # Convert the content of each page to a string.
            text = ""
            for page_num in range(num_pages):
                page = pdf_reader.pages[page_num]
                text += "[PAGE_NUM " + str(page_num + 1) + "]"
                text += page.extract_text()
    else:
        prs = Presentation(input_file)

        text = ""
        page_num = 0
        for slide in prs.slides:
            text += "[PAGE_NUM " + str(page_num + 1) + "]"
            page_num += 1
            for shape in slide.shapes:
                if not shape.has_text_frame:
                    continue
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        text += run.text

    header = """
    You are an assistant helping with PPT presentations. 
    ~~~Follow the rules below and write a presentation script for the PPT content below.
    ~~~
    - When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number.
    - Write only in text without using markdown language.
    - Add additional explanations or examples to the PPT content.
    ---
    """

    input_prompt = header + text
    
    output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt,     
                    "parameters": {
                    "return_full_text": False,
                    "max_new_tokens": 1000}})
    reply = output[0]['generated_text']

    return reply