Spaces:

monsterapi
/

Monster-LLMs

Runtime error

App Files Files Community

VikasQblocks commited on Jul 26, 2023

Commit

c85864b

•

1 Parent(s): f8d2db0

Add LLM Comparison gradio application that uses monster API in backend

Browse files

Files changed (4) hide show

MonsterAPIClient.py +144 -0
README.md +4 -4
gradio_app.py +54 -0
requirements.txt +4 -0

MonsterAPIClient.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#MonsterAPIClient.py
+"""
+Monster API Python client to connect to LLM models on monsterapi
+Base URL: https://api.monsterapi.ai/v1/generate/{model}
+Available models:
+-----------------
+    1. falcon-7b-instruct
+    2. falcon-40b-instruct
+    3. mpt-30B-instruct
+    4. mpt-7b-instruct
+    5. openllama-13b-base
+    6. llama2-7b-chat
+"""
+import os
+import time
+import logging
+import requests
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+from typing import Optional, Literal, Union, List, Dict
+from pydantic import BaseModel, Field
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class InputModel1(BaseModel):
+    """
+    Supports Following models: Falcon-40B-instruct, Falcon-7B-instruct, openllama-13b-base, llama2-7b-chat
+    prompt	string	Prompt is a textual instruction for the model to produce an output.	Required
+    top_k	integer	Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic.	Optional
+    (Default: 40)
+    top_p	float	Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens.	Optional
+    (Default: 1.0)
+    temp	float	The temperature influences the randomness of the next token predictions.	Optional
+    (Default: 0.98)
+    max_length	integer	The maximum length of the generated text.	Optional
+    (Default: 256)
+    repetition_penalty	float	The model uses this penalty to discourage the repetition of tokens in the output.	Optional
+    (Default: 1.2)
+    beam_size	integer	The beam size for beam search. A larger beam size results in better quality output, but slower generation times.	Optional
+    (Default: 1)
+    """
+    prompt: str
+    top_k: int = 40
+    top_p: float = Field(0.9, ge=0., le=1.)
+    temp: float = Field(0.98, ge=0., le=1.)
+    max_length: int = 256
+    repetition_penalty: float = 1.2
+    beam_size: int = 1
+class InputModel2(BaseModel):
+    """
+    Supports Following models: MPT-30B-instruct, MPT-7B-instruct
+    prompt:	string	Instruction is a textual command for the model to produce an output.	Required
+    top_k	integer	Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic.	Optional
+    (Default: 40)
+    top_p	float	Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens.	Optional
+    Allowed Range: 0 - 1
+    (Default: 1.0)
+    temp	float	Temperature is a parameter that controls the randomness of the model's output. The higher the temperature, the more random the output.	Optional
+    (Default: 0.98)
+    max_length	integer	Maximum length of the generated output.	Optional
+    (Default: 256)
+    """
+    prompt: str
+    top_k: int = 40
+    top_p: float = Field(0.9, ge=0., le=1.)
+    temp: float = Field(0.98, ge=0., le=1.)
+    max_length: int = 256
+MODELS_TO_DATAMODEL = {
+            'falcon-7b-instruct': InputModel1,
+            'falcon-40b-instruct': InputModel1,
+            'mpt-30B-instruct': InputModel2,
+            'mpt-7b-instruct': InputModel2,
+            'openllama-13b-base': InputModel1,
+            'llama2-7b-chat': InputModel1
+        }
+class MClient():
+    def __init__(self):
+        self.boundary = '---011000010111000001101001'
+        self.auth_token = os.environ.get('MONSTER_API_TOKEN')
+        self.headers = {
+            "accept": "application/json",
+            "content-type": f"multipart/form-data; boundary={self.boundary}",
+            'Authorization': 'Bearer ' + self.auth_token}
+        self.base_url = 'https://api.monsterapi.ai/v1'
+        self.models_to_data_model = MODELS_TO_DATAMODEL
+        self.mock = os.environ.get('MOCK_Runner', "True").lower() == "true"
+    def get_response(self, model:Literal['falcon-20b-instruct', 'falcon-7b-instruct', 'mpt-30B-instruct', 'mpt-7B-instruct'],
+                     data: dict):
+        if model not in self.models_to_data_model:
+            raise ValueError(f"Invalid model: {model}!")
+        dataModel = self.models_to_data_model[model](**data)
+        url = f"{self.base_url}/generate/{model}"
+        #url = self.base_url + model
+        data = dataModel.dict()
+        # convert all values into string
+        for key, value in data.items():
+            data[key] = str(value)
+        multipart_data = MultipartEncoder(fields=data, boundary=self.boundary)
+        response = requests.post(url, headers=self.headers, data=multipart_data)
+        response.raise_for_status()
+        return response.json()
+    def get_status(self, process_id):
+        # /v1/status/{process_id}
+        url = f"{self.base_url}/status/{process_id}"
+        response = requests.get(url, headers=self.headers)
+        response.raise_for_status()
+        return response.json()
+    def wait_and_get_result(self, process_id):
+        while True:
+            status = self.get_status(process_id)
+            if status['status'].lower() == 'completed':
+                return status['result']
+            elif status['status'].lower() == 'failed':
+                raise RuntimeError(f"Process {process_id} failed!")
+            else:
+                if self.mock:
+                    return 100*"Mock Output!"
+                logger.info(f"Process {process_id} is still running, status is {status['status']}. Waiting for 5 seconds...")
+                time.sleep(1)
+if __name__ == '__main__':
+    client = MClient()
+    response = client.get_response('falcon-7b-instruct', {"prompt": 'How to make a sandwich'})
+    output = client.wait_and_get_result(response['process_id'])
+    print(output)

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
-title: Monster LLMs
 emoji: 👀
-colorFrom: indigo
-colorTo: red
 sdk: gradio
 sdk_version: 3.38.0
-app_file: app.py
 pinned: false
 license: apache-2.0
 ---

 ---
+title: MonsterAPI LLM Comparison
 emoji: 👀
+colorFrom: white
+colorTo: blacke
 sdk: gradio
 sdk_version: 3.38.0
+app_file: gradio_app.py
 pinned: false
 license: apache-2.0
 ---

gradio_app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+import requests
+from tqdm import tqdm
+from MonsterAPIClient import MClient
+from MonsterAPIClient import MODELS_TO_DATAMODEL
+client = MClient()
+# Available models list
+EXCLUSION_LIST = ['mpt-30B-instruct']
+available_models = list(set(list(MODELS_TO_DATAMODEL.keys())) - set(EXCLUSION_LIST))
+def generate_model_output(model, input_text):
+    try:
+        response = client.get_response(model, {"prompt": input_text})
+        output = client.wait_and_get_result(response['process_id'])
+        return output
+    except Exception as e:
+        return f"Error occurred: {str(e)}"
+# Gradio interface function
+def generate_output(selected_models, input_text, available_models=available_models):
+    outputs = {}
+    for model in tqdm(selected_models):
+        outputs[model] = generate_model_output(model, input_text)
+    ret_outputs = []
+    for model in available_models:
+        if model not in outputs:
+            ret_outputs.append("Model not selected!")
+        else:
+            ret_outputs.append(outputs[model].replace("\n", "<br>"))
+    return ret_outputs
+output_components = [gr.outputs.Textbox(label=model) for model in available_models]
+checkboxes = gr.inputs.CheckboxGroup(available_models , label="Select models to generate outputs:")
+textbox = gr.inputs.Textbox()
+# Gradio Interface
+input_text = gr.Interface(
+    fn=generate_output,
+    inputs=[checkboxes, textbox],
+    outputs=output_components,
+    live=False,
+    capture_session=True,
+    title="Monster API LLM Output Comparison.",
+    description="Generate outputs from selected models using Monster API.",
+    css="body {background-color: black}"
+)
+# Launch the Gradio app
+input_text.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+requests
+requests-toolbelt
+pydantic
+gradio