VikasQblocks commited on
Commit
c85864b
β€’
1 Parent(s): f8d2db0

Add LLM Comparison gradio application that uses monster API in backend

Browse files
Files changed (4) hide show
  1. MonsterAPIClient.py +144 -0
  2. README.md +4 -4
  3. gradio_app.py +54 -0
  4. requirements.txt +4 -0
MonsterAPIClient.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #MonsterAPIClient.py
2
+
3
+ """
4
+ Monster API Python client to connect to LLM models on monsterapi
5
+
6
+ Base URL: https://api.monsterapi.ai/v1/generate/{model}
7
+
8
+ Available models:
9
+ -----------------
10
+ 1. falcon-7b-instruct
11
+ 2. falcon-40b-instruct
12
+ 3. mpt-30B-instruct
13
+ 4. mpt-7b-instruct
14
+ 5. openllama-13b-base
15
+ 6. llama2-7b-chat
16
+
17
+ """
18
+ import os
19
+ import time
20
+ import logging
21
+ import requests
22
+ from requests_toolbelt.multipart.encoder import MultipartEncoder
23
+
24
+ from typing import Optional, Literal, Union, List, Dict
25
+ from pydantic import BaseModel, Field
26
+
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class InputModel1(BaseModel):
32
+ """
33
+ Supports Following models: Falcon-40B-instruct, Falcon-7B-instruct, openllama-13b-base, llama2-7b-chat
34
+
35
+ prompt string Prompt is a textual instruction for the model to produce an output. Required
36
+ top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional
37
+ (Default: 40)
38
+ top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional
39
+ (Default: 1.0)
40
+ temp float The temperature influences the randomness of the next token predictions. Optional
41
+ (Default: 0.98)
42
+ max_length integer The maximum length of the generated text. Optional
43
+ (Default: 256)
44
+ repetition_penalty float The model uses this penalty to discourage the repetition of tokens in the output. Optional
45
+ (Default: 1.2)
46
+ beam_size integer The beam size for beam search. A larger beam size results in better quality output, but slower generation times. Optional
47
+ (Default: 1)
48
+ """
49
+ prompt: str
50
+ top_k: int = 40
51
+ top_p: float = Field(0.9, ge=0., le=1.)
52
+ temp: float = Field(0.98, ge=0., le=1.)
53
+ max_length: int = 256
54
+ repetition_penalty: float = 1.2
55
+ beam_size: int = 1
56
+
57
+
58
+ class InputModel2(BaseModel):
59
+ """
60
+ Supports Following models: MPT-30B-instruct, MPT-7B-instruct
61
+
62
+ prompt: string Instruction is a textual command for the model to produce an output. Required
63
+ top_k integer Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic. Optional
64
+ (Default: 40)
65
+ top_p float Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens. Optional
66
+ Allowed Range: 0 - 1
67
+ (Default: 1.0)
68
+ temp float Temperature is a parameter that controls the randomness of the model's output. The higher the temperature, the more random the output. Optional
69
+ (Default: 0.98)
70
+ max_length integer Maximum length of the generated output. Optional
71
+ (Default: 256)
72
+ """
73
+ prompt: str
74
+ top_k: int = 40
75
+ top_p: float = Field(0.9, ge=0., le=1.)
76
+ temp: float = Field(0.98, ge=0., le=1.)
77
+ max_length: int = 256
78
+
79
+ MODELS_TO_DATAMODEL = {
80
+ 'falcon-7b-instruct': InputModel1,
81
+ 'falcon-40b-instruct': InputModel1,
82
+ 'mpt-30B-instruct': InputModel2,
83
+ 'mpt-7b-instruct': InputModel2,
84
+ 'openllama-13b-base': InputModel1,
85
+ 'llama2-7b-chat': InputModel1
86
+ }
87
+
88
+
89
+ class MClient():
90
+ def __init__(self):
91
+ self.boundary = '---011000010111000001101001'
92
+ self.auth_token = os.environ.get('MONSTER_API_TOKEN')
93
+ self.headers = {
94
+ "accept": "application/json",
95
+ "content-type": f"multipart/form-data; boundary={self.boundary}",
96
+ 'Authorization': 'Bearer ' + self.auth_token}
97
+ self.base_url = 'https://api.monsterapi.ai/v1'
98
+ self.models_to_data_model = MODELS_TO_DATAMODEL
99
+ self.mock = os.environ.get('MOCK_Runner', "True").lower() == "true"
100
+
101
+ def get_response(self, model:Literal['falcon-20b-instruct', 'falcon-7b-instruct', 'mpt-30B-instruct', 'mpt-7B-instruct'],
102
+ data: dict):
103
+
104
+ if model not in self.models_to_data_model:
105
+ raise ValueError(f"Invalid model: {model}!")
106
+
107
+ dataModel = self.models_to_data_model[model](**data)
108
+ url = f"{self.base_url}/generate/{model}"
109
+ #url = self.base_url + model
110
+ data = dataModel.dict()
111
+ # convert all values into string
112
+ for key, value in data.items():
113
+ data[key] = str(value)
114
+ multipart_data = MultipartEncoder(fields=data, boundary=self.boundary)
115
+ response = requests.post(url, headers=self.headers, data=multipart_data)
116
+ response.raise_for_status()
117
+ return response.json()
118
+
119
+ def get_status(self, process_id):
120
+ # /v1/status/{process_id}
121
+ url = f"{self.base_url}/status/{process_id}"
122
+ response = requests.get(url, headers=self.headers)
123
+ response.raise_for_status()
124
+ return response.json()
125
+
126
+ def wait_and_get_result(self, process_id):
127
+ while True:
128
+ status = self.get_status(process_id)
129
+ if status['status'].lower() == 'completed':
130
+ return status['result']
131
+ elif status['status'].lower() == 'failed':
132
+ raise RuntimeError(f"Process {process_id} failed!")
133
+ else:
134
+ if self.mock:
135
+ return 100*"Mock Output!"
136
+ logger.info(f"Process {process_id} is still running, status is {status['status']}. Waiting for 5 seconds...")
137
+ time.sleep(1)
138
+
139
+
140
+ if __name__ == '__main__':
141
+ client = MClient()
142
+ response = client.get_response('falcon-7b-instruct', {"prompt": 'How to make a sandwich'})
143
+ output = client.wait_and_get_result(response['process_id'])
144
+ print(output)
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- title: Monster LLMs
3
  emoji: πŸ‘€
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 3.38.0
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
 
1
  ---
2
+ title: MonsterAPI LLM Comparison
3
  emoji: πŸ‘€
4
+ colorFrom: white
5
+ colorTo: blacke
6
  sdk: gradio
7
  sdk_version: 3.38.0
8
+ app_file: gradio_app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
gradio_app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from tqdm import tqdm
4
+ from MonsterAPIClient import MClient
5
+ from MonsterAPIClient import MODELS_TO_DATAMODEL
6
+ client = MClient()
7
+
8
+
9
+
10
+ # Available models list
11
+ EXCLUSION_LIST = ['mpt-30B-instruct']
12
+ available_models = list(set(list(MODELS_TO_DATAMODEL.keys())) - set(EXCLUSION_LIST))
13
+
14
+ def generate_model_output(model, input_text):
15
+ try:
16
+ response = client.get_response(model, {"prompt": input_text})
17
+ output = client.wait_and_get_result(response['process_id'])
18
+ return output
19
+ except Exception as e:
20
+ return f"Error occurred: {str(e)}"
21
+
22
+ # Gradio interface function
23
+ def generate_output(selected_models, input_text, available_models=available_models):
24
+ outputs = {}
25
+ for model in tqdm(selected_models):
26
+ outputs[model] = generate_model_output(model, input_text)
27
+ ret_outputs = []
28
+ for model in available_models:
29
+ if model not in outputs:
30
+ ret_outputs.append("Model not selected!")
31
+ else:
32
+ ret_outputs.append(outputs[model].replace("\n", "<br>"))
33
+
34
+ return ret_outputs
35
+
36
+ output_components = [gr.outputs.Textbox(label=model) for model in available_models]
37
+
38
+ checkboxes = gr.inputs.CheckboxGroup(available_models , label="Select models to generate outputs:")
39
+ textbox = gr.inputs.Textbox()
40
+
41
+ # Gradio Interface
42
+ input_text = gr.Interface(
43
+ fn=generate_output,
44
+ inputs=[checkboxes, textbox],
45
+ outputs=output_components,
46
+ live=False,
47
+ capture_session=True,
48
+ title="Monster API LLM Output Comparison.",
49
+ description="Generate outputs from selected models using Monster API.",
50
+ css="body {background-color: black}"
51
+ )
52
+
53
+ # Launch the Gradio app
54
+ input_text.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ requests
2
+ requests-toolbelt
3
+ pydantic
4
+ gradio