mr / app /rag /llm.py
JPBianchi's picture
big upload
10d6a86
# I didn't write this code
from litellm import completion, acompletion
from litellm.utils import CustomStreamWrapper, ModelResponse
import os
class LLM:
'''
Creates primary Class instance for interacting with various LLM model APIs.
Primary APIs supported are OpenAI and Anthropic.
'''
# non-exhaustive list of supported models
# these models are known to work
valid_models = {'openai': [
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-3.5-turbo",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-0125",
],
'anthropic': [ 'claude-3-haiku-20240307',
'claude-3-sonnet-2024022',
'claude-3-opus-20240229'
],
'cohere': ['command-r',
'command-r-plus'
]
}
def __init__(self,
model_name: str='gpt-3.5-turbo-0125',
api_key: str=None,
api_version: str=None,
api_base: str=None
):
self.model_name = model_name
if not api_key:
try:
self._api_key = os.environ['OPENAI_API_KEY']
except KeyError:
raise ValueError('Default api_key expects OPENAI_API_KEY environment variable. Check that you have this variable or pass in another api_key.')
else:
self._api_key = api_key
self.api_version = api_version
self.api_base = api_base
def chat_completion(self,
system_message: str,
user_message: str='',
temperature: int=0,
max_tokens: int=500,
stream: bool=False,
raw_response: bool=False,
**kwargs
) -> str | CustomStreamWrapper | ModelResponse:
'''
Generative text completion method.
Args:
-----
system_message: str
The system message to be sent to the model.
user_message: str
The user message to be sent to the model.
temperature: int
The temperature parameter for the model.
max_tokens: int
The maximum tokens to be generated.
stream: bool
Whether to stream the response.
raw_response: bool
If True, returns the raw model response.
'''
#reformat roles for claude models
initial_role = 'user' if self.model_name.startswith('claude') else 'system'
secondary_role = 'assistant' if self.model_name.startswith('claude') else 'user'
#handle temperature for claude models
if self.model_name.startswith('claude'):
temperature = temperature/2
messages = [
{'role': initial_role, 'content': system_message},
{'role': secondary_role, 'content': user_message}
]
response = completion(model=self.model_name,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=stream,
api_key=self._api_key,
api_base=self.api_base,
api_version=self.api_version,
**kwargs)
if raw_response or stream:
return response
return response.choices[0].message.content
async def achat_completion(self,
system_message: str,
user_message: str=None,
temperature: int=0,
max_tokens: int=500,
stream: bool=False,
raw_response: bool=False,
**kwargs
) -> str | CustomStreamWrapper | ModelResponse:
'''
Asynchronous generative text completion method.
Args:
-----
system_message: str
The system message to be sent to the model.
user_message: str
The user message to be sent to the model.
temperature: int
The temperature parameter for the model.
max_tokens: int
The maximum tokens to be generated.
stream: bool
Whether to stream the response.
raw_response: bool
If True, returns the raw model response.
'''
initial_role = 'user' if self.model_name.startswith('claude') else 'system'
if self.model_name.startswith('claude'):
temperature = temperature/2
messages = [
{'role': initial_role, 'content': system_message},
{'role': 'user', 'content': user_message}
]
response = await acompletion(model=self.model_name,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=stream,
api_key=self._api_key,
api_base=self.api_base,
api_version=self.api_version,
**kwargs)
if raw_response or stream:
return response
return response.choices[0].message.content