mpt-7b-8k-chat / exceptions.py
irenedea's picture
LLM-foundry update March 26, 2024 23:50:31
fdb2891 verified
raw
history blame
7.17 kB
"""Custom exceptions for the LLMFoundry."""
from collections.abc import Mapping
from typing import Any, Dict, List
class MissingHuggingFaceURLSplitError(ValueError):
"""Error thrown when there's no split used in HF dataset config."""
def __init__(self) -> None:
message = 'When using a HuggingFace dataset from a URL, you must set the ' + '`split` key in the dataset config.'
super().__init__(message)
class NotEnoughDatasetSamplesError(ValueError):
"""Error thrown when there is not enough data to train a model."""
def __init__(self, dataset_name: str, split: str, dataloader_batch_size: int, world_size: int, full_dataset_size: int, minimum_dataset_size: int) -> None:
self.dataset_name = dataset_name
self.split = split
self.dataloader_batch_size = dataloader_batch_size
self.world_size = world_size
self.full_dataset_size = full_dataset_size
self.minimum_dataset_size = minimum_dataset_size
message = f'Your dataset (name={dataset_name}, split={split}) ' + f'has {full_dataset_size} samples, but your minimum batch size ' + f'is {minimum_dataset_size} because you are running on {world_size} gpus and ' + f'your per device batch size is {dataloader_batch_size}. Please increase the number ' + f'of samples in your dataset to at least {minimum_dataset_size}.'
super().__init__(message)
class UnknownExampleTypeError(KeyError):
"""Error thrown when an unknown example type is used in a task."""
def __init__(self, example: Mapping) -> None:
message = f'Unknown example type example={example!r}'
super().__init__(message)
class TooManyKeysInExampleError(ValueError):
"""Error thrown when a data sample has too many keys."""
def __init__(self, desired_keys: set[str], keys: set[str]) -> None:
message = f'Data sample has {len(keys)} keys in `allowed_keys`: {desired_keys} Please specify exactly one. Provided keys: {keys}'
super().__init__(message)
class NotEnoughChatDataError(ValueError):
"""Error thrown when there is not enough chat data to train a model."""
def __init__(self) -> None:
message = 'Chat example must have at least two messages'
super().__init__(message)
class ConsecutiveRepeatedChatRolesError(ValueError):
"""Error thrown when there are consecutive repeated chat roles."""
def __init__(self, repeated_role: str) -> None:
self.repeated_role = repeated_role
message = f'Conversation roles must alternate but found {repeated_role} repeated consecutively.'
super().__init__(message)
class InvalidLastChatMessageRoleError(ValueError):
"""Error thrown when the last message role in a chat example is invalid."""
def __init__(self, last_role: str, expected_roles: set[str]) -> None:
message = f'Invalid last message role: {last_role}. Expected one of: {expected_roles}'
super().__init__(message)
class IncorrectMessageKeyQuantityError(ValueError):
"""Error thrown when a message has an incorrect number of keys."""
def __init__(self, keys: List[str]) -> None:
self.keys = keys
message = f'Expected 2 keys in message, but found {len(keys)}'
super().__init__(message)
class InvalidRoleError(ValueError):
"""Error thrown when a role is invalid."""
def __init__(self, role: str, valid_roles: set[str]) -> None:
self.role = role
self.valid_roles = valid_roles
message = f'Expected role to be one of {valid_roles} but found: {role}'
super().__init__(message)
class InvalidContentTypeError(TypeError):
"""Error thrown when the content type is invalid."""
def __init__(self, content_type: type) -> None:
self.content_type = content_type
message = f'Expected content to be a string, but found {content_type}'
super().__init__(message)
class InvalidPromptTypeError(TypeError):
"""Error thrown when the prompt type is invalid."""
def __init__(self, prompt_type: type) -> None:
self.prompt_type = prompt_type
message = f'Expected prompt to be a string, but found {prompt_type}'
super().__init__(message)
class InvalidResponseTypeError(TypeError):
"""Error thrown when the response type is invalid."""
def __init__(self, response_type: type) -> None:
self.response_type = response_type
message = f'Expected response to be a string, but found {response_type}'
super().__init__(message)
class InvalidPromptResponseKeysError(ValueError):
"""Error thrown when missing expected prompt and response keys."""
def __init__(self, mapping: Dict[str, str], example: Dict[str, Any]):
self.example = example
message = f'Expected mapping={mapping!r} to have keys "prompt" and "response".'
super().__init__(message)
class InvalidFileExtensionError(FileNotFoundError):
"""Error thrown when a file extension is not a safe extension."""
def __init__(self, dataset_name: str, valid_extensions: List[str]) -> None:
self.dataset_name = dataset_name
self.valid_extensions = valid_extensions
message = f'safe_load is set to True. No data files with safe extensions {valid_extensions} ' + f'found for dataset at local path {dataset_name}.'
super().__init__(message)
class UnableToProcessPromptResponseError(ValueError):
"""Error thrown when a prompt and response cannot be processed."""
def __init__(self, input: Dict) -> None:
message = f'Unable to extract prompt/response from {input}'
super().__init__(message)
class ClusterDoesNotExistError(ValueError):
"""Error thrown when the cluster does not exist."""
def __init__(self, cluster_id: str) -> None:
self.cluster_id = cluster_id
message = f'Cluster with id {cluster_id} does not exist. Check cluster id and try again!'
super().__init__(message)
class FailedToCreateSQLConnectionError(RuntimeError):
"""Error thrown when client can't sql connect to Databricks."""
def __init__(self) -> None:
message = 'Failed to create sql connection to db workspace. To use sql connect, you need to provide http_path and cluster_id!'
super().__init__(message)
class FailedToConnectToDatabricksError(RuntimeError):
"""Error thrown when the client fails to connect to Databricks."""
def __init__(self) -> None:
message = 'Failed to create databricks connection. Check hostname and access token!'
super().__init__(message)
class InputFolderMissingDataError(ValueError):
"""Error thrown when the input folder is missing data."""
def __init__(self, input_folder: str) -> None:
self.input_folder = input_folder
message = f'No text files were found at {input_folder}.'
super().__init__(message)
class OutputFolderNotEmptyError(FileExistsError):
"""Error thrown when the output folder is not empty."""
def __init__(self, output_folder: str) -> None:
self.output_folder = output_folder
message = f'{output_folder} is not empty. Please remove or empty it and retry.'
super().__init__(message)