import tiktoken import json from typing import Dict, Any class TokenLimitExceededError(Exception): """Custom exception raised when the input payload exceeds the token limit.""" def __init__(self, message: str, token_count: int, token_limit: int): super().__init__(message) self.token_count = token_count self.token_limit = token_limit class Validator: def __init__(self, token_limit: int = 100000, encoding_name: str = "cl100k_base"): """ Initializes the Validator with a token limit and encoding. Args: token_limit (int): The maximum number of tokens allowed. encoding_name (str): The name of the tokenizer encoding to use. """ self.token_limit = token_limit self.encoding = tiktoken.get_encoding(encoding_name=encoding_name) def precheck_tokensize(self, input_payload: Dict[str, Any]) -> None: """ Checks if the input payload's token count exceeds the configured limit. Args: input_payload (Dict[str, Any]): The payload to be checked. Raises: TokenLimitExceededError: If the payload's token count is too high. """ payload_string: str = json.dumps(input_payload) token_count: int = len(self.encoding.encode(payload_string)) if token_count > self.token_limit: raise TokenLimitExceededError( f"Input payload token count ({token_count}) exceeds the limit of {self.token_limit} tokens.", token_count, self.token_limit, )