import tiktoken
import json
from typing import Dict, Any
class TokenLimitExceededError(Exception):
"""Custom exception raised when the input payload exceeds the token limit."""
def __init__(self, message: str, token_count: int, token_limit: int):
super().__init__(message)
self.token_count = token_count
self.token_limit = token_limit
class Validator:
def __init__(self, token_limit: int = 100000, encoding_name: str = "cl100k_base"):
"""
Initializes the Validator with a token limit and encoding.
Args:
token_limit (int): The maximum number of tokens allowed.
encoding_name (str): The name of the tokenizer encoding to use.
"""
self.token_limit = token_limit
self.encoding = tiktoken.get_encoding(encoding_name=encoding_name)
def precheck_tokensize(self, input_payload: Dict[str, Any]) -> None:
"""
Checks if the input payload's token count exceeds the configured limit.
Args:
input_payload (Dict[str, Any]): The payload to be checked.
Raises:
TokenLimitExceededError: If the payload's token count is too high.
"""
payload_string: str = json.dumps(input_payload)
token_count: int = len(self.encoding.encode(payload_string))
if token_count > self.token_limit:
raise TokenLimitExceededError(
f"Input payload token count ({token_count}) exceeds the limit of {self.token_limit} tokens.",
token_count,
self.token_limit,
)