diff --git a/ai-hub/app/api/routes/sessions.py b/ai-hub/app/api/routes/sessions.py index 4708132..42cb378 100644 --- a/ai-hub/app/api/routes/sessions.py +++ b/ai-hub/app/api/routes/sessions.py @@ -76,14 +76,19 @@ @router.get("/{session_id}/tokens", response_model=schemas.SessionTokenUsageResponse, summary="Get Session Token Usage") def get_session_token_usage(session_id: int, db: Session = Depends(get_db)): try: - messages = services.rag_service.get_message_history(db=db, session_id=session_id) - if messages is None: + session = db.query(models.Session).filter(models.Session.id == session_id).first() + if not session: raise HTTPException(status_code=404, detail=f"Session with ID {session_id} not found.") + messages = services.rag_service.get_message_history(db=db, session_id=session_id) combined_text = " ".join([m.content for m in messages]) - validator = Validator() + + # Resolve dynamic token limit from model info + from app.core.providers.factory import get_model_limit + token_limit = get_model_limit(session.provider_name) + + validator = Validator(token_limit=token_limit) token_count = len(validator.encoding.encode(combined_text)) - token_limit = validator.token_limit percentage = round((token_count / token_limit) * 100, 2) if token_limit > 0 else 0.0 return schemas.SessionTokenUsageResponse( diff --git a/ai-hub/app/core/providers/factory.py b/ai-hub/app/core/providers/factory.py index 78e6179..4d1b53c 100644 --- a/ai-hub/app/core/providers/factory.py +++ b/ai-hub/app/core/providers/factory.py @@ -168,4 +168,38 @@ if "/" not in full_model and base_type not in ["google_gemini"]: full_model = f"{base_type}/{model_name}" - return GeneralSTTProvider(model_name=full_model, api_key=actual_key, **kwargs) \ No newline at end of file + return GeneralSTTProvider(model_name=full_model, api_key=actual_key, **kwargs) + +def get_model_limit(provider_name: str, model_name: str = None) -> int: + """ + Gets the token limit (context window) for a given provider/model using LiteLLM. + Used for UI progress bars and validation. + """ + # 1. Resolve Model Name + modelName = model_name + if not modelName: + modelName = settings.LLM_PROVIDERS.get(provider_name, {}).get("model") + if not modelName: + if provider_name == "gemini": modelName = settings.GEMINI_MODEL_NAME + elif provider_name == "deepseek": modelName = settings.DEEPSEEK_MODEL_NAME + elif "gemini" in provider_name.lower(): modelName = settings.GEMINI_MODEL_NAME + elif "deepseek" in provider_name.lower(): modelName = settings.DEEPSEEK_MODEL_NAME + else: + return 100000 # Safety default + + # 2. Resolve Base Type + litellm_providers = [p.value for p in litellm.LlmProviders] + base_type = resolve_provider_info(provider_name, "llm", _llm_providers, litellm_providers) + + full_model = f'{base_type}/{modelName}' if '/' not in modelName else modelName + + try: + info = litellm.get_model_info(full_model) + if info: + # Prefer max_input_tokens as it represents the context window + # If not present, max_tokens likely represents the full window or the output window + return info.get("max_input_tokens") or info.get("max_tokens") or 100000 + except: + pass + + return 100000 \ No newline at end of file diff --git a/ui/client-app/src/hooks/useCodeAssistant.js b/ui/client-app/src/hooks/useCodeAssistant.js index e4e8375..56a781d 100644 --- a/ui/client-app/src/hooks/useCodeAssistant.js +++ b/ui/client-app/src/hooks/useCodeAssistant.js @@ -15,7 +15,7 @@ const [isConfigured, setIsConfigured] = useState(true); const [missingConfigs, setMissingConfigs] = useState([]); const [sessionId, setSessionId] = useState(null); - const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 100000, percentage: 0 }); + const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 }); const [userConfigData, setUserConfigData] = useState(null); const [localActiveLLM, setLocalActiveLLM] = useState(''); diff --git a/ui/client-app/src/hooks/useVoiceChat.js b/ui/client-app/src/hooks/useVoiceChat.js index 3b10cc8..b003ac6 100644 --- a/ui/client-app/src/hooks/useVoiceChat.js +++ b/ui/client-app/src/hooks/useVoiceChat.js @@ -40,7 +40,7 @@ const [sessionId, setSessionId] = useState(null); const [isAutoMode, setIsAutoMode] = useState(false); const [isAutoListening, setIsAutoListening] = useState(false); - const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 100000, percentage: 0 }); + const [tokenUsage, setTokenUsage] = useState({ token_count: 0, token_limit: 0, percentage: 0 }); const [isConfigured, setIsConfigured] = useState(true); const [missingConfigs, setMissingConfigs] = useState([]); const [userConfigData, setUserConfigData] = useState(null); diff --git a/ui/client-app/src/pages/CodingAssistantPage.js b/ui/client-app/src/pages/CodingAssistantPage.js index 5e0b3f0..f114309 100644 --- a/ui/client-app/src/pages/CodingAssistantPage.js +++ b/ui/client-app/src/pages/CodingAssistantPage.js @@ -107,8 +107,21 @@