Newer
Older
cortex-hub / ai-hub / app / core / services / document.py
from typing import List, Dict, Any
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError

from app.core.vector_store.faiss_store import FaissVectorStore
from app.core.vector_store.embedder.mock import MockEmbedder
from app.db import models

class DocumentService:
    """
    Service class for managing document lifecycle, including
    adding, retrieving, and deleting documents and their vector metadata.
    """
    def __init__(self, vector_store: FaissVectorStore):
        self.vector_store = vector_store
        self.embedder = self.vector_store.embedder

    def add_document(self, db: Session, doc_data: Dict[str, Any]) -> int:
        """
        Adds a new document to the database and its vector embedding to the FAISS index.
        """
        try:
            document_db = models.Document(**doc_data)
            db.add(document_db)
            db.commit()
            db.refresh(document_db)
            
            embedding_model_name = "mock_embedder" if isinstance(self.embedder, MockEmbedder) else "GenAIEmbedder"
            
            faiss_index = self.vector_store.add_document(document_db.text)
            vector_metadata = models.VectorMetadata(
                document_id=document_db.id,
                faiss_index=faiss_index,
                embedding_model=embedding_model_name
            )
            db.add(vector_metadata)
            db.commit()
            return document_db.id
        except SQLAlchemyError as e:
            db.rollback()
            raise

    def get_all_documents(self, db: Session) -> List[models.Document]:
        """
        Retrieves all documents from the database.
        """
        return db.query(models.Document).order_by(models.Document.created_at.desc()).all()

    def delete_document(self, db: Session, document_id: int) -> int:
        """
        Deletes a document and its associated vector metadata from the database.
        """
        try:
            doc_to_delete = db.query(models.Document).filter(models.Document.id == document_id).first()
            if not doc_to_delete:
                return None
            
            # Assuming you also need to delete the vector metadata associated with the document
            # for a full cleanup.
            # db.query(models.VectorMetadata).filter(models.VectorMetadata.document_id == document_id).delete()
            
            db.delete(doc_to_delete)
            db.commit()
            return document_id
        except SQLAlchemyError as e:
            db.rollback()
            raise