import os import uuid from flask import Blueprint, request, jsonify, current_app from flask_login import login_required, current_user from werkzeug.utils import secure_filename from models import db, Document from services.document_parser import parse_document from services import rag_service documents_bp = Blueprint("documents", __name__, url_prefix="/api/documents") def _allowed(filename: str) -> bool: allowed = current_app.config["ALLOWED_EXTENSIONS"] return "." in filename and filename.rsplit(".", 1)[1].lower() in allowed @documents_bp.route("/", methods=["GET"]) @login_required def list_documents(): docs = Document.query.filter_by(user_id=current_user.id).order_by(Document.created_at.desc()).all() return jsonify([d.to_dict() for d in docs]) @documents_bp.route("/upload", methods=["POST"]) @login_required def upload(): if "file" not in request.files: return jsonify({"error": "No file provided"}), 400 file = request.files["file"] if not file.filename: return jsonify({"error": "No filename"}), 400 if not _allowed(file.filename): return jsonify({"error": "File type not allowed"}), 400 original_name = secure_filename(file.filename) ext = original_name.rsplit(".", 1)[1].lower() stored_name = f"{uuid.uuid4().hex}.{ext}" file_path = os.path.join(current_app.config["UPLOAD_FOLDER"], stored_name) file.save(file_path) doc = Document( user_id=current_user.id, filename=stored_name, original_name=original_name, file_type=ext, indexed=False, ) db.session.add(doc) db.session.commit() # Index in background (synchronous for simplicity — fast enough for normal docs) try: text = parse_document(file_path, ext) rag_service.index_source( text=text, user_id=current_user.id, source_id=doc.id, source_type="doc", chunk_size=current_app.config["RAG_CHUNK_SIZE"], chunk_overlap=current_app.config["RAG_CHUNK_OVERLAP"], ) doc.indexed = True db.session.commit() except Exception as e: current_app.logger.error(f"Indexing failed for doc {doc.id}: {e}") return jsonify(doc.to_dict()), 201 @documents_bp.route("/", methods=["DELETE"]) @login_required def delete_document(doc_id): doc = Document.query.filter_by(id=doc_id, user_id=current_user.id).first_or_404() # Remove from vector store rag_service.delete_source(current_user.id, doc.id, "doc") # Remove file file_path = os.path.join(current_app.config["UPLOAD_FOLDER"], doc.filename) if os.path.exists(file_path): os.remove(file_path) db.session.delete(doc) db.session.commit() return jsonify({"success": True}) @documents_bp.route("//content", methods=["GET"]) @login_required def get_content(doc_id): doc = Document.query.filter_by(id=doc_id, user_id=current_user.id).first_or_404() file_path = os.path.join(current_app.config["UPLOAD_FOLDER"], doc.filename) try: text = parse_document(file_path, doc.file_type) except Exception as e: return jsonify({"error": str(e)}), 500 return jsonify({"id": doc.id, "name": doc.original_name, "content": text})