new file: .dockerignore

new file:   .env.example
	new file:   Dockerfile
	new file:   app.py
	new file:   blueprints/__init__.py
	new file:   blueprints/auth.py
	new file:   blueprints/chat.py
	new file:   blueprints/context.py
	new file:   blueprints/documents.py
	new file:   blueprints/main.py
	new file:   config.py
	new file:   docker-compose.yml
	new file:   models/__init__.py
	new file:   models/chat_session.py
	new file:   models/document.py
	new file:   models/user.py
	new file:   requirements.txt
	new file:   services/__init__.py
	new file:   services/document_parser.py
	new file:   services/llm_service.py
	new file:   services/rag_service.py
	new file:   services/url_scraper.py
	new file:   static/css/style.css
	new file:   static/js/chat.js
	new file:   static/js/inline_chat.js
	new file:   static/js/main.js
	new file:   templates/base.html
	new file:   templates/document_view.html
	new file:   templates/index.html
	new file:   templates/login.html
	new file:   templates/register.html
This commit is contained in:
SimolZimol
2026-05-22 16:03:50 +02:00
commit 939cc13689
31 changed files with 2025 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
import os
import io
def parse_document(file_path: str, file_type: str) -> str:
"""Extract plain text from a document file."""
ext = file_type.lower().lstrip(".")
if ext == "txt" or ext == "md":
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
return f.read()
if ext == "pdf":
return _parse_pdf(file_path)
if ext == "docx":
return _parse_docx(file_path)
raise ValueError(f"Unsupported file type: {ext}")
def _parse_pdf(file_path: str) -> str:
from pdfminer.high_level import extract_text
text = extract_text(file_path)
return text or ""
def _parse_docx(file_path: str) -> str:
from docx import Document
doc = Document(file_path)
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n".join(paragraphs)