new file: .dockerignore
new file: .env.example new file: Dockerfile new file: app.py new file: blueprints/__init__.py new file: blueprints/auth.py new file: blueprints/chat.py new file: blueprints/context.py new file: blueprints/documents.py new file: blueprints/main.py new file: config.py new file: docker-compose.yml new file: models/__init__.py new file: models/chat_session.py new file: models/document.py new file: models/user.py new file: requirements.txt new file: services/__init__.py new file: services/document_parser.py new file: services/llm_service.py new file: services/rag_service.py new file: services/url_scraper.py new file: static/css/style.css new file: static/js/chat.js new file: static/js/inline_chat.js new file: static/js/main.js new file: templates/base.html new file: templates/document_view.html new file: templates/index.html new file: templates/login.html new file: templates/register.html
This commit is contained in:
32
services/document_parser.py
Normal file
32
services/document_parser.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import os
|
||||
import io
|
||||
|
||||
|
||||
def parse_document(file_path: str, file_type: str) -> str:
|
||||
"""Extract plain text from a document file."""
|
||||
ext = file_type.lower().lstrip(".")
|
||||
|
||||
if ext == "txt" or ext == "md":
|
||||
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
||||
return f.read()
|
||||
|
||||
if ext == "pdf":
|
||||
return _parse_pdf(file_path)
|
||||
|
||||
if ext == "docx":
|
||||
return _parse_docx(file_path)
|
||||
|
||||
raise ValueError(f"Unsupported file type: {ext}")
|
||||
|
||||
|
||||
def _parse_pdf(file_path: str) -> str:
|
||||
from pdfminer.high_level import extract_text
|
||||
text = extract_text(file_path)
|
||||
return text or ""
|
||||
|
||||
|
||||
def _parse_docx(file_path: str) -> str:
|
||||
from docx import Document
|
||||
doc = Document(file_path)
|
||||
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||
return "\n".join(paragraphs)
|
||||
Reference in New Issue
Block a user