diff --git a/frontend/__init__.py b/frontend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/frontend/html/upload.html b/frontend/html/upload.html index 234c977..a49c9e3 100644 --- a/frontend/html/upload.html +++ b/frontend/html/upload.html @@ -143,55 +143,7 @@

Upload a File

✅ Upload Complete!
- - function goToChat() { - window.location.href = "chat.html"; - } - diff --git a/frontend/main.py b/frontend/main.py index 0523709..3ad5235 100644 --- a/frontend/main.py +++ b/frontend/main.py @@ -5,7 +5,10 @@ import sys from datetime import datetime -import webview +try: + import webview +except ImportError: + webview = None # === Add project root to sys.path === here = os.path.dirname(os.path.abspath(__file__)) # frontend/ @@ -157,8 +160,75 @@ def save_file(self, filename, file_data_base64): except Exception as e: return f"error: {str(e)}" + def save_file_chunk(self, filename, chunk_data, chunk_index, is_last) : + """ + Persist a single chunk of an uploaded file to disk and trigger + post-processing once the final chunk is received. + + This method is invoked repeatedly during a chunked file upload + initiated from the frontend. Each call writes base64-decoded + binary data to a target file, either creating it on the first + chunk or appending to it for subsequent chunks. + + Parameters + ---------- + filename : str + Name of the file being uploaded. + chunk_data : str + Base64-encoded string representing the current file chunk. + chunk_index : int + Zero-based index of the current chunk. Used to determine + whether to open the file in write or append mode. + is_last : bool + Indicates whether this is the final chunk of the upload. + When True, a post-upload processing pipeline is executed. + + Side Effects + ------------ + - Creates the upload directory if it does not exist. + - Writes binary data to disk. + - Executes a subprocess for hallucination reduction when the + final chunk is received. + + Returns + ------- + str + "success" if the chunk is saved successfully and any required + post-processing completes, otherwise an error message + prefixed with "error:". + """ + try: + folder = os.path.join(here, "../data/raw") + os.makedirs(folder, exist_ok=True) + filepath = os.path.join(folder, filename) + + mode = "ab" if chunk_index > 0 else "wb" + + with open(filepath, mode) as f: + f.write(base64.b64decode(chunk_data)) + + if is_last: + project_root = os.path.abspath(os.path.join(here, "..")) + env = os.environ.copy() + env["CUDA_VISIBLE_DEVICES"] = "" + env["TOKENIZERS_PARALLELISM"] = "false" + env.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:true") + subprocess.run( + ["python", "-m", "hallucination_reduction.main"], + cwd=project_root, + check=True, + env=env, + ) + + return "success" + except Exception as e: + return f"error: {str(e)}" + def try_backends(): + if webview is None: + print("pywebview is not available in this environment.") + return False # Suppress tokenizers parallelism warning os.environ["TOKENIZERS_PARALLELISM"] = "false" diff --git a/frontend/script/upload.js b/frontend/script/upload.js new file mode 100644 index 0000000..55de2ee --- /dev/null +++ b/frontend/script/upload.js @@ -0,0 +1,103 @@ +//Define Maximum File Size as 50MB +const MAX_FILE_SIZE = 50 * 1024 * 1024; + +//Define Upload Chunk Size as 1MB +const CHUNK_SIZE = 1024 * 1024; + +function arrayBufferToBase64(buffer) { + let binary = ""; + const bytes = new Uint8Array(buffer); + const len = bytes.byteLength; + + for (let i = 0; i < len; i++) { + binary += String.fromCharCode(bytes[i]); + } + + return btoa(binary); +} + +function resetUploadUI() { + const progressContainer = document.querySelector(".progress-container"); + const progressBar = document.getElementById("progressBar"); + const progressText = document.getElementById("progressText"); + const doneMessage = document.getElementById("doneMessage"); + + if (progressContainer) { + progressContainer.style.display = "none"; + } + if (progressBar) { + progressBar.style.width = "0%"; + } + if (progressText) { + progressText.textContent = "0%"; + } + if (doneMessage) { + doneMessage.style.display = "none"; + } +} + +async function uploadFile() { + const fileInput = document.getElementById("fileInput"); + + if (!fileInput || !fileInput.files || fileInput.files.length === 0) { + alert("Please select a file first."); + return; + } + + const file = fileInput.files[0]; + + if (file.size > MAX_FILE_SIZE) { + alert("File is too large. Maximum allowed size is 50 MB."); + return; + } + + const progressContainer = document.querySelector(".progress-container"); + const progressBar = document.getElementById("progressBar"); + const progressText = document.getElementById("progressText"); + const doneMessage = document.getElementById("doneMessage"); + + progressContainer.style.display = "block"; + progressBar.style.width = "0%"; + progressText.textContent = "0%"; + doneMessage.style.display = "none"; + + let offset = 0; + let chunkIndex = 0; + + if (!window.pywebview || !window.pywebview.api || typeof window.pywebview.api.save_file_chunk !== "function") { + alert("Upload service is not available. Please try again."); + resetUploadUI(); + return; + } + + while (offset < file.size) { + const chunk = file.slice(offset, offset + CHUNK_SIZE); + const arrayBuffer = await chunk.arrayBuffer(); + + const base64Chunk = arrayBufferToBase64(arrayBuffer); + const result = await window.pywebview.api.save_file_chunk( + file.name, + base64Chunk, + chunkIndex, + offset + CHUNK_SIZE >= file.size + ); + if (result !== "success") { + alert("Upload failed: " + result); + resetUploadUI(); + return; + } + + offset = Math.min(offset + CHUNK_SIZE, file.size); + chunkIndex++; + + const progress = Math.round(Math.min((offset / file.size) * 100, 100)); + progressBar.style.width = progress + "%"; + progressText.textContent = progress + "%"; + } + doneMessage.style.display = "block"; + setTimeout(() => { window.location.href = "chat.html"; }, 1000); +} + +function goToChat() { + window.location.href = "chat.html"; +} diff --git a/tests/unit/test_frontend_main.py b/tests/unit/test_frontend_main.py new file mode 100644 index 0000000..b551293 --- /dev/null +++ b/tests/unit/test_frontend_main.py @@ -0,0 +1,76 @@ +import base64 +import os + +from frontend.main import Api + + +class TestSaveFileChunk: + def test_creates_file_on_first_chunk(self, tmp_path, monkeypatch): + monkeypatch.setattr("frontend.main.here", str(tmp_path)) + monkeypatch.setattr("frontend.main.subprocess.run", lambda *a, **k: None) + + api = Api.__new__(Api) + + data = base64.b64encode(b"hello").decode() + result = api.save_file_chunk( + filename="test.txt", + chunk_data=data, + chunk_index=0, + is_last=False, + ) + + assert result == "success" + + file_path = os.path.join(tmp_path, "../data/raw/test.txt") + with open(file_path, "rb") as f: + assert f.read() == b"hello" + + def test_appends_on_subsequent_chunks(self, tmp_path, monkeypatch): + monkeypatch.setattr("frontend.main.here", str(tmp_path)) + monkeypatch.setattr("frontend.main.subprocess.run", lambda *a, **k: None) + + api = Api.__new__(Api) + + chunk1 = base64.b64encode(b"hello ").decode() + chunk2 = base64.b64encode(b"world").decode() + + api.save_file_chunk("test.txt", chunk1, 0, False) + api.save_file_chunk("test.txt", chunk2, 1, False) + + file_path = os.path.join(tmp_path, "../data/raw/test.txt") + with open(file_path, "rb") as f: + assert f.read() == b"hello world" + + def test_runs_subprocess_on_last_chunk(self, tmp_path, monkeypatch): + monkeypatch.setattr("frontend.main.here", str(tmp_path)) + monkeypatch.setattr("frontend.main.subprocess.run", lambda *a, **k: None) + + api = Api.__new__(Api) + + called = {"ran": False} + + def fake_run(*args, **kwargs): + called["ran"] = True + + monkeypatch.setattr("frontend.main.subprocess.run", fake_run) + + data = base64.b64encode(b"data").decode() + result = api.save_file_chunk("final.txt", data, 0, True) + + assert result == "success" + assert called["ran"] is True + + def test_returns_error_on_exception(self, monkeypatch): + monkeypatch.setattr("frontend.main.subprocess.run", lambda *a, **k: None) + + api = Api.__new__(Api) + + def fake_open(*args, **kwargs): + raise IOError("disk failure") + + monkeypatch.setattr("builtins.open", fake_open) + + data = base64.b64encode(b"x").decode() + result = api.save_file_chunk("fail.txt", data, 0, False) + + assert result.startswith("error:")