diff --git a/README.md b/README.md index c149d47e..aaa0bb7a 100644 --- a/README.md +++ b/README.md @@ -2,23 +2,6 @@ This is a minimal implementation of the RAG model for question answering. -## The Course - -This is an educational project where all of the codes where explained (step by step) via a set of `Arabic` youtube videos. Please check the list: - -| # | Title | Link | Codes | -|---|------------------------------------------|------------------------------------------------------------------------------------------------------|----------------------------------------------------| -| 1 | About the Course ماذا ولمـــاذا | [Video](https://www.youtube.com/watch?v=Vv6e2Rb1Q6w&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj) | NA | -| 2 | What will we build ماذا سنبنى في المشروع | [Video](https://www.youtube.com/watch?v=_l5S5CdxE-Q&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=2) | NA | -| 3 | Setup your tools الأدوات الأساسية | [Video](https://www.youtube.com/watch?v=VSFbkFRAT4w&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=3) | NA | -| 4 | Project Architecture | [Video](https://www.youtube.com/watch?v=Ei_nBwBbFUQ&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=4) | [branch](https://github.com/bakrianoo/mini-rag/tree/tut-001) | -| 5 | Welcome to FastAPI | [Video](https://www.youtube.com/watch?v=cpOuCdzN_Mo&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=5) | [branch](https://github.com/bakrianoo/mini-rag/tree/tut-002) | -| 6 | Nested Routes + Env Values | [Video](https://www.youtube.com/watch?v=CrR2Bz2Y7Hw&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=6) | [branch](https://github.com/bakrianoo/mini-rag/tree/tut-003) | -| 7 | Uploading a File | [Video](https://www.youtube.com/watch?v=5alMKCbFqWs&list=PLvLvlVqNQGHCUR2p0b8a0QpVjDUg50wQj&index=7) | [branch](https://github.com/bakrianoo/mini-rag/tree/tut-004) | - - - - ## Requirements - Python 3.8 or later @@ -55,14 +38,4 @@ $ pip install -r requirements.txt $ cp .env.example .env ``` -Set your environment variables in the `.env` file. Like `OPENAI_API_KEY` value. - -## Run the FastAPI server - -```bash -$ uvicorn main:app --reload --host 0.0.0.0 --port 5000 -``` - -## POSTMAN Collection - -Download the POSTMAN collection from [/assets/mini-rag-app.postman_collection.json](/assets/mini-rag-app.postman_collection.json) +Set your environment variables in the `.env` file. Like `OPENAI_API_KEY` value. \ No newline at end of file diff --git a/__pycache__/main.cpython-310.pyc b/__pycache__/main.cpython-310.pyc new file mode 100644 index 00000000..7e0ad65c Binary files /dev/null and b/__pycache__/main.cpython-310.pyc differ diff --git a/src/.env b/src/.env new file mode 100644 index 00000000..ad9f0aa8 --- /dev/null +++ b/src/.env @@ -0,0 +1,7 @@ +APP_NAME="mini-rag" +APP_VERSION="0.1.0" +OPENAI_API_KEY="********" + +FILE_ALLOWED_TYPES=["text/plain", "application/pdf"] +FILE_MAX_SIZE=10 +FILE_DEFAULT_CHUNK_SIZE=512000 # 512 KB diff --git a/src/.env.example b/src/.env.example index 60ec87a5..a939daa6 100644 --- a/src/.env.example +++ b/src/.env.example @@ -1,7 +1,7 @@ -APP_NAME="mini-RAG" -APP_VERSION="0.1" -OPENAI_API_KEY="" - = -FILE_ALLOWED_TYPES= -FILE_MAX_SIZE=10 -FILE_DEFAULT_CHUNK_SIZE=512000 # 512KB +APP_NAME="mini-rag" +APP_VERSION="0.1.0" +OPENAI_API_KEY="" + +FILE_ALLOWED_TYPES= +FILE_MAX_SIZE=10 +FILE_DEFAULT_CHUNK_SIZE=512000 # 512 KB diff --git a/src/.gitignore b/src/.gitignore index 68bc17f9..64d49ae3 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,6 +1,6 @@ # Byte-compiled / optimized / DLL files __pycache__/ -*.py[cod] +*.py[codz] *$py.class # C extensions @@ -27,8 +27,8 @@ share/python-wheels/ MANIFEST # PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec @@ -46,7 +46,7 @@ htmlcov/ nosetests.xml coverage.xml *.cover -*.py,cover +*.py.cover .hypothesis/ .pytest_cache/ cover/ @@ -92,22 +92,37 @@ ipython_config.py # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. -#Pipfile.lock +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock +# poetry.lock +# poetry.toml # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ @@ -116,11 +131,25 @@ __pypackages__/ celerybeat-schedule celerybeat.pid +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + # SageMath parsed files *.sage.py # Environments .env +.envrc .venv env/ venv/ @@ -153,8 +182,35 @@ dmypy.json cython_debug/ # PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 00000000..823105c6 Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/__pycache__/base.cpython-310.pyc b/src/__pycache__/base.cpython-310.pyc new file mode 100644 index 00000000..946a4c8b Binary files /dev/null and b/src/__pycache__/base.cpython-310.pyc differ diff --git a/src/assets/.gitignore b/src/assets/.gitignore index 027271b9..45afc370 100644 --- a/src/assets/.gitignore +++ b/src/assets/.gitignore @@ -1 +1 @@ -files +files \ No newline at end of file diff --git a/src/assets/mini-rag-app.postman_collection.json b/src/assets/mini-rag-app.postman_collection.json index 2d58f447..b006f6bd 100644 --- a/src/assets/mini-rag-app.postman_collection.json +++ b/src/assets/mini-rag-app.postman_collection.json @@ -1,57 +1,50 @@ { - "info": { - "_postman_id": "1f67dedb-1b93-4639-9f96-7fe8681693f4", - "name": "mini-rag-app", - "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", - "_exporter_id": "854486", - "_collection_link": "https://www.postman.com/gold-water-645258/workspace/mini-rag-app/collection/854486-1f67dedb-1b93-4639-9f96-7fe8681693f4?action=share&source=collection_link&creator=854486" - }, - "item": [ - { - "name": "welcome-request", - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{api}}/welcome", - "host": [ - "{{api}}" - ], - "path": [ - "welcome" - ] - } - }, - "response": [] - } - ], - "event": [ - { - "listen": "prerequest", - "script": { - "type": "text/javascript", - "packages": {}, - "exec": [ - "" - ] - } - }, - { - "listen": "test", - "script": { - "type": "text/javascript", - "packages": {}, - "exec": [ - "" - ] - } - } - ], - "variable": [ - { - "key": "api", - "value": "http://127.0.0.1:8000", - "type": "string" - } - ] + "info": { + "_postman_id": "febe8027-4bc0-4f8e-ba7c-73b5eb97fa50", + "name": "mini-rag-app", + "description": "### Welcome to Postman! This is your first collection. \n\nCollections are your starting point for building and testing APIs. You can use this one to:\n\n• Group related requests\n• Test your API in real-world scenarios\n• Document and share your requests\n\nUpdate the name and overview whenever you’re ready to make it yours.\n\n[Learn more about Postman Collections.](https://learning.postman.com/docs/collections/collections-overview/)", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "_exporter_id": "51802644", + "_collection_link": "https://go.postman.co/collection/51802644-febe8027-4bc0-4f8e-ba7c-73b5eb97fa50?source=collection_link" + }, + "item": [ + { + "name": "welcome-request", + "request": { + "method": "GET", + "header": [] + }, + "response": [] + } + ], + "event": [ + { + "listen": "prerequest", + "script": { + "type": "text/javascript", + "packages": {}, + "requests": {}, + "exec": [ + "" + ] + } + }, + { + "listen": "test", + "script": { + "type": "text/javascript", + "packages": {}, + "requests": {}, + "exec": [ + "" + ] + } + } + ], + "variable": [ + { + "key": "api", + "value": "" + } + ] } \ No newline at end of file diff --git a/src/controllers/BaseController.py b/src/controllers/BaseController.py index 78554674..e7901716 100644 --- a/src/controllers/BaseController.py +++ b/src/controllers/BaseController.py @@ -3,17 +3,13 @@ import random import string + class BaseController: - def __init__(self): - self.app_settings = get_settings() - - self.base_dir = os.path.dirname( os.path.dirname(__file__) ) - self.files_dir = os.path.join( - self.base_dir, - "assets/files" - ) - - def generate_random_string(self, length: int=12): + self.base_dir = os.path.dirname(os.path.dirname(__file__)) + self.files_dir = os.path.join(self.base_dir, "assets/files") + + def generate_random_string(self, length: int = 12): return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length)) + \ No newline at end of file diff --git a/src/controllers/DataController.py b/src/controllers/DataController.py index d4bd5aab..802e0286 100644 --- a/src/controllers/DataController.py +++ b/src/controllers/DataController.py @@ -9,49 +9,32 @@ class DataController(BaseController): def __init__(self): super().__init__() - self.size_scale = 1048576 # convert MB to bytes - - def validate_uploaded_file(self, file: UploadFile): + self.size_scale = 1048576 + def validate_file(self, file: UploadFile): if file.content_type not in self.app_settings.FILE_ALLOWED_TYPES: return False, ResponseSignal.FILE_TYPE_NOT_SUPPORTED.value - if file.size > self.app_settings.FILE_MAX_SIZE * self.size_scale: - return False, ResponseSignal.FILE_SIZE_EXCEEDED.value - + return False, ResponseSignal.FILE_SIZE_EXCEEDED.value return True, ResponseSignal.FILE_VALIDATED_SUCCESS.value - def generate_unique_filepath(self, orig_file_name: str, project_id: str): + def generate_unique_filename(self, orig_file_name: str, project_id: str): random_key = self.generate_random_string() project_path = ProjectController().get_project_path(project_id=project_id) - - cleaned_file_name = self.get_clean_file_name( - orig_file_name=orig_file_name - ) - - new_file_path = os.path.join( - project_path, - random_key + "_" + cleaned_file_name - ) - + clean_file_name = self.get_clean_filename(orig_file_name=orig_file_name) + new_file_path = os.path.join(project_path, random_key + "_" + clean_file_name) + while os.path.exists(new_file_path): random_key = self.generate_random_string() - new_file_path = os.path.join( - project_path, - random_key + "_" + cleaned_file_name - ) - - return new_file_path, random_key + "_" + cleaned_file_name - - def get_clean_file_name(self, orig_file_name: str): - - # remove any special characters, except underscore and . - cleaned_file_name = re.sub(r'[^\w.]', '', orig_file_name.strip()) - - # replace spaces with underscore - cleaned_file_name = cleaned_file_name.replace(" ", "_") + new_file_path = os.path.join(project_path, random_key + "_" + clean_file_name) + + return new_file_path - return cleaned_file_name + def get_clean_filename(self, orig_file_name: str): + clean_file_name = re.sub(r'[^\w.]', '', orig_file_name.strip()) + clean_file_name = clean_file_name.replace(" ", "_") + return clean_file_name + \ No newline at end of file diff --git a/src/controllers/ProjectController.py b/src/controllers/ProjectController.py index 08862cd0..4dbcd755 100644 --- a/src/controllers/ProjectController.py +++ b/src/controllers/ProjectController.py @@ -3,20 +3,15 @@ from models import ResponseSignal import os + class ProjectController(BaseController): - + def __init__(self): super().__init__() def get_project_path(self, project_id: str): - project_dir = os.path.join( - self.files_dir, - project_id - ) + project_dir = os.path.join(self.files_dir, project_id) if not os.path.exists(project_dir): os.makedirs(project_dir) - return project_dir - - diff --git a/src/controllers/__init__.py b/src/controllers/__init__.py index 9007dbd8..27959616 100644 --- a/src/controllers/__init__.py +++ b/src/controllers/__init__.py @@ -1,2 +1,2 @@ from .DataController import DataController -from .ProjectController import ProjectController +from .ProjectController import ProjectController \ No newline at end of file diff --git a/src/main.py b/src/main.py index 79cee57d..20e065d6 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,8 @@ from fastapi import FastAPI -from routes import base, data +from routes.base import base_router +from routes.data import data_router app = FastAPI() -app.include_router(base.base_router) -app.include_router(data.data_router) + +app.include_router(base_router) +app.include_router(data_router) diff --git a/src/models/__init__.py b/src/models/__init__.py index b30f5a49..8053833f 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -1 +1 @@ -from .enums.ResponseEnums import ResponseSignal +from .enums.ResponseEnums import ResponseSignal \ No newline at end of file diff --git a/src/models/enums/ResponseEnums.py b/src/models/enums/ResponseEnums.py index 684376c0..bb28c5e8 100644 --- a/src/models/enums/ResponseEnums.py +++ b/src/models/enums/ResponseEnums.py @@ -1,9 +1,9 @@ from enum import Enum class ResponseSignal(Enum): - - FILE_VALIDATED_SUCCESS = "file_validate_successfully" FILE_TYPE_NOT_SUPPORTED = "file_type_not_supported" - FILE_SIZE_EXCEEDED = "file_size_exceeded" - FILE_UPLOAD_SUCCESS = "file_upload_success" - FILE_UPLOAD_FAILED = "file_upload_failed" + FILE_SIZE_EXCEEDED = "file_size_exceeded" + FILE_UPLOADED_SUCCESS = "file_upload_successfully" + FILE_UPLOADED_FAILED = "file_upload_failed" + FILE_VALIDATED_SUCCESS = "file_validated_successfully" + \ No newline at end of file diff --git a/src/routes/base.py b/src/routes/base.py index 773797ef..1581c726 100644 --- a/src/routes/base.py +++ b/src/routes/base.py @@ -1,5 +1,6 @@ from fastapi import FastAPI, APIRouter, Depends import os + from helpers.config import get_settings, Settings base_router = APIRouter( @@ -7,13 +8,12 @@ tags=["api_v1"], ) + @base_router.get("/") async def welcome(app_settings: Settings = Depends(get_settings)): + # app_settings = get_settings() app_name = app_settings.APP_NAME app_version = app_settings.APP_VERSION - return { - "app_name": app_name, - "app_version": app_version, - } + return {"app_name": app_name, "app_version": app_version} diff --git a/src/routes/data.py b/src/routes/data.py index 97ee5319..78d61d12 100644 --- a/src/routes/data.py +++ b/src/routes/data.py @@ -1,61 +1,57 @@ from fastapi import FastAPI, APIRouter, Depends, UploadFile, status from fastapi.responses import JSONResponse import os +import aiofiles from helpers.config import get_settings, Settings from controllers import DataController, ProjectController -import aiofiles from models import ResponseSignal import logging -logger = logging.getLogger('uvicorn.error') +logger = logging.getLogger("uvicorn.error") data_router = APIRouter( prefix="/api/v1/data", - tags=["api_v1", "data"], + tags=["data_api_v1"], ) -@data_router.post("/upload/{project_id}") -async def upload_data(project_id: str, file: UploadFile, - app_settings: Settings = Depends(get_settings)): - - - # validate the file properties - data_controller = DataController() - is_valid, result_signal = data_controller.validate_uploaded_file(file=file) +@data_router.post("/upload/{project_id}") +async def upload_data( + project_id: str, + file: UploadFile, + app_settings: Settings = Depends(get_settings) +): + data_controller = DataController() + is_valid, result_signal = data_controller.validate_file(file=file) if not is_valid: return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, content={ - "signal": result_signal + "signal": result_signal, } ) project_dir_path = ProjectController().get_project_path(project_id=project_id) - file_path, file_id = data_controller.generate_unique_filepath( - orig_file_name=file.filename, - project_id=project_id - ) - + file_path = data_controller.generate_unique_filename(orig_file_name=file.filename, project_id=project_id) + try: async with aiofiles.open(file_path, "wb") as f: while chunk := await file.read(app_settings.FILE_DEFAULT_CHUNK_SIZE): await f.write(chunk) except Exception as e: - logger.error(f"Error while uploading file: {e}") + logger.error(f"File uploaded failed: {e}") return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, content={ - "signal": ResponseSignal.FILE_UPLOAD_FAILED.value + "signal": ResponseSignal.FILE_UPLOADED_FAILED.value } ) return JSONResponse( content={ - "signal": ResponseSignal.FILE_UPLOAD_SUCCESS.value, - "file_id": file_id + "signal": ResponseSignal.FILE_UPLOADED_FAILED.value } )