From 9612b67721a1c186521ac56d837150c825a2471b Mon Sep 17 00:00:00 2001 From: ITQ Date: Sun, 2 Mar 2025 17:04:44 +0300 Subject: [PATCH] this will left production in ruins --- .gitlab-ci.yml | 18 ++ compose.yaml | 11 + services/backend/Dockerfile.staticfiles | 2 + services/backend/config/settings.py | 12 +- services/backend/integrations/__init__.py | 0 .../backend/integrations/checker/__init__.py | 0 .../integrations/checker/healthcheck.py | 22 ++ services/checker/.gitignore | 170 +++++++++++ services/checker/Dockerfile | 42 +++ services/checker/Dockerfile.checker | 19 ++ services/checker/README.md | 87 ++++++ services/checker/checker_requirements.txt | 7 + services/checker/main.py | 289 ++++++++++++++++++ services/checker/pyproject.toml | 13 + services/checker/scripts/check | 8 + 15 files changed, 692 insertions(+), 8 deletions(-) create mode 100644 services/backend/integrations/__init__.py create mode 100644 services/backend/integrations/checker/__init__.py create mode 100644 services/backend/integrations/checker/healthcheck.py create mode 100644 services/checker/.gitignore create mode 100644 services/checker/Dockerfile create mode 100644 services/checker/Dockerfile.checker create mode 100644 services/checker/README.md create mode 100644 services/checker/checker_requirements.txt create mode 100644 services/checker/main.py create mode 100644 services/checker/pyproject.toml create mode 100755 services/checker/scripts/check diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a734261..fde8bb7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -54,6 +54,24 @@ build_backend-staticfiles: DOCKERFILE_PATH: "Dockerfile.staticfiles" IMAGE_NAME: "$CI_REGISTRY_IMAGE/backend-staticfiles" +build_checker: + <<: *build-template + rules: + - if: '$CI_COMMIT_REF_NAME == "master"' + variables: + CONTEXT: "${CI_PROJECT_DIR}/services/checker" + DOCKERFILE_PATH: "Dockerfile" + IMAGE_NAME: "$CI_REGISTRY_IMAGE/checker" + +build_custom-python: + <<: *build-template + rules: + - if: '$CI_COMMIT_REF_NAME == "master"' + variables: + CONTEXT: "${CI_PROJECT_DIR}/services/checker" + DOCKERFILE_PATH: "Dockerfile.checker" + IMAGE_NAME: "$CI_REGISTRY_IMAGE/custom-python" + build_docs: <<: *build-template rules: diff --git a/compose.yaml b/compose.yaml index 7a28eac..7eaa8c7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -370,6 +370,17 @@ services: restart: unless-stopped shm_size: 4mb + checker: + image: gitlab.prodcontest.ru:5050/team-15/project/checker:latest + build: + context: ./services/checker + dockerfile: Dockerfile + restart: unless-stopped + volumes: + - type: bind + source: /var/run/docker.sock + target: /prometheus + proxy: image: docker.io/nginx:1.27-alpine3.21 configs: diff --git a/services/backend/Dockerfile.staticfiles b/services/backend/Dockerfile.staticfiles index 5150bf5..55556cb 100644 --- a/services/backend/Dockerfile.staticfiles +++ b/services/backend/Dockerfile.staticfiles @@ -24,4 +24,6 @@ FROM docker.io/nginx:latest COPY --from=builder /app/static /usr/share/nginx/html +COPY ../checker/checher_requirements.txt . + CMD ["nginx", "-g", "daemon off;"] diff --git a/services/backend/config/settings.py b/services/backend/config/settings.py index 6bf014b..c5cbcbd 100644 --- a/services/backend/config/settings.py +++ b/services/backend/config/settings.py @@ -7,7 +7,9 @@ from pathlib import Path import django_stubs_ext import environ +from health_check.plugins import plugin_dir from django.utils.translation import gettext_lazy as _ +from integrations.checker.healthcheck import CheckerHealthCheck BASE_DIR = Path(__file__).resolve().parent.parent @@ -30,18 +32,12 @@ ALLOWED_HOSTS = env( # Integrations -YANDEX_CLOUD_FOLDER_ID = env("YANDEX_CLOUD_FOLDER_ID", default=None) - -YANDEX_CLOUD_API_KEY = env("YANDEX_CLOUD_API_KEY", default=None) - -YANDEX_CLOUD_INTEGRATION_ENABLED = ( - YANDEX_CLOUD_FOLDER_ID and YANDEX_CLOUD_API_KEY -) +CHECKER_API_ENDPOINT = env("CHECKER_API_ENDPOINT", default=None) # Register healthchecks -# plugin_dir.register(SomeHealthCheckClass) +plugin_dir.register(CheckerHealthCheck) # Caching diff --git a/services/backend/integrations/__init__.py b/services/backend/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/backend/integrations/checker/__init__.py b/services/backend/integrations/checker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/backend/integrations/checker/healthcheck.py b/services/backend/integrations/checker/healthcheck.py new file mode 100644 index 0000000..492e127 --- /dev/null +++ b/services/backend/integrations/checker/healthcheck.py @@ -0,0 +1,22 @@ +from http import HTTPStatus as status + +import httpx +from django.conf import settings +from health_check.backends import BaseHealthCheckBackend + + +class CheckerHealthCheck(BaseHealthCheckBackend): + critical_service = False + + def check_status(self) -> None: + try: + response = httpx.get( + f"{settings.ANTIFRAUD_ADDRESS}/ping", timeout=1 + ) + if response.status_code >= status.INTERNAL_SERVER_ERROR: + self.add_error("Checker service is unaccessible") + except httpx.HTTPError: + self.add_error("Checker service is unaccessible") + + def identifier(self) -> str: + return self.__class__.__name__ diff --git a/services/checker/.gitignore b/services/checker/.gitignore new file mode 100644 index 0000000..b96e392 --- /dev/null +++ b/services/checker/.gitignore @@ -0,0 +1,170 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# PyPI configuration file +.pypirc + +# Ruff files +.ruff_cache diff --git a/services/checker/Dockerfile b/services/checker/Dockerfile new file mode 100644 index 0000000..9f0295f --- /dev/null +++ b/services/checker/Dockerfile @@ -0,0 +1,42 @@ +# Stage 1: Install dependencies +FROM docker.io/python:3.11-alpine3.20 AS builder + +COPY --from=ghcr.io/astral-sh/uv:0.4.30 /uv /uvx /bin/ + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONOPTIMIZE=2 \ + UV_COMPILE_BYTECODE=1 \ + UV_PROJECT_ENVIRONMENT=/opt/venv + +COPY pyproject.toml . + +RUN uv sync --no-dev --no-install-project --no-cache + + +# Stage 2: Start the application +FROM docker.io/python:3.11-alpine3.20 + +WORKDIR /app + +COPY --from=builder /opt/venv /opt/venv + +COPY . . + +RUN adduser -D -g '' app && chown -R app:app ./ + +USER app + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONOPTIMIZE=2 \ + PATH="/opt/venv/bin:$PATH" + +EXPOSE 8080 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --start-interval=2s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://127.0.0.1:8000/ping || exit 1 + +CMD uvicorn main:app --host 0.0.0.0 --port 8000 diff --git a/services/checker/Dockerfile.checker b/services/checker/Dockerfile.checker new file mode 100644 index 0000000..f528db0 --- /dev/null +++ b/services/checker/Dockerfile.checker @@ -0,0 +1,19 @@ +FROM docker.io/python:3.11-slim + +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY checker_requirements.txt . + +RUN pip install --no-cache-dir -r checker_requirements.txt + +RUN useradd -m appuser && chown -R appuser /app +USER appuser + +WORKDIR /app + +CMD ["python"] diff --git a/services/checker/README.md b/services/checker/README.md new file mode 100644 index 0000000..927c0e7 --- /dev/null +++ b/services/checker/README.md @@ -0,0 +1,87 @@ +# DataRush Checker + +## Prerequisites + +Ensure you have the following installed on your system: + +- [Python](https://www.python.org/) (>=3.10,<3.12) +- [uv](https://docs.astral.sh/uv/) +- [Docker](https://www.docker.com/) (for containerized setup) + +## Basic setup + +### Installation + +#### Clone the project + +```bash +git clone git@gitlab.prodcontest.ru:team-15/project.git +``` + +#### Go to the project directory + +```bash +cd project/services/checker +``` + +#### Install dependencies + +##### For dev environment + +```bash +uv sync --all-extras +``` + +##### For prod environment + +```bash +uv sync --no-dev +``` + +#### Running + +##### Apply migrations + +```bash +uv run python manage.py migrate +``` + +##### Start celery worker + +```bash +celery -A config worker -l INFO +``` + +##### Start server + +In dev mode: + +```bash +uv run python manage.py runserver +``` + +In prod mode: + +```bash +uv run gunicorn config.wsgi +``` + +## Containerized setup + +### Clone the project + +```bash +git clone git@gitlab.prodcontest.ru:team-15/project.git +``` + +### Go to the project directory + +```bash +cd project/services/checker +``` + +### Build docker image + +```bash +docker build -t datarush-checker . +``` diff --git a/services/checker/checker_requirements.txt b/services/checker/checker_requirements.txt new file mode 100644 index 0000000..7cc32d3 --- /dev/null +++ b/services/checker/checker_requirements.txt @@ -0,0 +1,7 @@ +pandas==2.2.3 +numpy==2.2.3 +matplotlib==3.10.1 +scipy==1.15.2 +scikit-learn==1.6.1 +seaborn==0.13.2 +statsmodels==0.14.4 diff --git a/services/checker/main.py b/services/checker/main.py new file mode 100644 index 0000000..4b01fac --- /dev/null +++ b/services/checker/main.py @@ -0,0 +1,289 @@ +from fastapi import FastAPI, HTTPException, status +from pydantic import BaseModel, Field, HttpUrl, constr +import aiohttp +import asyncio +import docker +import hashlib +import os +import base64 +import tempfile +import logging +from urllib.parse import urlparse +import re + +app = FastAPI() +docker_client = docker.from_env() +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +DOCKER_IMAGE = "gitlab.python:3-slim" +CONTAINER_TIMEOUT = 60 +MAX_FILE_SIZE = 4 * 1024 * 1024 +ALLOWED_FILENAME_CHARS = r"[^a-zA-Z0-9_\-.]" + + +class FileDetails(BaseModel): + url: HttpUrl = Field( + ..., description="URL to download the file from (supports HTTP/HTTPS)" + ) + bind_path: str = Field( + ..., + description="Container path to bind the file (absolute)", + ) + + +class ExecutionRequest(BaseModel): + code: str = Field(..., description="Base64 encoded Python code to execute") + answer_file_path: str = Field( + "stdout", description="Base64 encoded path to result file or 'stdout'" + ) + expected_hash: str | None = Field( + None, description="Optional SHA-256 hash of expected output" + ) + files: list[FileDetails] = Field( + [], description="List of files to mount in container" + ) + + +class ExecutionResponse(BaseModel): + success: bool = Field(..., description="Execution success status") + hash_match: bool | None = Field( + None, description="Output hash matches expected (if provided)" + ) + output: str = Field(..., description="Captured stdout or file contents") + result_hash: str = Field(..., description="SHA-256 hash of output") + error: str = Field(..., description="Execution errors or stderr") + + +class HealthCheckResponse(BaseModel): + status: str = Field(..., description="Service health status") + docker: str = Field(..., description="Docker daemon status") + + +def decode_base64(encoded_str: str, field_name: str) -> str: + try: + return base64.b64decode(encoded_str).decode("utf-8") + except Exception as e: + logger.error(f"Base64 decode failed for {field_name}: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid Base64 in {field_name}", + ) + + +def sanitize_filename(url: str) -> str: + parsed = urlparse(url) + base_name = os.path.basename(parsed.path) + + if not base_name: + base_name = "file" + + clean = re.sub(ALLOWED_FILENAME_CHARS, "", base_name)[:255] + return clean or "file" + + +async def download_file( + session: aiohttp.ClientSession, url: str, dest_path: str +) -> None: + try: + async with session.get( + url, timeout=aiohttp.ClientTimeout(total=30) + ) as resp: + if resp.status != 200: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Failed to download {url} - Status {resp.status}", + ) + + content = b"" + async for chunk in resp.content.iter_chunked(8192): + content += chunk + if len(content) > MAX_FILE_SIZE: + raise HTTPException( + status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, + detail="File size exceeds 4MB limit", + ) + + with open(dest_path, "wb") as f: + f.write(content) + logger.info(f"Downloaded {url} to {dest_path}") + + except aiohttp.ClientError as e: + logger.error(f"Download error for {url}: {str(e)}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Download failed: {str(e)}", + ) + + +def run_container_safely( + tmp_dir: str, + command: list[str], + bound_files: dict[str, str], + timeout: int = CONTAINER_TIMEOUT, +) -> dict: + container = None + try: + volumes = {tmp_dir: {"bind": "/execution", "mode": "rw"}} + for host_path, container_path in bound_files.items(): + volumes[host_path] = {"bind": container_path, "mode": "ro"} + + container = docker_client.containers.run( + image=DOCKER_IMAGE, + command=command, + volumes=volumes, + working_dir="/execution", + stdout=True, + stderr=True, + detach=True, + mem_limit="100m", + network_mode="none", + cpu_period=100000, + cpu_quota=50000, + user="root", + security_opt=["no-new-privileges"], + ) + + exit_code = container.wait(timeout=timeout)["StatusCode"] + stdout = container.logs(stdout=True, stderr=False).decode().strip() + stderr = container.logs(stdout=False, stderr=True).decode().strip() + + return {"stdout": stdout, "stderr": stderr, "status": exit_code} + + except docker.errors.DockerException as e: + logger.error(f"Docker error: {str(e)}") + return { + "stdout": "", + "stderr": f"Container error: {str(e)}", + "status": -1, + } + finally: + if container: + try: + container.remove(force=True) + except docker.errors.DockerException: + pass + + +@app.post("/execute", response_model=ExecutionResponse) +async def execute_code(request: ExecutionRequest) -> ExecutionResponse: + try: + code = decode_base64(request.code, "code") + answer_path = ( + decode_base64(request.answer_file_path, "answer_file_path") + if request.answer_file_path != "stdout" + else "stdout" + ) + except HTTPException as e: + return ExecutionResponse( + success=False, + output="", + result_hash="", + error=e.detail, + hash_match=None, + ) + + if answer_path != "stdout": + if os.path.isabs(answer_path) or not validate_file_path(answer_path): + return ExecutionResponse( + success=False, + output="", + result_hash="", + error="Invalid answer file path", + hash_match=None, + ) + + with tempfile.TemporaryDirectory() as tmp_dir: + bound_files = {} + if request.files: + async with aiohttp.ClientSession() as session: + download_tasks = [] + for file in request.files: + filename = sanitize_filename(str(file.url)) + dest_path = os.path.join(tmp_dir, filename) + bound_files[dest_path] = file.bind_path + download_tasks.append( + download_file(session, str(file.url), dest_path) + ) + + try: + await asyncio.gather(*download_tasks) + except HTTPException as e: + return ExecutionResponse( + success=False, + output="", + result_hash="", + error=e.detail, + hash_match=None, + ) + + code_path = os.path.join(tmp_dir, "submission.py") + with open(code_path, "w") as f: + f.write(code) + os.chmod(code_path, 0o444) + + if answer_path == "stdout": + cmd = ["python", "submission.py"] + else: + cmd = [ + "sh", + "-c", + f"python submission.py && cat {answer_path} || echo 'EXECUTION_FAILED'", + ] + + try: + result = await asyncio.to_thread( + run_container_safely, + tmp_dir, + cmd, + bound_files, + CONTAINER_TIMEOUT, + ) + except Exception as e: + logger.error(f"Container execution failed: {str(e)}") + return ExecutionResponse( + success=False, + output="", + result_hash="", + error=f"Execution failed: {str(e)}", + hash_match=None, + ) + + output = result["stdout"] + error = result["stderr"] + success = result["status"] == 0 + + if answer_path != "stdout" and not output: + error += "\nNo output captured - check answer file path" + + result_hash = hashlib.sha256(output.encode()).hexdigest() + + return ExecutionResponse( + success=success, + hash_match=( + result_hash == request.expected_hash + if request.expected_hash + else None + ), + output=output[:5000], + result_hash=result_hash, + error=error[:5000], + ) + + +@app.get("/health", response_model=HealthCheckResponse) +async def health_check() -> HealthCheckResponse: + try: + docker_client.ping() + return HealthCheckResponse(status="healthy", docker="connected") + except docker.errors.DockerException: + return HealthCheckResponse(status="degraded", docker="unavailable") + + +def validate_file_path(path: str) -> bool: + return ( + not os.path.isabs(path) + and os.path.basename(path) == path + and all(c.isalnum() or c in {"_", "-", "."} for c in path) + ) diff --git a/services/checker/pyproject.toml b/services/checker/pyproject.toml new file mode 100644 index 0000000..2e47424 --- /dev/null +++ b/services/checker/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "checker" +version = "0.1.0" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "aiohttp>=3.11.13", + "docker>=7.1.0", + "fastapi>=0.115.11", + "python-multipart>=0.0.20", + "regex>=2024.11.6", + "uvicorn>=0.34.0", +] diff --git a/services/checker/scripts/check b/services/checker/scripts/check new file mode 100755 index 0000000..6230cab --- /dev/null +++ b/services/checker/scripts/check @@ -0,0 +1,8 @@ +#!/bin/sh + +GREEN='\033[1;32m' +NC='\033[0m' + +uvx ruff format . +uvx ruff check . --fix +printf "${GREEN}Linters/formatters runned${NC}\n"