From b706546435ec7d0abc9802983fff3c9c3997d970 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov <41117609+azhavoro@users.noreply.github.com> Date: Tue, 16 Jun 2020 16:35:50 +0300 Subject: [PATCH] Added clamav integration (#1712) * added clamav integration * updated license headers and changelog --- CHANGELOG.md | 1 + Dockerfile | 13 +++++++++++++ cvat/apps/auto_annotation/model_manager.py | 13 +++++++++++++ cvat/apps/engine/task.py | 5 ++++- cvat/apps/engine/utils.py | 16 ++++++++++++++++ cvat/apps/engine/views.py | 5 ++++- docker-compose.yml | 1 + supervisord.conf | 5 +++++ 8 files changed, 57 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b82b1389..edc74e02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Shortcut to switch split mode () - Built-in search for labels when create an object or change a label () - Better validation of labels and attributes in raw viewer () +- ClamAV antivirus integration () ### Changed - Removed information about e-mail from the basic user information () diff --git a/Dockerfile b/Dockerfile index 9c00bf8f..e6eb2911 100644 --- a/Dockerfile +++ b/Dockerfile @@ -129,6 +129,19 @@ RUN if [ "$WITH_DEXTR" = "yes" ]; then \ 7z e ${DEXTR_MODEL_DIR}/dextr.zip -o${DEXTR_MODEL_DIR} && rm ${DEXTR_MODEL_DIR}/dextr.zip; \ fi +ARG CLAM_AV +ENV CLAM_AV=${CLAM_AV} +RUN if [ "$CLAM_AV" = "yes" ]; then \ + apt-get update && \ + apt-get --no-install-recommends install -yq \ + clamav \ + libclamunrar9 && \ + sed -i 's/ReceiveTimeout 30/ReceiveTimeout 300/g' /etc/clamav/freshclam.conf && \ + freshclam && \ + chown -R ${USER}:${USER} /var/lib/clamav && \ + rm -rf /var/lib/apt/lists/*; \ + fi + COPY ssh ${HOME}/.ssh COPY utils ${HOME}/utils COPY cvat/ ${HOME}/cvat diff --git a/cvat/apps/auto_annotation/model_manager.py b/cvat/apps/auto_annotation/model_manager.py index 37f6cc05..7bac221a 100644 --- a/cvat/apps/auto_annotation/model_manager.py +++ b/cvat/apps/auto_annotation/model_manager.py @@ -19,6 +19,7 @@ from cvat.apps.authentication.auth import has_admin_role from cvat.apps.engine.serializers import LabeledDataSerializer from cvat.apps.dataset_manager.task import put_task_data, patch_task_data from cvat.apps.engine.frame_provider import FrameProvider +from cvat.apps.engine.utils import av_scan_paths from .models import AnnotationModel, FrameworkChoice from .model_loader import load_labelmap @@ -139,6 +140,7 @@ def create_or_update(dl_model_id, name, model_file, weights_file, labelmap_file, tmp_file.write(chunk) os.close(fd) return filename + is_create_request = dl_model_id is None if is_create_request: dl_model_id = create_empty(owner=owner) @@ -155,6 +157,17 @@ def create_or_update(dl_model_id, name, model_file, weights_file, labelmap_file, labelmap_file = save_file_as_tmp(labelmap_file) interpretation_file = save_file_as_tmp(interpretation_file) + files_to_scan = [] + if model_file: + files_to_scan.append(model_file) + if weights_file: + files_to_scan.append(weights_file) + if labelmap_file: + files_to_scan.append(labelmap_file) + if interpretation_file: + files_to_scan.append(interpretation_file) + av_scan_paths(*files_to_scan) + if owner: restricted = not has_admin_role(owner) else: diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index baf9f81e..998bc4af 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -1,5 +1,5 @@ -# Copyright (C) 2018 Intel Corporation +# Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -15,6 +15,7 @@ from urllib import request as urlrequest from cvat.apps.engine.media_extractors import get_mime, MEDIA_TYPES, Mpeg4ChunkWriter, ZipChunkWriter, Mpeg4CompressedChunkWriter, ZipCompressedChunkWriter from cvat.apps.engine.models import DataChoice +from cvat.apps.engine.utils import av_scan_paths import django_rq from django.conf import settings @@ -223,6 +224,8 @@ def _create_thread(tid, data): if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) + av_scan_paths(upload_dir) + job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index e3b4954d..dd4a083d 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -1,8 +1,16 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + import ast from collections import namedtuple import importlib import sys import traceback +import subprocess +import os + +from django.core.exceptions import ValidationError Import = namedtuple("Import", ["module", "name", "alias"]) @@ -58,3 +66,11 @@ def execute_python_code(source_code, global_vars=None, local_vars=None): _, _, tb = sys.exc_info() line_number = traceback.extract_tb(tb)[-1][1] raise InterpreterError("{} at line {}: {}".format(error_class, line_number, details)) + +def av_scan_paths(*paths): + if 'yes' == os.environ.get('CLAM_AV'): + command = ['clamscan', '--no-summary', '-i', '-o'] + command.extend(paths) + res = subprocess.run(command, capture_output=True) + if res.returncode: + raise ValidationError(res.stdout) \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 735d77c4..120125ea 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 Intel Corporation +# Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT @@ -45,6 +45,7 @@ from cvat.apps.engine.serializers import ( LogEventSerializer, PluginSerializer, ProjectSerializer, RqStatusSerializer, TaskSerializer, UserSerializer) from cvat.settings.base import CSS_3RDPARTY, JS_3RDPARTY +from cvat.apps.engine.utils import av_scan_paths from . import models, task from .log import clogger, slogger @@ -821,6 +822,8 @@ def _import_annotations(request, rq_id, rq_func, pk, format_name): with open(filename, 'wb+') as f: for chunk in anno_file.chunks(): f.write(chunk) + + av_scan_paths(filename) rq_job = queue.enqueue_call( func=rq_func, args=(pk, filename, format_name), diff --git a/docker-compose.yml b/docker-compose.yml index d615ef5e..0b8aebfb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -50,6 +50,7 @@ services: DJANGO_CONFIGURATION: "production" TZ: "Etc/UTC" OPENVINO_TOOLKIT: "no" + CLAM_AV: "no" environment: DJANGO_MODWSGI_EXTRA_ARGS: "" ALLOWED_HOSTS: '*' diff --git a/supervisord.conf b/supervisord.conf index dde373cf..a991b2b9 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -47,6 +47,11 @@ command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -i environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock" numprocs=1 +[program:clamav_update] +command=bash -c "if [ \"${CLAM_AV}\" = 'yes' ]; then /usr/bin/freshclam -d \ + -l %(ENV_HOME)s/logs/freshclam.log --foreground=true; fi" +numprocs=1 + [program:runserver] ; Here need to run a couple of commands to initialize DB and copy static files. ; We cannot initialize DB on build because the DB should be online. Also some