diff --git a/CHANGELOG.md b/CHANGELOG.md index b4efddcb..51d31f72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security - Updated ELK to 6.8.23 which uses log4j 2.17.1 () +- Added validation for URLs which used as remote data source () ## \[1.7.0] - 2021-11-15 diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 89079929..33464493 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -6,6 +6,7 @@ import itertools import os import sys +from rest_framework.serializers import ValidationError import rq import re import shutil @@ -14,6 +15,8 @@ from traceback import print_exception from urllib import parse as urlparse from urllib import request as urlrequest import requests +import ipaddress +import dns.resolver import django_rq from django.conf import settings @@ -203,6 +206,45 @@ def _validate_manifest(manifests, root_dir): raise Exception('Invalid manifest was uploaded') return None +def _validate_url(url): + def _validate_ip_address(ip_address): + if not ip_address.is_global: + raise ValidationError('Non public IP address \'{}\' is provided!'.format(ip_address)) + + ALLOWED_SCHEMES = ['http', 'https'] + + parsed_url = urlparse.urlparse(url) + + if parsed_url.scheme not in ALLOWED_SCHEMES: + raise ValueError('Unsupported URL sheme: {}. Only http and https are supported'.format(parsed_url.scheme)) + + try: + ip_address = ipaddress.ip_address(parsed_url.hostname) + _validate_ip_address(ip_address) + except ValueError as _: + ip_v4_records = None + ip_v6_records = None + try: + ip_v4_records = dns.resolver.query(parsed_url.hostname, 'A') + for record in ip_v4_records: + _validate_ip_address(ipaddress.ip_address(record.to_text())) + except ValidationError: + raise + except Exception as e: + slogger.glob.info('Cannot get A record for domain \'{}\': {}'.format(parsed_url.hostname, e)) + + try: + ip_v6_records = dns.resolver.query(parsed_url.hostname, 'AAAA') + for record in ip_v6_records: + _validate_ip_address(ipaddress.ip_address(record.to_text())) + except ValidationError: + raise + except Exception as e: + slogger.glob.info('Cannot get AAAA record for domain \'{}\': {}'.format(parsed_url.hostname, e)) + + if not ip_v4_records and not ip_v6_records: + raise ValidationError('Cannot resolve IP address for domain \'{}\''.format(parsed_url.hostname)) + def _download_data(urls, upload_dir): job = rq.get_current_job() local_files = {} @@ -210,6 +252,7 @@ def _download_data(urls, upload_dir): name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path)) if name in local_files: raise Exception("filename collision: {}".format(name)) + _validate_url(url) slogger.glob.info("Downloading: {}".format(url)) job.meta['status'] = '{} is being downloaded..'.format(url) job.save_meta() diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index d01f0f2c..e2a98b2e 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -52,3 +52,4 @@ datumaro==0.2.0 --no-binary=datumaro urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability natsort==8.0.0 mistune>=2.0.1 # not directly required, pinned by Snyk to avoid a vulnerability +dnspython==2.2.0