From 03e9f513e706a7abb0317e1e78287311f3260332 Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Sun, 17 May 2020 03:51:27 -0500 Subject: [PATCH 1/7] Move code and setup testing, audit and lint --- .bandit.test.yml | 397 ++++++++++++++++++ .bandit.yml | 397 ++++++++++++++++++ .flake8 | 5 + .travis.yml | 21 + cleanup.py | 97 +---- cleanup/__init__.py | 0 cleanup/cleanup.py | 96 +++++ .../cleanup_all_covid_calls.py | 0 .../cleanup_keep_calm_with_covid.py | 0 utils.py => cleanup/utils.py | 4 +- requirements.txt | 3 + tests/test_utils.py | 13 + 12 files changed, 935 insertions(+), 98 deletions(-) create mode 100644 .bandit.test.yml create mode 100644 .bandit.yml create mode 100644 .flake8 create mode 100644 .travis.yml create mode 100644 cleanup/__init__.py create mode 100644 cleanup/cleanup.py rename cleanup_all_covid_calls.py => cleanup/cleanup_all_covid_calls.py (100%) rename cleanup_keep_calm_with_covid.py => cleanup/cleanup_keep_calm_with_covid.py (100%) rename utils.py => cleanup/utils.py (92%) create mode 100644 tests/test_utils.py diff --git a/.bandit.test.yml b/.bandit.test.yml new file mode 100644 index 0000000..8ae4abd --- /dev/null +++ b/.bandit.test.yml @@ -0,0 +1,397 @@ + +### Bandit config file generated from: +# '/Users/ted/anaconda3/bin/bandit-config-generator -o .bandit.yml' + +### This config may optionally select a subset of tests to run or skip by +### filling out the 'tests' and 'skips' lists given below. If no tests are +### specified for inclusion then it is assumed all tests are desired. The skips +### set will remove specific tests from the include set. This can be controlled +### using the -t/-s CLI options. Note that the same test ID should not appear +### in both 'tests' and 'skips', this would be nonsensical and is detected by +### Bandit at runtime. + +# Available tests: +# B101 : assert_used +# B102 : exec_used +# B103 : set_bad_file_permissions +# B104 : hardcoded_bind_all_interfaces +# B105 : hardcoded_password_string +# B106 : hardcoded_password_funcarg +# B107 : hardcoded_password_default +# B108 : hardcoded_tmp_directory +# B110 : try_except_pass +# B112 : try_except_continue +# B201 : flask_debug_true +# B301 : pickle +# B302 : marshal +# B303 : md5 +# B304 : ciphers +# B305 : cipher_modes +# B306 : mktemp_q +# B307 : eval +# B308 : mark_safe +# B309 : httpsconnection +# B310 : urllib_urlopen +# B311 : random +# B312 : telnetlib +# B313 : xml_bad_cElementTree +# B314 : xml_bad_ElementTree +# B315 : xml_bad_expatreader +# B316 : xml_bad_expatbuilder +# B317 : xml_bad_sax +# B318 : xml_bad_minidom +# B319 : xml_bad_pulldom +# B320 : xml_bad_etree +# B321 : ftplib +# B322 : input +# B323 : unverified_context +# B324 : hashlib_new_insecure_functions +# B325 : tempnam +# B401 : import_telnetlib +# B402 : import_ftplib +# B403 : import_pickle +# B404 : import_subprocess +# B405 : import_xml_etree +# B406 : import_xml_sax +# B407 : import_xml_expat +# B408 : import_xml_minidom +# B409 : import_xml_pulldom +# B410 : import_lxml +# B411 : import_xmlrpclib +# B412 : import_httpoxy +# B413 : import_pycrypto +# B501 : request_with_no_cert_validation +# B502 : ssl_with_bad_version +# B503 : ssl_with_bad_defaults +# B504 : ssl_with_no_version +# B505 : weak_cryptographic_key +# B506 : yaml_load +# B507 : ssh_no_host_key_verification +# B601 : paramiko_calls +# B602 : subprocess_popen_with_shell_equals_true +# B603 : subprocess_without_shell_equals_true +# B604 : any_other_function_with_shell_equals_true +# B605 : start_process_with_a_shell +# B606 : start_process_with_no_shell +# B607 : start_process_with_partial_path +# B608 : hardcoded_sql_expressions +# B609 : linux_commands_wildcard_injection +# B610 : django_extra_used +# B611 : django_rawsql_used +# B701 : jinja2_autoescape_false +# B702 : use_of_mako_templates +# B703 : django_mark_safe + +# (optional) list included test IDs here, eg '[B101, B406]': +tests: + +# (optional) list skipped test IDs here, eg '[B101, B406]': +skips: [B101] + +### (optional) plugin settings - some test plugins require configuration data +### that may be given here, per-plugin. All bandit test plugins have a built in +### set of sensible defaults and these will be used if no configuration is +### provided. It is not necessary to provide settings for every (or any) plugin +### if the defaults are acceptable. + +any_other_function_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +hardcoded_tmp_directory: + tmp_dirs: + - /tmp + - /var/tmp + - /dev/shm +linux_commands_wildcard_injection: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +ssl_with_bad_defaults: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +ssl_with_bad_version: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +start_process_with_a_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_no_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_partial_path: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_popen_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_without_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +try_except_continue: + check_typed_exception: false +try_except_pass: + check_typed_exception: false +weak_cryptographic_key: + weak_key_size_dsa_high: 1024 + weak_key_size_dsa_medium: 2048 + weak_key_size_ec_high: 160 + weak_key_size_ec_medium: 224 + weak_key_size_rsa_high: 1024 + weak_key_size_rsa_medium: 2048 + diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 0000000..ea9119c --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,397 @@ + +### Bandit config file generated from: +# '/Users/ted/anaconda3/bin/bandit-config-generator -o .bandit.yml' + +### This config may optionally select a subset of tests to run or skip by +### filling out the 'tests' and 'skips' lists given below. If no tests are +### specified for inclusion then it is assumed all tests are desired. The skips +### set will remove specific tests from the include set. This can be controlled +### using the -t/-s CLI options. Note that the same test ID should not appear +### in both 'tests' and 'skips', this would be nonsensical and is detected by +### Bandit at runtime. + +# Available tests: +# B101 : assert_used +# B102 : exec_used +# B103 : set_bad_file_permissions +# B104 : hardcoded_bind_all_interfaces +# B105 : hardcoded_password_string +# B106 : hardcoded_password_funcarg +# B107 : hardcoded_password_default +# B108 : hardcoded_tmp_directory +# B110 : try_except_pass +# B112 : try_except_continue +# B201 : flask_debug_true +# B301 : pickle +# B302 : marshal +# B303 : md5 +# B304 : ciphers +# B305 : cipher_modes +# B306 : mktemp_q +# B307 : eval +# B308 : mark_safe +# B309 : httpsconnection +# B310 : urllib_urlopen +# B311 : random +# B312 : telnetlib +# B313 : xml_bad_cElementTree +# B314 : xml_bad_ElementTree +# B315 : xml_bad_expatreader +# B316 : xml_bad_expatbuilder +# B317 : xml_bad_sax +# B318 : xml_bad_minidom +# B319 : xml_bad_pulldom +# B320 : xml_bad_etree +# B321 : ftplib +# B322 : input +# B323 : unverified_context +# B324 : hashlib_new_insecure_functions +# B325 : tempnam +# B401 : import_telnetlib +# B402 : import_ftplib +# B403 : import_pickle +# B404 : import_subprocess +# B405 : import_xml_etree +# B406 : import_xml_sax +# B407 : import_xml_expat +# B408 : import_xml_minidom +# B409 : import_xml_pulldom +# B410 : import_lxml +# B411 : import_xmlrpclib +# B412 : import_httpoxy +# B413 : import_pycrypto +# B501 : request_with_no_cert_validation +# B502 : ssl_with_bad_version +# B503 : ssl_with_bad_defaults +# B504 : ssl_with_no_version +# B505 : weak_cryptographic_key +# B506 : yaml_load +# B507 : ssh_no_host_key_verification +# B601 : paramiko_calls +# B602 : subprocess_popen_with_shell_equals_true +# B603 : subprocess_without_shell_equals_true +# B604 : any_other_function_with_shell_equals_true +# B605 : start_process_with_a_shell +# B606 : start_process_with_no_shell +# B607 : start_process_with_partial_path +# B608 : hardcoded_sql_expressions +# B609 : linux_commands_wildcard_injection +# B610 : django_extra_used +# B611 : django_rawsql_used +# B701 : jinja2_autoescape_false +# B702 : use_of_mako_templates +# B703 : django_mark_safe + +# (optional) list included test IDs here, eg '[B101, B406]': +tests: + +# (optional) list skipped test IDs here, eg '[B101, B406]': +skips: + +### (optional) plugin settings - some test plugins require configuration data +### that may be given here, per-plugin. All bandit test plugins have a built in +### set of sensible defaults and these will be used if no configuration is +### provided. It is not necessary to provide settings for every (or any) plugin +### if the defaults are acceptable. + +any_other_function_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +hardcoded_tmp_directory: + tmp_dirs: + - /tmp + - /var/tmp + - /dev/shm +linux_commands_wildcard_injection: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +ssl_with_bad_defaults: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +ssl_with_bad_version: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +start_process_with_a_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_no_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_partial_path: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_popen_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_without_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +try_except_continue: + check_typed_exception: false +try_except_pass: + check_typed_exception: false +weak_cryptographic_key: + weak_key_size_dsa_high: 1024 + weak_key_size_dsa_medium: 2048 + weak_key_size_ec_high: 160 + weak_key_size_ec_medium: 224 + weak_key_size_rsa_high: 1024 + weak_key_size_rsa_medium: 2048 + diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..ec085c4 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +exclude=.git,data,__pycache__,.pytest_cache +ignore= + # line too long + E501 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..93a60ab --- /dev/null +++ b/.travis.yml @@ -0,0 +1,21 @@ +language: python +python: + - "3.6" # current default Python on Travis CI + - "3.7" + - "3.8" +virtualenv: + system_site_packages: true +# command to install dependencies +install: + - pip install -r requirements.txt +# command to run tests +jobs: + include: + - stage: unit tests + script: python -m pytest -v + - stage: production code audit + script: bandit -c .bandit.yml -r ./cleanup && bandit -c .bandit.yml cleanup.py + - stage: test code audit + script: bandit -c .bandit.test.yml -r ./tests + - stage: lint + script: flake8 diff --git a/cleanup.py b/cleanup.py index 14f5969..d847e67 100644 --- a/cleanup.py +++ b/cleanup.py @@ -1,99 +1,4 @@ -import logging - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - handlers=[logging.StreamHandler()], -) -import os -import sys - -import click -import pandas as pd - -from cleanup_all_covid_calls import cleanup as cleanup_all_covid_calls -from cleanup_keep_calm_with_covid import ( - CONVERTERS, - cleanup as cleanup_keep_calm_with_covid, -) -from utils import write_output_file - - -@click.group() -@click.option("--debug/--no-debug", default=False) -@click.pass_context -def cleanup(ctx, debug): - ctx.ensure_object(dict) - ctx.obj["DEBUG"] = debug - - -@cleanup.command() -@click.pass_context -@click.option( - "--vialink-input", - "vl_infile", - required=True, - help="Path to the VIA LINK input csv file", -) -@click.option( - "--232-input", - "two32_infile", - required=True, - help="Path to the 232 HELP input csv file", -) -@click.option( - "--output", - default="data/all_covid_calls_cleaned.xlsx", - help="Path to the output spreadsheet (cleaned .xlsx file)", -) -def all_covid_calls(ctx, vl_infile, two32_infile, output): - if ctx.obj["DEBUG"]: - logging.getLogger().setLevel(logging.DEBUG) - logging.debug("Running in debug mode") - logging.debug(f"Reading VIALINK file from '{vl_infile}'") - logging.debug(f"Reading 232-HELP file from '{two32_infile}'") - dfs = {} - dfvl = pd.read_csv(vl_infile, encoding="ISO-8859-1") - dfvl = remove_first_rows(dfvl) - dfs["VIALINK"] = dfvl - df232 = pd.read_csv(two32_infile, encoding="ISO-8859-1") - df232 = remove_first_rows(df232) - dfs["TWO32"] = df232 - logging.info("Cleaning data for All COVID Calls Dashboard") - df = cleanup_all_covid_calls(dfs) - logging.info(f"Writing data for All COVID Calls Dashboard to '{output}'") - write_output_file(df, output) - - -@cleanup.command() -@click.pass_context -@click.option( - "--input", "infile", required=True, help="Path to the input csv file", -) -@click.option( - "--output", - default="data/keep_calm_with_covid_cleaned.xlsx", - help="Path to the output spreadsheet (cleaned .xlsx file)", -) -def keep_calm_with_covid(ctx, infile, output): - if ctx.obj["DEBUG"]: - logging.getLogger().setLevel(logging.DEBUG) - logging.debug("Running in debug mode") - logging.debug(f"Reading input file '{infile}'") - df = pd.read_csv(infile, encoding="ISO-8859-1", converters=CONVERTERS) - df = remove_first_rows(df) - logging.info("Cleaning data for Keep Calm with COVID Dashboard") - cleanup_keep_calm_with_covid(df) - logging.info(f"Writing data for Keep Calm with COVID Dashboard to '{output}'") - write_output_file(df, output) - - -def remove_first_rows(df): - columns = df.iloc[1].values.tolist() - df = df.iloc[2:] - df.columns = columns - return df +from cleanup.cleanup import cleanup if __name__ == "__main__": diff --git a/cleanup/__init__.py b/cleanup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cleanup/cleanup.py b/cleanup/cleanup.py new file mode 100644 index 0000000..b86a47a --- /dev/null +++ b/cleanup/cleanup.py @@ -0,0 +1,96 @@ +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + handlers=[logging.StreamHandler()], +) +import os +import sys + +import click +import pandas as pd + +from cleanup_all_covid_calls import cleanup as cleanup_all_covid_calls +from cleanup_keep_calm_with_covid import ( + CONVERTERS, + cleanup as cleanup_keep_calm_with_covid, +) +from utils import write_output_file + + +@click.group() +@click.option("--debug/--no-debug", default=False) +@click.pass_context +def cleanup(ctx, debug): + ctx.ensure_object(dict) + ctx.obj["DEBUG"] = debug + + +@cleanup.command() +@click.pass_context +@click.option( + "--vialink-input", + "vl_infile", + required=True, + help="Path to the VIA LINK input csv file", +) +@click.option( + "--232-input", + "two32_infile", + required=True, + help="Path to the 232 HELP input csv file", +) +@click.option( + "--output", + default="data/all_covid_calls_cleaned.xlsx", + help="Path to the output spreadsheet (cleaned .xlsx file)", +) +def all_covid_calls(ctx, vl_infile, two32_infile, output): + if ctx.obj["DEBUG"]: + logging.getLogger().setLevel(logging.DEBUG) + logging.debug("Running in debug mode") + logging.debug(f"Reading VIALINK file from '{vl_infile}'") + logging.debug(f"Reading 232-HELP file from '{two32_infile}'") + dfs = {} + dfvl = pd.read_csv(vl_infile, encoding="ISO-8859-1") + dfvl = remove_first_rows(dfvl) + dfs["VIALINK"] = dfvl + df232 = pd.read_csv(two32_infile, encoding="ISO-8859-1") + df232 = remove_first_rows(df232) + dfs["TWO32"] = df232 + logging.info("Cleaning data for All COVID Calls Dashboard") + df = cleanup_all_covid_calls(dfs) + logging.info(f"Writing data for All COVID Calls Dashboard to '{output}'") + write_output_file(df, output) + + +@cleanup.command() +@click.pass_context +@click.option( + "--input", "infile", required=True, help="Path to the input csv file", +) +@click.option( + "--output", + default="data/keep_calm_with_covid_cleaned.xlsx", + help="Path to the output spreadsheet (cleaned .xlsx file)", +) +def keep_calm_with_covid(ctx, infile, output): + if ctx.obj["DEBUG"]: + logging.getLogger().setLevel(logging.DEBUG) + logging.debug("Running in debug mode") + logging.debug(f"Reading input file '{infile}'") + df = pd.read_csv(infile, encoding="ISO-8859-1", converters=CONVERTERS) + df = remove_first_rows(df) + logging.info("Cleaning data for Keep Calm with COVID Dashboard") + cleanup_keep_calm_with_covid(df) + logging.info(f"Writing data for Keep Calm with COVID Dashboard to '{output}'") + write_output_file(df, output) + + +def remove_first_rows(df): + columns = df.iloc[1].values.tolist() + df = df.iloc[2:] + df.columns = columns + return df diff --git a/cleanup_all_covid_calls.py b/cleanup/cleanup_all_covid_calls.py similarity index 100% rename from cleanup_all_covid_calls.py rename to cleanup/cleanup_all_covid_calls.py diff --git a/cleanup_keep_calm_with_covid.py b/cleanup/cleanup_keep_calm_with_covid.py similarity index 100% rename from cleanup_keep_calm_with_covid.py rename to cleanup/cleanup_keep_calm_with_covid.py diff --git a/utils.py b/cleanup/utils.py similarity index 92% rename from utils.py rename to cleanup/utils.py index dec2a10..4440879 100644 --- a/utils.py +++ b/cleanup/utils.py @@ -19,7 +19,7 @@ def get_lat(zipcode): return None else: lat = search.by_zipcode(int(zipcode)).lat - return lat if lat else None + return lat if lat else None # why is the if statement here? def get_lng(zipcode): @@ -27,7 +27,7 @@ def get_lng(zipcode): return None else: lng = search.by_zipcode(int(zipcode)).lng - return lng if lng else None + return lng if lng else None # why is the if statement here? def explode_needs(df, need_column): diff --git a/requirements.txt b/requirements.txt index 3f07058..bba031d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,6 @@ openpyxl==3.0.3 pandas==1.0.3 uszipcode==0.2.4 xlrd==1.2.0 +pytest==5.0.1 +flake8==3.8.1 +bandit==1.6.2 diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..3ef5c21 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,13 @@ +from cleanup.utils import get_lat, get_lng + + +def test_get_lat(): + assert isinstance(get_lat("70471"), float) # valid zip + assert get_lat("00000") is None # invalid zip + assert get_lat(None) is None # null zip + + +def test_get_lng(): + assert isinstance(get_lng("70471"), float) # valid zip + assert get_lng("00000") is None # invalid zip + assert get_lng(None) is None # null zip From caf11f24416fa737c392511a8afcc0523ad7e480 Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Sun, 17 May 2020 03:54:52 -0500 Subject: [PATCH 2/7] Trigger Travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 93a60ab..b81a8a5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,4 +18,4 @@ jobs: - stage: test code audit script: bandit -c .bandit.test.yml -r ./tests - stage: lint - script: flake8 + script: flake8 From 36a769ac43b00259f682124f503c32d43159fc3e Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Sun, 17 May 2020 03:57:27 -0500 Subject: [PATCH 3/7] Update Travis config --- .travis.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index b81a8a5..1b45496 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,19 +3,14 @@ python: - "3.6" # current default Python on Travis CI - "3.7" - "3.8" -virtualenv: - system_site_packages: true -# command to install dependencies install: - pip install -r requirements.txt -# command to run tests +script: python -m pytest -v jobs: include: - - stage: unit tests - script: python -m pytest -v - stage: production code audit script: bandit -c .bandit.yml -r ./cleanup && bandit -c .bandit.yml cleanup.py - stage: test code audit script: bandit -c .bandit.test.yml -r ./tests - stage: lint - script: flake8 + script: flake8 From d2736e62c5b33ea719ccea7d122057778fa89181 Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Mon, 18 May 2020 02:06:51 -0500 Subject: [PATCH 4/7] Implement fixtures, mocks, stubs and test coverage --- .travis.yml | 2 +- requirements.txt | 2 ++ tests/fixtures.py | 6 ++++++ tests/test_utils.py | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 tests/fixtures.py diff --git a/.travis.yml b/.travis.yml index 1b45496..2a4c271 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ python: - "3.8" install: - pip install -r requirements.txt -script: python -m pytest -v +script: python -m pytest -v --cov jobs: include: - stage: production code audit diff --git a/requirements.txt b/requirements.txt index bba031d..abe52d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,7 @@ pandas==1.0.3 uszipcode==0.2.4 xlrd==1.2.0 pytest==5.0.1 +pytest-cov==2.8.1 +pytest-mock==3.1.0 flake8==3.8.1 bandit==1.6.2 diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000..ac5e849 --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,6 @@ +import pytest +import pandas as pd + +@pytest.fixture +def df(): + return pd.DataFrame() diff --git a/tests/test_utils.py b/tests/test_utils.py index 3ef5c21..f055cd2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,35 @@ -from cleanup.utils import get_lat, get_lng +import os +import pandas as pd +from io import BytesIO + +from fixtures import df + +from cleanup.utils import write_output_file, get_lat, get_lng + + +def test_write_output_file(df, mocker): + # mocks & stubs + mocker.patch.object(pd.DataFrame, 'to_excel') + mocker.patch('os.makedirs') + path_exists_stub = mocker.stub('os.path.exists') + # it calls to_excel on the file + filename = 'nondata/somefile.xlsx' + write_output_file(df, filename) + pd.DataFrame.to_excel.assert_called_once_with(filename) + # it doesn't call os.makedirs when not prefixed with 'data' + filename = 'nondata/somefile.xlsx' + write_output_file(df, filename) + os.makedirs.assert_not_called() + # it doesn't call os.makedirs when prefixed with 'data' & exists + filename = 'data/somefile.xlsx' + path_exists_stub.return_value = True + write_output_file(df, filename) + os.makedirs.assert_not_called() + # it calls os.makedirs when prefixed with 'data', but doesn't exist + filename = 'data/somefile.xlsx' + path_exists_stub.return_value = False + write_output_file(df, filename) + os.makedirs.assert_not_called() def test_get_lat(): From 2b2181377f494da21a2d91119f8fa0a43d45e957 Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Mon, 18 May 2020 02:17:41 -0500 Subject: [PATCH 5/7] Fix mock return values --- tests/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index f055cd2..06bc823 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,7 +11,7 @@ def test_write_output_file(df, mocker): # mocks & stubs mocker.patch.object(pd.DataFrame, 'to_excel') mocker.patch('os.makedirs') - path_exists_stub = mocker.stub('os.path.exists') + path_exists_mock = mocker.patch('os.path.exists') # it calls to_excel on the file filename = 'nondata/somefile.xlsx' write_output_file(df, filename) @@ -22,14 +22,14 @@ def test_write_output_file(df, mocker): os.makedirs.assert_not_called() # it doesn't call os.makedirs when prefixed with 'data' & exists filename = 'data/somefile.xlsx' - path_exists_stub.return_value = True + path_exists_mock.return_value = True write_output_file(df, filename) os.makedirs.assert_not_called() # it calls os.makedirs when prefixed with 'data', but doesn't exist filename = 'data/somefile.xlsx' - path_exists_stub.return_value = False + path_exists_mock.return_value = False write_output_file(df, filename) - os.makedirs.assert_not_called() + os.makedirs.assert_called_once_with('data') def test_get_lat(): From 03a29c82cc59831080cd913cb1f0c095a9d74922 Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Mon, 18 May 2020 02:21:03 -0500 Subject: [PATCH 6/7] Update Travis config to limit coverage to code modules --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2a4c271..33aac35 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,9 +5,11 @@ python: - "3.8" install: - pip install -r requirements.txt -script: python -m pytest -v --cov +script: python -m pytest -v jobs: include: + - stage: coverage + script: python -m pytest -v --cov=cleanup - stage: production code audit script: bandit -c .bandit.yml -r ./cleanup && bandit -c .bandit.yml cleanup.py - stage: test code audit From 5cf0964d74ac76644c8dbe34fedbb62a3a4130fd Mon Sep 17 00:00:00 2001 From: "Ryan B. Harvey" Date: Mon, 18 May 2020 02:35:29 -0500 Subject: [PATCH 7/7] Move fixtures to where pytest wants them to prevent imports --- tests/{fixtures.py => conftest.py} | 3 +- tests/test_utils.py | 47 ++++++++++++++---------------- 2 files changed, 24 insertions(+), 26 deletions(-) rename tests/{fixtures.py => conftest.py} (69%) diff --git a/tests/fixtures.py b/tests/conftest.py similarity index 69% rename from tests/fixtures.py rename to tests/conftest.py index ac5e849..dc543fe 100644 --- a/tests/fixtures.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import pytest import pandas as pd + @pytest.fixture def df(): - return pd.DataFrame() + return pd.DataFrame() diff --git a/tests/test_utils.py b/tests/test_utils.py index 06bc823..60f1b81 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,35 +1,32 @@ import os import pandas as pd -from io import BytesIO - -from fixtures import df from cleanup.utils import write_output_file, get_lat, get_lng def test_write_output_file(df, mocker): - # mocks & stubs - mocker.patch.object(pd.DataFrame, 'to_excel') - mocker.patch('os.makedirs') - path_exists_mock = mocker.patch('os.path.exists') - # it calls to_excel on the file - filename = 'nondata/somefile.xlsx' - write_output_file(df, filename) - pd.DataFrame.to_excel.assert_called_once_with(filename) - # it doesn't call os.makedirs when not prefixed with 'data' - filename = 'nondata/somefile.xlsx' - write_output_file(df, filename) - os.makedirs.assert_not_called() - # it doesn't call os.makedirs when prefixed with 'data' & exists - filename = 'data/somefile.xlsx' - path_exists_mock.return_value = True - write_output_file(df, filename) - os.makedirs.assert_not_called() - # it calls os.makedirs when prefixed with 'data', but doesn't exist - filename = 'data/somefile.xlsx' - path_exists_mock.return_value = False - write_output_file(df, filename) - os.makedirs.assert_called_once_with('data') + # mocks & stubs + mocker.patch.object(pd.DataFrame, 'to_excel') + mocker.patch('os.makedirs') + path_exists_mock = mocker.patch('os.path.exists') + # it calls to_excel on the file + filename = 'nondata/somefile.xlsx' + write_output_file(df, filename) + pd.DataFrame.to_excel.assert_called_once_with(filename) + # it doesn't call os.makedirs when not prefixed with 'data' + filename = 'nondata/somefile.xlsx' + write_output_file(df, filename) + os.makedirs.assert_not_called() + # it doesn't call os.makedirs when prefixed with 'data' & exists + filename = 'data/somefile.xlsx' + path_exists_mock.return_value = True + write_output_file(df, filename) + os.makedirs.assert_not_called() + # it calls os.makedirs when prefixed with 'data', but doesn't exist + filename = 'data/somefile.xlsx' + path_exists_mock.return_value = False + write_output_file(df, filename) + os.makedirs.assert_called_once_with('data') def test_get_lat():