diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 4a2248e..0000000 --- a/.flake8 +++ /dev/null @@ -1,30 +0,0 @@ -[flake8] -ignore = - # Refers to the max-line length. Let's suppress the error and simply - # let black take care on how it wants to format the lines. - E501, - - # Refers to "line break before binary operator". - # Similar to above, let black take care of the formatting. - W503, - - # Refers to "Unnecessary dict call - rewrite as a literal". - C408 - -per-file-ignores = - # Ignore: "imported but unused" errors in __init__ files, as those imports are there - # to expose submodule functions so they can be imported directly from that module - zyte_common_items/__init__.py:F401, - - # Ignore: * imports in these files - zyte_common_items/__init__.py:F403, - zyte_common_items/zyte_data_api.py:F403, - - # Ignore: may be undefined, or defined from star imports - zyte_common_items/zyte_data_api.py:F405, - tests/test_page_inputs.py:F405, - - # ”module level import not at the top of file“ caused by - # pytest.importorskip - tests/test_ae_pipeline.py:E402, - tests/test_pipelines.py:E402, diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 54d60d4..352c36e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,36 +1,21 @@ repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.13.2 + hooks: + - id: ruff-check + args: [ --fix ] + - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - - hooks: - - id: black - exclude: test_mypy\.py$ # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 - language_version: python3 - repo: https://github.com/psf/black - rev: 24.10.0 - - hooks: - - id: isort - language_version: python3 - repo: https://github.com/timothycrosley/isort - rev: 5.13.2 - - hooks: - - id: flake8 - language_version: python3 - additional_dependencies: - - flake8-bugbear - - flake8-comprehensions - - flake8-debugger - - flake8-string-format - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 - repo: https://github.com/adamchainz/blacken-docs - rev: 1.19.0 + rev: 1.19.1 hooks: - id: blacken-docs additional_dependencies: - - black==24.10.0 + - black==25.1.0 - repo: local hooks: - id: no-colon-comments diff --git a/docs/conf.py b/docs/conf.py index c76a447..169c54d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,7 @@ def get_copyright(attribution, *, first_year): def get_version_and_release(): try: - import zyte_common_items # noqa: F401 + import zyte_common_items # noqa: F401, PLC0415 except ImportError: return "", "" version_bytes = pkgutil.get_data("zyte_common_items", "VERSION") or b"" @@ -23,7 +23,7 @@ def get_version_and_release(): project = "zyte-common-items" -copyright = get_copyright("Zyte Group Ltd", first_year=2022) +project_copyright = get_copyright("Zyte Group Ltd", first_year=2022) version, release = get_version_and_release() extensions = [ diff --git a/pre-commit-scripts/no_colon_comments.py b/pre-commit-scripts/no_colon_comments.py index 8f04697..1a79c46 100644 --- a/pre-commit-scripts/no_colon_comments.py +++ b/pre-commit-scripts/no_colon_comments.py @@ -8,7 +8,7 @@ def check_file_for_colon_comment(file_path): - with open(file_path, "r", encoding="utf-8") as f: + with Path(file_path).open(encoding="utf-8") as f: for i, line in enumerate(f, 1): if "test_file" in file_path: raise ValueError(f"{line=}") @@ -23,9 +23,10 @@ def check_file_for_colon_comment(file_path): def main(): failed = False for file in sys.argv[1:]: - if Path(file).suffix in PYTHON_FILE_EXTENSIONS: - if check_file_for_colon_comment(file): - failed = True + if Path(file).suffix in PYTHON_FILE_EXTENSIONS and check_file_for_colon_comment( + file + ): + failed = True if failed: print( "\nERROR: Sphinx-style comments detected. Please use docstrings for documentation." diff --git a/pyproject.toml b/pyproject.toml index b9c4d11..8fe9c06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,13 +50,6 @@ zyte_common_items = ["py.typed","VERSION"] include = ["zyte_common_items*"] namespaces = false -[tool.black] -exclude = 'test_mypy\.py' # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 - -[tool.isort] -profile = "black" -multi_line_output = 3 - [tool.mypy] check_untyped_defs = true ignore_missing_imports = true @@ -82,3 +75,157 @@ regex = true [[tool.bumpversion.files]] filename = "zyte_common_items/VERSION" + +[tool.ruff.format] +exclude = [ + "tests/test_mypy.py", # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 +] + +[tool.ruff.lint] +extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # flake8-commas + "COM", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-pytest-style + "PT", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # flake8-tidy-imports + "TID", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Trailing comma missing + "COM812", + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", + # Prefer absolute imports over relative imports from parent modules + "TID252", + # Add `from __future__ import annotations` to simplify + # (The fix can break andi.) + "FA100", +] + +[tool.ruff.lint.flake8-type-checking] +runtime-evaluated-decorators = ["attrs.define"] + +[tool.ruff.lint.isort] +split-on-trailing-comma = false + +[tool.ruff.lint.per-file-ignores] +"zyte_common_items/__init__.py" = ["F401"] +"zyte_common_items/components/__init__.py" = ["F401"] +"zyte_common_items/items/__init__.py" = ["F401"] +"zyte_common_items/pages/__init__.py" = ["F401"] +# Skip PEP 604 suggestions for files with attr classes +"zyte_common_items/components/*.py" = ["UP007", "UP045"] +"zyte_common_items/items/*.py" = ["UP007", "UP045"] +# ”module level import not at the top of file“ caused by pytest.importorskip +"tests/test_ae_pipeline.py" = ["E402"] +"tests/test_pipelines.py" = ["E402"] +"tests/*" = ["S", "B018"] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/tests/__init__.py b/tests/__init__.py index 0a18110..5832a0d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,37 +1,16 @@ -import contextlib -import json -import os import random -def load_fixture(name): - path = os.path.join(os.path.dirname(__file__), f"fixtures/{name}") - with open(path, "r") as f: - return json.loads(f.read()) - - -@contextlib.contextmanager -def temp_seed(seed): - state = random.getstate() - random.seed(seed) - try: - yield - finally: - random.setstate(state) - - def crazy_monkey_nullify(data, drop_prob=0.5): """Make some attributes None or [] recursively""" def nullify(value): if drop_prob <= random.random(): return [] if isinstance(value, list) else None - else: - return crazy_monkey_nullify(value, drop_prob) + return crazy_monkey_nullify(value, drop_prob) if isinstance(data, list): return [crazy_monkey_nullify(value, drop_prob) for value in data] - elif isinstance(data, dict): + if isinstance(data, dict): return {k: nullify(v) for k, v in data.items()} - else: - return data + return data diff --git a/tests/test_adapter.py b/tests/test_adapter.py index f842df1..5f2458f 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -29,17 +29,13 @@ def test_asdict_all_fields(): adapter = ItemAdapter(product) actual_dict = adapter.asdict() expected_dict = { - "additionalProperties": [dict(name="foo", value="bar")], - "aggregateRating": dict( - bestRating=5.0, - ratingValue=2.5, - reviewCount=123, - ), + "additionalProperties": [{"name": "foo", "value": "bar"}], + "aggregateRating": {"bestRating": 5.0, "ratingValue": 2.5, "reviewCount": 123}, "availability": "InStock", - "brand": dict(name="Ka-pow"), + "brand": {"name": "Ka-pow"}, "breadcrumbs": [ - dict(name="Level 1", url="http://example.com/level1"), - dict(name="Level 2", url="http://example.com/level1/level2"), + {"name": "Level 1", "url": "http://example.com/level1"}, + {"name": "Level 2", "url": "http://example.com/level1/level2"}, ], "canonicalUrl": "https://example.com/product22", "color": "white", @@ -52,15 +48,12 @@ def test_asdict_all_fields(): "
Super Cooling Plus™
" ), "features": ["Easily store fragile products.", "Bluetooth connectivity."], - "gtin": [dict(type="foo", value="bar")], + "gtin": [{"type": "foo", "value": "bar"}], "images": [ - dict(url="http://example.com/image1.png"), + {"url": "http://example.com/image1.png"}, ], - "mainImage": dict(url="http://example.com/image1.png"), - "metadata": dict( - dateDownloaded="2022-12-31T13:01:54Z", - probability=1.0, - ), + "mainImage": {"url": "http://example.com/image1.png"}, + "metadata": {"dateDownloaded": "2022-12-31T13:01:54Z", "probability": 1.0}, "mpn": "HSC0424PP", "name": "White two-door refrigerator", "price": "9999.99", @@ -112,11 +105,11 @@ def __iter__(self): @pytest.mark.parametrize( "value", - ( + [ [], - tuple(), + (), EmptyCollection(), - ), + ], ) def test_asdict_empty_collection(value): @attrs.define @@ -131,12 +124,12 @@ class _Item(Item): def test_asdict_unknown_fields(): - input_dict = dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + input_dict = { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } product = Product.from_dict(input_dict) with configured_adapter(): adapter = ItemAdapter(product) @@ -254,12 +247,12 @@ def test_known_field_remove_missing_twice(): def test_unknown_field_get(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -283,11 +276,11 @@ def test_unknown_field_get_missing(): def test_unknown_field_set(): product = Product.from_dict( - dict( - additionalProperties=[{"name": "a", "value": "b"}], - aggregateRating={"bestRating": 5.0}, - url="https://example.com/", - ) + { + "additionalProperties": [{"name": "a", "value": "b"}], + "aggregateRating": {"bestRating": 5.0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -309,12 +302,12 @@ def test_unknown_field_set(): def test_unknown_field_update(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -336,12 +329,12 @@ def test_unknown_field_update(): def test_unknown_field_remove(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -387,7 +380,7 @@ class _Item(Item): children: Collection[Item] class TestAdapter(ItemAdapter): - ADAPTER_CLASSES = [ZyteItemKeepEmptyAdapter] + list(ItemAdapter.ADAPTER_CLASSES) + ADAPTER_CLASSES = [ZyteItemKeepEmptyAdapter, *ItemAdapter.ADAPTER_CLASSES] item = _Item([]) adapter = TestAdapter(item) diff --git a/tests/test_ae_pipeline.py b/tests/test_ae_pipeline.py index 88fd0f0..21dee43 100644 --- a/tests/test_ae_pipeline.py +++ b/tests/test_ae_pipeline.py @@ -1,6 +1,6 @@ -import pytest # isort: skip +import pytest -scrapy = pytest.importorskip("scrapy") # noqa +scrapy = pytest.importorskip("scrapy") from zyte_common_items import Article, ArticleList, JobPosting, Product, ProductList from zyte_common_items.ae import ( @@ -38,8 +38,8 @@ @pytest.mark.parametrize( - ["item", "expected"], - ( + ("item", "expected"), + [ ( Article(**_ARTICLE_MIN_KWARGS), AEArticle( @@ -222,7 +222,7 @@ ), ), ), - ), + ], ) def test_main(item, expected): pipeline = AEPipeline() diff --git a/tests/test_components.py b/tests/test_components.py index 808291f..433d5b3 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -88,9 +88,9 @@ def test_metadata_fields(): superset = set(attrs.fields_dict(Metadata)) for cls in get_all_subclasses(BaseMetadata): subset = set(attrs.fields_dict(cls)) - assert subset.issubset( - superset - ), f"Metadata is missing some fields from {cls.__name__}: {subset - superset}" + assert subset.issubset(superset), ( + f"Metadata is missing some fields from {cls.__name__}: {subset - superset}" + ) def test_metadata_subclasses(): diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 49567d6..1006928 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize( - "cls,fields", + ("cls", "fields"), [ (Image, ["url"]), (Breadcrumb, ["url"]), @@ -44,7 +44,7 @@ def test_webpoet_URL_classes(cls, fields): # Ensure that both types of URL classes are covered for url_obj in [response_url_obj, request_url_obj]: - data = {field: url_obj for field in fields} + data = dict.fromkeys(fields, url_obj) obj = cls(**data) for field in fields: @@ -61,7 +61,9 @@ def test_webpoet_URL_classes(cls, fields): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match="is neither a string nor an instance of" + ): setattr(obj, field, 123) @@ -90,12 +92,12 @@ def test_webpoet_URL_mainImage(cls): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): obj.mainImage.url = False data = {"mainImage": {"url": 123}, "url": 123} - with pytest.raises(ValueError): - obj = cls.from_dict(data) + with pytest.raises(ValueError, match="is neither a string nor an instance of"): + cls.from_dict(data) @pytest.mark.parametrize("cls", [ProductVariant, Product]) @@ -127,7 +129,7 @@ def test_webpoet_URL_images(cls): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): obj.images[1].url = False data = { @@ -137,5 +139,5 @@ def test_webpoet_URL_images(cls): ], "url": 789, } - with pytest.raises(ValueError): - obj = cls.from_dict(data) + with pytest.raises(ValueError, match="is neither a string nor an instance of"): + cls.from_dict(data) diff --git a/tests/test_docs.py b/tests/test_docs.py index 6b9a76f..3880ffd 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,5 +1,6 @@ import re from importlib import import_module +from pathlib import Path import pytest @@ -12,7 +13,7 @@ @pytest.mark.parametrize( ("submodule_name", "only_in_docs", "only_in_submodule"), - ( + [ ("components", set(), set()), ("items", {"Item", "base.ProbabilityMixin"}, {"RequestListCaster"}), ( @@ -20,11 +21,10 @@ {"pages.base._BasePage"}, {"DescriptionMixin", "HasMetadata", "PriceMixin"}, ), - ), + ], ) def test_component_reference_entries(submodule_name, only_in_docs, only_in_submodule): - with open(f"docs/reference/{submodule_name}.rst") as f: - docs_page = f.read() + docs_page = Path(f"docs/reference/{submodule_name}.rst").read_text(encoding="utf-8") docs_class_names = set() for match in AUTOCLASS_PATTERN.finditer(docs_page): class_name = match[1] diff --git a/tests/test_examples.py b/tests/test_examples.py index 051eb85..fc5199e 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -16,14 +16,14 @@ @pytest.mark.parametrize( "example", - ( + [ _BREADCRUMBS_EXAMPLE_1, _BREADCRUMBS_EXAMPLE_2, _DESCRIPTION_HTML_EXAMPLE, _GTIN_EXAMPLE_1, _GTIN_EXAMPLE_2, _GTIN_EXAMPLE_3, - ), + ], ) def test(example: PageObjectMethodExample): response = HttpResponse(url="http://example.com", body=example.html.encode()) diff --git a/tests/test_items.py b/tests/test_items.py index 5a6bb5b..cb5a12d 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -805,7 +805,7 @@ def test_metadata(): obj_name[:-4] for obj_name in zyte_common_items.__dict__ if ( - not (obj_name.startswith("Base") or obj_name.startswith("Auto")) + not obj_name.startswith(("Base", "Auto")) and obj_name.endswith("Page") and obj_name != "Page" ) @@ -912,8 +912,8 @@ def test_social_media_post_missing_fields(): @pytest.mark.parametrize( - "cls,has_proba", - ( + ("cls", "has_proba"), + [ (Article, True), (ArticleFromList, True), (ArticleList, False), @@ -928,7 +928,7 @@ def test_social_media_post_missing_fields(): (RealEstate, True), (Serp, False), (SocialMediaPost, True), - ), + ], ) def test_get_probability_request(cls, has_proba): data = {"url": "https://example.com"} diff --git a/tests/test_items_base.py b/tests/test_items_base.py index daa1fea..c272653 100644 --- a/tests/test_items_base.py +++ b/tests/test_items_base.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import Optional, Union import attrs import pytest @@ -9,8 +9,6 @@ class NotConsideredAnItem: """It has to inherit from Item to be considered one.""" - pass - @attrs.define class SubItem(Item): @@ -70,7 +68,7 @@ class A(Item): pattern = r"Expected a dict with fields from tests\.\S+?\.A, got 'a'\." with pytest.raises(ValueError, match=pattern): - A.from_dict("a") # type: ignore + A.from_dict("a") # type: ignore[arg-type] def test_from_dict_non_dict_field(): @@ -90,7 +88,7 @@ class A(Item): def test_from_dict_from_list_non_list_field(): @attrs.define class A(Item): - a: List[str] + a: list[str] pattern = r"Expected a to be a list, got 'b'\." with pytest.raises(ValueError, match=pattern): @@ -104,7 +102,7 @@ class B(Item): @attrs.define class A(Item): - a: List[B] + a: list[B] pattern = r"Expected a\[0\] to be a dict with fields from tests\.\S+?\.B, got 'b'\." with pytest.raises(ValueError, match=pattern): @@ -113,12 +111,12 @@ class A(Item): def test_item_unknown_input(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating=dict(worstRating=0), - url="https://example.com/?product=product22", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/?product=product22", + } ) assert product._unknown_fields_dict["a"] == "b" assert product.aggregateRating._unknown_fields_dict["worstRating"] == 0 diff --git a/tests/test_mypy.py b/tests/test_mypy.py index f3b5d1d..534a37d 100644 --- a/tests/test_mypy.py +++ b/tests/test_mypy.py @@ -11,10 +11,11 @@ # E: