diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 4a2248e..0000000 --- a/.flake8 +++ /dev/null @@ -1,30 +0,0 @@ -[flake8] -ignore = - # Refers to the max-line length. Let's suppress the error and simply - # let black take care on how it wants to format the lines. - E501, - - # Refers to "line break before binary operator". - # Similar to above, let black take care of the formatting. - W503, - - # Refers to "Unnecessary dict call - rewrite as a literal". - C408 - -per-file-ignores = - # Ignore: "imported but unused" errors in __init__ files, as those imports are there - # to expose submodule functions so they can be imported directly from that module - zyte_common_items/__init__.py:F401, - - # Ignore: * imports in these files - zyte_common_items/__init__.py:F403, - zyte_common_items/zyte_data_api.py:F403, - - # Ignore: may be undefined, or defined from star imports - zyte_common_items/zyte_data_api.py:F405, - tests/test_page_inputs.py:F405, - - # ”module level import not at the top of file“ caused by - # pytest.importorskip - tests/test_ae_pipeline.py:E402, - tests/test_pipelines.py:E402, diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 54d60d4..352c36e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,36 +1,21 @@ repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.13.2 + hooks: + - id: ruff-check + args: [ --fix ] + - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - - hooks: - - id: black - exclude: test_mypy\.py$ # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 - language_version: python3 - repo: https://github.com/psf/black - rev: 24.10.0 - - hooks: - - id: isort - language_version: python3 - repo: https://github.com/timothycrosley/isort - rev: 5.13.2 - - hooks: - - id: flake8 - language_version: python3 - additional_dependencies: - - flake8-bugbear - - flake8-comprehensions - - flake8-debugger - - flake8-string-format - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 - repo: https://github.com/adamchainz/blacken-docs - rev: 1.19.0 + rev: 1.19.1 hooks: - id: blacken-docs additional_dependencies: - - black==24.10.0 + - black==25.1.0 - repo: local hooks: - id: no-colon-comments diff --git a/docs/conf.py b/docs/conf.py index c76a447..169c54d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,7 @@ def get_copyright(attribution, *, first_year): def get_version_and_release(): try: - import zyte_common_items # noqa: F401 + import zyte_common_items # noqa: F401, PLC0415 except ImportError: return "", "" version_bytes = pkgutil.get_data("zyte_common_items", "VERSION") or b"" @@ -23,7 +23,7 @@ def get_version_and_release(): project = "zyte-common-items" -copyright = get_copyright("Zyte Group Ltd", first_year=2022) +project_copyright = get_copyright("Zyte Group Ltd", first_year=2022) version, release = get_version_and_release() extensions = [ diff --git a/pre-commit-scripts/no_colon_comments.py b/pre-commit-scripts/no_colon_comments.py index 8f04697..1a79c46 100644 --- a/pre-commit-scripts/no_colon_comments.py +++ b/pre-commit-scripts/no_colon_comments.py @@ -8,7 +8,7 @@ def check_file_for_colon_comment(file_path): - with open(file_path, "r", encoding="utf-8") as f: + with Path(file_path).open(encoding="utf-8") as f: for i, line in enumerate(f, 1): if "test_file" in file_path: raise ValueError(f"{line=}") @@ -23,9 +23,10 @@ def check_file_for_colon_comment(file_path): def main(): failed = False for file in sys.argv[1:]: - if Path(file).suffix in PYTHON_FILE_EXTENSIONS: - if check_file_for_colon_comment(file): - failed = True + if Path(file).suffix in PYTHON_FILE_EXTENSIONS and check_file_for_colon_comment( + file + ): + failed = True if failed: print( "\nERROR: Sphinx-style comments detected. Please use docstrings for documentation." diff --git a/pyproject.toml b/pyproject.toml index b9c4d11..8fe9c06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,13 +50,6 @@ zyte_common_items = ["py.typed","VERSION"] include = ["zyte_common_items*"] namespaces = false -[tool.black] -exclude = 'test_mypy\.py' # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 - -[tool.isort] -profile = "black" -multi_line_output = 3 - [tool.mypy] check_untyped_defs = true ignore_missing_imports = true @@ -82,3 +75,157 @@ regex = true [[tool.bumpversion.files]] filename = "zyte_common_items/VERSION" + +[tool.ruff.format] +exclude = [ + "tests/test_mypy.py", # https://github.com/davidfritzsche/pytest-mypy-testing/issues/29 +] + +[tool.ruff.lint] +extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # flake8-commas + "COM", + # pydocstyle + "D", + # flake8-future-annotations + "FA", + # flynt + "FLY", + # refurb + "FURB", + # isort + "I", + # flake8-implicit-str-concat + "ISC", + # flake8-logging + "LOG", + # Perflint + "PERF", + # pygrep-hooks + "PGH", + # flake8-pie + "PIE", + # pylint + "PL", + # flake8-pytest-style + "PT", + # flake8-use-pathlib + "PTH", + # flake8-pyi + "PYI", + # flake8-quotes + "Q", + # flake8-return + "RET", + # flake8-raise + "RSE", + # Ruff-specific rules + "RUF", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # flake8-slots + "SLOT", + # flake8-debugger + "T10", + # flake8-type-checking + "TC", + # flake8-tidy-imports + "TID", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +ignore = [ + # Trailing comma missing + "COM812", + # Missing docstring in public module + "D100", + # Missing docstring in public class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # Missing docstring in magic method + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + # One-line docstring should fit on one line with quotes + "D200", + # No blank lines allowed after function docstring + "D202", + # 1 blank line required between summary line and description + "D205", + # Multi-line docstring closing quotes should be on a separate line + "D209", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First line should not be the function's "signature" + "D402", + # Too many return statements + "PLR0911", + # Too many branches + "PLR0912", + # Too many arguments in function definition + "PLR0913", + # Too many statements + "PLR0915", + # Magic value used in comparison + "PLR2004", + # String contains ambiguous {}. + "RUF001", + # Docstring contains ambiguous {}. + "RUF002", + # Comment contains ambiguous {}. + "RUF003", + # Mutable class attributes should be annotated with `typing.ClassVar` + "RUF012", + # Use of `assert` detected + "S101", + # Prefer absolute imports over relative imports from parent modules + "TID252", + # Add `from __future__ import annotations` to simplify + # (The fix can break andi.) + "FA100", +] + +[tool.ruff.lint.flake8-type-checking] +runtime-evaluated-decorators = ["attrs.define"] + +[tool.ruff.lint.isort] +split-on-trailing-comma = false + +[tool.ruff.lint.per-file-ignores] +"zyte_common_items/__init__.py" = ["F401"] +"zyte_common_items/components/__init__.py" = ["F401"] +"zyte_common_items/items/__init__.py" = ["F401"] +"zyte_common_items/pages/__init__.py" = ["F401"] +# Skip PEP 604 suggestions for files with attr classes +"zyte_common_items/components/*.py" = ["UP007", "UP045"] +"zyte_common_items/items/*.py" = ["UP007", "UP045"] +# ”module level import not at the top of file“ caused by pytest.importorskip +"tests/test_ae_pipeline.py" = ["E402"] +"tests/test_pipelines.py" = ["E402"] +"tests/*" = ["S", "B018"] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" diff --git a/tests/__init__.py b/tests/__init__.py index 0a18110..5832a0d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,37 +1,16 @@ -import contextlib -import json -import os import random -def load_fixture(name): - path = os.path.join(os.path.dirname(__file__), f"fixtures/{name}") - with open(path, "r") as f: - return json.loads(f.read()) - - -@contextlib.contextmanager -def temp_seed(seed): - state = random.getstate() - random.seed(seed) - try: - yield - finally: - random.setstate(state) - - def crazy_monkey_nullify(data, drop_prob=0.5): """Make some attributes None or [] recursively""" def nullify(value): if drop_prob <= random.random(): return [] if isinstance(value, list) else None - else: - return crazy_monkey_nullify(value, drop_prob) + return crazy_monkey_nullify(value, drop_prob) if isinstance(data, list): return [crazy_monkey_nullify(value, drop_prob) for value in data] - elif isinstance(data, dict): + if isinstance(data, dict): return {k: nullify(v) for k, v in data.items()} - else: - return data + return data diff --git a/tests/test_adapter.py b/tests/test_adapter.py index f842df1..5f2458f 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -29,17 +29,13 @@ def test_asdict_all_fields(): adapter = ItemAdapter(product) actual_dict = adapter.asdict() expected_dict = { - "additionalProperties": [dict(name="foo", value="bar")], - "aggregateRating": dict( - bestRating=5.0, - ratingValue=2.5, - reviewCount=123, - ), + "additionalProperties": [{"name": "foo", "value": "bar"}], + "aggregateRating": {"bestRating": 5.0, "ratingValue": 2.5, "reviewCount": 123}, "availability": "InStock", - "brand": dict(name="Ka-pow"), + "brand": {"name": "Ka-pow"}, "breadcrumbs": [ - dict(name="Level 1", url="http://example.com/level1"), - dict(name="Level 2", url="http://example.com/level1/level2"), + {"name": "Level 1", "url": "http://example.com/level1"}, + {"name": "Level 2", "url": "http://example.com/level1/level2"}, ], "canonicalUrl": "https://example.com/product22", "color": "white", @@ -52,15 +48,12 @@ def test_asdict_all_fields(): "

Super Cooling Plus™

" ), "features": ["Easily store fragile products.", "Bluetooth connectivity."], - "gtin": [dict(type="foo", value="bar")], + "gtin": [{"type": "foo", "value": "bar"}], "images": [ - dict(url="http://example.com/image1.png"), + {"url": "http://example.com/image1.png"}, ], - "mainImage": dict(url="http://example.com/image1.png"), - "metadata": dict( - dateDownloaded="2022-12-31T13:01:54Z", - probability=1.0, - ), + "mainImage": {"url": "http://example.com/image1.png"}, + "metadata": {"dateDownloaded": "2022-12-31T13:01:54Z", "probability": 1.0}, "mpn": "HSC0424PP", "name": "White two-door refrigerator", "price": "9999.99", @@ -112,11 +105,11 @@ def __iter__(self): @pytest.mark.parametrize( "value", - ( + [ [], - tuple(), + (), EmptyCollection(), - ), + ], ) def test_asdict_empty_collection(value): @attrs.define @@ -131,12 +124,12 @@ class _Item(Item): def test_asdict_unknown_fields(): - input_dict = dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + input_dict = { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } product = Product.from_dict(input_dict) with configured_adapter(): adapter = ItemAdapter(product) @@ -254,12 +247,12 @@ def test_known_field_remove_missing_twice(): def test_unknown_field_get(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -283,11 +276,11 @@ def test_unknown_field_get_missing(): def test_unknown_field_set(): product = Product.from_dict( - dict( - additionalProperties=[{"name": "a", "value": "b"}], - aggregateRating={"bestRating": 5.0}, - url="https://example.com/", - ) + { + "additionalProperties": [{"name": "a", "value": "b"}], + "aggregateRating": {"bestRating": 5.0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -309,12 +302,12 @@ def test_unknown_field_set(): def test_unknown_field_update(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -336,12 +329,12 @@ def test_unknown_field_update(): def test_unknown_field_remove(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating={"worstRating": 0}, - url="https://example.com/", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/", + } ) with configured_adapter(): @@ -387,7 +380,7 @@ class _Item(Item): children: Collection[Item] class TestAdapter(ItemAdapter): - ADAPTER_CLASSES = [ZyteItemKeepEmptyAdapter] + list(ItemAdapter.ADAPTER_CLASSES) + ADAPTER_CLASSES = [ZyteItemKeepEmptyAdapter, *ItemAdapter.ADAPTER_CLASSES] item = _Item([]) adapter = TestAdapter(item) diff --git a/tests/test_ae_pipeline.py b/tests/test_ae_pipeline.py index 88fd0f0..21dee43 100644 --- a/tests/test_ae_pipeline.py +++ b/tests/test_ae_pipeline.py @@ -1,6 +1,6 @@ -import pytest # isort: skip +import pytest -scrapy = pytest.importorskip("scrapy") # noqa +scrapy = pytest.importorskip("scrapy") from zyte_common_items import Article, ArticleList, JobPosting, Product, ProductList from zyte_common_items.ae import ( @@ -38,8 +38,8 @@ @pytest.mark.parametrize( - ["item", "expected"], - ( + ("item", "expected"), + [ ( Article(**_ARTICLE_MIN_KWARGS), AEArticle( @@ -222,7 +222,7 @@ ), ), ), - ), + ], ) def test_main(item, expected): pipeline = AEPipeline() diff --git a/tests/test_components.py b/tests/test_components.py index 808291f..433d5b3 100644 --- a/tests/test_components.py +++ b/tests/test_components.py @@ -88,9 +88,9 @@ def test_metadata_fields(): superset = set(attrs.fields_dict(Metadata)) for cls in get_all_subclasses(BaseMetadata): subset = set(attrs.fields_dict(cls)) - assert subset.issubset( - superset - ), f"Metadata is missing some fields from {cls.__name__}: {subset - superset}" + assert subset.issubset(superset), ( + f"Metadata is missing some fields from {cls.__name__}: {subset - superset}" + ) def test_metadata_subclasses(): diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 49567d6..1006928 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize( - "cls,fields", + ("cls", "fields"), [ (Image, ["url"]), (Breadcrumb, ["url"]), @@ -44,7 +44,7 @@ def test_webpoet_URL_classes(cls, fields): # Ensure that both types of URL classes are covered for url_obj in [response_url_obj, request_url_obj]: - data = {field: url_obj for field in fields} + data = dict.fromkeys(fields, url_obj) obj = cls(**data) for field in fields: @@ -61,7 +61,9 @@ def test_webpoet_URL_classes(cls, fields): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match="is neither a string nor an instance of" + ): setattr(obj, field, 123) @@ -90,12 +92,12 @@ def test_webpoet_URL_mainImage(cls): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): obj.mainImage.url = False data = {"mainImage": {"url": 123}, "url": 123} - with pytest.raises(ValueError): - obj = cls.from_dict(data) + with pytest.raises(ValueError, match="is neither a string nor an instance of"): + cls.from_dict(data) @pytest.mark.parametrize("cls", [ProductVariant, Product]) @@ -127,7 +129,7 @@ def test_webpoet_URL_images(cls): # Setting other values that are not strings or URL classes would # raise a ValueError - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): obj.images[1].url = False data = { @@ -137,5 +139,5 @@ def test_webpoet_URL_images(cls): ], "url": 789, } - with pytest.raises(ValueError): - obj = cls.from_dict(data) + with pytest.raises(ValueError, match="is neither a string nor an instance of"): + cls.from_dict(data) diff --git a/tests/test_docs.py b/tests/test_docs.py index 6b9a76f..3880ffd 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,5 +1,6 @@ import re from importlib import import_module +from pathlib import Path import pytest @@ -12,7 +13,7 @@ @pytest.mark.parametrize( ("submodule_name", "only_in_docs", "only_in_submodule"), - ( + [ ("components", set(), set()), ("items", {"Item", "base.ProbabilityMixin"}, {"RequestListCaster"}), ( @@ -20,11 +21,10 @@ {"pages.base._BasePage"}, {"DescriptionMixin", "HasMetadata", "PriceMixin"}, ), - ), + ], ) def test_component_reference_entries(submodule_name, only_in_docs, only_in_submodule): - with open(f"docs/reference/{submodule_name}.rst") as f: - docs_page = f.read() + docs_page = Path(f"docs/reference/{submodule_name}.rst").read_text(encoding="utf-8") docs_class_names = set() for match in AUTOCLASS_PATTERN.finditer(docs_page): class_name = match[1] diff --git a/tests/test_examples.py b/tests/test_examples.py index 051eb85..fc5199e 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -16,14 +16,14 @@ @pytest.mark.parametrize( "example", - ( + [ _BREADCRUMBS_EXAMPLE_1, _BREADCRUMBS_EXAMPLE_2, _DESCRIPTION_HTML_EXAMPLE, _GTIN_EXAMPLE_1, _GTIN_EXAMPLE_2, _GTIN_EXAMPLE_3, - ), + ], ) def test(example: PageObjectMethodExample): response = HttpResponse(url="http://example.com", body=example.html.encode()) diff --git a/tests/test_items.py b/tests/test_items.py index 5a6bb5b..cb5a12d 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -805,7 +805,7 @@ def test_metadata(): obj_name[:-4] for obj_name in zyte_common_items.__dict__ if ( - not (obj_name.startswith("Base") or obj_name.startswith("Auto")) + not obj_name.startswith(("Base", "Auto")) and obj_name.endswith("Page") and obj_name != "Page" ) @@ -912,8 +912,8 @@ def test_social_media_post_missing_fields(): @pytest.mark.parametrize( - "cls,has_proba", - ( + ("cls", "has_proba"), + [ (Article, True), (ArticleFromList, True), (ArticleList, False), @@ -928,7 +928,7 @@ def test_social_media_post_missing_fields(): (RealEstate, True), (Serp, False), (SocialMediaPost, True), - ), + ], ) def test_get_probability_request(cls, has_proba): data = {"url": "https://example.com"} diff --git a/tests/test_items_base.py b/tests/test_items_base.py index daa1fea..c272653 100644 --- a/tests/test_items_base.py +++ b/tests/test_items_base.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import Optional, Union import attrs import pytest @@ -9,8 +9,6 @@ class NotConsideredAnItem: """It has to inherit from Item to be considered one.""" - pass - @attrs.define class SubItem(Item): @@ -70,7 +68,7 @@ class A(Item): pattern = r"Expected a dict with fields from tests\.\S+?\.A, got 'a'\." with pytest.raises(ValueError, match=pattern): - A.from_dict("a") # type: ignore + A.from_dict("a") # type: ignore[arg-type] def test_from_dict_non_dict_field(): @@ -90,7 +88,7 @@ class A(Item): def test_from_dict_from_list_non_list_field(): @attrs.define class A(Item): - a: List[str] + a: list[str] pattern = r"Expected a to be a list, got 'b'\." with pytest.raises(ValueError, match=pattern): @@ -104,7 +102,7 @@ class B(Item): @attrs.define class A(Item): - a: List[B] + a: list[B] pattern = r"Expected a\[0\] to be a dict with fields from tests\.\S+?\.B, got 'b'\." with pytest.raises(ValueError, match=pattern): @@ -113,12 +111,12 @@ class A(Item): def test_item_unknown_input(): product = Product.from_dict( - dict( - a="b", - additionalProperties=[{"name": "a", "value": "b", "max": 10}], - aggregateRating=dict(worstRating=0), - url="https://example.com/?product=product22", - ) + { + "a": "b", + "additionalProperties": [{"name": "a", "value": "b", "max": 10}], + "aggregateRating": {"worstRating": 0}, + "url": "https://example.com/?product=product22", + } ) assert product._unknown_fields_dict["a"] == "b" assert product.aggregateRating._unknown_fields_dict["worstRating"] == 0 diff --git a/tests/test_mypy.py b/tests/test_mypy.py index f3b5d1d..534a37d 100644 --- a/tests/test_mypy.py +++ b/tests/test_mypy.py @@ -11,10 +11,11 @@ # E: - assert a mypy error message # R: - assert a mypy note message Revealed type is ''. -NOTE: +Note: Pytest's parametrization doesn't work here due to the dynamic nature of variables in each line. For-loops also doesn't work well since the type checker needs to narrow down on the type within the test function's scope. + """ import pytest @@ -186,7 +187,7 @@ def test_assignment_product_variant(): @pytest.mark.mypy_testing def test_instantiation_image(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): Image( url=123 # E: Argument "url" to "Image" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl" [arg-type] ) @@ -194,7 +195,7 @@ def test_instantiation_image(): @pytest.mark.mypy_testing def test_instantiation_breadcrumb(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): Breadcrumb( url=123 # E: Argument "url" to "Breadcrumb" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] ) @@ -202,7 +203,7 @@ def test_instantiation_breadcrumb(): @pytest.mark.mypy_testing def test_instantiation_link(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): Link( url=123 # E: Argument "url" to "Link" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] ) @@ -210,11 +211,11 @@ def test_instantiation_link(): @pytest.mark.mypy_testing def test_instantiation_product_list(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): ProductList( url=123 # E: Argument "url" to "ProductList" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl" [arg-type] ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): ProductList( url="https://www.example.com", canonicalUrl=123, # E: Argument "canonicalUrl" to "ProductList" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] @@ -223,7 +224,7 @@ def test_instantiation_product_list(): @pytest.mark.mypy_testing def test_instantiation_product_from_list(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): ProductFromList( url=123 # E: Argument "url" to "ProductFromList" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] ) @@ -231,11 +232,11 @@ def test_instantiation_product_from_list(): @pytest.mark.mypy_testing def test_instantiation_product_variant(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): ProductVariant( url=123 # E: Argument "url" to "ProductVariant" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="is neither a string nor an instance of"): ProductVariant( url="https://www.example.com", canonicalUrl=123, # E: Argument "canonicalUrl" to "ProductVariant" has incompatible type "int"; expected "str | RequestUrl | ResponseUrl | None" [arg-type] diff --git a/tests/test_pages.py b/tests/test_pages.py index dad16ad..d9d3e27 100644 --- a/tests/test_pages.py +++ b/tests/test_pages.py @@ -32,10 +32,10 @@ @pytest.mark.parametrize( "page_class", - ( + [ BaseProductPage, BaseProductListPage, - ), + ], ) def test_base_pages_default(page_class): page = page_class(request_url=RequestUrl("https://example.com")) @@ -46,10 +46,10 @@ def test_base_pages_default(page_class): @pytest.mark.parametrize( "page_class", - ( + [ ProductPage, ProductListPage, - ), + ], ) def test_pages_default(page_class): url = ResponseUrl("https://example.com") @@ -159,7 +159,7 @@ def test_page_pairs(): obj_name for obj_name in zyte_common_items.__dict__ if ( - not (obj_name.startswith("Base") or obj_name.startswith("Auto")) + not obj_name.startswith(("Base", "Auto")) and obj_name.endswith("Page") and obj_name != "Page" ) @@ -194,7 +194,7 @@ def test_matching_items(): obj_name for obj_name in zyte_common_items.__dict__ if ( - not (obj_name.startswith("Base") or obj_name.startswith("Auto")) + not obj_name.startswith(("Base", "Auto")) and obj_name.endswith("Page") and obj_name != "Page" ) @@ -256,9 +256,7 @@ def allow_field(field_name): for prefix in ["_", "from_", "get_"]: if field_name.startswith(prefix): return False - if field_name == "cast": - return False - return True + return field_name != "cast" actual_fields = {field for field in dir(obj.metadata) if allow_field(field)} error_message = ( @@ -268,9 +266,9 @@ def allow_field(field_name): assert actual_fields == expected_fields, error_message if "dateDownloaded" in actual_fields: - assert isinstance( - obj.metadata.dateDownloaded, str - ), f"{cls} does not get dateDownloaded set by default" + assert isinstance(obj.metadata.dateDownloaded, str), ( + f"{cls} does not get dateDownloaded set by default" + ) assert obj.metadata.dateDownloaded.endswith("Z") actual = obj.metadata.get_date_downloaded_parsed() end = utcnow().replace(microsecond=0) @@ -297,7 +295,7 @@ def test_metadata(): obj_name for obj_name in zyte_common_items.__dict__ if ( - not (obj_name.startswith("Base") or obj_name.startswith("Auto")) + not obj_name.startswith(("Base", "Auto")) and obj_name.endswith("Page") and obj_name != "Page" ) @@ -451,7 +449,7 @@ class CustomProductPage(Page, Returns[Product]): """ page = CustomProductPage(response=HttpResponse(url=url, body=html)) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="doesn't have a metadata class configured"): page.metadata @@ -507,6 +505,6 @@ def test_auto_page_item_fields(): auto_page_fields = set(get_fields_dict(auto_page)) item_cls = get_item_cls(auto_page) # type: ignore[call-overload] item_fields = set(attrs.fields_dict(item_cls)) - assert ( - auto_page_fields == item_fields - ), f"{auto_page} does not map all {item_cls} fields" + assert auto_page_fields == item_fields, ( + f"{auto_page} does not map all {item_cls} fields" + ) diff --git a/tests/test_pages_auto.py b/tests/test_pages_auto.py index f81026d..c505f8d 100644 --- a/tests/test_pages_auto.py +++ b/tests/test_pages_auto.py @@ -1,5 +1,5 @@ from copy import copy -from typing import Any, Dict, Type +from typing import Any import attrs import pytest @@ -90,7 +90,7 @@ async def assert_expected_item(page, item): @pytest.mark.parametrize(*PARAMS) @pytest.mark.asyncio async def test_unmodified( - item_cls: Type, item_kwargs: Dict[str, Any], cls: Type, param: str + item_cls: type, item_kwargs: dict[str, Any], cls: type, param: str ) -> None: item = item_cls(**item_kwargs) kwargs = { @@ -104,7 +104,7 @@ async def test_unmodified( @pytest.mark.parametrize(*PARAMS) @pytest.mark.asyncio async def test_modified( - item_cls: Type, item_kwargs: Dict[str, Any], cls: Type, param: str + item_cls: type, item_kwargs: dict[str, Any], cls: type, param: str ) -> None: modified_url = "https://custom.example" @@ -127,7 +127,7 @@ def url(self): @pytest.mark.parametrize(*PARAMS) @pytest.mark.asyncio async def test_extended( - item_cls: Type, item_kwargs: Dict[str, Any], cls: Type, param: str + item_cls: type, item_kwargs: dict[str, Any], cls: type, param: str ) -> None: @attrs.define class ExtendedItem(item_cls): diff --git a/tests/test_pages_description.py b/tests/test_pages_description.py index f8fb3cb..6166546 100644 --- a/tests/test_pages_description.py +++ b/tests/test_pages_description.py @@ -206,11 +206,11 @@ def descriptionHtml(self): @pytest.mark.parametrize( "page_class", - ( + [ BusinessPlacePage, JobPostingPage, RealEstatePage, - ), + ], ) def test_description_simple(page_class: type): class CustomPage(page_class): @@ -232,5 +232,5 @@ def description(self): url = "https://example.com" body = b"""
""" page = CustomPage(response=HttpResponse(url=url, body=body)) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="expects an HtmlElement node, got"): page.description diff --git a/tests/test_pages_price.py b/tests/test_pages_price.py index e367c37..37b5f33 100644 --- a/tests/test_pages_price.py +++ b/tests/test_pages_price.py @@ -166,7 +166,6 @@ class CustomProductPage(ProductPage): @field def price(self): self.call_count += 1 - return None html = b""" diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 9323f12..0f62666 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -1,6 +1,6 @@ -import pytest # isort: skip +import pytest -scrapy = pytest.importorskip("scrapy") # noqa +scrapy = pytest.importorskip("scrapy") import sys import warnings @@ -21,7 +21,12 @@ @pytest.mark.parametrize( - "thresholds_settings, default_threshold, expected_thresholds, expected_default_thresholds", + ( + "thresholds_settings", + "default_threshold", + "expected_thresholds", + "expected_default_thresholds", + ), [ ({}, 0.09, {}, 0.09), ( @@ -88,7 +93,7 @@ def test_init_thresholds( @pytest.mark.parametrize( - "item, thresholds_settings, default_threshold, expected_threshold", + ("item", "thresholds_settings", "default_threshold", "expected_threshold"), [ ( Article(url="http://example.com"), @@ -120,7 +125,7 @@ def test_get_threshold_for_item( @pytest.mark.parametrize( - "items, item_proba, threshold, expected_stats_calls", + ("items", "item_proba", "threshold", "expected_stats_calls"), [ ( [ @@ -263,7 +268,7 @@ def test_process_item(items, item_proba, threshold, expected_stats_calls): try: returned_item = pipeline.process_item(item, mock_crawler.spider) except scrapy.exceptions.DropItem as e: - assert ( + assert ( # noqa: PT017 f"This item is dropped since the probability ({item_proba}) " f"is below the threshold ({threshold}):" ) in str(e) @@ -285,7 +290,7 @@ def test_process_item(items, item_proba, threshold, expected_stats_calls): @pytest.mark.parametrize( - "item, expected_name", + ("item", "expected_name"), [ ( Article(url="http://example.com"), @@ -319,13 +324,13 @@ def test_warning(): sys.modules.pop("zyte_common_items", None) with warnings.catch_warnings(record=True) as record: - from zyte_common_items.pipelines import AEPipeline + from zyte_common_items.pipelines import AEPipeline # noqa: PLC0415 ae_pipeline = AEPipeline() warn_msg = str(record[0].message) assert len(record) == 1 assert warning_msg in warn_msg - from zyte_common_items.ae import downgrade + from zyte_common_items.ae import downgrade # noqa: PLC0415 assert ae_pipeline._downgrade == downgrade @@ -337,7 +342,9 @@ def test_no_warning(): sys.modules.pop("zyte_common_items", None) with warnings.catch_warnings(record=True) as record: - from zyte_common_items.pipelines import DropLowProbabilityItemPipeline + from zyte_common_items.pipelines import ( # noqa: PLC0415 + DropLowProbabilityItemPipeline, + ) mock_crawler = MagicMock(spec=["spider", "stats"]) DropLowProbabilityItemPipeline(mock_crawler) diff --git a/tests/test_processors.py b/tests/test_processors.py index e1a9a31..0485e75 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1,5 +1,3 @@ -from typing import Type - import pytest from lxml.html import fromstring from parsel import Selector, SelectorList @@ -49,7 +47,7 @@ @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (None, None), ([], []), @@ -137,7 +135,7 @@ def breadcrumbs(self): @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (None, None), ("", None), @@ -172,14 +170,14 @@ def brand(self): response = HttpResponse( url="http://www.example.com/", - body="foo".encode(), + body=b"foo", ) page = MyProductPage(response=response) assert page.brand == Brand(name="foo") @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ ("", None), ("NaN", None), @@ -202,7 +200,7 @@ def test_format_price(input_value, expected_value): @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (None, None), ([], None), @@ -246,7 +244,7 @@ def gtin(self): @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (None, None), ([], []), @@ -341,7 +339,7 @@ def aggregateRating(self): @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (None, None), ([], []), @@ -393,14 +391,14 @@ def images(self): response = HttpResponse( url="http://www.example.com/", - body="".encode(), + body=b"", ) page = MyProductPage(response=response) assert page.images == [Image(url="https://www.url.com/img.jpg")] @pytest.mark.parametrize( - "input_value,expected_value", + ("input_value", "expected_value"), [ (100, "100.00"), (None, None), @@ -423,13 +421,13 @@ def price(self): @pytest.mark.parametrize( - "input_value,BasePage,expected_value", + ("input_value", "BasePage", "expected_value"), [ (None, ProductPage, None), (Metadata(), ProductPage, ProductMetadata()), ], ) -def test_metadata(input_value, BasePage: Type, expected_value): +def test_metadata(input_value, BasePage: type, expected_value): class CustomPage(BasePage): @field(out=[metadata_processor]) def metadata(self): diff --git a/tests/test_request_templates.py b/tests/test_request_templates.py index c7e3e3e..f9c5832 100644 --- a/tests/test_request_templates.py +++ b/tests/test_request_templates.py @@ -126,7 +126,6 @@ def url(self): class UrlBasedSearchRequestTemplatePage(BaseSearchRequestTemplatePage): - @field def url(self): return f"{self.request_url}?search={{{{ query|urlencode }}}}" @@ -139,7 +138,7 @@ def edit_request_url(expression, page): return expression if not isinstance(expression, dict): raise ValueError( - f"The edit_request_url processor expected a dict, got " f"{expression!r}" + f"The edit_request_url processor expected a dict, got {expression!r}" ) if "url" in expression: url = expression["url"] @@ -161,16 +160,14 @@ def edit_request_url(expression, page): params = copy(expression["add_query_params"]) for k in list(params): v = params.pop(k) - k = k.format(query=url_safe_query_placeholder) + k = k.format(query=url_safe_query_placeholder) # noqa: PLW2901 v = v.format(query=url_safe_query_placeholder) params[k] = v url = add_or_replace_parameters(url, params) - url = url.replace(url_safe_query_placeholder, "{{ query|urlencode }}") - return url + return url.replace(url_safe_query_placeholder, "{{ query|urlencode }}") class DSLSearchRequestTemplatePage(BaseSearchRequestTemplatePage): - class Processors: url = [edit_request_url] @@ -181,7 +178,7 @@ def url(self): @pytest.mark.parametrize( ("page_cls", "inputs", "query", "url"), - ( + [ ( VerbatimSearchRequestTemplatePage, {}, @@ -218,7 +215,7 @@ def url(self): "foo bar", "https://example.com/?search=foo%20bar", ), - ), + ], ) @pytest.mark.asyncio async def test_url(page_cls, inputs, query, url): diff --git a/tests/test_util.py b/tests/test_util.py index d000345..c458f1f 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -11,11 +11,11 @@ class _TestItem: def test_split_in_unknown_and_known_fields(): - input = dict(k1=1, k2=2, extra=3) - unknown, known = split_in_unknown_and_known_fields(input, _TestItem) + input_ = {"k1": 1, "k2": 2, "extra": 3} + unknown, known = split_in_unknown_and_known_fields(input_, _TestItem) item = _TestItem(**known) - assert attrs.asdict(item) == dict(k1=1, k2=2) - assert unknown == dict(extra=3) + assert attrs.asdict(item) == {"k1": 1, "k2": 2} + assert unknown == {"extra": 3} unknown, known = split_in_unknown_and_known_fields(known, _TestItem) assert unknown == {} @@ -24,5 +24,5 @@ def test_split_in_unknown_and_known_fields(): ret = split_in_unknown_and_known_fields(empty_input, _TestItem) assert ret == ({}, {}) - with pytest.raises(ValueError): - split_in_unknown_and_known_fields(input, str) + with pytest.raises(ValueError, match="The cls is not attrs class"): + split_in_unknown_and_known_fields(input_, str) diff --git a/tests/typing.py b/tests/typing.py index 075bfd8..884acd3 100644 --- a/tests/typing.py +++ b/tests/typing.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Union, get_args # noqa +from typing import List, Optional, Tuple, Union, get_args # noqa: F401,UP035 import attrs @@ -14,6 +14,7 @@ def optional_type(cls): types = [t for t in get_args(cls) if not issubclass(t, type(None))] if len(types) == 1: return types[0] + return None def get_generic_type(cls): @@ -28,11 +29,11 @@ def get_generic_type(cls): args = get_args(cls) if args and len(args) == 1: return args[0] + return None def assert_type_compliance(item, cls=None, attrib=None): - """ - Assert recursively that the values of the attributes of an attrs + """Assert recursively that the values of the attributes of an attrs item are as defined """ item_cls = cls or type(item) diff --git a/tox.ini b/tox.ini index 72fa5fd..99be118 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ deps = pytest-cov # the expected output uses Python 3.10 syntax pytest-mypy-testing==0.1.3; python_version >= '3.10' - mypy==1.9.0; python_version >= '3.10' + mypy==1.15.0; python_version >= '3.10' [testenv] deps = @@ -73,7 +73,7 @@ commands = [testenv:mypy] deps = - mypy==1.11.2 + mypy==1.18.2 commands = mypy zyte_common_items tests diff --git a/zyte_common_items/__init__.py b/zyte_common_items/__init__.py index c61b45a..e0c74f6 100644 --- a/zyte_common_items/__init__.py +++ b/zyte_common_items/__init__.py @@ -1,4 +1,5 @@ -# flake8: noqa +import contextlib + from .adapter import ZyteItemAdapter, ZyteItemKeepEmptyAdapter from .base import Item, is_data_container from .components import ( @@ -127,7 +128,6 @@ SocialMediaPostPage, ) -try: +# If Scrapy is not installed +with contextlib.suppress(ImportError): from ._addon import Addon -except ImportError: # Scrapy is not installed. - pass diff --git a/zyte_common_items/_addon.py b/zyte_common_items/_addon.py index f8dc1c9..1a27fae 100644 --- a/zyte_common_items/_addon.py +++ b/zyte_common_items/_addon.py @@ -30,7 +30,7 @@ def update_settings(self, settings: BaseSettings) -> None: for cls in ItemAdapter.ADAPTER_CLASSES ): ItemAdapter.ADAPTER_CLASSES = deque( - (ZyteItemAdapter,) + tuple(ItemAdapter.ADAPTER_CLASSES) + (ZyteItemAdapter, *ItemAdapter.ADAPTER_CLASSES) ) settings.set("LOG_FORMATTER", ZyteLogFormatter, priority="addon") diff --git a/zyte_common_items/_dateutils.py b/zyte_common_items/_dateutils.py index a4de1b5..fe892ce 100644 --- a/zyte_common_items/_dateutils.py +++ b/zyte_common_items/_dateutils.py @@ -18,8 +18,7 @@ def format_datetime(dt) -> str: def parse_iso_datetime(date_str) -> datetime.datetime: - """ - Parse ISO-formatted UTC date (with a timezone specified as Z) + """Parse ISO-formatted UTC date (with a timezone specified as Z) to a TZ-aware datetime object. """ if sys.version_info < (3, 11): diff --git a/zyte_common_items/_examples.py b/zyte_common_items/_examples.py index 019a0a2..6822ec0 100644 --- a/zyte_common_items/_examples.py +++ b/zyte_common_items/_examples.py @@ -140,15 +140,7 @@ def __str__(self): " cleaned_html = cleaned_node_to_html(cleaned_node)\n" " return cleaned_html" ), - expected=( - "
\n" - "\n" - "

Paragraph 1

\n" - "\n" - "

Paragraph 2

\n" - "\n" - "
" - ), + expected=("
\n\n

Paragraph 1

\n\n

Paragraph 2

\n\n
"), ) _GTIN_EXAMPLE_1 = PageObjectMethodExample( diff --git a/zyte_common_items/adapter.py b/zyte_common_items/adapter.py index 79e14bd..4e6a823 100644 --- a/zyte_common_items/adapter.py +++ b/zyte_common_items/adapter.py @@ -1,7 +1,8 @@ """This module offers better integration with the itemadapter package.""" +from collections.abc import Collection, Iterator, KeysView from types import MappingProxyType -from typing import Any, Collection, Iterator, KeysView +from typing import Any from itemadapter.adapter import AttrsAdapter @@ -54,8 +55,8 @@ def is_item(cls, item: Any) -> bool: def get_field_meta(self, field_name: str) -> MappingProxyType: if field_name in self._fields_dict: - return self._fields_dict[field_name].metadata # type: ignore - elif field_name in self.item._unknown_fields_dict: + return self._fields_dict[field_name].metadata + if field_name in self.item._unknown_fields_dict: return MappingProxyType({}) raise KeyError(field_name) @@ -65,7 +66,7 @@ def field_names(self) -> KeysView: def __getitem__(self, field_name: str) -> Any: if field_name in self._fields_dict: return getattr(self.item, field_name) - elif field_name in self.item._unknown_fields_dict: + if field_name in self.item._unknown_fields_dict: return self.item._unknown_fields_dict[field_name] raise KeyError(field_name) diff --git a/zyte_common_items/ae.py b/zyte_common_items/ae.py index 9866613..c8516f4 100644 --- a/zyte_common_items/ae.py +++ b/zyte_common_items/ae.py @@ -1,5 +1,5 @@ from collections import deque -from typing import List, Optional +from typing import Optional from warnings import warn import attrs @@ -43,10 +43,9 @@ def _set_if_truthy(data, field, value): def _to_url_list(data, old_k, new_k): if _is_truthy_else_remove(data, old_k): - entries = [] - for entry in data.pop(old_k): - if _is_truthy(entry, "url"): - entries.append(entry["url"]) + entries = [ + entry["url"] for entry in data.pop(old_k) if _is_truthy(entry, "url") + ] if entries: data[new_k] = entries @@ -79,17 +78,17 @@ class AEArticle(Item): dateModified: Optional[str] = None dateModifiedRaw: Optional[str] = None author: Optional[str] = None - authorsList: List[str] = attrs.Factory(list) + authorsList: list[str] = attrs.Factory(list) inLanguage: Optional[str] = None - breadcrumbs: List[AEBreadcrumb] = attrs.Factory(list) + breadcrumbs: list[AEBreadcrumb] = attrs.Factory(list) mainImage: Optional[str] = None - images: List[str] = attrs.Factory(list) + images: list[str] = attrs.Factory(list) description: Optional[str] = None articleBody: Optional[str] = None articleBodyHtml: Optional[str] = None articleBodyRaw: Optional[str] = None - videoUrls: List[str] = attrs.Factory(list) - audioUrls: List[str] = attrs.Factory(list) + videoUrls: list[str] = attrs.Factory(list) + audioUrls: list[str] = attrs.Factory(list) probability: float canonicalUrl: Optional[str] = None url: str @@ -114,10 +113,10 @@ class AEArticleFromList(Item): datePublished: Optional[str] = None datePublishedRaw: Optional[str] = None author: Optional[str] = None - authorsList: List[str] = attrs.Factory(list) + authorsList: list[str] = attrs.Factory(list) inLanguage: Optional[str] = None mainImage: Optional[str] = None - images: List[str] = attrs.Factory(list) + images: list[str] = attrs.Factory(list) articleBody: Optional[str] = None url: Optional[str] = None probability: float @@ -141,7 +140,7 @@ def _convert_list_item_metadata(item): @attrs.define(kw_only=True) class AEArticleList(Item): url: str - articles: List[AEArticleFromList] = attrs.Factory(list) + articles: list[AEArticleFromList] = attrs.Factory(list) paginationNext: Optional[AEPaginationLink] = None paginationPrevious: Optional[AEPaginationLink] = None @@ -255,9 +254,8 @@ def _is_truthy_else_remove(data, field): return False if data[field]: return True - else: - del data[field] - return False + del data[field] + return False def _is_not_none(data, field): @@ -299,22 +297,22 @@ def _convert_breadcrumbs(data): @attrs.define(kw_only=True) class AEProduct(Item): name: Optional[str] = None - offers: List[AEOffer] = attrs.Factory(list) + offers: list[AEOffer] = attrs.Factory(list) sku: Optional[str] = None mpn: Optional[str] = None - gtin: List[AEGTIN] = attrs.Factory(list) + gtin: list[AEGTIN] = attrs.Factory(list) brand: Optional[str] = None - breadcrumbs: List[AEBreadcrumb] = attrs.Factory(list) + breadcrumbs: list[AEBreadcrumb] = attrs.Factory(list) mainImage: Optional[str] = None - images: List[str] = attrs.Factory(list) + images: list[str] = attrs.Factory(list) description: Optional[str] = None descriptionHtml: Optional[str] = None aggregateRating: Optional[AERating] = None color: Optional[str] = None size: Optional[str] = None style: Optional[str] = None - additionalProperty: List[AEAdditionalProperty] = attrs.Factory(list) - hasVariants: List["AEProduct"] = attrs.Factory(list) + additionalProperty: list[AEAdditionalProperty] = attrs.Factory(list) + hasVariants: list["AEProduct"] = attrs.Factory(list) probability: float canonicalUrl: Optional[str] = None url: str @@ -351,11 +349,11 @@ def convert(data): @attrs.define(kw_only=True) class AEProductFromList(Item): name: Optional[str] = None - offers: List[AEOffer] = attrs.Factory(list) + offers: list[AEOffer] = attrs.Factory(list) sku: Optional[str] = None brand: Optional[str] = None mainImage: Optional[str] = None - images: List[str] = attrs.Factory(list) + images: list[str] = attrs.Factory(list) description: Optional[str] = None descriptionHtml: Optional[str] = None aggregateRating: Optional[AERating] = None @@ -367,8 +365,8 @@ class AEProductFromList(Item): @attrs.define(kw_only=True) class AEProductList(Item): url: str - products: List[AEProductFromList] = attrs.Factory(list) - breadcrumbs: List[AEBreadcrumb] = attrs.Factory(list) + products: list[AEProductFromList] = attrs.Factory(list) + breadcrumbs: list[AEBreadcrumb] = attrs.Factory(list) paginationNext: Optional[AEPaginationLink] = None paginationPrevious: Optional[AEPaginationLink] = None diff --git a/zyte_common_items/base.py b/zyte_common_items/base.py index 444d75b..633173e 100644 --- a/zyte_common_items/base.py +++ b/zyte_common_items/base.py @@ -1,7 +1,7 @@ """The ``Item`` class should be used as the parent class for data containers.""" from collections import ChainMap -from typing import Dict, List, Optional, Union, get_args, get_origin, get_type_hints +from typing import Optional, Union, get_args, get_origin, get_type_hints import attrs @@ -69,37 +69,34 @@ def __attrs_post_init__(self): self._unknown_fields_dict = {} # type: ignore[misc] @classmethod - def from_dict(cls, item: Optional[Dict]): + def from_dict(cls, item: Optional[dict]): """Read an item from a dictionary.""" return cls._from_dict(item) @classmethod - def _from_dict(cls, item: Optional[Dict], *, trail: _Trail = None): + def _from_dict(cls, item: Optional[dict], *, trail: _Trail = None): """Read an item from a dictionary.""" if item is None: return None if not isinstance(item, dict): path = _get_import_path(cls) - if not trail: - prefix = "Expected" - else: - prefix = f"Expected {trail} to be" + prefix = f"Expected {trail} to be" if trail else "Expected" raise ValueError(f"{prefix} a dict with fields from {path}, got {item!r}.") item = cls._apply_field_types_to_sub_fields(item, trail=trail) unknown_fields, known_fields = split_in_unknown_and_known_fields(item, cls) - obj = cls(**known_fields) # type: ignore + obj = cls(**known_fields) obj._unknown_fields_dict = unknown_fields # type: ignore[misc] return obj @classmethod - def from_list(cls, items: Optional[List[Dict]], *, trail: _Trail = None) -> List: + def from_list(cls, items: Optional[list[dict]], *, trail: _Trail = None) -> list: """Read items from a list.""" return cls._from_list(items) @classmethod - def _from_list(cls, items: Optional[List[Dict]], *, trail: _Trail = None) -> List: + def _from_list(cls, items: Optional[list[dict]], *, trail: _Trail = None) -> list: """Read items from a list.""" result = [] for index, item in enumerate(items or []): @@ -108,7 +105,7 @@ def _from_list(cls, items: Optional[List[Dict]], *, trail: _Trail = None) -> Lis return result @classmethod - def _apply_field_types_to_sub_fields(cls, item: Dict, trail: _Trail = None): + def _apply_field_types_to_sub_fields(cls, item: dict, trail: _Trail = None): """This applies the correct data container class for some of the fields that need them. @@ -143,7 +140,7 @@ def _apply_field_types_to_sub_fields(cls, item: Dict, trail: _Trail = None): is_optional = len(field_classes) == 2 and isinstance( None, field_classes[1] ) - type_annotation = field_classes[0] + type_annotation = field_classes[0] # noqa: PLW2901 origin = get_origin(type_annotation) if origin is list: @@ -157,7 +154,7 @@ def _apply_field_types_to_sub_fields(cls, item: Dict, trail: _Trail = None): raise ValueError( f"Expected {field_trail} to be a list, got {value!r}." ) - type_annotation = get_args(type_annotation)[0] + type_annotation = get_args(type_annotation)[0] # noqa: PLW2901 if is_data_container(type_annotation): from_list[field] = type_annotation elif is_data_container(type_annotation): @@ -165,25 +162,25 @@ def _apply_field_types_to_sub_fields(cls, item: Dict, trail: _Trail = None): if from_dict or from_list: item = dict(**item) - for key, cls in (from_dict or {}).items(): + for key, cls_ in (from_dict or {}).items(): key_trail = _extend_trail(trail, key) value = item.get(key) if value is not None and not isinstance(value, dict): - path = _get_import_path(cls) + path = _get_import_path(cls_) raise ValueError( f"Expected {key_trail} to be a dict with fields " f"from {path}, got {value!r}." ) - item[key] = cls._from_dict(value, trail=key_trail) - for key, cls in (from_list or {}).items(): + item[key] = cls_._from_dict(value, trail=key_trail) + for key, cls_ in (from_list or {}).items(): key_trail = _extend_trail(trail, key) value = item.get(key) if value is not None and not isinstance(value, list): - path = _get_import_path(cls) + path = _get_import_path(cls_) raise ValueError( f"Expected {key_trail} to be a list of dicts " f"with fields from {path}, got {value!r}." ) - item[key] = cls._from_list(value, trail=key_trail) + item[key] = cls_._from_list(value, trail=key_trail) return item diff --git a/zyte_common_items/components/__init__.py b/zyte_common_items/components/__init__.py index a6b3f16..b62db00 100644 --- a/zyte_common_items/components/__init__.py +++ b/zyte_common_items/components/__init__.py @@ -1,4 +1,3 @@ -# flake8: noqa from ._compat import request_list_processor from .additional_property import AdditionalProperty from .address import Address diff --git a/zyte_common_items/components/_compat.py b/zyte_common_items/components/_compat.py index 6aed17e..7d6fb5f 100644 --- a/zyte_common_items/components/_compat.py +++ b/zyte_common_items/components/_compat.py @@ -1,10 +1,9 @@ import warnings -from typing import List from .request import ProbabilityRequest, Request -def request_list_processor(request_list: List[Request]) -> List[ProbabilityRequest]: +def request_list_processor(request_list: list[Request]) -> list[ProbabilityRequest]: """Deprecated. Please use :func:`zyte_common_items.processors.probability_request_list_processor` instead.""" warnings.warn( @@ -13,6 +12,8 @@ def request_list_processor(request_list: List[Request]) -> List[ProbabilityReque category=DeprecationWarning, stacklevel=2, ) - from zyte_common_items.processors import probability_request_list_processor + from zyte_common_items.processors import ( # noqa: PLC0415 + probability_request_list_processor, + ) return probability_request_list_processor(request_list) diff --git a/zyte_common_items/components/metadata.py b/zyte_common_items/components/metadata.py index d62b068..6f630ed 100644 --- a/zyte_common_items/components/metadata.py +++ b/zyte_common_items/components/metadata.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Dict, List, Optional, Type, TypeVar +from typing import Optional, TypeVar import attrs @@ -14,7 +14,7 @@ class BaseMetadata(Item): """Base metadata class""" - def cast(self, cls: Type[MetadataT]) -> MetadataT: + def cast(self, cls: type[MetadataT]) -> MetadataT: """Convert *value*, a metadata instance, into a different metadata *cls*.""" return convert_to_class(self, cls) @@ -47,7 +47,7 @@ class ListMetadata(BaseMetadata): """Date and time when the product data was downloaded, in UTC timezone and the following format: ``YYYY-MM-DDThh:mm:ssZ``.""" - validationMessages: Optional[Dict[str, List[str]]] = None + validationMessages: Optional[dict[str, list[str]]] = None """Contains paths to fields with the description of issues found with their values.""" diff --git a/zyte_common_items/components/request.py b/zyte_common_items/components/request.py index 05fb030..fc2bf85 100644 --- a/zyte_common_items/components/request.py +++ b/zyte_common_items/components/request.py @@ -1,5 +1,5 @@ import base64 -from typing import List, Optional, Type, TypeVar +from typing import Optional, TypeVar import attrs @@ -36,7 +36,7 @@ class Request(Item): body: Optional[str] = None """HTTP request body, Base64-encoded.""" - headers: Optional[List[Header]] = None + headers: Optional[list[Header]] = None """HTTP headers.""" name: Optional[str] = None @@ -48,17 +48,15 @@ class Request(Item): def body_bytes(self) -> Optional[bytes]: """Request.body as bytes""" # todo: allow to set body bytes in __init__, to avoid encoding/decoding. - if self._body_bytes is None: - if self.body is not None: - self._body_bytes = base64.b64decode(self.body) + if self._body_bytes is None and self.body is not None: + self._body_bytes = base64.b64decode(self.body) return self._body_bytes def to_scrapy(self, callback, **kwargs): - """ - Convert a request to scrapy.Request. + """Convert a request to scrapy.Request. All kwargs are passed to scrapy.Request as-is. """ - import scrapy + import scrapy # noqa: PLC0415 header_list = [(header.name, header.value) for header in self.headers or []] @@ -71,7 +69,7 @@ def to_scrapy(self, callback, **kwargs): **kwargs, ) - def cast(self, cls: Type[RequestT]) -> RequestT: + def cast(self, cls: type[RequestT]) -> RequestT: """Convert *value*, an instance of :class:`~.Request` or a subclass, into *cls*, a different class that is also either :class:`~.Request` or a subclass.""" diff --git a/zyte_common_items/components/social_media_post.py b/zyte_common_items/components/social_media_post.py index a9ac033..50d178f 100644 --- a/zyte_common_items/components/social_media_post.py +++ b/zyte_common_items/components/social_media_post.py @@ -1,6 +1,4 @@ -""" -Components specific to :class:`~SocialMediaPost` item. -""" +"""Components specific to :class:`~SocialMediaPost` item.""" from typing import Optional diff --git a/zyte_common_items/converters.py b/zyte_common_items/converters.py index 96870cb..be95085 100644 --- a/zyte_common_items/converters.py +++ b/zyte_common_items/converters.py @@ -42,15 +42,16 @@ def __call__(self, value): def to_probability_request_list(request_list): - """attrs converter to turn lists of :class:`~scrapy.Request` instances into + """Attrs converter to turn lists of :class:`~scrapy.Request` instances into lists of :class:`~.ProbabilityRequest` instances.""" - from zyte_common_items.components import ProbabilityRequest + # circular import + from zyte_common_items.components import ProbabilityRequest # noqa: PLC0415 return [request.cast(ProbabilityRequest) for request in request_list] def to_probability_request_list_optional(request_list): - """attrs converter to turn lists of :class:`~scrapy.Request` instances into + """Attrs converter to turn lists of :class:`~scrapy.Request` instances into lists of :class:`~.ProbabilityRequest` instances. If None is passed, None is returned.""" if request_list is None: diff --git a/zyte_common_items/fields.py b/zyte_common_items/fields.py index 2043d2d..8872d87 100644 --- a/zyte_common_items/fields.py +++ b/zyte_common_items/fields.py @@ -1,4 +1,4 @@ -from typing import Callable, List, Optional +from typing import Callable, Optional from web_poet import ItemPage, field from web_poet.fields import get_fields_dict @@ -9,7 +9,7 @@ def auto_field( *, cached: bool = False, meta: Optional[dict] = None, - out: Optional[List[Callable]] = None, + out: Optional[list[Callable]] = None, ): """Decorator that works like :func:`web_poet.fields.field` but sets ``auto_field`` to ``True`` by default in *meta*. diff --git a/zyte_common_items/items/__init__.py b/zyte_common_items/items/__init__.py index 86d4d92..43d7e24 100644 --- a/zyte_common_items/items/__init__.py +++ b/zyte_common_items/items/__init__.py @@ -1,4 +1,3 @@ -# flake8: noqa from ._compat import RequestListCaster from .article import Article, ArticleMetadata from .article_list import ArticleFromList, ArticleList, ArticleListMetadata diff --git a/zyte_common_items/items/article.py b/zyte_common_items/items/article.py index 39094d2..42e0928 100644 --- a/zyte_common_items/items/article.py +++ b/zyte_common_items/items/article.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -63,10 +63,10 @@ class Article(Item): """Same date as :attr:`~zyte_common_items.Article.dateModified`, but before parsing/normalization, i.e. as it appears on the website.""" - authors: Optional[List[Author]] = None + authors: Optional[list[Author]] = None """All authors of the article.""" - breadcrumbs: Optional[List[Breadcrumb]] = None + breadcrumbs: Optional[list[Breadcrumb]] = None """Webpage `breadcrumb trail`_. .. _Breadcrumb trail: https://en.wikipedia.org/wiki/Breadcrumb_navigation @@ -82,7 +82,7 @@ class Article(Item): mainImage: Optional[Image] = None """Main image.""" - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """All images.""" description: Optional[str] = None @@ -110,10 +110,10 @@ class Article(Item): Format: HTML string normalized in a consistent way. """ - videos: Optional[List[Video]] = None + videos: Optional[list[Video]] = None """All videos.""" - audios: Optional[List[Audio]] = None + audios: Optional[list[Audio]] = None """All audios.""" canonicalUrl: Optional[str] = attrs.field( diff --git a/zyte_common_items/items/article_list.py b/zyte_common_items/items/article_list.py index 910a608..51414dd 100644 --- a/zyte_common_items/items/article_list.py +++ b/zyte_common_items/items/article_list.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional +from typing import Optional import attrs @@ -52,13 +52,13 @@ class ArticleList(Item): See also ``url``. """ - articles: Optional[List[ArticleFromList]] = None + articles: Optional[list[ArticleFromList]] = None """List of article details found on the page. The order of the articles reflects their position on the page. """ - breadcrumbs: Optional[List[Breadcrumb]] = None + breadcrumbs: Optional[list[Breadcrumb]] = None """Webpage `breadcrumb trail`_. .. _Breadcrumb trail: https://en.wikipedia.org/wiki/Breadcrumb_navigation @@ -91,7 +91,7 @@ class ArticleFromList(Item): - no normalization of Unicode characters. """ - authors: Optional[List[Author]] = None + authors: Optional[list[Author]] = None """All authors of the article.""" datePublished: Optional[str] = None @@ -123,7 +123,7 @@ class ArticleFromList(Item): mainImage: Optional[Image] = None """Main image.""" - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """All images.""" metadata: Optional[ProbabilityMetadata] = attrs.field( diff --git a/zyte_common_items/items/article_navigation.py b/zyte_common_items/items/article_navigation.py index ae6f35a..4519b34 100644 --- a/zyte_common_items/items/article_navigation.py +++ b/zyte_common_items/items/article_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -35,14 +35,14 @@ class ArticleNavigation(Item): string) """ - subCategories: Optional[List[ProbabilityRequest]] = attrs.field( + subCategories: Optional[list[ProbabilityRequest]] = attrs.field( default=None, converter=to_probability_request_list_optional, kw_only=True, # type: ignore[misc] ) """List of sub-category links ordered by their position in the page.""" - items: Optional[List[ProbabilityRequest]] = attrs.field( + items: Optional[list[ProbabilityRequest]] = attrs.field( default=None, converter=to_probability_request_list_optional, kw_only=True, # type: ignore[misc] diff --git a/zyte_common_items/items/business_place.py b/zyte_common_items/items/business_place.py index 54731af..0a3304f 100644 --- a/zyte_common_items/items/business_place.py +++ b/zyte_common_items/items/business_place.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -49,11 +49,11 @@ class BusinessPlace(Item): name: Optional[str] = None """The name of the place.""" - actions: Optional[List[NamedLink]] = None + actions: Optional[list[NamedLink]] = None """List of actions that can be performed directly from the URLs on the place page, including URLs.""" - additionalProperties: Optional[List[AdditionalProperty]] = None + additionalProperties: Optional[list[AdditionalProperty]] = None """List of name-value pais of any unmapped additional properties specific to the place.""" @@ -64,7 +64,7 @@ class BusinessPlace(Item): """The details of the reservation action, e.g. table reservation in case of restaurants or room reservation in case of hotels.""" - categories: Optional[List[str]] = None + categories: Optional[list[str]] = None """List of categories the place belongs to.""" description: Optional[str] = None @@ -73,7 +73,7 @@ class BusinessPlace(Item): Stripped of white spaces. """ - features: Optional[List[str]] = None + features: Optional[list[str]] = None """List of frequently mentioned features of this place.""" map: Optional[str] = attrs.field( @@ -81,10 +81,10 @@ class BusinessPlace(Item): ) """URL to a map of the place.""" - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """A list of URL values of all images of the place.""" - amenityFeatures: Optional[List[Amenity]] = None + amenityFeatures: Optional[list[Amenity]] = None """List of amenities of the place.""" aggregateRating: Optional[AggregateRating] = None @@ -97,11 +97,11 @@ class BusinessPlace(Item): """If the place is located inside another place, these are the details of the parent place.""" - openingHours: Optional[List[OpeningHoursItem]] = None + openingHours: Optional[list[OpeningHoursItem]] = None """Ordered specification of opening hours, including data for opening and closing time for each day of the week.""" - reviewSites: Optional[List[NamedLink]] = None + reviewSites: Optional[list[NamedLink]] = None """List of partner review sites.""" telephone: Optional[str] = None @@ -126,7 +126,7 @@ class BusinessPlace(Item): ) """The URL pointing to the official website of the place.""" - tags: Optional[List[str]] = None + tags: Optional[list[str]] = None """List of the tags associated with the place.""" metadata: Optional[BusinessPlaceMetadata] = attrs.field( diff --git a/zyte_common_items/items/custom_attributes.py b/zyte_common_items/items/custom_attributes.py index e387b33..14794b7 100644 --- a/zyte_common_items/items/custom_attributes.py +++ b/zyte_common_items/items/custom_attributes.py @@ -1,15 +1,13 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import attrs from zyte_common_items.base import Item -class CustomAttributesValues(Dict[str, Any]): +class CustomAttributesValues(dict[str, Any]): """Container for custom attribute values.""" - pass - @attrs.define class CustomAttributesMetadata(Item): @@ -37,7 +35,7 @@ class CustomAttributesMetadata(Item): limits, either set via ``maxInputTokens`` or due to the model limitation returned in ``maxInputTokens``, when using the "generate" method.""" - excludedPIIAttributes: Optional[List[str]] = None + excludedPIIAttributes: Optional[list[str]] = None """A list of all attributes dropped from the output due to a risk of PII (Personally Identifiable Information) extraction.""" diff --git a/zyte_common_items/items/forum_thread.py b/zyte_common_items/items/forum_thread.py index 87853e6..61e3724 100644 --- a/zyte_common_items/items/forum_thread.py +++ b/zyte_common_items/items/forum_thread.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -27,7 +27,7 @@ class ForumThread(Item): threadId: Optional[str] = None """Thread ID.""" - posts: Optional[List[SocialMediaPost]] = None + posts: Optional[list[SocialMediaPost]] = None """List of posts available on the page, including the first or top post.""" metadata: Optional[ForumThreadMetadata] = attrs.field( diff --git a/zyte_common_items/items/job_posting.py b/zyte_common_items/items/job_posting.py index 385db28..bc331f8 100644 --- a/zyte_common_items/items/job_posting.py +++ b/zyte_common_items/items/job_posting.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -102,7 +102,7 @@ class JobPosting(Item): baseSalary: Optional[BaseSalary] = None """The base salary of the job or of an employee in the proposed role.""" - requirements: Optional[List[str]] = None + requirements: Optional[list[str]] = None """Candidate requirements for the job.""" hiringOrganization: Optional[HiringOrganization] = None diff --git a/zyte_common_items/items/job_posting_navigation.py b/zyte_common_items/items/job_posting_navigation.py index b1c0de1..26a5adb 100644 --- a/zyte_common_items/items/job_posting_navigation.py +++ b/zyte_common_items/items/job_posting_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -24,7 +24,7 @@ class JobPostingNavigation(Item): url: str = attrs.field(converter=url_to_str) """Main URL from which the data is extracted.""" - items: Optional[List[ProbabilityRequest]] = attrs.field( + items: Optional[list[ProbabilityRequest]] = attrs.field( default=None, converter=to_probability_request_list_optional, kw_only=True, # type: ignore[misc] diff --git a/zyte_common_items/items/product.py b/zyte_common_items/items/product.py index de071f3..62241d8 100644 --- a/zyte_common_items/items/product.py +++ b/zyte_common_items/items/product.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional +from typing import Optional import attrs @@ -41,7 +41,7 @@ class Product(Item): :attr:`url` is the only required attribute. """ - additionalProperties: Optional[List[AdditionalProperty]] = attrs.field( + additionalProperties: Optional[list[AdditionalProperty]] = attrs.field( default=None, metadata={ "json_schema_extra": { @@ -97,7 +97,7 @@ class Product(Item): brand: Optional[Brand] = None """Brand or manufacturer of the product.""" - breadcrumbs: Optional[List[Breadcrumb]] = attrs.field( + breadcrumbs: Optional[list[Breadcrumb]] = attrs.field( default=None, metadata={ "json_schema_extra": { @@ -311,7 +311,7 @@ class Product(Item): .. _HTML normalization specification: https://docs.zyte.com/automatic-extraction/article.html#format-of-articlebodyhtml-field """ - features: Optional[List[str]] = attrs.field( + features: Optional[list[str]] = attrs.field( default=None, metadata={ "json_schema_extra": { @@ -345,7 +345,7 @@ class Product(Item): See also ``additionalProperties``. """ - gtin: Optional[List[Gtin]] = attrs.field( + gtin: Optional[list[Gtin]] = attrs.field( default=None, metadata={ "json_schema_extra": { @@ -423,7 +423,7 @@ class Product(Item): .. _GTIN: https://en.wikipedia.org/wiki/Global_Trade_Item_Number """ - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """All product images. The main image (see ``mainImage``) should be first in the list. @@ -573,7 +573,7 @@ class Product(Item): See also ``canonicalUrl``. """ - variants: Optional[List[ProductVariant]] = attrs.field( + variants: Optional[list[ProductVariant]] = attrs.field( default=None, metadata={ "json_schema_extra": { @@ -648,7 +648,7 @@ class ProductVariant(Item): :class:`ProductVariantSelectorExtractor`. """ - additionalProperties: Optional[List[AdditionalProperty]] = None + additionalProperties: Optional[list[AdditionalProperty]] = None """List of name-value pais of data about a specific, otherwise unmapped feature. @@ -698,7 +698,7 @@ class ProductVariant(Item): See also ``currency``. """ - gtin: Optional[List[Gtin]] = None + gtin: Optional[list[Gtin]] = None """List of standardized GTIN_ product identifiers associated with the product, which are unique for the product across different sellers. @@ -707,7 +707,7 @@ class ProductVariant(Item): .. _GTIN: https://en.wikipedia.org/wiki/Global_Trade_Item_Number """ - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """All product images. The main image (see ``mainImage``) should be first in the list. diff --git a/zyte_common_items/items/product_list.py b/zyte_common_items/items/product_list.py index 3163e9b..2c47135 100644 --- a/zyte_common_items/items/product_list.py +++ b/zyte_common_items/items/product_list.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional +from typing import Optional import attrs @@ -33,7 +33,7 @@ class ProductList(Item): :attr:`url` is the only required attribute. """ - breadcrumbs: Optional[List[Breadcrumb]] = None + breadcrumbs: Optional[list[Breadcrumb]] = None """Webpage `breadcrumb trail`_. .. _Breadcrumb trail: https://en.wikipedia.org/wiki/Breadcrumb_navigation @@ -71,7 +71,7 @@ class ProductList(Item): paginationNext: Optional[Link] = None """Link to the next page.""" - products: Optional[List[ProductFromList]] = None + products: Optional[list[ProductFromList]] = None """List of products. It only includes product information found in the product listing page diff --git a/zyte_common_items/items/product_navigation.py b/zyte_common_items/items/product_navigation.py index 0922b4b..1ac6687 100644 --- a/zyte_common_items/items/product_navigation.py +++ b/zyte_common_items/items/product_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -33,14 +33,14 @@ class ProductNavigation(Item): string) """ - subCategories: Optional[List[ProbabilityRequest]] = attrs.field( + subCategories: Optional[list[ProbabilityRequest]] = attrs.field( default=None, converter=to_probability_request_list_optional, kw_only=True, # type: ignore[misc] ) """List of sub-category links ordered by their position in the page.""" - items: Optional[List[ProbabilityRequest]] = attrs.field( + items: Optional[list[ProbabilityRequest]] = attrs.field( default=None, converter=to_probability_request_list_optional, kw_only=True, # type: ignore[misc] diff --git a/zyte_common_items/items/real_estate.py b/zyte_common_items/items/real_estate.py index f57e75c..f0a1eee 100644 --- a/zyte_common_items/items/real_estate.py +++ b/zyte_common_items/items/real_estate.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -30,7 +30,7 @@ class RealEstate(Item): url: str = attrs.field(converter=url_to_str) """The url of the final response, after any redirects.""" - breadcrumbs: Optional[List[Breadcrumb]] = None + breadcrumbs: Optional[list[Breadcrumb]] = None """Webpage `breadcrumb trail`_. .. _Breadcrumb trail: https://en.wikipedia.org/wiki/Breadcrumb_navigation @@ -71,7 +71,7 @@ class RealEstate(Item): mainImage: Optional[Image] = None """The details of the main image of the real estate.""" - images: Optional[List[Image]] = None + images: Optional[list[Image]] = None """A list of URL values of all images of the real estate.""" address: Optional[Address] = None @@ -113,7 +113,7 @@ class RealEstate(Item): currency: Optional[str] = None """The currency of the price, in 3-letter ISO 4217 format.""" - additionalProperties: Optional[List[AdditionalProperty]] = None + additionalProperties: Optional[list[AdditionalProperty]] = None """A name-value pair field holding information pertaining to specific features. Usually in a form of a specification table or freeform specification list.""" diff --git a/zyte_common_items/items/search_request_template.py b/zyte_common_items/items/search_request_template.py index 4424efe..4f826ac 100644 --- a/zyte_common_items/items/search_request_template.py +++ b/zyte_common_items/items/search_request_template.py @@ -1,7 +1,7 @@ from __future__ import annotations from base64 import b64encode -from typing import Any, List, Optional +from typing import Any, Optional from urllib.parse import quote_plus from warnings import warn @@ -14,7 +14,7 @@ from zyte_common_items.components import DetailsMetadata, Header, Request from zyte_common_items.converters import to_metadata_optional -_TEMPLATE_ENVIRONMENT = jinja2.Environment(undefined=StrictUndefined) +_TEMPLATE_ENVIRONMENT = jinja2.Environment(undefined=StrictUndefined) # noqa: S701 _TEMPLATE_ENVIRONMENT.filters["quote_plus"] = quote_plus _UNSET = object() @@ -61,7 +61,7 @@ class SearchRequestTemplate(Item): Defining a non-UTF-8 body is not supported. """ - headers: Optional[List[Header]] = None + headers: Optional[list[Header]] = None """List of :class:`Header`, for :class:`Request.headers `, where every :attr:`~Header.name` and :attr:`~Header.value` is a :doc:`Jinja template `. @@ -88,16 +88,15 @@ def request( raise TypeError( "request() missing 1 required keyword-only argument: 'query'" ) - else: - query = keyword - warn( - ( - "The 'keyword' parameter of request() is deprecated, " - "use 'query' instead." - ), - DeprecationWarning, - stacklevel=2, - ) + query = keyword + warn( + ( + "The 'keyword' parameter of request() is deprecated, " + "use 'query' instead." + ), + DeprecationWarning, + stacklevel=2, + ) elif keyword is not _UNSET: if keyword == query: warn( diff --git a/zyte_common_items/items/serp.py b/zyte_common_items/items/serp.py index d9fc276..545d871 100644 --- a/zyte_common_items/items/serp.py +++ b/zyte_common_items/items/serp.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -64,7 +64,7 @@ class Serp(Item): """Data from a `search engine results page `_.""" - organicResults: Optional[List[SerpOrganicResult]] = None + organicResults: Optional[list[SerpOrganicResult]] = None """List of search results excluding paid results.""" url: str = attrs.field(converter=url_to_str) diff --git a/zyte_common_items/items/social_media_post.py b/zyte_common_items/items/social_media_post.py index af08025..ade2c24 100644 --- a/zyte_common_items/items/social_media_post.py +++ b/zyte_common_items/items/social_media_post.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs @@ -39,10 +39,10 @@ class SocialMediaPost(Item): Format: Timezone: UTC. ISO 8601 format: "YYYY-MM-DDThh:mm:ssZ" """ - hashtags: Optional[List[str]] = None + hashtags: Optional[list[str]] = None """The list of hashtags contained in the post.""" - mediaUrls: Optional[List[Url]] = None + mediaUrls: Optional[list[Url]] = None """The list of URLs of media files (images, videos, etc.) linked from the post.""" diff --git a/zyte_common_items/pages/__init__.py b/zyte_common_items/pages/__init__.py index 667d1ff..0e10389 100644 --- a/zyte_common_items/pages/__init__.py +++ b/zyte_common_items/pages/__init__.py @@ -1,4 +1,3 @@ -# flake8: noqa from .article import ArticlePage, AutoArticlePage, BaseArticlePage from .article_list import ArticleListPage, AutoArticleListPage, BaseArticleListPage from .article_navigation import ( diff --git a/zyte_common_items/pages/article.py b/zyte_common_items/pages/article.py index 09acf68..29956e6 100644 --- a/zyte_common_items/pages/article.py +++ b/zyte_common_items/pages/article.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -53,11 +53,11 @@ def dateModifiedRaw(self) -> Optional[str]: return self.article.dateModifiedRaw @auto_field - def authors(self) -> Optional[List[Author]]: + def authors(self) -> Optional[list[Author]]: return self.article.authors @auto_field - def breadcrumbs(self) -> Optional[List[Breadcrumb]]: + def breadcrumbs(self) -> Optional[list[Breadcrumb]]: return self.article.breadcrumbs @auto_field @@ -69,7 +69,7 @@ def mainImage(self) -> Optional[Image]: return self.article.mainImage @auto_field - def images(self) -> Optional[List[Image]]: + def images(self) -> Optional[list[Image]]: return self.article.images @auto_field @@ -85,11 +85,11 @@ def articleBodyHtml(self) -> Optional[str]: return self.article.articleBodyHtml @auto_field - def videos(self) -> Optional[List[Video]]: + def videos(self) -> Optional[list[Video]]: return self.article.videos @auto_field - def audios(self) -> Optional[List[Audio]]: + def audios(self) -> Optional[list[Audio]]: return self.article.audios @auto_field diff --git a/zyte_common_items/pages/article_list.py b/zyte_common_items/pages/article_list.py index 40b15c5..809daf9 100644 --- a/zyte_common_items/pages/article_list.py +++ b/zyte_common_items/pages/article_list.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -33,11 +33,11 @@ class AutoArticleListPage(BaseArticleListPage): article_list: ArticleList @auto_field - def articles(self) -> Optional[List[ArticleFromList]]: + def articles(self) -> Optional[list[ArticleFromList]]: return self.article_list.articles @auto_field - def breadcrumbs(self) -> Optional[List[Breadcrumb]]: + def breadcrumbs(self) -> Optional[list[Breadcrumb]]: return self.article_list.breadcrumbs @auto_field diff --git a/zyte_common_items/pages/article_navigation.py b/zyte_common_items/pages/article_navigation.py index 7a84d39..19610c9 100644 --- a/zyte_common_items/pages/article_navigation.py +++ b/zyte_common_items/pages/article_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -32,7 +32,7 @@ def categoryName(self) -> Optional[str]: return self.article_navigation.categoryName @auto_field - def items(self) -> Optional[List[ProbabilityRequest]]: + def items(self) -> Optional[list[ProbabilityRequest]]: return self.article_navigation.items @auto_field @@ -48,7 +48,7 @@ def pageNumber(self) -> Optional[int]: return self.article_navigation.pageNumber @auto_field - def subCategories(self) -> Optional[List[ProbabilityRequest]]: + def subCategories(self) -> Optional[list[ProbabilityRequest]]: return self.article_navigation.subCategories @auto_field diff --git a/zyte_common_items/pages/base.py b/zyte_common_items/pages/base.py index 617a6d3..96bd42b 100644 --- a/zyte_common_items/pages/base.py +++ b/zyte_common_items/pages/base.py @@ -14,13 +14,13 @@ class Processors: @field def metadata(self) -> MetadataT: if self.metadata_cls is None: - raise ValueError(f"{type(self)} doesn'have a metadata class configured.") + raise ValueError(f"{type(self)} doesn't have a metadata class configured.") value = self.metadata_cls() attributes = dir(value) if "dateDownloaded" in attributes: - value.dateDownloaded = utcnow_formatted() # type: ignore + value.dateDownloaded = utcnow_formatted() # type: ignore[attr-defined] if "probability" in attributes: - value.probability = 1.0 # type: ignore + value.probability = 1.0 # type: ignore[attr-defined] return value def no_item_found(self) -> ItemT: @@ -30,14 +30,14 @@ def no_item_found(self) -> ItemT: Use it in your .validate_input implementation. """ if self.metadata_cls is None: - raise ValueError(f"{type(self)} doesn'have a metadata class configured.") + raise ValueError(f"{type(self)} doesn't have a metadata class configured.") metadata = self.metadata_cls() metadata_attributes = dir(metadata) if "dateDownloaded" in metadata_attributes: - metadata.dateDownloaded = utcnow_formatted() # type: ignore + metadata.dateDownloaded = utcnow_formatted() # type: ignore[attr-defined] if "probability" in metadata_attributes: - metadata.probability = 0.0 # type: ignore - return self.item_cls( # type: ignore + metadata.probability = 0.0 # type: ignore[attr-defined] + return self.item_cls( url=self.url, # type: ignore[attr-defined] metadata=metadata, ) diff --git a/zyte_common_items/pages/business_place.py b/zyte_common_items/pages/business_place.py index 4ad2bc9..6f25ee0 100644 --- a/zyte_common_items/pages/business_place.py +++ b/zyte_common_items/pages/business_place.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -53,11 +53,11 @@ class AutoBusinessPlacePage(BaseBusinessPlacePage): business_place: BusinessPlace @auto_field - def actions(self) -> Optional[List[NamedLink]]: + def actions(self) -> Optional[list[NamedLink]]: return self.business_place.actions @auto_field - def additionalProperties(self) -> Optional[List[AdditionalProperty]]: + def additionalProperties(self) -> Optional[list[AdditionalProperty]]: return self.business_place.additionalProperties @auto_field @@ -69,11 +69,11 @@ def aggregateRating(self) -> Optional[AggregateRating]: return self.business_place.aggregateRating @auto_field - def amenityFeatures(self) -> Optional[List[Amenity]]: + def amenityFeatures(self) -> Optional[list[Amenity]]: return self.business_place.amenityFeatures @auto_field - def categories(self) -> Optional[List[str]]: + def categories(self) -> Optional[list[str]]: return self.business_place.categories @auto_field @@ -85,11 +85,11 @@ def description(self) -> Optional[str]: return self.business_place.description @auto_field - def features(self) -> Optional[List[str]]: + def features(self) -> Optional[list[str]]: return self.business_place.features @auto_field - def images(self) -> Optional[List[Image]]: + def images(self) -> Optional[list[Image]]: return self.business_place.images @auto_field @@ -109,7 +109,7 @@ def name(self) -> Optional[str]: return self.business_place.name @auto_field - def openingHours(self) -> Optional[List[OpeningHoursItem]]: + def openingHours(self) -> Optional[list[OpeningHoursItem]]: return self.business_place.openingHours @auto_field @@ -125,7 +125,7 @@ def reservationAction(self) -> Optional[NamedLink]: return self.business_place.reservationAction @auto_field - def reviewSites(self) -> Optional[List[NamedLink]]: + def reviewSites(self) -> Optional[list[NamedLink]]: return self.business_place.reviewSites @auto_field @@ -133,7 +133,7 @@ def starRating(self) -> Optional[StarRating]: return self.business_place.starRating @auto_field - def tags(self) -> Optional[List[str]]: + def tags(self) -> Optional[list[str]]: return self.business_place.tags @auto_field diff --git a/zyte_common_items/pages/forum_thread.py b/zyte_common_items/pages/forum_thread.py index 66ca14f..0f41c56 100644 --- a/zyte_common_items/pages/forum_thread.py +++ b/zyte_common_items/pages/forum_thread.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -38,7 +38,7 @@ def topic(self) -> Optional[Topic]: return self.forum_thread.topic @auto_field - def posts(self) -> Optional[List[SocialMediaPost]]: + def posts(self) -> Optional[list[SocialMediaPost]]: return self.forum_thread.posts @auto_field diff --git a/zyte_common_items/pages/job_posting.py b/zyte_common_items/pages/job_posting.py index a48a95b..a120e2e 100644 --- a/zyte_common_items/pages/job_posting.py +++ b/zyte_common_items/pages/job_posting.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -100,7 +100,7 @@ def baseSalary(self) -> Optional[BaseSalary]: return self.job_posting.baseSalary @auto_field - def requirements(self) -> Optional[List[str]]: + def requirements(self) -> Optional[list[str]]: return self.job_posting.requirements @auto_field diff --git a/zyte_common_items/pages/job_posting_navigation.py b/zyte_common_items/pages/job_posting_navigation.py index 5c5cbfc..d2131e8 100644 --- a/zyte_common_items/pages/job_posting_navigation.py +++ b/zyte_common_items/pages/job_posting_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -33,7 +33,7 @@ class AutoJobPostingNavigationPage(BaseJobPostingNavigationPage): job_posting_navigation: JobPostingNavigation @auto_field - def items(self) -> Optional[List[ProbabilityRequest]]: + def items(self) -> Optional[list[ProbabilityRequest]]: return self.job_posting_navigation.items @auto_field diff --git a/zyte_common_items/pages/mixins.py b/zyte_common_items/pages/mixins.py index be92f36..a33c545 100644 --- a/zyte_common_items/pages/mixins.py +++ b/zyte_common_items/pages/mixins.py @@ -1,5 +1,5 @@ import html -from typing import Any, Generic, Optional, Type, Union +from typing import Any, Generic, Optional, Union import html_text from clear_html import cleaned_node_to_text @@ -16,12 +16,12 @@ class HasMetadata(Generic[MetadataT]): class.""" @property - def metadata_cls(self) -> Optional[Type[MetadataT]]: + def metadata_cls(self) -> Optional[type[MetadataT]]: """Metadata class.""" return _get_metadata_class(type(self)) -def _get_metadata_class(cls: type) -> Optional[Type[MetadataT]]: +def _get_metadata_class(cls: type) -> Optional[type[MetadataT]]: return get_generic_param(cls, HasMetadata) diff --git a/zyte_common_items/pages/product.py b/zyte_common_items/pages/product.py index 45c1500..9de8c11 100644 --- a/zyte_common_items/pages/product.py +++ b/zyte_common_items/pages/product.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -72,7 +72,7 @@ class AutoProductPage(BaseProductPage): product: Product @auto_field - def additionalProperties(self) -> Optional[List[AdditionalProperty]]: + def additionalProperties(self) -> Optional[list[AdditionalProperty]]: return self.product.additionalProperties @auto_field @@ -88,7 +88,7 @@ def brand(self) -> Optional[Brand]: return self.product.brand @auto_field - def breadcrumbs(self) -> Optional[List[Breadcrumb]]: + def breadcrumbs(self) -> Optional[list[Breadcrumb]]: return self.product.breadcrumbs @auto_field @@ -116,15 +116,15 @@ def descriptionHtml(self) -> Optional[str]: return self.product.descriptionHtml @auto_field - def features(self) -> Optional[List[str]]: + def features(self) -> Optional[list[str]]: return self.product.features @auto_field - def gtin(self) -> Optional[List[Gtin]]: + def gtin(self) -> Optional[list[Gtin]]: return self.product.gtin @auto_field - def images(self) -> Optional[List[Image]]: + def images(self) -> Optional[list[Image]]: return self.product.images @auto_field @@ -172,5 +172,5 @@ def url(self) -> str: return self.product.url @auto_field - def variants(self) -> Optional[List[ProductVariant]]: + def variants(self) -> Optional[list[ProductVariant]]: return self.product.variants diff --git a/zyte_common_items/pages/product_list.py b/zyte_common_items/pages/product_list.py index e115b4c..7149e63 100644 --- a/zyte_common_items/pages/product_list.py +++ b/zyte_common_items/pages/product_list.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -33,7 +33,7 @@ class AutoProductListPage(BaseProductListPage): product_list: ProductList @auto_field - def breadcrumbs(self) -> Optional[List[Breadcrumb]]: + def breadcrumbs(self) -> Optional[list[Breadcrumb]]: return self.product_list.breadcrumbs @auto_field @@ -57,7 +57,7 @@ def paginationNext(self) -> Optional[Link]: return self.product_list.paginationNext @auto_field - def products(self) -> Optional[List[ProductFromList]]: + def products(self) -> Optional[list[ProductFromList]]: return self.product_list.products @auto_field diff --git a/zyte_common_items/pages/product_navigation.py b/zyte_common_items/pages/product_navigation.py index 7ed00d9..84dc41d 100644 --- a/zyte_common_items/pages/product_navigation.py +++ b/zyte_common_items/pages/product_navigation.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -37,7 +37,7 @@ def categoryName(self) -> Optional[str]: return self.product_navigation.categoryName @auto_field - def items(self) -> Optional[List[ProbabilityRequest]]: + def items(self) -> Optional[list[ProbabilityRequest]]: return self.product_navigation.items @auto_field @@ -53,7 +53,7 @@ def pageNumber(self) -> Optional[int]: return self.product_navigation.pageNumber @auto_field - def subCategories(self) -> Optional[List[ProbabilityRequest]]: + def subCategories(self) -> Optional[list[ProbabilityRequest]]: return self.product_navigation.subCategories @auto_field diff --git a/zyte_common_items/pages/real_estate.py b/zyte_common_items/pages/real_estate.py index 069dc92..dfe345f 100644 --- a/zyte_common_items/pages/real_estate.py +++ b/zyte_common_items/pages/real_estate.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -43,7 +43,7 @@ class AutoRealEstatePage(BaseRealEstatePage): real_estate: RealEstate @auto_field - def additionalProperties(self) -> Optional[List[AdditionalProperty]]: + def additionalProperties(self) -> Optional[list[AdditionalProperty]]: return self.real_estate.additionalProperties @auto_field @@ -55,7 +55,7 @@ def area(self) -> Optional[RealEstateArea]: return self.real_estate.area @auto_field - def breadcrumbs(self) -> Optional[List[Breadcrumb]]: + def breadcrumbs(self) -> Optional[list[Breadcrumb]]: return self.real_estate.breadcrumbs @auto_field @@ -79,7 +79,7 @@ def description(self) -> Optional[str]: return self.real_estate.description @auto_field - def images(self) -> Optional[List[Image]]: + def images(self) -> Optional[list[Image]]: return self.real_estate.images @auto_field diff --git a/zyte_common_items/pages/serp.py b/zyte_common_items/pages/serp.py index de79dcc..3211764 100644 --- a/zyte_common_items/pages/serp.py +++ b/zyte_common_items/pages/serp.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -23,7 +23,7 @@ class AutoSerpPage(BaseSerpPage): serp: Serp @auto_field - def organicResults(self) -> Optional[List[SerpOrganicResult]]: + def organicResults(self) -> Optional[list[SerpOrganicResult]]: return self.serp.organicResults @auto_field diff --git a/zyte_common_items/pages/social_media_post.py b/zyte_common_items/pages/social_media_post.py index a4f7823..a79364a 100644 --- a/zyte_common_items/pages/social_media_post.py +++ b/zyte_common_items/pages/social_media_post.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional import attrs from web_poet import Returns @@ -48,11 +48,11 @@ def datePublished(self) -> Optional[str]: return self.social_media_post.datePublished @auto_field - def hashtags(self) -> Optional[List[str]]: + def hashtags(self) -> Optional[list[str]]: return self.social_media_post.hashtags @auto_field - def mediaUrls(self) -> Optional[List[Url]]: + def mediaUrls(self) -> Optional[list[Url]]: return self.social_media_post.mediaUrls @auto_field diff --git a/zyte_common_items/pipelines.py b/zyte_common_items/pipelines.py index cee8aaa..b1958d2 100644 --- a/zyte_common_items/pipelines.py +++ b/zyte_common_items/pipelines.py @@ -1,9 +1,10 @@ -# Raise ImportError if Scrapy is missing. -import scrapy # isort: skip # noqa: F401 - import logging from copy import deepcopy +# Raise ImportError if Scrapy is missing. +import scrapy # noqa: F401 +from scrapy.utils.misc import load_object + from .base import ProbabilityMixin from .log_formatters import InfoDropItem @@ -44,7 +45,7 @@ def parse_product(self, response: DummyResponse, product: Product): """ def __init__(self): - from . import ae + from . import ae # noqa: PLC0415 self._downgrade = ae.downgrade @@ -104,8 +105,6 @@ def from_crawler(cls, crawler): return cls(crawler) def init_thresholds(self, spider): - from scrapy.utils.misc import load_object - thresholds_settings = deepcopy( spider.settings.get("ITEM_PROBABILITY_THRESHOLDS", {}) ) diff --git a/zyte_common_items/processors.py b/zyte_common_items/processors.py index cfebe2d..038e1cd 100644 --- a/zyte_common_items/processors.py +++ b/zyte_common_items/processors.py @@ -1,7 +1,7 @@ from collections.abc import Iterable, Mapping from functools import wraps from numbers import Real -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Optional, Union from clear_html import clean_node, cleaned_node_to_html, cleaned_node_to_text from lxml.html import HtmlElement @@ -69,8 +69,7 @@ def wrapper(value: Any, page: Any) -> Any: value = _handle_selectorlist(value) if not isinstance(value, (Selector, HtmlElement)): return value - result = f(value, page) - return result + return f(value, page) return wrapper @@ -98,7 +97,7 @@ def _from_zp_breadcrumb(value: zp_Breadcrumb) -> Breadcrumb: if not isinstance(value, Iterable) or isinstance(value, str): return value - results: List[Any] = [] + results: list[Any] = [] for item in value: if isinstance(item, zp_Breadcrumb): results.append(_from_zp_breadcrumb(item)) @@ -127,8 +126,7 @@ def brand_processor(value: Any, page: Any) -> Any: if isinstance(value, (Selector, SelectorList, HtmlElement)): if brand_name := extract_brand_name(value, search_depth=2): return Brand(name=brand_name) - else: - return None + return None return value @@ -151,12 +149,11 @@ def price_processor(value: Any, page: Any) -> Any: if isinstance(value, Real): return f"{value:.2f}" - elif isinstance(value, (Selector, HtmlElement)): + if isinstance(value, (Selector, HtmlElement)): price = extract_price(value) page._parsed_price = price return _format_price(price) - else: - return value + return value def simple_price_processor(value: Any, page: Any) -> Any: @@ -175,11 +172,10 @@ def simple_price_processor(value: Any, page: Any) -> Any: if isinstance(value, Real): return f"{value:.2f}" - elif isinstance(value, (Selector, HtmlElement)): + if isinstance(value, (Selector, HtmlElement)): price = extract_price(value) return _format_price(price) - else: - return value + return value @only_handle_nodes @@ -258,11 +254,11 @@ def gtin_processor( def _from_zp_gtin(zp_value: zp_Gtin) -> Gtin: return Gtin(type=zp_value.type, value=zp_value.value) - results = [] + results: list[Gtin] = [] if isinstance(value, SelectorList): - for sel in value: - if result := extract_gtin(sel): - results.append(_from_zp_gtin(result)) + results.extend( + _from_zp_gtin(result) for sel in value if (result := extract_gtin(sel)) + ) elif isinstance(value, (Selector, HtmlElement, str)): if result := extract_gtin(value): results.append(_from_zp_gtin(result)) @@ -335,7 +331,7 @@ def aggregateRating(self): if result.reviewCount or result.bestRating or result.ratingValue: return result return None - elif isinstance(value, dict): + if isinstance(value, dict): result = AggregateRating() review_count = _handle_selectorlist(value.get("reviewCount")) @@ -377,7 +373,7 @@ def images_processor(value: Any, page: Any) -> Any: return [Image(url=value)] if isinstance(value, Iterable): - results: List[Any] = [] + results: list[Any] = [] for item in value: if isinstance(item, Image): results.append(item) @@ -393,8 +389,8 @@ def images_processor(value: Any, page: Any) -> Any: def probability_request_list_processor( - request_list: List[Request], -) -> List[ProbabilityRequest]: + request_list: list[Request], +) -> list[ProbabilityRequest]: """Convert all objects in *request_list*, which are instances of :class:`Request` or a subclass, into instances of :class:`ProbabilityRequest`.""" diff --git a/zyte_common_items/util.py b/zyte_common_items/util.py index a9cbe07..1bdd3a7 100644 --- a/zyte_common_items/util.py +++ b/zyte_common_items/util.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, Callable, Dict, Optional, Tuple, Type, TypeVar +from typing import Any, Callable, Optional, TypeVar from warnings import warn from weakref import WeakKeyDictionary @@ -14,10 +14,9 @@ def split_in_unknown_and_known_fields( - data: Optional[dict], item_cls: Type -) -> Tuple[Dict, Dict]: - """ - Return a pair of dicts. The first one contains those elements not belonging to the + data: Optional[dict], item_cls: type +) -> tuple[dict, dict]: + """Return a pair of dicts. The first one contains those elements not belonging to the attr class ``item_cls``. The second one contains the rest. That is, those attributes not belonging to ``item_cls`` class """ @@ -30,7 +29,7 @@ def split_in_unknown_and_known_fields( return unknown, known -def split_dict(dict: Dict, key_pred: Callable[[Any], Any]) -> Tuple[Dict, Dict]: +def split_dict(dict: dict, key_pred: Callable[[Any], Any]) -> tuple[dict, dict]: # noqa: A002 """Splits the dictionary in two. The first dict contains the records for which the key predicate is False @@ -40,7 +39,7 @@ def split_dict(dict: Dict, key_pred: Callable[[Any], Any]) -> Tuple[Dict, Dict]: ({}, {}) >>> split_dict(dict(a=1, b=2, c=3), lambda k: k != 'a') ({'a': 1}, {'b': 2, 'c': 3}) - """ # noqa + """ yes, no = {}, {} for k, v in dict.items(): if key_pred(k): @@ -53,7 +52,7 @@ def split_dict(dict: Dict, key_pred: Callable[[Any], Any]) -> Tuple[Dict, Dict]: NewClassT = TypeVar("NewClassT", bound=attrs.AttrsInstance) -def convert_to_class(value: Any, new_cls: Type[NewClassT]) -> NewClassT: +def convert_to_class(value: Any, new_cls: type[NewClassT]) -> NewClassT: """Convert *value* into *type* keeping all shared attributes, and triggering a run-time warning if any attribute is removed.""" if type(value) is new_cls: @@ -78,12 +77,13 @@ def convert_to_class(value: Any, new_cls: Type[NewClassT]) -> NewClassT: f"{removed_nonempty_attributes}." ), RuntimeWarning, + stacklevel=2, ) return new_value def metadata_processor(metadata, page): - from zyte_common_items.processors import metadata_processor + from zyte_common_items.processors import metadata_processor # noqa: PLC0415 warnings.warn( "zyte_common_items.util.metadata_processor is moved to"