From 0af43e0e09f2ffd5639e9a598827c7b2010f5b73 Mon Sep 17 00:00:00 2001 From: mhh Date: Wed, 31 Jan 2024 13:53:20 +0100 Subject: [PATCH 1/8] Reformat with black/isort/ruff --- aleph_message/models/__init__.py | 2 +- aleph_message/models/execution/abstract.py | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/aleph_message/models/__init__.py b/aleph_message/models/__init__.py index 4f15e42..e84adbe 100644 --- a/aleph_message/models/__init__.py +++ b/aleph_message/models/__init__.py @@ -11,9 +11,9 @@ from .abstract import BaseContent from .base import Chain, HashType, MessageType +from .execution.base import MachineType, Payment, PaymentType # noqa from .execution.instance import InstanceContent from .execution.program import ProgramContent -from .execution.base import PaymentType, MachineType, Payment # noqa from .item_hash import ItemHash, ItemType diff --git a/aleph_message/models/execution/abstract.py b/aleph_message/models/execution/abstract.py index 833ba7c..c6271b5 100644 --- a/aleph_message/models/execution/abstract.py +++ b/aleph_message/models/execution/abstract.py @@ -5,14 +5,10 @@ from pydantic import Field -from .environment import ( - FunctionEnvironment, - HostRequirements, - MachineResources, -) +from ..abstract import BaseContent, HashableModel from .base import Payment +from .environment import FunctionEnvironment, HostRequirements, MachineResources from .volume import MachineVolume -from ..abstract import BaseContent, HashableModel class BaseExecutableContent(HashableModel, BaseContent, ABC): From da269b077c5ac7cec42f5d7d5fea86a9ca83d584 Mon Sep 17 00:00:00 2001 From: mhh Date: Wed, 31 Jan 2024 15:39:52 +0100 Subject: [PATCH 2/8] Refactor message content serialization and validation: Introduce a default method to dump message content --- aleph_message/models/__init__.py | 53 ++++++++++++++++++++---------- aleph_message/models/abstract.py | 5 +++ aleph_message/tests/test_models.py | 4 +-- aleph_message/utils.py | 41 ++++++++++++++++++++++- 4 files changed, 82 insertions(+), 21 deletions(-) diff --git a/aleph_message/models/__init__.py b/aleph_message/models/__init__.py index e84adbe..03a377f 100644 --- a/aleph_message/models/__init__.py +++ b/aleph_message/models/__init__.py @@ -9,6 +9,7 @@ from pydantic import BaseModel, Extra, Field, validator from typing_extensions import TypeAlias +from ..utils import dump_content from .abstract import BaseContent from .base import Chain, HashType, MessageType from .execution.base import MachineType, Payment, PaymentType # noqa @@ -126,7 +127,7 @@ class ForgetContent(BaseContent): """Content of a FORGET message""" hashes: List[ItemHash] - aggregates: List[ItemHash] = Field(default_factory=list) + aggregates: Optional[List[ItemHash]] = None reason: Optional[str] = None def __hash__(self): @@ -200,6 +201,36 @@ def check_item_content(cls, v: Optional[str], values) -> Optional[str]: ) return v + @validator("content") + def check_content(cls, v, values): + item_type = values["item_type"] + if item_type == ItemType.inline: + try: + item_content = json.loads(values["item_content"]) + except JSONDecodeError: + raise ValueError( + "Field 'item_content' does not appear to be valid JSON" + ) + json_dump = json.loads(v.json()) + for key, value in json_dump.items(): + if value != item_content[key]: + if isinstance(value, list): + for item in value: + if item not in item_content[key]: + raise ValueError( + f"Field 'content.{key}' does not match 'item_content.{key}': {item} != {item_content[key]}" + ) + if isinstance(value, dict): + for item in value.items(): + if item not in item_content[key].items(): + raise ValueError( + f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]}" + ) + raise ValueError( + f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]} or type mismatch ({type(value)} != {type(item_content[key])})" + ) + return v + @validator("item_hash") def check_item_hash(cls, v: ItemHash, values) -> ItemHash: item_type = values["item_type"] @@ -277,20 +308,6 @@ class ProgramMessage(BaseMessage): type: Literal[MessageType.program] content: ProgramContent - @validator("content") - def check_content(cls, v, values): - item_type = values["item_type"] - if item_type == ItemType.inline: - item_content = json.loads(values["item_content"]) - if v.dict(exclude_none=True) != item_content: - # Print differences - vdict = v.dict(exclude_none=True) - for key, value in item_content.items(): - if vdict[key] != value: - print(f"{key}: {vdict[key]} != {value}") - raise ValueError("Content and item_content differ") - return v - class InstanceMessage(BaseMessage): type: Literal[MessageType.instance] @@ -337,12 +354,12 @@ def parse_message(message_dict: Dict) -> AlephMessage: def add_item_content_and_hash(message_dict: Dict, inplace: bool = False): + # TODO: I really don't like this function. There is no validation of the + # message_dict, if it is indeed a real message, and can lead to unexpected results. if not inplace: message_dict = copy(message_dict) - message_dict["item_content"] = json.dumps( - message_dict["content"], separators=(",", ":") - ) + message_dict["item_content"] = dump_content(message_dict["content"]) message_dict["item_hash"] = sha256( message_dict["item_content"].encode() ).hexdigest() diff --git a/aleph_message/models/abstract.py b/aleph_message/models/abstract.py index f272dbd..b238050 100644 --- a/aleph_message/models/abstract.py +++ b/aleph_message/models/abstract.py @@ -1,5 +1,7 @@ from pydantic import BaseModel, Extra +from aleph_message.utils import dump_content + def hashable(obj): """Convert `obj` into a hashable object.""" @@ -26,3 +28,6 @@ class BaseContent(BaseModel): class Config: extra = Extra.forbid + + def json(self, *args, **kwargs): + return dump_content(self) diff --git a/aleph_message/tests/test_models.py b/aleph_message/tests/test_models.py index 5f07bcd..c84dae2 100644 --- a/aleph_message/tests/test_models.py +++ b/aleph_message/tests/test_models.py @@ -271,12 +271,12 @@ def test_create_new_message(): "chain": "ETH", "sender": "0x101d8D16372dBf5f1614adaE95Ee5CCE61998Fc9", "type": "POST", - "time": "1625652287.017", + "time": 1625652287.017, "item_type": "inline", "content": { "address": "0x101d8D16372dBf5f1614adaE95Ee5CCE61998Fc9", "type": "test-message", - "time": "1625652287.017", + "time": 1625652287.017, "content": { "hello": "world", }, diff --git a/aleph_message/utils.py b/aleph_message/utils.py index 358c365..d242a79 100644 --- a/aleph_message/utils.py +++ b/aleph_message/utils.py @@ -1,7 +1,12 @@ from __future__ import annotations +import json import math -from typing import NewType +from datetime import date, datetime, time +from typing import Any, Dict, NewType, Union + +from pydantic import BaseModel +from pydantic.json import pydantic_encoder Megabytes = NewType("Megabytes", int) Mebibytes = NewType("Mebibytes", int) @@ -15,3 +20,37 @@ def gigabyte_to_mebibyte(n: Gigabytes) -> Mebibytes: mebibyte = 2**20 gigabyte = 10**9 return Mebibytes(math.ceil(n * gigabyte / mebibyte)) + + +def extended_json_encoder(obj: Any) -> Any: + """ + Extended JSON encoder for dumping objects that contain pydantic models and datetime objects. + """ + if isinstance(obj, datetime): + return obj.timestamp() + elif isinstance(obj, date): + return obj.toordinal() + elif isinstance(obj, time): + return obj.hour * 3600 + obj.minute * 60 + obj.second + obj.microsecond / 1e6 + else: + return pydantic_encoder(obj) + + +def dump_content(obj: Union[Dict, BaseModel]) -> str: + """Dump message content as JSON string.""" + if isinstance(obj, dict): + # without None values + obj = obj.copy() + for key in list(obj.keys()): + if obj[key] is None: + del obj[key] + return json.dumps(obj, separators=(",", ":"), default=extended_json_encoder) + + if isinstance(obj, BaseModel): + return json.dumps( + obj.dict(exclude_none=True), + separators=(",", ":"), + default=extended_json_encoder, + ) + + raise TypeError(f"Invalid type: `{type(obj)}`") From 3febd8e60ff94be45436f5f48bededa60ec52670 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 17:23:58 +0100 Subject: [PATCH 3/8] Fix formatting with isort --- aleph_message/models/execution/program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aleph_message/models/execution/program.py b/aleph_message/models/execution/program.py index 344d4d6..8afb6d9 100644 --- a/aleph_message/models/execution/program.py +++ b/aleph_message/models/execution/program.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Literal, Optional, List +from typing import List, Literal, Optional from pydantic import Field From 213b554f8667d113699256f3468d010bc074a3b7 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 18:15:41 +0100 Subject: [PATCH 4/8] Improve `dump_content` readability --- aleph_message/utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/aleph_message/utils.py b/aleph_message/utils.py index d242a79..d7e702d 100644 --- a/aleph_message/utils.py +++ b/aleph_message/utils.py @@ -40,17 +40,13 @@ def dump_content(obj: Union[Dict, BaseModel]) -> str: """Dump message content as JSON string.""" if isinstance(obj, dict): # without None values - obj = obj.copy() - for key in list(obj.keys()): - if obj[key] is None: - del obj[key] + obj = {k: v for k, v in obj.items() if v is not None} return json.dumps(obj, separators=(",", ":"), default=extended_json_encoder) - - if isinstance(obj, BaseModel): + elif isinstance(obj, BaseModel): return json.dumps( obj.dict(exclude_none=True), separators=(",", ":"), default=extended_json_encoder, ) - - raise TypeError(f"Invalid type: `{type(obj)}`") + else: + raise TypeError(f"Invalid type: `{type(obj)}`") From 3cc5faf7e788356064f44e1cab7a10b801e20787 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 18:35:42 +0100 Subject: [PATCH 5/8] Improve test coverage for utils.py --- aleph_message/tests/test_utils.py | 39 ++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/aleph_message/tests/test_utils.py b/aleph_message/tests/test_utils.py index f9214be..01ae993 100644 --- a/aleph_message/tests/test_utils.py +++ b/aleph_message/tests/test_utils.py @@ -1,6 +1,43 @@ -from aleph_message.utils import Gigabytes, gigabyte_to_mebibyte +from datetime import datetime, date, time + +import pytest +from pydantic import BaseModel + +from aleph_message.utils import Gigabytes, gigabyte_to_mebibyte, extended_json_encoder, dump_content def test_gigabyte_to_mebibyte(): assert gigabyte_to_mebibyte(Gigabytes(1)) == 954 assert gigabyte_to_mebibyte(Gigabytes(100)) == 95368 + + +def test_extended_json_encoder(): + now = datetime.now() + today = date.today() + now_time = time(hour=1, minute=2, second=3, microsecond=4) + assert extended_json_encoder(now) == now.timestamp() + assert extended_json_encoder(today) == today.toordinal() + assert extended_json_encoder(now_time) == 3723.000004 + + +def test_dump_content(): + class TestModel(BaseModel): + address: str + time: float + + assert dump_content({"address": "0x1", "time": 1.0}) == '{"address":"0x1","time":1.0}' + assert dump_content(TestModel(address="0x1", time=1.0)) == '{"address":"0x1","time":1.0}' + + +@pytest.mark.parametrize( + "content", + [ + 1, + "test", + None, + True, + ], +) +def test_dump_content_invalid(content): + with pytest.raises(TypeError): + dump_content(content) From 8a45ee85499a8dbdbf6561f552299335271bbeb6 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 21:04:39 +0100 Subject: [PATCH 6/8] Fix formatting --- aleph_message/tests/test_utils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/aleph_message/tests/test_utils.py b/aleph_message/tests/test_utils.py index 01ae993..349db45 100644 --- a/aleph_message/tests/test_utils.py +++ b/aleph_message/tests/test_utils.py @@ -1,9 +1,14 @@ -from datetime import datetime, date, time +from datetime import date, datetime, time import pytest from pydantic import BaseModel -from aleph_message.utils import Gigabytes, gigabyte_to_mebibyte, extended_json_encoder, dump_content +from aleph_message.utils import ( + Gigabytes, + dump_content, + extended_json_encoder, + gigabyte_to_mebibyte, +) def test_gigabyte_to_mebibyte(): @@ -25,8 +30,13 @@ class TestModel(BaseModel): address: str time: float - assert dump_content({"address": "0x1", "time": 1.0}) == '{"address":"0x1","time":1.0}' - assert dump_content(TestModel(address="0x1", time=1.0)) == '{"address":"0x1","time":1.0}' + assert ( + dump_content({"address": "0x1", "time": 1.0}) == '{"address":"0x1","time":1.0}' + ) + assert ( + dump_content(TestModel(address="0x1", time=1.0)) + == '{"address":"0x1","time":1.0}' + ) @pytest.mark.parametrize( From 9bd0cc6a2fcc5540a910f00bc67090cf58b65452 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 21:14:57 +0100 Subject: [PATCH 7/8] Reformat content validator and add docstrings --- aleph_message/models/__init__.py | 58 ++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/aleph_message/models/__init__.py b/aleph_message/models/__init__.py index 03a377f..33330d7 100644 --- a/aleph_message/models/__init__.py +++ b/aleph_message/models/__init__.py @@ -202,37 +202,43 @@ def check_item_content(cls, v: Optional[str], values) -> Optional[str]: return v @validator("content") - def check_content(cls, v, values): + def check_content(self, v, values): + """Check that the content matches the serialized item_content""" item_type = values["item_type"] - if item_type == ItemType.inline: - try: - item_content = json.loads(values["item_content"]) - except JSONDecodeError: - raise ValueError( - "Field 'item_content' does not appear to be valid JSON" - ) - json_dump = json.loads(v.json()) - for key, value in json_dump.items(): - if value != item_content[key]: - if isinstance(value, list): - for item in value: - if item not in item_content[key]: - raise ValueError( - f"Field 'content.{key}' does not match 'item_content.{key}': {item} != {item_content[key]}" - ) - if isinstance(value, dict): - for item in value.items(): - if item not in item_content[key].items(): - raise ValueError( - f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]}" - ) + if item_type != ItemType.inline: + return v + + try: + item_content = json.loads(values["item_content"]) + except JSONDecodeError: + raise ValueError("Field 'item_content' does not appear to be valid JSON") + json_dump = json.loads(v.json()) + for key, value in json_dump.items(): + if value != item_content[key]: + self._raise_value_error(item_content, key, value) + + @staticmethod + def _raise_value_error(item_content, key, value): + """Raise a ValueError with a message that explains the content/item_content mismatch""" + if isinstance(value, list): + for item in value: + if item not in item_content[key]: raise ValueError( - f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]} or type mismatch ({type(value)} != {type(item_content[key])})" + f"Field 'content.{key}' does not match 'item_content.{key}': {item} != {item_content[key]}" ) - return v + if isinstance(value, dict): + for item in value.items(): + if item not in item_content[key].items(): + raise ValueError( + f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]}" + ) + raise ValueError( + f"Field 'content.{key}' does not match 'item_content.{key}': {value} != {item_content[key]} or type mismatch ({type(value)} != {type(item_content[key])})" + ) @validator("item_hash") def check_item_hash(cls, v: ItemHash, values) -> ItemHash: + """Check that the 'item_hash' matches the 'item_content's SHA256 hash""" item_type = values["item_type"] if item_type == ItemType.inline: item_content: str = values["item_content"] @@ -256,6 +262,7 @@ def check_item_hash(cls, v: ItemHash, values) -> ItemHash: @validator("confirmed") def check_confirmed(cls, v, values): + """Check that 'confirmed' is not True without 'confirmations'""" confirmations = values["confirmations"] if v is True and not bool(confirmations): raise ValueError("Message cannot be 'confirmed' without 'confirmations'") @@ -263,6 +270,7 @@ def check_confirmed(cls, v, values): @validator("time") def convert_float_to_datetime(cls, v, values): + """Converts a Unix timestamp to a datetime object""" if isinstance(v, float): v = datetime.datetime.fromtimestamp(v) assert isinstance(v, datetime.datetime) From 910bb7ad2bf7598f59323879c58dfcf3b7c1de25 Mon Sep 17 00:00:00 2001 From: mhh Date: Tue, 13 Feb 2024 21:20:04 +0100 Subject: [PATCH 8/8] Fix tests after refactoring --- aleph_message/models/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aleph_message/models/__init__.py b/aleph_message/models/__init__.py index 33330d7..1b905d6 100644 --- a/aleph_message/models/__init__.py +++ b/aleph_message/models/__init__.py @@ -202,7 +202,7 @@ def check_item_content(cls, v: Optional[str], values) -> Optional[str]: return v @validator("content") - def check_content(self, v, values): + def check_content(cls, v, values): """Check that the content matches the serialized item_content""" item_type = values["item_type"] if item_type != ItemType.inline: @@ -215,7 +215,8 @@ def check_content(self, v, values): json_dump = json.loads(v.json()) for key, value in json_dump.items(): if value != item_content[key]: - self._raise_value_error(item_content, key, value) + cls._raise_value_error(item_content, key, value) + return v @staticmethod def _raise_value_error(item_content, key, value):