diff --git a/openvalidators/event.py b/openvalidators/event.py index a7af8a0..8452698 100644 --- a/openvalidators/event.py +++ b/openvalidators/event.py @@ -26,6 +26,8 @@ class EventSchema: completions: List[str] # List of completions received for a given prompt completion_times: List[float] # List of completion times for a given prompt + completion_return_messages: List[str] # List of completion return messages for a given prompt + completion_return_codes: List[str] # List of completion return codes for a given prompt name: str # Prompt type, e.g. 'followup', 'answer' block: float # Current block at given step gating_loss: float # Gating model loss for given step @@ -95,6 +97,8 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> 'EventSchema': return EventSchema( completions=event_dict['completions'], completion_times=event_dict['completion_times'], + completion_return_messages=event_dict['completion_return_messages'], + completion_return_codes=event_dict['completion_return_codes'], name=event_dict['name'], block=event_dict['block'], gating_loss=event_dict['gating_loss'], diff --git a/openvalidators/forward.py b/openvalidators/forward.py index 8b71847..0e603c8 100644 --- a/openvalidators/forward.py +++ b/openvalidators/forward.py @@ -112,6 +112,8 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude # Get completion times completion_times: List[float] = [comp.elapsed_time for comp in responses] + completion_return_messages: List[str] = [str(comp.return_message) for comp in responses] + completion_return_codes: List[str] = [str(comp.return_code) for comp in responses] # Compute forward pass rewards, assumes followup_uids and answer_uids are mutually exclusive. # shape: [ metagraph.n ] @@ -133,6 +135,8 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude "uids": uids.tolist(), "completions": completions, "completion_times": completion_times, + "completion_return_messages": completion_return_messages, + "completion_return_codes": completion_return_codes, "rewards": rewards.tolist(), "gating_loss": gating_loss.item(), "best": best, @@ -142,9 +146,9 @@ async def run_step(self, prompt: str, k: int, timeout: float, name: str, exclude if not self.config.neuron.dont_save_events: logger.log("EVENTS", "events", **event) - # Log the event to wandb. - wandb_event = EventSchema.from_dict(event, self.config.neuron.disable_log_rewards) + # Log the event to wandb. if not self.config.wandb.off: + wandb_event = EventSchema.from_dict(event, self.config.neuron.disable_log_rewards) self.wandb.log(asdict(wandb_event)) # Return the event. diff --git a/openvalidators/mock.py b/openvalidators/mock.py index e960f9f..62ab198 100644 --- a/openvalidators/mock.py +++ b/openvalidators/mock.py @@ -57,6 +57,8 @@ def reward( class MockDendriteResponse: completion = "" + return_message = "Success" + return_code = "1" elapsed_time = 0 is_success = True firewall_prompt = FirewallPrompt() diff --git a/openvalidators/neuron.py b/openvalidators/neuron.py index f21e670..fa0e9d2 100644 --- a/openvalidators/neuron.py +++ b/openvalidators/neuron.py @@ -167,8 +167,9 @@ def __init__(self): if self.config.neuron.mock_reward_models: self.reward_functions = [] self.reward_weights = [] + self.blacklist = MockRewardModel(RewardModelType.blacklist.value) self.masking_functions = [ - MockRewardModel(RewardModelType.blacklist.value), + self.blacklist, MockRewardModel(RewardModelType.nsfw.value), ] bt.logging.debug(str(self.reward_functions)) diff --git a/openvalidators/reward/reward.py b/openvalidators/reward/reward.py index 11ce39c..fad8081 100644 --- a/openvalidators/reward/reward.py +++ b/openvalidators/reward/reward.py @@ -125,5 +125,8 @@ def __init__(self, mock_name: str = 'MockReward'): def apply( self, prompt: str, completion: List[str], name: str ) -> torch.FloatTensor: mock_reward = torch.tensor( [0 for _ in completion], dtype=torch.float32 ) return mock_reward, mock_reward + + def reset(self): + pass \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index fd1b616..0d1a39a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ bittensor>=5.2.1,<6.0.0 transformers==4.28.0 -wandb==0.15.3 +wandb==0.15.10 datasets==2.14.0 plotly==5.14.1 networkx==3.1 diff --git a/tests/test_event.py b/tests/test_event.py index 44ce9c2..13902fa 100644 --- a/tests/test_event.py +++ b/tests/test_event.py @@ -29,6 +29,8 @@ def test_event_from_dict_all_forward_columns_match(self): event_dict = { 'completions': ['test'], 'completion_times': [0.123], + 'completion_return_messages': ['Success'], + 'completion_return_codes': ['1'], 'name': 'test-name', 'block': 1.0, 'gating_loss': 1.0, @@ -85,7 +87,9 @@ def test_event_from_dict_forward_no_reward_logging(self): # Assert: create a dictionary with all non-related reward columns event_dict = { 'completions': ['test'], - 'completion_times': [0.123], + 'completion_times': [0.123], + 'completion_return_messages': ['Success'], + 'completion_return_codes': ['1'], 'name': 'test-name', 'block': 1.0, 'gating_loss': 1.0, @@ -134,7 +138,9 @@ def test_event_from_dict_forward_reward_logging_mismatch(self): # Assert: create a dictionary with all non-related reward columns event_dict = { 'completions': ['test'], - 'completion_times': [0.123], + 'completion_times': [0.123], + 'completion_return_messages': ['Success'], + 'completion_return_codes': ['1'], 'name': 'test-name', 'block': 1.0, 'gating_loss': 1.0,