diff --git a/README.md b/README.md index 547bc67..1e91933 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,18 @@ response = agent.react( ) ``` +We integrate with [EvaEngine by Chromia](https://evaengine.ai/virtuals) for response evaluation, enabling you to optimize your Character Card based on quantitative metrics (truth, accuracy, creativity, engagement) and scoring. +```python +response = agent.react( + session_id="567", # string identifier that you decide + tweet_id="xxxx", + platform="twitter", +) +original_tweet = response[0]["EVENT-REQUEST"]["event"].split("New tweet: ")[1] +replied_tweet = response[-1]["TWEET-CONTENT"]["content"] +eval_result = agent.eval_react(original_tweet, replied_tweet) +``` + Once you are happy, `deploy_twitter` will push your agent configurations to production and run your agent on Twitter/X autonomously. ```python # deploy agent! (NOTE: supported for Twitter/X only now) diff --git a/examples/example-eval.py b/examples/example-eval.py new file mode 100644 index 0000000..75ee363 --- /dev/null +++ b/examples/example-eval.py @@ -0,0 +1,55 @@ +import os + +from virtuals_sdk import game + +VIRTUALS_API_KEY = os.environ.get("VIRTUALS_API_KEY") + +agent = game.Agent( + api_key=VIRTUALS_API_KEY, + goal="reply tweet", + description="always reply to tweet and make sure read the tweet requirement carefully, please make sure your tweet are very interesting and clickbait and very provocative and very relevant to the tweet, Viral-worthy content, Perfect timing and context, Exceptional creativity/originality,Maximum engagement potential, Industry-leading example of effective tweeting", + world_info="You must always reply user's tweet" +) + +# applicable only for platform twitter +agent.list_available_default_twitter_functions() +agent.use_default_twitter_functions(["wait", "reply_tweet"]) + +# # running reaction module only for platform twitter +# result = agent.react( +# session_id="session-twitter", +# platform="twitter", +# tweet_id="1869281466628349975", +# ) + +# print("original_tweet:", original_tweet) +# print("responded_tweet:", replied_tweet) + +# # Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view) +# eval_result = agent.eval_react(result) +# print(eval_result) + +# Run multiple test to get average eval score +eval_results = [] +for i in range(2): + result = agent.react( + session_id="session-twitter", + platform="twitter", + tweet_id="1869281466628349975", + ) + eval_result = agent.eval_react(result) + eval_results.append(eval_result) + +# Calculate averages from eval_results +final_scores = [result['final_score'] for result in eval_results] +truth_scores = [result['truth']['score'] for result in eval_results] +accuracy_scores = [result['accuracy']['score'] for result in eval_results] +creativity_scores = [result['creativity']['score'] for result in eval_results] +engagement_scores = [result['engagement']['score'] for result in eval_results] + +print(f"Average scores across {len(eval_results)} evaluations:") +print(f"Final Score: {(sum(final_scores) / len(final_scores)):.2f}") +print(f"Truth Score: {(sum(truth_scores) / len(truth_scores)):.2f}") +print(f"Accuracy Score: {(sum(accuracy_scores) / len(accuracy_scores)):.2f}") +print(f"Creativity Score: {(sum(creativity_scores) / len(creativity_scores)):.2f}") +print(f"Engagement Score: {sum(engagement_scores) / len(engagement_scores):.2f}") diff --git a/src/virtuals_sdk/game.py b/src/virtuals_sdk/game.py index 5a565ae..a754eac 100644 --- a/src/virtuals_sdk/game.py +++ b/src/virtuals_sdk/game.py @@ -256,6 +256,18 @@ def react(self, session_id: str, platform: str, tweet_id: str = None, event: str functions=self.enabled_functions, custom_functions=self.custom_functions ) + + def eval_react(self, response: List[Dict[str, Any]]): + """ + Evaluate the agent reply with EvaEngine + Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view) + """ + try: + original_tweet = response[0]["EVENT-REQUEST"]["event"].split("New tweet: ")[1] + replied_tweet = response[-1]["TWEET-CONTENT"]["content"] + except (KeyError, IndexError) as e: + raise ValueError("Invalid response format - missing tweet content. Please ensure the agent's goal includes replying to tweets.") + return self.game_sdk.eval_react(original_tweet, replied_tweet) def deploy_twitter(self): """ diff --git a/src/virtuals_sdk/sdk.py b/src/virtuals_sdk/sdk.py index 64aa21f..d92ccbe 100644 --- a/src/virtuals_sdk/sdk.py +++ b/src/virtuals_sdk/sdk.py @@ -4,6 +4,7 @@ class GameSDK: api_url: str = "https://game-api.virtuals.io/api" api_key: str + eval_api_url: str = "https://api.evaengine.ai/api" def __init__(self, api_key: str): self.api_key = api_key @@ -89,6 +90,25 @@ def react(self, session_id: str, platform: str, goal: str, raise Exception(response.json()) return response.json()["data"] + + def eval_react(self, input_tweet: str, output_tweet: str): + """ + Evaluate the agent reply + Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view) + """ + response = requests.post( + f"{self.eval_api_url}/eval/evaluate-tweet", + headers={"x-api-key": self.api_key}, + json={ + "input_tweet": input_tweet, + "output_tweet": output_tweet + } + ) + + if (response.status_code != 200): + raise Exception(response.json()) + + return response.json() def deploy(self, goal: str, description: str, world_info: str, functions: list, custom_functions: list, main_heartbeat: int, reaction_heartbeat: int): """