Skip to content

Added EvaEngine #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,18 @@ response = agent.react(
)
```

We integrate with [EvaEngine by Chromia](https://evaengine.ai/virtuals) for response evaluation, enabling you to optimize your Character Card based on quantitative metrics (truth, accuracy, creativity, engagement) and scoring.
```python
response = agent.react(
session_id="567", # string identifier that you decide
tweet_id="xxxx",
platform="twitter",
)
original_tweet = response[0]["EVENT-REQUEST"]["event"].split("New tweet: ")[1]
replied_tweet = response[-1]["TWEET-CONTENT"]["content"]
eval_result = agent.eval_react(original_tweet, replied_tweet)
```

Once you are happy, `deploy_twitter` will push your agent configurations to production and run your agent on Twitter/X autonomously.
```python
# deploy agent! (NOTE: supported for Twitter/X only now)
Expand Down
55 changes: 55 additions & 0 deletions examples/example-eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os

from virtuals_sdk import game

VIRTUALS_API_KEY = os.environ.get("VIRTUALS_API_KEY")

agent = game.Agent(
api_key=VIRTUALS_API_KEY,
goal="reply tweet",
description="always reply to tweet and make sure read the tweet requirement carefully, please make sure your tweet are very interesting and clickbait and very provocative and very relevant to the tweet, Viral-worthy content, Perfect timing and context, Exceptional creativity/originality,Maximum engagement potential, Industry-leading example of effective tweeting",
world_info="You must always reply user's tweet"
)

# applicable only for platform twitter
agent.list_available_default_twitter_functions()
agent.use_default_twitter_functions(["wait", "reply_tweet"])

# # running reaction module only for platform twitter
# result = agent.react(
# session_id="session-twitter",
# platform="twitter",
# tweet_id="1869281466628349975",
# )

# print("original_tweet:", original_tweet)
# print("responded_tweet:", replied_tweet)

# # Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view)
# eval_result = agent.eval_react(result)
# print(eval_result)

# Run multiple test to get average eval score
eval_results = []
for i in range(2):
result = agent.react(
session_id="session-twitter",
platform="twitter",
tweet_id="1869281466628349975",
)
eval_result = agent.eval_react(result)
eval_results.append(eval_result)

# Calculate averages from eval_results
final_scores = [result['final_score'] for result in eval_results]
truth_scores = [result['truth']['score'] for result in eval_results]
accuracy_scores = [result['accuracy']['score'] for result in eval_results]
creativity_scores = [result['creativity']['score'] for result in eval_results]
engagement_scores = [result['engagement']['score'] for result in eval_results]

print(f"Average scores across {len(eval_results)} evaluations:")
print(f"Final Score: {(sum(final_scores) / len(final_scores)):.2f}")
print(f"Truth Score: {(sum(truth_scores) / len(truth_scores)):.2f}")
print(f"Accuracy Score: {(sum(accuracy_scores) / len(accuracy_scores)):.2f}")
print(f"Creativity Score: {(sum(creativity_scores) / len(creativity_scores)):.2f}")
print(f"Engagement Score: {sum(engagement_scores) / len(engagement_scores):.2f}")
12 changes: 12 additions & 0 deletions src/virtuals_sdk/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,18 @@ def react(self, session_id: str, platform: str, tweet_id: str = None, event: str
functions=self.enabled_functions,
custom_functions=self.custom_functions
)

def eval_react(self, response: List[Dict[str, Any]]):
"""
Evaluate the agent reply with EvaEngine
Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view)
"""
try:
original_tweet = response[0]["EVENT-REQUEST"]["event"].split("New tweet: ")[1]
replied_tweet = response[-1]["TWEET-CONTENT"]["content"]
except (KeyError, IndexError) as e:
raise ValueError("Invalid response format - missing tweet content. Please ensure the agent's goal includes replying to tweets.")
return self.game_sdk.eval_react(original_tweet, replied_tweet)

def deploy_twitter(self):
"""
Expand Down
20 changes: 20 additions & 0 deletions src/virtuals_sdk/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
class GameSDK:
api_url: str = "https://game-api.virtuals.io/api"
api_key: str
eval_api_url: str = "https://api.evaengine.ai/api"

def __init__(self, api_key: str):
self.api_key = api_key
Expand Down Expand Up @@ -89,6 +90,25 @@ def react(self, session_id: str, platform: str, goal: str,
raise Exception(response.json())

return response.json()["data"]

def eval_react(self, input_tweet: str, output_tweet: str):
"""
Evaluate the agent reply
Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view)
"""
response = requests.post(
f"{self.eval_api_url}/eval/evaluate-tweet",
headers={"x-api-key": self.api_key},
json={
"input_tweet": input_tweet,
"output_tweet": output_tweet
}
)

if (response.status_code != 200):
raise Exception(response.json())

return response.json()

def deploy(self, goal: str, description: str, world_info: str, functions: list, custom_functions: list, main_heartbeat: int, reaction_heartbeat: int):
"""
Expand Down