diff --git a/challenge/onsite_competition/README.md b/challenge/onsite_competition/README.md index 76b91ee..f6e2fc4 100644 --- a/challenge/onsite_competition/README.md +++ b/challenge/onsite_competition/README.md @@ -5,6 +5,11 @@ In this phase, participants’ models will be deployed on **a real robot** to ev --- +## Robot +The robot uses a wheeled chassis. The chassis and camera control code can be found in the provided SDK. The camera’s default resolution is 640×480, and the depth and color images are already aligned. + +The robot is a agilex (RANGER MINI 3.0)[https://www.agilex.ai/solutions/1] with RGB camera and a LiDAR sensor. + ## ⚙️ Installation First, install the `InternNav` package: diff --git a/challenge/onsite_competition/captures/rs_depth_mm.png b/challenge/onsite_competition/captures/rs_depth_mm.png new file mode 100644 index 0000000..7c46c35 Binary files /dev/null and b/challenge/onsite_competition/captures/rs_depth_mm.png differ diff --git a/challenge/onsite_competition/captures/rs_depth_vis.png b/challenge/onsite_competition/captures/rs_depth_vis.png new file mode 100644 index 0000000..85a629b Binary files /dev/null and b/challenge/onsite_competition/captures/rs_depth_vis.png differ diff --git a/challenge/onsite_competition/sdk/main.py b/challenge/onsite_competition/sdk/main.py index 2e9468e..705329c 100644 --- a/challenge/onsite_competition/sdk/main.py +++ b/challenge/onsite_competition/sdk/main.py @@ -3,6 +3,7 @@ import sys from real_world_env import RealWorldEnv +from stream import app, start_env from internnav.agent.utils.client import AgentClient from internnav.configs.evaluator.default_config import get_config @@ -21,6 +22,7 @@ def parse_args(): type=str, help='current instruction to follow', ) + parser.add_argument("--tag", type=str, help="tag for the run, saved by the tag name which is team-task-trail") return parser.parse_args() @@ -32,6 +34,7 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'): return getattr(config_module, attr_name) +# TODO add logging for each step, saved by the tag name which is team-task-trail def main(): args = parse_args() print("--- Loading config from:", args.config, "---") @@ -43,16 +46,34 @@ def main(): agent = AgentClient(cfg.agent) # initialize real world env - env = RealWorldEnv(args.instruction) + env = RealWorldEnv() - while True: - # print("get observation...") - # obs contains {rgb, depth, instruction} - obs = env.get_observation() + # start stream + start_env(env) + app.run(host="0.0.0.0", port=8080, threaded=True) - # print("agent step...") - # action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent) - action = agent.step(obs)[0]['action'][0] # only take the first env's action integer + try: + while True: + # print("get observation...") + # obs contains {rgb, depth, instruction} + obs = env.get_observation() + obs["instruction"] = args.instruction - # print("env step...") - env.step(action) + # print("agent step...") + # action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent) + try: + action = agent.step(obs)[0]['action'][0] + print(f"agent step success, action is {action}") + except Exception as e: + print(f"agent step error {e}") + continue + + # print("env step...") + try: + env.step(action) + print("env step success") + except Exception as e: + print(f"env step error {e}") + continue + finally: + env.close() diff --git a/challenge/onsite_competition/sdk/real_world_env.py b/challenge/onsite_competition/sdk/real_world_env.py index 8d13c99..e004bd6 100644 --- a/challenge/onsite_competition/sdk/real_world_env.py +++ b/challenge/onsite_competition/sdk/real_world_env.py @@ -1,3 +1,6 @@ +import threading +import time + from cam import AlignedRealSense from control import DiscreteRobotController @@ -5,23 +8,49 @@ class RealWorldEnv(Env): - def __init__(self): + def __init__(self, fps: int = 30): self.node = DiscreteRobotController() self.cam = AlignedRealSense() + self.latest_obs = None + self.lock = threading.Lock() + self.stop_flag = threading.Event() + self.fps = fps + + # 启动相机 + self.cam.start() + # 启动采集线程 + self.thread = threading.Thread(target=self._capture_loop, daemon=True) + self.thread.start() + + def _capture_loop(self): + """keep capturing frames""" + interval = 1.0 / self.fps + while not self.stop_flag.is_set(): + t0 = time.time() + try: + obs = self.cam.get_observation(timeout_ms=1000) + with self.lock: + self.latest_obs = obs + except Exception as e: + print("Camera capture failed:", e) + time.sleep(0.05) + dt = time.time() - t0 + if dt < interval: + time.sleep(interval - dt) def get_observation(self): - frame = self.cam.get_observation() - return frame - - def step(self, action): - - ''' - action (int): Discrete action to apply: - - 0: no movement (stand still) - - 1: move forward - - 2: rotate left - - 3: rotate right - ''' + """return most recent frame""" + with self.lock: + return self.latest_obs + + def step(self, action: int): + """ + action: + 0: stand still + 1: move forward + 2: turn left + 3: turn right + """ if action == 0: self.node.stand_still() elif action == 1: @@ -30,3 +59,8 @@ def step(self, action): self.node.turn_left() elif action == 3: self.node.turn_right() + + def close(self): + self.stop_flag.set() + self.thread.join(timeout=1.0) + self.cam.stop() diff --git a/challenge/onsite_competition/sdk/stream.py b/challenge/onsite_competition/sdk/stream.py new file mode 100644 index 0000000..b045cf1 --- /dev/null +++ b/challenge/onsite_competition/sdk/stream.py @@ -0,0 +1,38 @@ +# stream_server.py +import time + +import cv2 +from flask import Flask, Response + +app = Flask(__name__) + +# 由主程序注入 +_env = None + + +def set_env(env): + """set env from main to stream server""" + global _env + _env = env + + +def _mjpeg_generator(jpeg_quality: int = 80): + boundary = b"--frame" + while True: + if _env is None: + time.sleep(0.1) + continue + obs = _env.get_observation() + if obs is None: + time.sleep(0.01) + continue + frame_bgr = obs["rgb"] + ok, jpg = cv2.imencode(".jpg", frame_bgr, [cv2.IMWRITE_JPEG_QUALITY, jpeg_quality]) + if not ok: + continue + yield (boundary + b"\r\n" b"Content-Type: image/jpeg\r\n\r\n" + jpg.tobytes() + b"\r\n") + + +@app.route("/stream") +def stream(): + return Response(_mjpeg_generator(), mimetype="multipart/x-mixed-replace; boundary=frame")