From 8b6e33af35a6edf52a3f4f4b9a0898b5bbf4a0fa Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Wed, 26 Feb 2025 12:46:05 +0530
Subject: [PATCH 1/9] feat: add setup script and enhance custom agent and
 prompts for iframe interactions

---
 setup.ps1                   | 18 ++++++++++++++++
 src/agent/custom_agent.py   |  4 ++--
 src/agent/custom_prompts.py | 41 +++++++++++++++++++++++++++++++++++--
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 setup.ps1

diff --git a/setup.ps1 b/setup.ps1
new file mode 100644
index 00000000..6a2d69fd
--- /dev/null
+++ b/setup.ps1
@@ -0,0 +1,18 @@
+deactivate
+
+Remove-Item -Recurse -Force .venv
+
+# Step 2: Set Up Python Environment
+uv venv --python 3.11
+
+# Activate the virtual environment
+.\.venv\Scripts\Activate.ps1
+
+# Step 3: Install Dependencies
+uv pip install -r requirements.txt
+playwright install
+
+
+# Step 4: Run web ui in local
+python webui.py --ip 127.0.0.1 --port 7788
+Write-Output "Setup complete. Virtual environment activated."
diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
index bfeb33ca..6202edad 100644
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -57,11 +57,11 @@ def __init__(
             use_vision_for_planner: bool = False,
             save_conversation_path: Optional[str] = None,
             save_conversation_path_encoding: Optional[str] = 'utf-8',
-            max_failures: int = 3,
+            max_failures: int = 5,
             retry_delay: int = 10,
             system_prompt_class: Type[SystemPrompt] = SystemPrompt,
             agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
-            max_input_tokens: int = 128000,
+            max_input_tokens: int = 1280000,
             validate_output: bool = False,
             message_context: Optional[str] = None,
             generate_gif: bool | str = True,
diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
index ab8c9a1e..6b0e9d7b 100644
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -43,6 +43,12 @@ def important_rules(self) -> str:
        {"go_to_url": {"url": "https://example.com"}},
        {"extract_page_content": {}}
      ]
+     - Iframe interaction: [
+           {"switch_frame": {"frame_name": "GlobalNav"}},
+           {"click_element": {"index": 1}},
+           {"switch_frame": {"frame_name": "frameContent"}},
+           {"click_element": {"index": 2}}
+         ]
 
 
 3. ELEMENT INTERACTION:
@@ -82,8 +88,39 @@ def important_rules(self) -> str:
    - Only provide the action sequence until you think the page will change.
    - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
    - only use multiple actions if it makes sense. 
-
-9. Extraction:
+9. IFrames:
+         - Identify iframes using their names or unique identifiers
+         - Switch to iframes before interacting with nested elements
+         - Use frame locators for element interaction within iframes
+         - Example action sequence for iframe interaction:
+             [
+               {"switch_frame": {"frame_name": "GlobalNav"}},
+               {"click_element": {"index": 1}},
+               {"switch_frame": {"frame_name": "frameContent"}},
+               {"click_element": {"index": 2}}
+             ]
+           - Always return to the main frame after iframe operations
+           - Handle nested iframes by chaining switch_frame actions
+        10. Action Sequencing for Iframes:
+       - Always start iframe interactions with switch_frame
+       - Perform all element interactions within the iframe context
+       - Use back_to_main_frame after completing iframe operations
+       - For nested iframes, chain switch_frame actions
+       - Example nested iframe sequence:
+         [
+           {"switch_frame": {"frame_name": "outerFrame"}},
+           {"switch_frame": {"frame_name": "innerFrame"}},
+           {"click_element": {"index": 1}},
+           {"back_to_main_frame": {}}
+         ]
+
+    11. Visual Context for Iframes:
+       - Bounding boxes for iframe elements will have frame name labels
+       - Example: [GlobalNav] <button>Patients</button>
+       - Use frame labels to identify element context
+       - Elements without frame labels are in the main page
+
+12. Extraction:
     - If your task is to find information or do research - call extract_content on the specific pages to get and store the information.
 
 """

From 9c427307a3fdd25efcf332025e687ab45b1f4f3d Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Wed, 26 Feb 2025 12:51:41 +0530
Subject: [PATCH 2/9] feat: add setup script and enhance custom agent and
 prompts for iframe interactions

---
 setup.ps1                   | 18 ++++++++++++++++
 src/agent/custom_agent.py   |  4 ++--
 src/agent/custom_prompts.py | 41 +++++++++++++++++++++++++++++++++++--
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 setup.ps1

diff --git a/setup.ps1 b/setup.ps1
new file mode 100644
index 00000000..6a2d69fd
--- /dev/null
+++ b/setup.ps1
@@ -0,0 +1,18 @@
+deactivate
+
+Remove-Item -Recurse -Force .venv
+
+# Step 2: Set Up Python Environment
+uv venv --python 3.11
+
+# Activate the virtual environment
+.\.venv\Scripts\Activate.ps1
+
+# Step 3: Install Dependencies
+uv pip install -r requirements.txt
+playwright install
+
+
+# Step 4: Run web ui in local
+python webui.py --ip 127.0.0.1 --port 7788
+Write-Output "Setup complete. Virtual environment activated."
diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
index bfeb33ca..6202edad 100644
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -57,11 +57,11 @@ def __init__(
             use_vision_for_planner: bool = False,
             save_conversation_path: Optional[str] = None,
             save_conversation_path_encoding: Optional[str] = 'utf-8',
-            max_failures: int = 3,
+            max_failures: int = 5,
             retry_delay: int = 10,
             system_prompt_class: Type[SystemPrompt] = SystemPrompt,
             agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
-            max_input_tokens: int = 128000,
+            max_input_tokens: int = 1280000,
             validate_output: bool = False,
             message_context: Optional[str] = None,
             generate_gif: bool | str = True,
diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
index ab8c9a1e..6b0e9d7b 100644
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -43,6 +43,12 @@ def important_rules(self) -> str:
        {"go_to_url": {"url": "https://example.com"}},
        {"extract_page_content": {}}
      ]
+     - Iframe interaction: [
+           {"switch_frame": {"frame_name": "GlobalNav"}},
+           {"click_element": {"index": 1}},
+           {"switch_frame": {"frame_name": "frameContent"}},
+           {"click_element": {"index": 2}}
+         ]
 
 
 3. ELEMENT INTERACTION:
@@ -82,8 +88,39 @@ def important_rules(self) -> str:
    - Only provide the action sequence until you think the page will change.
    - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
    - only use multiple actions if it makes sense. 
-
-9. Extraction:
+9. IFrames:
+         - Identify iframes using their names or unique identifiers
+         - Switch to iframes before interacting with nested elements
+         - Use frame locators for element interaction within iframes
+         - Example action sequence for iframe interaction:
+             [
+               {"switch_frame": {"frame_name": "GlobalNav"}},
+               {"click_element": {"index": 1}},
+               {"switch_frame": {"frame_name": "frameContent"}},
+               {"click_element": {"index": 2}}
+             ]
+           - Always return to the main frame after iframe operations
+           - Handle nested iframes by chaining switch_frame actions
+        10. Action Sequencing for Iframes:
+       - Always start iframe interactions with switch_frame
+       - Perform all element interactions within the iframe context
+       - Use back_to_main_frame after completing iframe operations
+       - For nested iframes, chain switch_frame actions
+       - Example nested iframe sequence:
+         [
+           {"switch_frame": {"frame_name": "outerFrame"}},
+           {"switch_frame": {"frame_name": "innerFrame"}},
+           {"click_element": {"index": 1}},
+           {"back_to_main_frame": {}}
+         ]
+
+    11. Visual Context for Iframes:
+       - Bounding boxes for iframe elements will have frame name labels
+       - Example: [GlobalNav] <button>Patients</button>
+       - Use frame labels to identify element context
+       - Elements without frame labels are in the main page
+
+12. Extraction:
     - If your task is to find information or do research - call extract_content on the specific pages to get and store the information.
 
 """

From 2954de3a061748b4d5bd868768833575445a1e59 Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Wed, 26 Feb 2025 13:37:01 +0530
Subject: [PATCH 3/9] feat: add step-by-step instructions for accessing
 Athenahealth Document Search

---
 prompts/prompt.text | 51 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 prompts/prompt.text

diff --git a/prompts/prompt.text b/prompts/prompt.text
new file mode 100644
index 00000000..41a816a7
--- /dev/null
+++ b/prompts/prompt.text
@@ -0,0 +1,51 @@
+Step-by-Step Instructions
+
+1. Navigate to Athenahealth Preview Environment
+   - Open a web browser and go to: https://preview.athenahealth.com/
+
+2. Log In
+   - Enter the credentials:
+     - Username: p-bkumar1
+     - Password: Xcaliber@12345
+   - Click the Login button.
+
+3. Select the Default Department
+   - If prompted, choose the default department from the list (e.g., "7 Hills Department").
+
+4. Access the "Patients" Menu
+   - Locate the header at the top of the dashboard.
+   - Click on the "Patients" menu to open the dropdown.
+
+5. Access Document Search
+   - Option 1 (Primary Attempt)
+     - In the dropdown, look for "Documents > Document Search" and click it.
+     - If the primary attempt fails (error 404 or element not found):
+       - Refresh the page.
+       - Retry clicking "Document Search" (up to 3 times with 2-second intervals).
+
+6. Handle Iframes (Fallback Approach)
+   - Use the following sequence if Document Search is nested in iframes:
+     - Switch to the main iframe context:
+       - Locate and switch to iframe[name="frMain"].
+     - Switch to the sub-iframe:
+       - Locate and switch to iframe[id="searchFrame"] or iframe[name="frMain"] > iframe (if nested).
+     - Fill the DOCUMENTID and click Search:
+       - Enter the value "116873" in the DOCUMENTID field.
+       - Click the "Search" button.
+       - Retry up to 3 times:
+         - Wait 2 seconds between each retry if elements are missing.
+
+7. Observe and Report
+   - After clicking "Document Search" or executing the iframe fallback:
+     - The Clinical Inbox should display Jammy J. Willer's lab/imaging results, including CBC tests and MRI results with normal/abnormal statuses.
+
+Notes on Tier Group in Clinical Inbox
+- The Tier Group level ID for the search results is 107 tasks associated with the 16.11 Testing department.
+
+Common Issues and Solutions
+- Element Not Found: Ensure the iframe is fully loaded (wait for 5–10 seconds).
+- Button Not Clickable: Refresh the page and try again.
+- Network Errors: Verify your internet connection and retry the login process.
+
+Result
+- After following the steps, the Clinical Inbox displays lab/imaging results for Jammy J. Willer with task updates. The task is completed successfully.

From a46f98c7c17bc96fe6f4cb83dc481754e32fffb3 Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Wed, 26 Feb 2025 13:47:16 +0530
Subject: [PATCH 4/9] feat: update prompt instructions for Clinical Inbox and
 refine common issues section

---
 prompts/prompt.text | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/prompts/prompt.text b/prompts/prompt.text
index 41a816a7..f55fc85d 100644
--- a/prompts/prompt.text
+++ b/prompts/prompt.text
@@ -37,15 +37,11 @@ Step-by-Step Instructions
 
 7. Observe and Report
    - After clicking "Document Search" or executing the iframe fallback:
-     - The Clinical Inbox should display Jammy J. Willer's lab/imaging results, including CBC tests and MRI results with normal/abnormal statuses.
-
-Notes on Tier Group in Clinical Inbox
-- The Tier Group level ID for the search results is 107 tasks associated with the 16.11 Testing department.
-
+     
 Common Issues and Solutions
 - Element Not Found: Ensure the iframe is fully loaded (wait for 5–10 seconds).
-- Button Not Clickable: Refresh the page and try again.
+- Button Not Clickable: try to force click button again.
 - Network Errors: Verify your internet connection and retry the login process.
 
 Result
-- After following the steps, the Clinical Inbox displays lab/imaging results for Jammy J. Willer with task updates. The task is completed successfully.
+- After following the steps, patient lab reports will be displayed. The task is completed successfully.

From b571736c8fd00b34e02712654135d60cea302901 Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Sat, 1 Mar 2025 14:51:07 +0530
Subject: [PATCH 5/9] feat: update Dockerfile and configuration for reduced
 resolution; modify custom agent to disable vision usage

---
 Dockerfile                           |   6 +-
 src/agent/custom_agent.py            |   6 +-
 src/utils/default_config_settings.py |   2 +-
 supervisord.conf                     | 121 ++++++++++++++++++++++++---
 webui.py                             |  25 ++++--
 5 files changed, 136 insertions(+), 24 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7b6d39fe..44d47511 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -71,11 +71,11 @@ ENV BROWSER_USE_LOGGING_LEVEL=info
 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
-ENV RESOLUTION=1920x1080x24
+ENV RESOLUTION=960x540x24
 ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
-ENV RESOLUTION_WIDTH=1920
-ENV RESOLUTION_HEIGHT=1080
+ENV RESOLUTION_WIDTH=960
+ENV RESOLUTION_HEIGHT=540
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
index 6202edad..27379cc0 100644
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -53,7 +53,7 @@ def __init__(
             browser: Browser | None = None,
             browser_context: BrowserContext | None = None,
             controller: Controller = Controller(),
-            use_vision: bool = True,
+            use_vision: bool = False,
             use_vision_for_planner: bool = False,
             save_conversation_path: Optional[str] = None,
             save_conversation_path_encoding: Optional[str] = 'utf-8',
@@ -281,8 +281,8 @@ async def _run_planner(self) -> Optional[str]:
             planner_messages[-1] = HumanMessage(content=new_msg)
 
         # Get planner output
-        response = await self.planner_llm.ainvoke(planner_messages)
-        plan = response.content
+        response = await self.ainvoke(planner_messages)
+        plan = response.contentplanner_llm
         last_state_message = planner_messages[-1]
         # remove image from last state message
         if isinstance(last_state_message.content, list):
diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py
index e6fa88f9..5d7c6662 100644
--- a/src/utils/default_config_settings.py
+++ b/src/utils/default_config_settings.py
@@ -10,7 +10,7 @@ def default_config():
         "agent_type": "custom",
         "max_steps": 100,
         "max_actions_per_step": 10,
-        "use_vision": True,
+        "use_vision": False,
         "tool_calling_method": "auto",
         "llm_provider": "openai",
         "llm_model_name": "gpt-4o",
diff --git a/supervisord.conf b/supervisord.conf
index 3410b912..a59a94e5 100644
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -1,3 +1,100 @@
+# [supervisord]
+# user=root
+# nodaemon=true
+# logfile=/dev/stdout
+# logfile_maxbytes=0
+# loglevel=debug
+
+# [program:xvfb]
+# command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=100
+# startsecs=3
+# stopsignal=TERM
+# stopwaitsecs=10
+
+# [program:vnc_setup]
+# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
+# autorestart=false
+# startsecs=0
+# priority=150
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+
+# [program:x11vnc]
+# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -bg -rfbport 5901 -o /var/log/x11vnc.log"
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=200
+# startretries=10
+# startsecs=10
+# stopsignal=TERM
+# stopwaitsecs=10
+# depends_on=vnc_setup,xvfb
+
+# [program:x11vnc_log]
+# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=250
+# stopsignal=TERM
+# stopwaitsecs=5
+# depends_on=x11vnc
+
+# [program:novnc]
+# command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc --http-header='Content-Security-Policy: frame-ancestors http://localhost:7788/'"
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=300
+# startretries=5
+# startsecs=3
+# depends_on=x11vnc
+
+# [program:persistent_browser]
+# environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
+# command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=350
+# startretries=5
+# startsecs=10
+# stopsignal=TERM
+# stopwaitsecs=15
+# depends_on=novnc
+
+# [program:webui]
+# command=python webui.py --ip 0.0.0.0 --port 7788
+# directory=/app
+# autorestart=true
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
+# priority=400
+# startretries=3
+# startsecs=3
+# stopsignal=TERM
+# stopwaitsecs=10
+# depends_on=persistent_browser
+
 [supervisord]
 user=root
 nodaemon=true
@@ -17,18 +114,18 @@ startsecs=3
 stopsignal=TERM
 stopwaitsecs=10
 
-[program:vnc_setup]
-command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
-autorestart=false
-startsecs=0
-priority=150
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-stderr_logfile=/dev/stderr
-stderr_logfile_maxbytes=0
+# [program:vnc_setup]
+# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
+# autorestart=false
+# startsecs=0
+# priority=150
+# stdout_logfile=/dev/stdout
+# stdout_logfile_maxbytes=0
+# stderr_logfile=/dev/stderr
+# stderr_logfile_maxbytes=0
 
 [program:x11vnc]
-command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
+command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -nopw -forever -shared -bg -rfbport 5901 -o /var/log/x11vnc.log"
 autorestart=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
@@ -39,7 +136,7 @@ startretries=10
 startsecs=10
 stopsignal=TERM
 stopwaitsecs=10
-depends_on=vnc_setup,xvfb
+depends_on=xvfb
 
 [program:x11vnc_log]
 command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
@@ -93,4 +190,4 @@ startretries=3
 startsecs=3
 stopsignal=TERM
 stopwaitsecs=10
-depends_on=persistent_browser
+depends_on=persistent_browser
\ No newline at end of file
diff --git a/webui.py b/webui.py
index e770d99d..e1f4ae1c 100644
--- a/webui.py
+++ b/webui.py
@@ -897,11 +897,26 @@ def update_llm_num_ctx_visibility(llm_provider):
                     run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
                     stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
                     
+                # with gr.Row():
+                #     browser_view = gr.HTML(
+                #         value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
+                #         label="Live Browser View",
+                # )
+
                 with gr.Row():
-                    browser_view = gr.HTML(
-                        value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
-                        label="Live Browser View",
-                )
+                    gr.HTML(
+                        """
+                        <iframe 
+                            src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
+                            width="100%" 
+                            height="600px" 
+                            frameborder="0"
+                            allow="clipboard-read; clipboard-write"
+                            style="margin-top: 20px;"
+                            allowfullscreen>
+                        </iframe>
+                        """
+                    )
             
             with gr.TabItem("🧐 Deep Research", id=5):
                 research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
@@ -961,7 +976,7 @@ def update_llm_num_ctx_visibility(llm_provider):
                             enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method
                         ],
                     outputs=[
-                        browser_view,           # Browser view
+                        # browser_view,           # Browser view
                         final_result_output,    # Final result
                         errors_output,          # Errors
                         model_actions_output,   # Model actions

From 50a033ecc4a41f09a5ac2407df411b8bfa33c9bf Mon Sep 17 00:00:00 2001
From: prathamxcaliber <pratham.sharma@xcaliber.health>
Date: Sun, 2 Mar 2025 11:47:41 +0530
Subject: [PATCH 6/9] UI fixes for layout

---
 webui.py | 1129 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 801 insertions(+), 328 deletions(-)

diff --git a/webui.py b/webui.py
index e1f4ae1c..2ad1c397 100644
--- a/webui.py
+++ b/webui.py
@@ -68,6 +68,12 @@ def resolve_sensitive_env_variables(text):
         
     return result
 
+def open_modal():
+    return gr.update(visible=True)
+
+def close_modal():
+    return gr.update(visible=False)
+
 async def stop_agent():
     """Request the agent to stop and update UI with enhanced feedback"""
     global _global_agent_state, _global_browser_context, _global_browser, _global_agent
@@ -703,379 +709,846 @@ def create_ui(config, theme_name="Ocean"):
     """
 
     with gr.Blocks(
-            title="Browser Use WebUI", theme=theme_map[theme_name], css=css
+            title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; }"
     ) as demo:
         with gr.Row():
             gr.Markdown(
                 """
-                # 🌐 Browser Use WebUI
-                ### Control your browser with AI assistance
+                <h1 style="font-size: 2.5em; font-weight: 800; text-align: center;">🌐 EHR Operator</h1>
+                <h3 style="font-size: 1.5em; font-weight: 600; text-align: center;">Control your browser with AI assistance</h3>
                 """,
                 elem_classes=["header-text"],
             )
 
-        with gr.Tabs() as tabs:
-            with gr.TabItem("⚙️ Agent Settings", id=1):
-                with gr.Group():
-                    agent_type = gr.Radio(
-                        ["org", "custom"],
-                        label="Agent Type",
-                        value=config['agent_type'],
-                        info="Select the type of agent to use",
-                    )
-                    with gr.Column():
-                        max_steps = gr.Slider(
-                            minimum=1,
-                            maximum=200,
-                            value=config['max_steps'],
-                            step=1,
-                            label="Max Run Steps",
-                            info="Maximum number of steps the agent will take",
-                        )
-                        max_actions_per_step = gr.Slider(
-                            minimum=1,
-                            maximum=20,
-                            value=config['max_actions_per_step'],
-                            step=1,
-                            label="Max Actions per Step",
-                            info="Maximum number of actions the agent will take per step",
-                        )
-                    with gr.Column():
-                        use_vision = gr.Checkbox(
-                            label="Use Vision",
-                            value=config['use_vision'],
-                            info="Enable visual processing capabilities",
-                        )
-                        tool_calling_method = gr.Dropdown(
-                            label="Tool Calling Method",
-                            value=config['tool_calling_method'],
-                            interactive=True,
-                            allow_custom_value=True,  # Allow users to input custom model names
-                            choices=["auto", "json_schema", "function_calling"],
-                            info="Tool Calls Funtion Name",
-                            visible=False
+        with gr.Blocks(elem_id="main-container"):  
+            with gr.Row(equal_height=True):  
+                with gr.Column(scale=2, min_width=480, elem_id="left-column"):  
+                    with gr.Group():
+                        task = gr.Textbox(
+                            label="Task Description",
+                            lines=10,  
+                            placeholder="Enter your task here...",
+                            value=config['task'],
+                            info="Describe what you want the agent to do",
                         )
 
-            with gr.TabItem("🔧 LLM Configuration", id=2):
-                with gr.Group():
-                    llm_provider = gr.Dropdown(
-                        choices=[provider for provider,model in utils.model_names.items()],
-                        label="LLM Provider",
-                        value=config['llm_provider'],
-                        info="Select your preferred language model provider"
-                    )
-                    llm_model_name = gr.Dropdown(
-                        label="Model Name",
-                        choices=utils.model_names['openai'],
-                        value=config['llm_model_name'],
-                        interactive=True,
-                        allow_custom_value=True,  # Allow users to input custom model names
-                        info="Select a model from the dropdown or type a custom model name"
-                    )
-                    llm_num_ctx = gr.Slider(
-                        minimum=2**8,
-                        maximum=2**16,
-                        value=config['llm_num_ctx'],
-                        step=1,
-                        label="Max Context Length",
-                        info="Controls max context length model needs to handle (less = faster)",
-                        visible=config['llm_provider'] == "ollama"
-                    )
-                    llm_temperature = gr.Slider(
-                        minimum=0.0,
-                        maximum=2.0,
-                        value=config['llm_temperature'],
-                        step=0.1,
-                        label="Temperature",
-                        info="Controls randomness in model outputs"
-                    )
-                    with gr.Row():
-                        llm_base_url = gr.Textbox(
-                            label="Base URL",
-                            value=config['llm_base_url'],
-                            info="API endpoint URL (if required)"
+                        add_infos = gr.Textbox(
+                            label="Additional Information",
+                            lines=7,
+                            placeholder="Add any helpful context or instructions...",
+                            info="Optional hints to help the LLM complete the task",
                         )
-                        llm_api_key = gr.Textbox(
-                            label="API Key",
-                            type="password",
-                            value=config['llm_api_key'],
-                            info="Your API key (leave blank to use .env)"
-                        )
-
-            # Change event to update context length slider
-            def update_llm_num_ctx_visibility(llm_provider):
-                return gr.update(visible=llm_provider == "ollama")
-
-            # Bind the change event of llm_provider to update the visibility of context length slider
-            llm_provider.change(
-                fn=update_llm_num_ctx_visibility,
-                inputs=llm_provider,
-                outputs=llm_num_ctx
-            )
 
-            with gr.TabItem("🌐 Browser Settings", id=3):
-                with gr.Group():
                     with gr.Row():
-                        use_own_browser = gr.Checkbox(
-                            label="Use Own Browser",
-                            value=config['use_own_browser'],
-                            info="Use your existing browser instance",
-                        )
-                        keep_browser_open = gr.Checkbox(
-                            label="Keep Browser Open",
-                            value=config['keep_browser_open'],
-                            info="Keep Browser Open between Tasks",
-                        )
-                        headless = gr.Checkbox(
-                            label="Headless Mode",
-                            value=config['headless'],
-                            info="Run browser without GUI",
-                        )
-                        disable_security = gr.Checkbox(
-                            label="Disable Security",
-                            value=config['disable_security'],
-                            info="Disable browser security features",
-                        )
-                        enable_recording = gr.Checkbox(
-                            label="Enable Recording",
-                            value=config['enable_recording'],
-                            info="Enable saving browser recordings",
-                        )
+                        run_button = gr.Button("▶️ Run Agent", variant="primary", scale=1)
+                        stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
 
-                    with gr.Row():
-                        window_w = gr.Number(
-                            label="Window Width",
-                            value=config['window_w'],
-                            info="Browser window width",
-                        )
-                        window_h = gr.Number(
-                            label="Window Height",
-                            value=config['window_h'],
-                            info="Browser window height",
-                        )
-
-                    save_recording_path = gr.Textbox(
-                        label="Recording Path",
-                        placeholder="e.g. ./tmp/record_videos",
-                        value=config['save_recording_path'],
-                        info="Path to save browser recordings",
-                        interactive=True,  # Allow editing only if recording is enabled
-                    )
-
-                    save_trace_path = gr.Textbox(
-                        label="Trace Path",
-                        placeholder="e.g. ./tmp/traces",
-                        value=config['save_trace_path'],
-                        info="Path to save Agent traces",
-                        interactive=True,
-                    )
-
-                    save_agent_history_path = gr.Textbox(
-                        label="Agent History Save Path",
-                        placeholder="e.g., ./tmp/agent_history",
-                        value=config['save_agent_history_path'],
-                        info="Specify the directory where agent history should be saved.",
-                        interactive=True,
-                    )
-
-            with gr.TabItem("🤖 Run Agent", id=4):
-                task = gr.Textbox(
-                    label="Task Description",
-                    lines=4,
-                    placeholder="Enter your task here...",
-                    value=config['task'],
-                    info="Describe what you want the agent to do",
-                )
-                add_infos = gr.Textbox(
-                    label="Additional Information",
-                    lines=3,
-                    placeholder="Add any helpful context or instructions...",
-                    info="Optional hints to help the LLM complete the task",
-                )
-
-                with gr.Row():
-                    run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
-                    stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
-                    
-                # with gr.Row():
-                #     browser_view = gr.HTML(
-                #         value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
-                #         label="Live Browser View",
-                # )
-
-                with gr.Row():
+                with gr.Column(scale=3, min_width=720, elem_id="right-column"):  
                     gr.HTML(
-                        """
+                    """
+                    <div style="height: 580px; width: 100%; margin: 0 !important; padding: 0 !important; display: flex; align-items: center; justify-content: center;">
                         <iframe 
                             src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
                             width="100%" 
-                            height="600px" 
+                            height="100%" 
                             frameborder="0"
+                            style="margin: 0 !important; padding: 0 !important; border: none !important; display: block !important; box-sizing: border-box;"
                             allow="clipboard-read; clipboard-write"
-                            style="margin-top: 20px;"
                             allowfullscreen>
                         </iframe>
-                        """
-                    )
-            
-            with gr.TabItem("🧐 Deep Research", id=5):
-                research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
-                with gr.Row():
-                    max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 确保是整数
-                    max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 确保是整数
-                with gr.Row():
-                    research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2)
-                    stop_research_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
-                markdown_output_display = gr.Markdown(label="Research Report")
-                markdown_download = gr.File(label="Download Research Report")
+                    </div>
+                    """
+                )
 
 
-            with gr.TabItem("📊 Results", id=6):
-                with gr.Group():
+        gr.HTML(
+            """
+            <style>
+                #settings-button-container {
+                    display: flex;
+                    justify-content: flex-end;  /* Moves button to the extreme right */
+                }
+                #settings-button button {
+                    background: transparent !important; /* Removes background */
+                    border: none !important; /* Removes border */
+                    box-shadow: none !important; /* Removes shadow */
+                    padding: 0px !important;
+                    font-size: 24px !important; /* Increases icon size */
+                    cursor: pointer;
+                }
+            </style>
+            """
+        )
 
-                    recording_display = gr.Video(label="Latest Recording")
+        with gr.Row(elem_id="settings-button-container"):
+            open_modal_button = gr.Button("⚙️", variant="secondary", elem_id="settings-button")
 
-                    gr.Markdown("### Results")
-                    with gr.Row():
-                        with gr.Column():
-                            final_result_output = gr.Textbox(
-                                label="Final Result", lines=3, show_label=True
+
+        # Modal Container (Initially Hidden)
+        with gr.Group(visible=False) as modal:
+            with gr.Blocks(css=".tab-container { min-width: 800px; }"):
+               with gr.Tabs() as tabs: 
+                    
+                    with gr.TabItem("🌐 Browser Settings", id=1):
+                        with gr.Group():
+                            with gr.Row():
+                                use_own_browser = gr.Checkbox(
+                                    label="Use Own Browser",
+                                    value=config['use_own_browser'],
+                                    info="Use your existing browser instance",
+                                )
+                                keep_browser_open = gr.Checkbox(
+                                    label="Keep Browser Open",
+                                    value=config['keep_browser_open'],
+                                    info="Keep Browser Open between Tasks",
+                                )
+                                headless = gr.Checkbox(
+                                    label="Headless Mode",
+                                    value=config['headless'],
+                                    info="Run browser without GUI",
+                                )
+                                disable_security = gr.Checkbox(
+                                    label="Disable Security",
+                                    value=config['disable_security'],
+                                    info="Disable browser security features",
+                                )
+                                enable_recording = gr.Checkbox(
+                                    label="Enable Recording",
+                                    value=config['enable_recording'],
+                                    info="Enable saving browser recordings",
+                                )
+
+                            with gr.Row():
+                                window_w = gr.Number(
+                                    label="Window Width",
+                                    value=config['window_w'],
+                                    info="Browser window width",
+                                )
+                                window_h = gr.Number(
+                                    label="Window Height",
+                                    value=config['window_h'],
+                                    info="Browser window height",
+                                )
+
+                            save_recording_path = gr.Textbox(
+                                label="Recording Path",
+                                placeholder="e.g. ./tmp/record_videos",
+                                value=config['save_recording_path'],
+                                info="Path to save browser recordings",
+                                interactive=True,  # Allow editing only if recording is enabled
+                            )
+
+                            save_trace_path = gr.Textbox(
+                                label="Trace Path",
+                                placeholder="e.g. ./tmp/traces",
+                                value=config['save_trace_path'],
+                                info="Path to save Agent traces",
+                                interactive=True,
                             )
-                        with gr.Column():
-                            errors_output = gr.Textbox(
-                                label="Errors", lines=3, show_label=True
+
+                            save_agent_history_path = gr.Textbox(
+                                label="Agent History Save Path",
+                                placeholder="e.g., ./tmp/agent_history",
+                                value=config['save_agent_history_path'],
+                                info="Specify the directory where agent history should be saved.",
+                                interactive=True,
                             )
-                    with gr.Row():
-                        with gr.Column():
-                            model_actions_output = gr.Textbox(
-                                label="Model Actions", lines=3, show_label=True
+
+                    with gr.TabItem("⚙️ Agent Settings", id=2):
+                        with gr.Group():
+                            agent_type = gr.Radio(
+                                ["org", "custom"],
+                                label="Agent Type",
+                                value=config['agent_type'],
+                                info="Select the type of agent to use",
+                            )
+                            with gr.Column():
+                                max_steps = gr.Slider(
+                                    minimum=1,
+                                    maximum=200,
+                                    value=config['max_steps'],
+                                    step=1,
+                                    label="Max Run Steps",
+                                    info="Maximum number of steps the agent will take",
+                                )
+                                max_actions_per_step = gr.Slider(
+                                    minimum=1,
+                                    maximum=20,
+                                    value=config['max_actions_per_step'],
+                                    step=1,
+                                    label="Max Actions per Step",
+                                    info="Maximum number of actions the agent will take per step",
+                                )
+                            with gr.Column():
+                                use_vision = gr.Checkbox(
+                                    label="Use Vision",
+                                    value=config['use_vision'],
+                                    info="Enable visual processing capabilities",
+                                )
+                                tool_calling_method = gr.Dropdown(
+                                    label="Tool Calling Method",
+                                    value=config['tool_calling_method'],
+                                    interactive=True,
+                                    allow_custom_value=True,  # Allow users to input custom model names
+                                    choices=["auto", "json_schema", "function_calling"],
+                                    info="Tool Calls Funtion Name",
+                                    visible=False
+                                )
+
+                    with gr.TabItem("🔧 LLM Configuration", id=3):
+                        with gr.Group():
+                            llm_provider = gr.Dropdown(
+                                choices=[provider for provider,model in utils.model_names.items()],
+                                label="LLM Provider",
+                                value=config['llm_provider'],
+                                info="Select your preferred language model provider"
+                            )
+                            llm_model_name = gr.Dropdown(
+                                label="Model Name",
+                                choices=utils.model_names['openai'],
+                                value=config['llm_model_name'],
+                                interactive=True,
+                                allow_custom_value=True,  # Allow users to input custom model names
+                                info="Select a model from the dropdown or type a custom model name"
                             )
-                        with gr.Column():
-                            model_thoughts_output = gr.Textbox(
-                                label="Model Thoughts", lines=3, show_label=True
+                            llm_num_ctx = gr.Slider(
+                                minimum=2**8,
+                                maximum=2**16,
+                                value=config['llm_num_ctx'],
+                                step=1,
+                                label="Max Context Length",
+                                info="Controls max context length model needs to handle (less = faster)",
+                                visible=config['llm_provider'] == "ollama"
                             )
+                            llm_temperature = gr.Slider(
+                                minimum=0.0,
+                                maximum=2.0,
+                                value=config['llm_temperature'],
+                                step=0.1,
+                                label="Temperature",
+                                info="Controls randomness in model outputs"
+                            )
+                            with gr.Row():
+                                llm_base_url = gr.Textbox(
+                                    label="Base URL",
+                                    value=config['llm_base_url'],
+                                    info="API endpoint URL (if required)"
+                                )
+                                llm_api_key = gr.Textbox(
+                                    label="API Key",
+                                    type="password",
+                                    value=config['llm_api_key'],
+                                    info="Your API key (leave blank to use .env)"
+                                )
+
+                    # Change event to update context length slider
+                    def update_llm_num_ctx_visibility(llm_provider):
+                        return gr.update(visible=llm_provider == "ollama")
+
+                    # Bind the change event of llm_provider to update the visibility of context length slider
+                    llm_provider.change(
+                        fn=update_llm_num_ctx_visibility,
+                        inputs=llm_provider,
+                        outputs=llm_num_ctx
+                    )
 
-                    trace_file = gr.File(label="Trace File")
+                   
+
+                    # with gr.TabItem("🤖 Run Agent", id=4):
+                    #     task = gr.Textbox(
+                    #         label="Task Description",
+                    #         lines=4,
+                    #         placeholder="Enter your task here...",
+                    #         value=config['task'],
+                    #         info="Describe what you want the agent to do",
+                    #     )
+                    #     add_infos = gr.Textbox(
+                    #         label="Additional Information",
+                    #         lines=3,
+                    #         placeholder="Add any helpful context or instructions...",
+                    #         info="Optional hints to help the LLM complete the task",
+                    #     )
+
+                    #     with gr.Row():
+                    #         run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
+                    #         stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+                            
+                    #     # with gr.Row():
+                    #     #     browser_view = gr.HTML(
+                    #     #         value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
+                    #     #         label="Live Browser View",
+                    #     # )
+
+                    #     with gr.Row():
+                    #         gr.HTML(
+                    #             """
+                    #             <iframe 
+                    #                 src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
+                    #                 width="100%" 
+                    #                 height="600px" 
+                    #                 frameborder="0"
+                    #                 allow="clipboard-read; clipboard-write"
+                    #                 style="margin-top: 20px;"
+                    #                 allowfullscreen>
+                    #             </iframe>
+                    #             """
+                    #         )
+                    
+                    with gr.TabItem("🧐 Deep Research", id=5):
+                        research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
+                        with gr.Row():
+                            max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 确保是整数
+                            max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 确保是整数
+                        with gr.Row():
+                            research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2)
+                            stop_research_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+                        markdown_output_display = gr.Markdown(label="Research Report")
+                        markdown_download = gr.File(label="Download Research Report")
+
+
+                    with gr.TabItem("📊 Results", id=6):
+                        with gr.Group():
+
+                            recording_display = gr.Video(label="Latest Recording")
+
+                            gr.Markdown("### Results")
+                            with gr.Row():
+                                with gr.Column():
+                                    final_result_output = gr.Textbox(
+                                        label="Final Result", lines=3, show_label=True
+                                    )
+                                with gr.Column():
+                                    errors_output = gr.Textbox(
+                                        label="Errors", lines=3, show_label=True
+                                    )
+                            with gr.Row():
+                                with gr.Column():
+                                    model_actions_output = gr.Textbox(
+                                        label="Model Actions", lines=3, show_label=True
+                                    )
+                                with gr.Column():
+                                    model_thoughts_output = gr.Textbox(
+                                        label="Model Thoughts", lines=3, show_label=True
+                                    )
+
+                            trace_file = gr.File(label="Trace File")
+
+                            agent_history_file = gr.File(label="Agent History")
+
+                        # # Bind the stop button click event after errors_output is defined
+                        # stop_button.click(
+                        #     fn=stop_agent,
+                        #     inputs=[],
+                        #     outputs=[errors_output, stop_button, run_button],
+                        # )
+
+                        # # Run button click handler
+                        # run_button.click(
+                        #     fn=run_with_stream,
+                        #         inputs=[
+                        #             agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+                        #             use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
+                        #             save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
+                        #             enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method
+                        #         ],
+                        #     outputs=[
+                        #         # browser_view,           # Browser view
+                        #         final_result_output,    # Final result
+                        #         errors_output,          # Errors
+                        #         model_actions_output,   # Model actions
+                        #         model_thoughts_output,  # Model thoughts
+                        #         recording_display,      # Latest recording
+                        #         trace_file,             # Trace file
+                        #         agent_history_file,     # Agent history file
+                        #         stop_button,            # Stop button
+                        #         run_button              # Run button
+                        #     ],
+                        # )
+                        
+                        # Run Deep Research
+                        research_button.click(
+                                fn=run_deep_search,
+                                inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless],
+                                outputs=[markdown_output_display, markdown_download, stop_research_button, research_button]
+                        )
+                        # Bind the stop button click event after errors_output is defined
+                        stop_research_button.click(
+                            fn=stop_research_agent,
+                            inputs=[],
+                            outputs=[stop_research_button, research_button],
+                        )
 
-                    agent_history_file = gr.File(label="Agent History")
+                    with gr.TabItem("🎥 Recordings", id=7):
+                        def list_recordings(save_recording_path):
+                            if not os.path.exists(save_recording_path):
+                                return []
 
-                # Bind the stop button click event after errors_output is defined
-                stop_button.click(
-                    fn=stop_agent,
-                    inputs=[],
-                    outputs=[errors_output, stop_button, run_button],
-                )
+                            # Get all video files
+                            recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
 
-                # Run button click handler
-                run_button.click(
-                    fn=run_with_stream,
-                        inputs=[
-                            agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
-                            use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
-                            save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
-                            enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method
-                        ],
-                    outputs=[
-                        # browser_view,           # Browser view
-                        final_result_output,    # Final result
-                        errors_output,          # Errors
-                        model_actions_output,   # Model actions
-                        model_thoughts_output,  # Model thoughts
-                        recording_display,      # Latest recording
-                        trace_file,             # Trace file
-                        agent_history_file,     # Agent history file
-                        stop_button,            # Stop button
-                        run_button              # Run button
-                    ],
-                )
-                
-                # Run Deep Research
-                research_button.click(
-                        fn=run_deep_search,
-                        inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless],
-                        outputs=[markdown_output_display, markdown_download, stop_research_button, research_button]
-                )
-                # Bind the stop button click event after errors_output is defined
-                stop_research_button.click(
-                    fn=stop_research_agent,
-                    inputs=[],
-                    outputs=[stop_research_button, research_button],
-                )
+                            # Sort recordings by creation time (oldest first)
+                            recordings.sort(key=os.path.getctime)
 
-            with gr.TabItem("🎥 Recordings", id=7):
-                def list_recordings(save_recording_path):
-                    if not os.path.exists(save_recording_path):
-                        return []
+                            # Add numbering to the recordings
+                            numbered_recordings = []
+                            for idx, recording in enumerate(recordings, start=1):
+                                filename = os.path.basename(recording)
+                                numbered_recordings.append((recording, f"{idx}. {filename}"))
 
-                    # Get all video files
-                    recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+                            return numbered_recordings
 
-                    # Sort recordings by creation time (oldest first)
-                    recordings.sort(key=os.path.getctime)
+                        recordings_gallery = gr.Gallery(
+                            label="Recordings",
+                            value=list_recordings(config['save_recording_path']),
+                            columns=3,
+                            height="auto",
+                            object_fit="contain"
+                        )
 
-                    # Add numbering to the recordings
-                    numbered_recordings = []
-                    for idx, recording in enumerate(recordings, start=1):
-                        filename = os.path.basename(recording)
-                        numbered_recordings.append((recording, f"{idx}. {filename}"))
+                        refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
+                        refresh_button.click(
+                            fn=list_recordings,
+                            inputs=save_recording_path,
+                            outputs=recordings_gallery
+                        )
+                    
+                    with gr.TabItem("📁 Configuration", id=8):
+                        with gr.Group():
+                            config_file_input = gr.File(
+                                label="Load Config File",
+                                file_types=[".pkl"],
+                                interactive=True
+                            )
 
-                    return numbered_recordings
+                            load_config_button = gr.Button("Load Existing Config From File", variant="primary")
+                            save_config_button = gr.Button("Save Current Config", variant="primary")
 
-                recordings_gallery = gr.Gallery(
-                    label="Recordings",
-                    value=list_recordings(config['save_recording_path']),
-                    columns=3,
-                    height="auto",
-                    object_fit="contain"
-                )
+                            config_status = gr.Textbox(
+                                label="Status",
+                                lines=2,
+                                interactive=False
+                            )
 
-                refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
-                refresh_button.click(
-                    fn=list_recordings,
-                    inputs=save_recording_path,
-                    outputs=recordings_gallery
-                )
-            
-            with gr.TabItem("📁 Configuration", id=8):
-                with gr.Group():
-                    config_file_input = gr.File(
-                        label="Load Config File",
-                        file_types=[".pkl"],
-                        interactive=True
-                    )
+                        load_config_button.click(
+                            fn=update_ui_from_config,
+                            inputs=[config_file_input],
+                            outputs=[
+                                agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
+                                llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+                                use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
+                                window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
+                                config_status
+                            ]
+                        )
 
-                    load_config_button = gr.Button("Load Existing Config From File", variant="primary")
-                    save_config_button = gr.Button("Save Current Config", variant="primary")
+                        save_config_button.click(
+                            fn=save_current_config,
+                            inputs=[
+                                agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
+                                llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+                                use_own_browser, keep_browser_open, headless, disable_security,
+                                enable_recording, window_w, window_h, save_recording_path, save_trace_path,
+                                save_agent_history_path, 
+                            ],  
+                            outputs=[config_status]
+                        )
 
-                    config_status = gr.Textbox(
-                        label="Status",
-                        lines=2,
-                        interactive=False
-                    )
 
-                load_config_button.click(
-                    fn=update_ui_from_config,
-                    inputs=[config_file_input],
-                    outputs=[
-                        agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
-                        llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
-                        use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
-                        window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
-                        task, config_status
-                    ]
-                )
+            # Close Button
+            close_modal_button = gr.Button("❌ Close", variant="stop")
 
-                save_config_button.click(
-                    fn=save_current_config,
-                    inputs=[
-                        agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
-                        llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
-                        use_own_browser, keep_browser_open, headless, disable_security,
-                        enable_recording, window_w, window_h, save_recording_path, save_trace_path,
-                        save_agent_history_path, task,
-                    ],  
-                    outputs=[config_status]
-                )
+        # Bind Events
+        open_modal_button.click(fn=lambda: gr.update(visible=True), inputs=[], outputs=modal)
+
+        close_modal_button.click(fn=close_modal, inputs=[], outputs=modal)
+
+       
+       
+
+
+        # Run button click handler
+        run_button.click(
+            fn=run_with_stream,
+            inputs=[
+                agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+                use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
+                save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
+                enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method
+            ],
+            outputs=[
+                final_result_output, errors_output, model_actions_output, model_thoughts_output,
+                recording_display, trace_file, agent_history_file, stop_button, run_button
+            ],
+        )
+
+        stop_button.click(
+            fn=stop_agent,
+            inputs=[],
+            outputs=[errors_output, stop_button, run_button],
+        )
+
+        # with gr.Tabs() as tabs:
+        #     with gr.TabItem("⚙️ Agent Settings", id=1):
+        #         with gr.Group():
+        #             agent_type = gr.Radio(
+        #                 ["org", "custom"],
+        #                 label="Agent Type",
+        #                 value=config['agent_type'],
+        #                 info="Select the type of agent to use",
+        #             )
+        #             with gr.Column():
+        #                 max_steps = gr.Slider(
+        #                     minimum=1,
+        #                     maximum=200,
+        #                     value=config['max_steps'],
+        #                     step=1,
+        #                     label="Max Run Steps",
+        #                     info="Maximum number of steps the agent will take",
+        #                 )
+        #                 max_actions_per_step = gr.Slider(
+        #                     minimum=1,
+        #                     maximum=20,
+        #                     value=config['max_actions_per_step'],
+        #                     step=1,
+        #                     label="Max Actions per Step",
+        #                     info="Maximum number of actions the agent will take per step",
+        #                 )
+        #             with gr.Column():
+        #                 use_vision = gr.Checkbox(
+        #                     label="Use Vision",
+        #                     value=config['use_vision'],
+        #                     info="Enable visual processing capabilities",
+        #                 )
+        #                 tool_calling_method = gr.Dropdown(
+        #                     label="Tool Calling Method",
+        #                     value=config['tool_calling_method'],
+        #                     interactive=True,
+        #                     allow_custom_value=True,  # Allow users to input custom model names
+        #                     choices=["auto", "json_schema", "function_calling"],
+        #                     info="Tool Calls Funtion Name",
+        #                     visible=False
+        #                 )
+
+        #     with gr.TabItem("🔧 LLM Configuration", id=2):
+        #         with gr.Group():
+        #             llm_provider = gr.Dropdown(
+        #                 choices=[provider for provider,model in utils.model_names.items()],
+        #                 label="LLM Provider",
+        #                 value=config['llm_provider'],
+        #                 info="Select your preferred language model provider"
+        #             )
+        #             llm_model_name = gr.Dropdown(
+        #                 label="Model Name",
+        #                 choices=utils.model_names['openai'],
+        #                 value=config['llm_model_name'],
+        #                 interactive=True,
+        #                 allow_custom_value=True,  # Allow users to input custom model names
+        #                 info="Select a model from the dropdown or type a custom model name"
+        #             )
+        #             llm_num_ctx = gr.Slider(
+        #                 minimum=2**8,
+        #                 maximum=2**16,
+        #                 value=config['llm_num_ctx'],
+        #                 step=1,
+        #                 label="Max Context Length",
+        #                 info="Controls max context length model needs to handle (less = faster)",
+        #                 visible=config['llm_provider'] == "ollama"
+        #             )
+        #             llm_temperature = gr.Slider(
+        #                 minimum=0.0,
+        #                 maximum=2.0,
+        #                 value=config['llm_temperature'],
+        #                 step=0.1,
+        #                 label="Temperature",
+        #                 info="Controls randomness in model outputs"
+        #             )
+        #             with gr.Row():
+        #                 llm_base_url = gr.Textbox(
+        #                     label="Base URL",
+        #                     value=config['llm_base_url'],
+        #                     info="API endpoint URL (if required)"
+        #                 )
+        #                 llm_api_key = gr.Textbox(
+        #                     label="API Key",
+        #                     type="password",
+        #                     value=config['llm_api_key'],
+        #                     info="Your API key (leave blank to use .env)"
+        #                 )
+
+        #     # Change event to update context length slider
+        #     def update_llm_num_ctx_visibility(llm_provider):
+        #         return gr.update(visible=llm_provider == "ollama")
+
+        #     # Bind the change event of llm_provider to update the visibility of context length slider
+        #     llm_provider.change(
+        #         fn=update_llm_num_ctx_visibility,
+        #         inputs=llm_provider,
+        #         outputs=llm_num_ctx
+        #     )
+
+        #     with gr.TabItem("🌐 Browser Settings", id=3):
+        #         with gr.Group():
+        #             with gr.Row():
+        #                 use_own_browser = gr.Checkbox(
+        #                     label="Use Own Browser",
+        #                     value=config['use_own_browser'],
+        #                     info="Use your existing browser instance",
+        #                 )
+        #                 keep_browser_open = gr.Checkbox(
+        #                     label="Keep Browser Open",
+        #                     value=config['keep_browser_open'],
+        #                     info="Keep Browser Open between Tasks",
+        #                 )
+        #                 headless = gr.Checkbox(
+        #                     label="Headless Mode",
+        #                     value=config['headless'],
+        #                     info="Run browser without GUI",
+        #                 )
+        #                 disable_security = gr.Checkbox(
+        #                     label="Disable Security",
+        #                     value=config['disable_security'],
+        #                     info="Disable browser security features",
+        #                 )
+        #                 enable_recording = gr.Checkbox(
+        #                     label="Enable Recording",
+        #                     value=config['enable_recording'],
+        #                     info="Enable saving browser recordings",
+        #                 )
+
+        #             with gr.Row():
+        #                 window_w = gr.Number(
+        #                     label="Window Width",
+        #                     value=config['window_w'],
+        #                     info="Browser window width",
+        #                 )
+        #                 window_h = gr.Number(
+        #                     label="Window Height",
+        #                     value=config['window_h'],
+        #                     info="Browser window height",
+        #                 )
+
+        #             save_recording_path = gr.Textbox(
+        #                 label="Recording Path",
+        #                 placeholder="e.g. ./tmp/record_videos",
+        #                 value=config['save_recording_path'],
+        #                 info="Path to save browser recordings",
+        #                 interactive=True,  # Allow editing only if recording is enabled
+        #             )
+
+        #             save_trace_path = gr.Textbox(
+        #                 label="Trace Path",
+        #                 placeholder="e.g. ./tmp/traces",
+        #                 value=config['save_trace_path'],
+        #                 info="Path to save Agent traces",
+        #                 interactive=True,
+        #             )
+
+        #             save_agent_history_path = gr.Textbox(
+        #                 label="Agent History Save Path",
+        #                 placeholder="e.g., ./tmp/agent_history",
+        #                 value=config['save_agent_history_path'],
+        #                 info="Specify the directory where agent history should be saved.",
+        #                 interactive=True,
+        #             )
+
+        #     with gr.TabItem("🤖 Run Agent", id=4):
+        #         task = gr.Textbox(
+        #             label="Task Description",
+        #             lines=4,
+        #             placeholder="Enter your task here...",
+        #             value=config['task'],
+        #             info="Describe what you want the agent to do",
+        #         )
+        #         add_infos = gr.Textbox(
+        #             label="Additional Information",
+        #             lines=3,
+        #             placeholder="Add any helpful context or instructions...",
+        #             info="Optional hints to help the LLM complete the task",
+        #         )
+
+        #         with gr.Row():
+        #             run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
+        #             stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+                    
+        #         # with gr.Row():
+        #         #     browser_view = gr.HTML(
+        #         #         value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
+        #         #         label="Live Browser View",
+        #         # )
+
+        #         with gr.Row():
+        #             gr.HTML(
+        #                 """
+        #                 <iframe 
+        #                     src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
+        #                     width="100%" 
+        #                     height="600px" 
+        #                     frameborder="0"
+        #                     allow="clipboard-read; clipboard-write"
+        #                     style="margin-top: 20px;"
+        #                     allowfullscreen>
+        #                 </iframe>
+        #                 """
+        #             )
+            
+        #     with gr.TabItem("🧐 Deep Research", id=5):
+        #         research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
+        #         with gr.Row():
+        #             max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 确保是整数
+        #             max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 确保是整数
+        #         with gr.Row():
+        #             research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2)
+        #             stop_research_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+        #         markdown_output_display = gr.Markdown(label="Research Report")
+        #         markdown_download = gr.File(label="Download Research Report")
+
+
+        #     with gr.TabItem("📊 Results", id=6):
+        #         with gr.Group():
+
+        #             recording_display = gr.Video(label="Latest Recording")
+
+        #             gr.Markdown("### Results")
+        #             with gr.Row():
+        #                 with gr.Column():
+        #                     final_result_output = gr.Textbox(
+        #                         label="Final Result", lines=3, show_label=True
+        #                     )
+        #                 with gr.Column():
+        #                     errors_output = gr.Textbox(
+        #                         label="Errors", lines=3, show_label=True
+        #                     )
+        #             with gr.Row():
+        #                 with gr.Column():
+        #                     model_actions_output = gr.Textbox(
+        #                         label="Model Actions", lines=3, show_label=True
+        #                     )
+        #                 with gr.Column():
+        #                     model_thoughts_output = gr.Textbox(
+        #                         label="Model Thoughts", lines=3, show_label=True
+        #                     )
+
+        #             trace_file = gr.File(label="Trace File")
+
+        #             agent_history_file = gr.File(label="Agent History")
+
+        #         # Bind the stop button click event after errors_output is defined
+        #         stop_button.click(
+        #             fn=stop_agent,
+        #             inputs=[],
+        #             outputs=[errors_output, stop_button, run_button],
+        #         )
+
+        #         # Run button click handler
+        #         run_button.click(
+        #             fn=run_with_stream,
+        #                 inputs=[
+        #                     agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+        #                     use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
+        #                     save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
+        #                     enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method
+        #                 ],
+        #             outputs=[
+        #                 # browser_view,           # Browser view
+        #                 final_result_output,    # Final result
+        #                 errors_output,          # Errors
+        #                 model_actions_output,   # Model actions
+        #                 model_thoughts_output,  # Model thoughts
+        #                 recording_display,      # Latest recording
+        #                 trace_file,             # Trace file
+        #                 agent_history_file,     # Agent history file
+        #                 stop_button,            # Stop button
+        #                 run_button              # Run button
+        #             ],
+        #         )
+                
+        #         # Run Deep Research
+        #         research_button.click(
+        #                 fn=run_deep_search,
+        #                 inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless],
+        #                 outputs=[markdown_output_display, markdown_download, stop_research_button, research_button]
+        #         )
+        #         # Bind the stop button click event after errors_output is defined
+        #         stop_research_button.click(
+        #             fn=stop_research_agent,
+        #             inputs=[],
+        #             outputs=[stop_research_button, research_button],
+        #         )
+
+        #     with gr.TabItem("🎥 Recordings", id=7):
+        #         def list_recordings(save_recording_path):
+        #             if not os.path.exists(save_recording_path):
+        #                 return []
+
+        #             # Get all video files
+        #             recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+
+        #             # Sort recordings by creation time (oldest first)
+        #             recordings.sort(key=os.path.getctime)
+
+        #             # Add numbering to the recordings
+        #             numbered_recordings = []
+        #             for idx, recording in enumerate(recordings, start=1):
+        #                 filename = os.path.basename(recording)
+        #                 numbered_recordings.append((recording, f"{idx}. {filename}"))
+
+        #             return numbered_recordings
+
+        #         recordings_gallery = gr.Gallery(
+        #             label="Recordings",
+        #             value=list_recordings(config['save_recording_path']),
+        #             columns=3,
+        #             height="auto",
+        #             object_fit="contain"
+        #         )
+
+        #         refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
+        #         refresh_button.click(
+        #             fn=list_recordings,
+        #             inputs=save_recording_path,
+        #             outputs=recordings_gallery
+        #         )
+            
+        #     with gr.TabItem("📁 Configuration", id=8):
+        #         with gr.Group():
+        #             config_file_input = gr.File(
+        #                 label="Load Config File",
+        #                 file_types=[".pkl"],
+        #                 interactive=True
+        #             )
+
+        #             load_config_button = gr.Button("Load Existing Config From File", variant="primary")
+        #             save_config_button = gr.Button("Save Current Config", variant="primary")
+
+        #             config_status = gr.Textbox(
+        #                 label="Status",
+        #                 lines=2,
+        #                 interactive=False
+        #             )
+
+        #         load_config_button.click(
+        #             fn=update_ui_from_config,
+        #             inputs=[config_file_input],
+        #             outputs=[
+        #                 agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
+        #                 llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+        #                 use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
+        #                 window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
+        #                 task, config_status
+        #             ]
+        #         )
+
+        #         save_config_button.click(
+        #             fn=save_current_config,
+        #             inputs=[
+        #                 agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
+        #                 llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
+        #                 use_own_browser, keep_browser_open, headless, disable_security,
+        #                 enable_recording, window_w, window_h, save_recording_path, save_trace_path,
+        #                 save_agent_history_path, task,
+        #             ],  
+        #             outputs=[config_status]
+        #         )
 
 
         # Attach the callback to the LLM provider dropdown

From 21404f011a0372ff650ea5c1172e405bd9d3eadb Mon Sep 17 00:00:00 2001
From: prathamxcaliber <pratham.sharma@xcaliber.health>
Date: Sun, 2 Mar 2025 22:21:58 +0530
Subject: [PATCH 7/9] feat: add custom_theme for the app

---
 custom_theme.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 webui.py        | 62 ++++++++++++++++++++++++-----------------
 2 files changed, 110 insertions(+), 25 deletions(-)
 create mode 100644 custom_theme.py

diff --git a/custom_theme.py b/custom_theme.py
new file mode 100644
index 00000000..80c25394
--- /dev/null
+++ b/custom_theme.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts, sizes
+
+
+class custom_theme(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.blue,
+        secondary_hue: colors.Color | str = colors.sky,
+        neutral_hue: colors.Color | str = colors.gray,
+        spacing_size: sizes.Size | str = sizes.spacing_md,
+        radius_size: sizes.Size | str = sizes.radius_lg,
+        text_size: sizes.Size | str = sizes.text_md,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Montserrat"),
+            "ui-sans-serif",
+            "system-ui",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Inter"),
+            "ui-monospace",
+            "Consolas",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            spacing_size=spacing_size,
+            radius_size=radius_size,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        self.name = "custom_theme"
+        super().set(
+            button_border_width="0px",
+            checkbox_label_border_width="1px",
+            button_transform_hover="scale(1.02)",
+            button_transition="all 0.1s ease-in-out",
+            slider_color="*primary_400",
+            button_primary_background_fill="linear-gradient(120deg, *secondary_500 0%, *primary_300 60%, *primary_400 100%)",
+            button_primary_background_fill_hover="linear-gradient(120deg, *secondary_400 0%, *primary_300 60%, *primary_300 100%)",
+            button_primary_text_color="*button_secondary_text_color",
+            button_secondary_background_fill="linear-gradient(120deg, *neutral_300 0%, *neutral_100 60%, *neutral_200 100%)",
+            button_secondary_background_fill_hover="linear-gradient(120deg, *neutral_200 0%, *neutral_100 60%, *neutral_100 100%)",
+            checkbox_label_background_fill_selected="linear-gradient(120deg, *primary_400 0%, *primary_300 60%, *primary_400 100%)",
+            checkbox_label_border_color_selected="*primary_400",
+            checkbox_background_color_selected="*primary_400",
+            checkbox_label_text_color_selected="*button_secondary_text_color",
+            slider_color_dark="*primary_500",
+            button_primary_background_fill_dark="linear-gradient(120deg, *secondary_600 0%, *primary_500 60%, *primary_600 100%)",
+            button_primary_background_fill_hover_dark="linear-gradient(120deg, *secondary_500 0%, *primary_500 60%, *primary_500 100%)",
+            button_primary_text_color_dark="*button_secondary_text_color",
+            button_secondary_background_fill_dark="linear-gradient(120deg, *neutral_700 0%, *neutral_600 60%, *neutral_700 100%)",
+            button_secondary_background_fill_hover_dark="linear-gradient(120deg, *neutral_600 0%, *neutral_600 60%, *neutral_700 100%)",
+            checkbox_label_background_fill_selected_dark="linear-gradient(120deg, *primary_600 0%, *primary_500 60%, *primary_600 100%)",
+            checkbox_label_border_color_selected_dark="*primary_600",
+            checkbox_background_color_selected_dark="*primary_600",
+            checkbox_label_text_color_selected_dark="*button_secondary_text_color",
+            block_shadow="*shadow_drop_lg",
+            button_secondary_shadow_hover="*shadow_drop_lg",
+            button_primary_shadow_hover="0 1px 3px 0 *primary_200, 0 1px 2px -1px *primary_200",
+            button_secondary_shadow_dark="none",
+            button_primary_shadow_dark="none",
+        )
diff --git a/webui.py b/webui.py
index 2ad1c397..51385473 100644
--- a/webui.py
+++ b/webui.py
@@ -32,6 +32,7 @@
 from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
 from src.controller.custom_controller import CustomController
 from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
+from custom_theme import custom_theme
 from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, save_current_config, update_ui_from_config
 from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
 
@@ -74,6 +75,9 @@ def open_modal():
 def close_modal():
     return gr.update(visible=False)
 
+def show_iframe():
+    return gr.update(visible=True)
+
 async def stop_agent():
     """Request the agent to stop and update UI with enhanced feedback"""
     global _global_agent_state, _global_browser_context, _global_browser, _global_agent
@@ -650,7 +654,8 @@ async def run_with_stream(
     "Origin": Origin(),
     "Citrus": Citrus(),
     "Ocean": Ocean(),
-    "Base": Base()
+    "Base": Base(),
+    "custom_theme": custom_theme()
 }
 
 async def close_global_browser():
@@ -690,7 +695,7 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p
     return markdown_content, file_path, gr.update(value="Stop", interactive=True),  gr.update(interactive=True) 
     
 
-def create_ui(config, theme_name="Ocean"):
+def create_ui(config, theme_name="custom_theme"):
     css = """
     .gradio-container {
         max-width: 1200px !important;
@@ -711,22 +716,25 @@ def create_ui(config, theme_name="Ocean"):
     with gr.Blocks(
             title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; }"
     ) as demo:
+        
         with gr.Row():
             gr.Markdown(
                 """
                 <h1 style="font-size: 2.5em; font-weight: 800; text-align: center;">🌐 EHR Operator</h1>
-                <h3 style="font-size: 1.5em; font-weight: 600; text-align: center;">Control your browser with AI assistance</h3>
+                <h3 style="font-size: 1.5em; font-weight: 600; text-align: center;">Control your EHR via prompts</h3>
                 """,
                 elem_classes=["header-text"],
             )
 
-        with gr.Blocks(elem_id="main-container"):  
-            with gr.Row(equal_height=True):  
-                with gr.Column(scale=2, min_width=480, elem_id="left-column"):  
+                
+        with gr.Blocks(elem_id="main-container"):
+            # Main Row (Contains left-column and right-column iframe)
+            with gr.Row(equal_height=True):
+                with gr.Column(scale=2, min_width=480, elem_id="left-column"):
                     with gr.Group():
                         task = gr.Textbox(
                             label="Task Description",
-                            lines=10,  
+                            lines=10,
                             placeholder="Enter your task here...",
                             value=config['task'],
                             info="Describe what you want the agent to do",
@@ -740,25 +748,29 @@ def create_ui(config, theme_name="Ocean"):
                         )
 
                     with gr.Row():
-                        run_button = gr.Button("▶️ Run Agent", variant="primary", scale=1)
-                        stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+                        run_button = gr.Button("Run Agent", variant="primary", scale=1)
+                        stop_button = gr.Button("Stop", variant="stop", scale=1)
 
-                with gr.Column(scale=3, min_width=720, elem_id="right-column"):  
+                # Initially hidden iframe column inside the SAME Row
+                with gr.Column(scale=3, min_width=720, elem_id="right-column", visible=False) as iframe_row:
                     gr.HTML(
-                    """
-                    <div style="height: 580px; width: 100%; margin: 0 !important; padding: 0 !important; display: flex; align-items: center; justify-content: center;">
-                        <iframe 
-                            src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
-                            width="100%" 
-                            height="100%" 
-                            frameborder="0"
-                            style="margin: 0 !important; padding: 0 !important; border: none !important; display: block !important; box-sizing: border-box;"
-                            allow="clipboard-read; clipboard-write"
-                            allowfullscreen>
-                        </iframe>
-                    </div>
-                    """
-                )
+                        """
+                        <div style="height: 580px; width: 100%; margin: 0 !important; padding: 0 !important; display: flex; align-items: center; justify-content: center;">
+                            <iframe 
+                                src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
+                                width="100%" 
+                                height="100%" 
+                                frameborder="0"
+                                style="margin: 0 !important; padding: 0 !important; border: none !important; display: block !important; box-sizing: border-box;"
+                                allow="clipboard-read; clipboard-write"
+                                allowfullscreen>
+                            </iframe>
+                        </div>
+                        """
+                    )
+
+            # Button click will now reveal the iframe in the SAME row
+            run_button.click(show_iframe, outputs=iframe_row)
 
 
         gr.HTML(
@@ -1574,7 +1586,7 @@ def main():
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
     parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
     parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
-    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+    parser.add_argument("--theme", type=str, default="custom_theme", choices=theme_map.keys(), help="Theme to use for the UI")
     parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
     args = parser.parse_args()
 

From cae287f09ee6c5b35f4ff093b79026eeb1f009fe Mon Sep 17 00:00:00 2001
From: prathamxcaliber <pratham.sharma@xcaliber.health>
Date: Sun, 2 Mar 2025 22:57:05 +0530
Subject: [PATCH 8/9] feat: add XC logo to favicon

---
 logo.png | Bin 0 -> 2559 bytes
 webui.py |   8 ++++++--
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 logo.png

diff --git a/logo.png b/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..99ebb6fcdda35efa01aeb1606a8dd8004e725f4f
GIT binary patch
literal 2559
zcmV<b2>|wqP)<h;3K|Lk000e1NJLTq001fg001fo1^@s6#ly*400009a7bBm000XU
z000XU0RWnu7ytkO0drDELIAGL9O(c600d`2O+f$vv5yP<VFdsH36x1hK~#7Fm04Yg
zT~`%e>)dlQxzom)Oe&-kX>2-U#imxRNfERlzQiA}A_&P#{R1C_wiJA=g3pq-zKDoL
zv?!9Qtp$ZlD~JP1lNNuHjCRDSW9>{rlFm$K?%jKJ@AbF$nLBOhA$RUQ=j@-g*ZS64
z-&#RP*JJ;`_HYdG5hA!90w6^KAS8(C4>bfKrtdT`q{js5IbiyvoAfWFKarpF92!OR
zxp@qaLm8L<MH$a?_&Y6_$J@S9hGDvok<L<pM}PE{Tj%hR<*N^+ZGV^-hAdCWtExc!
z8)fkTz6g(rDww>JIe83GC4?Tv1Pax?u1|bOB&-jn^G#tij2~&zugAeok~Tro!Mu6a
zOuonbO5Ub>OiB|WWt?hRHm`-0-?oOHG7Dw%FwcN-A*s@QQc434x9R3kWg0o5Y9KvF
znWPh{g=+Jwu>6eF>}D0>gB6o!p^{1Uu;6RfP|!mOCL&wvP{W#0{J1Gi!zv`QP$(Ol
z)+%kQImzZ3b~8jU0(Q-r)+1hv=TP<BEl9eC?v=;4hDk6j8765PUMfu0FrgSt5XD<s
zAe4<od-Bc%h)F;>fLN)RJ*q|6u{suPwu6^7<JvNzijetqb_g<$&8E_^Xya8V^aHRH
zx=^+E7Yn9!*ii|#!3^38OX%P&YPXQ#*c0Kv!w)-x2!^7q3csBYW!Cb!tW|*%3YOTp
zkk_;jXO7rH?I^Zk)VQdi7bp;H*Af<du@*$*tQn2L`8#?DQjMGNWuF2+EY^)!EJX>U
zb|TRv)-ld8mVCk%quN7R1N@xVK&mXT%@R4W<nLG|j5>Ny<r5yx#CmB!b|!bCdQegQ
z-8VO|yOD_-BY;=eB3#-a-d7!w$BE`^T%ntMYS6&py^TQ%FTD|A=`z9kroafzict?C
zLEE!qR#wX|JY&cP7B{mtKDejB`}X#rzFt@!!Rf^jsJW31Uf{!@L7MWw9W5Nat%ZG)
z2s>T#uZ-ZCmxj1{EsDUg#Br49<I$qEAL{J|C+uJZzj`&m9g_|0n<$%o@<5veLHOx~
zEii0ZmdU^GyrqE?M|Q!!!OjBOb?kjDf6n}U2tS|SRG~QQ<seUtCskZig%wnjLjgPC
z`Y^&%i^H)^KK7m-3+!;oFi4*V-rIw39i6zLkoC2Go!9|Azl-xS_b@9ppf8-cgjfTb
zW8}is2rv9EjBW6R!+p4I(xC!JZtcN?ckP0Aa$QL4{^snkq}|{di%7#^hGyd_%u=b5
z2lY4$zE3BuE?kLY8-4Ab2^b_l*_)v8;9c#G!mKAb<}Qrj*_Vg>x%iqKI%D(4f7qh&
zT+hz!F*M838ejBp;CzpgW2uUPgg;A|eC)$Xu(p_W-)@8xclVR2n;p%~0{(SKYnw60
z<Q(sK^Xy<#HzBNE2_=mdzF}rJZY&s0H{@wiQy6PF8sC&Kw7LOwdSM)|M-pDA_lyhq
z+rJIz7iTw97>eo?WE!HCYeeV&8=V#ky+mbOnyDhVT#d8q%;FudmO-59+W~NXIneX}
z8NrRN)ir`=UfvQip0yLG!=f-yp^zRLWlSb5O0qF~e~ju(+nKfIY>SO(gxQGla|@#s
z^yAwKI(K1c0`LLZkJV{4#~T*&+QoY8R8$gB8lD}gA$g&uR(4c`0ylldbT$(HfAE*B
zw-xl#8j+=0FKbL0ft+cXGHPyXwOI_}{V}jcvBbfwb|Jia>b!}43SrLgm#z@~GC9dx
z3A($dn`?Zv(nS>_zk|n5i=l<LX3|a0S*fW3>jYYr%!5OrS~?B~ZNtUb+`<qqu7n*0
zJ^1ck6>^}WekHGyGbXA?bQg*?rx9+5enMwRfq7=h``JO$;yIOm`x6s*^kB={`N^NR
zU}Fcx-ha3!VN5qRQm)m5o0|Mn6eeS{6-#N;7zx%GR_K+53^f&3rRmw%KPox=iz#+a
zO#rUGLGa}KI9><45WaMDS7G~IhpkEx`3OZ6r-E2Wjuv;&C}VMtj9%VqIt#@l$7foc
zzNHaC`UofQ+r?IRX=wy!&X0$z`|oVw(a#KU>ZZo(E>^;1AMmrs`tZ%q52|!otj87<
zM-nK4uqVy{SY@Ev1Of$wnW-KfpY3;|`b^60$8K+7{$Gi%o!f$YrW=^Lx#<Wyd!Ps3
zJ244MD-l+%Mx0F0Ik>-pNo8YS{?r6M^}9_GD3p2(vO%pn4!O9i18=C*?ZRThCE&>q
z^~X}rY9hcFUz3!$kr>da=dQ#0*3N+-3wLkgr?b;N7cyU;PEeXX*hu+8)MfjUt&}}c
zsUVG0N}MYt^T~S?E+?C@&7YhfQWBPod95avb?VvcOg7%h_0`V}z*|a6?Xu|=tLjWz
zRZ2M<S>1KeKR(;y%>Lfk;>?1UM_zy#d02Wq!uNlDozA^-<20ME%te2n4y>68r5L2b
z;PP$A7^Nx?i$>Av_31r;$KT)YEWC6%!kK>z;ZOg}DYD_iIoT#@gk8Eq_(am|%z*|z
z{((NsBy;b7SJxuv|2={ilLP+o#SyJ<a1(<qgUarWl%V<ye&gv?>d>qpvRcC-7iLhA
zhvX1`S}W*0#Ca@6kC|M-rtd+&p+R3Nh}SPi?cS*tg)nnH6OGlEo<R2g=Ub|BxK@cq
z9QfJFwcx;J?Sl~w?H>^kY$r7lD#sA~X05HQ@j*)S%WHQ&Txn>O^}MmsLiF|vT3Rbj
z0xSop(wQ|SG$~-TciwChZI#Ykhtd@S;-zsXz<U$K4rHAYteRqoB|K48k%HCl+~A_B
z#Y1{>?+xhTeB%gpWT{JzB2+PRA=uOjQ>_75_$RnVtc&cp)l=qj!q`nQEn?Mp6~VNL
zAZtUXo$h;(@mwt2H_AjZE+7_$7L6&@Z5Ds#(Ey+lge+eZSlvw+J568JR^=R21nQW#
zSiqc6IfnRx{cdBHIe@oj6dQFjiwCOKq0$PLI$Rk$S(B{BVud{?KU8=7w1|@fqB@lw
z#$;%X6#-SvRwX2*mZ`#*h7xl=O>xNH9BUzq`LwjaI*3cZs)`bVO*G9*sFK5FB{gZ&
zxpHFglBClH?>GXv7rPS#*D4lxlz0eL2rPKC(x)g2yuH4yjtjKd0QSDq;iT#$%ojVY
za1$7HGc1^M%~P>VX$XVWKCCG%EzIoN88N2jP{?%?iWL^)u=y_2n1`NNJ_Nnq<LT8Q
z|NkWBfyjkcc0_(2JXFbjUNscB#5(){RC7EvmbGjRC@`})Z1*vQb2MyEoj!eF@lC7q
V5668N82kVL002ovPDHLkV1kc<)m{Jq

literal 0
HcmV?d00001

diff --git a/webui.py b/webui.py
index 51385473..f95737e4 100644
--- a/webui.py
+++ b/webui.py
@@ -712,10 +712,14 @@ def create_ui(config, theme_name="custom_theme"):
         border-radius: 10px;
     }
     """
-
+    custom_favicon = """
+        <link rel="icon" type="image/png" href="logo.png">
+        """
+    
     with gr.Blocks(
             title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; }"
     ) as demo:
+        gr.HTML(custom_favicon)
         
         with gr.Row():
             gr.Markdown(
@@ -1593,7 +1597,7 @@ def main():
     config_dict = default_config()
 
     demo = create_ui(config_dict, theme_name=args.theme)
-    demo.launch(server_name=args.ip, server_port=args.port)
+    demo.launch(server_name=args.ip, server_port=args.port,favicon_path="logo.png")
 
 if __name__ == '__main__':
     main()

From e76e5a0f31dc78f37b633f16b08caf7f0c674683 Mon Sep 17 00:00:00 2001
From: ashdude1401 <ashdudecool1401@gmail.com>
Date: Mon, 3 Mar 2025 23:06:32 +0530
Subject: [PATCH 9/9] feat: Enhance UI with iframe display and task saving
 functionality

This commit introduces several UI enhancements and a new feature to save task details:

-   Adds functionality to display an iframe in the UI upon clicking the "Run Agent" button.
-   Implements a "Save" button that sends a POST request to the backend with the title and task description.
-   Introduces a `send_post_request` function to handle the API call to save task information.
-   Updates the UI layout, including adjustments to column scaling and the addition of input fields for the operation title.
-   Adds a docker-setup.ps1 file to build and run the agent.
---
 docker-setup.ps1 |   2 +
 webui.py         | 103 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 86 insertions(+), 19 deletions(-)
 create mode 100644 docker-setup.ps1

diff --git a/docker-setup.ps1 b/docker-setup.ps1
new file mode 100644
index 00000000..82e9ceae
--- /dev/null
+++ b/docker-setup.ps1
@@ -0,0 +1,2 @@
+docker build -t agent .
+docker run -p 7788:7788 -p 6080:6080 -p 5901:5901 agent
\ No newline at end of file
diff --git a/webui.py b/webui.py
index f95737e4..2adaf628 100644
--- a/webui.py
+++ b/webui.py
@@ -1,5 +1,8 @@
 import pdb
 import logging
+import requests
+
+
 
 from dotenv import load_dotenv
 
@@ -75,8 +78,8 @@ def open_modal():
 def close_modal():
     return gr.update(visible=False)
 
-def show_iframe():
-    return gr.update(visible=True)
+def show_iframe_and_save():
+    return gr.update(visible=True), gr.update(visible=True)
 
 async def stop_agent():
     """Request the agent to stop and update UI with enhanced feedback"""
@@ -693,7 +696,48 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p
                                                         )
     
     return markdown_content, file_path, gr.update(value="Stop", interactive=True),  gr.update(interactive=True) 
+import requests
+
+
+def send_post_request(title, task):
+    try:
+        print(f"🔍 title: {title}, task: {task}")
+
+        base_url = os.getenv("BASE_URL")
+        if not base_url:
+            raise ValueError("BASE_URL environment variable is not set")
+
+        url = f"{base_url}/agent/operations"
+        payload = {
+            "title": title,
+            "prompt": task  
+        }
+        headers = {"Content-Type": "application/json"}
+
+        print(f"📦 Payload: {payload}")  
+
+        response = requests.post(url, json=payload, headers=headers)
+
+        print(f"🛑 Response Status Code: {response.status_code}")
+        print(f"📩 Response Content: {response.text}")
+
+        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx and 5xx)
+        
+        return "Title and Task saved successfully!"
+    
+    except requests.exceptions.RequestException as e:
+        print(f" Request Error: {e}")
+        return f"Error: {e}"
     
+    except ValueError as ve:
+        print(f" Value Error: {ve}")
+        return str(ve)
+
+    except Exception as ex:
+        print(f"⚠️ Unexpected Error: {ex}")
+        return f"Unexpected Error: {ex}"
+
+
 
 def create_ui(config, theme_name="custom_theme"):
     css = """
@@ -711,42 +755,58 @@ def create_ui(config, theme_name="custom_theme"):
         padding: 15px;
         border-radius: 10px;
     }
+    footer {
+        display: none !important;  /* Hide the footer */
+    }
+    #built-with-gradio, #settings {
+        display: none !important;
+    }
     """
     custom_favicon = """
         <link rel="icon" type="image/png" href="logo.png">
         """
     
+
+
+
     with gr.Blocks(
-            title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; }"
+            title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; } footer { display: none !important; }"
     ) as demo:
         gr.HTML(custom_favicon)
         
-        with gr.Row():
-            gr.Markdown(
-                """
-                <h1 style="font-size: 2.5em; font-weight: 800; text-align: center;">🌐 EHR Operator</h1>
-                <h3 style="font-size: 1.5em; font-weight: 600; text-align: center;">Control your EHR via prompts</h3>
-                """,
-                elem_classes=["header-text"],
-            )
+        # with gr.Row():
+        #     gr.Markdown(
+        #         """
+        #         <h1 style="font-size: 2.5em; font-weight: 800; text-align: center;">🌐 EHR Operator</h1>
+        #         <h3 style="font-size: 1.5em; font-weight: 600; text-align: center;">Control your EHR via prompts</h3>
+        #         """,
+        #         elem_classes=["header-text"],
+        #     )
 
                 
         with gr.Blocks(elem_id="main-container"):
             # Main Row (Contains left-column and right-column iframe)
             with gr.Row(equal_height=True):
-                with gr.Column(scale=2, min_width=480, elem_id="left-column"):
+                with gr.Column(scale=1, min_width=480, elem_id="left-column"):
                     with gr.Group():
+                        title = gr.Textbox(
+                            label="Operation Title",
+                            lines=1,
+                            placeholder="Enter your title here...",
+                            value="Sample Title",
+                        )
+
                         task = gr.Textbox(
                             label="Task Description",
                             lines=10,
                             placeholder="Enter your task here...",
-                            value=config['task'],
+                            value="Your Task Here",
                             info="Describe what you want the agent to do",
                         )
 
                         add_infos = gr.Textbox(
                             label="Additional Information",
-                            lines=7,
+                            lines=4,
                             placeholder="Add any helpful context or instructions...",
                             info="Optional hints to help the LLM complete the task",
                         )
@@ -754,14 +814,14 @@ def create_ui(config, theme_name="custom_theme"):
                     with gr.Row():
                         run_button = gr.Button("Run Agent", variant="primary", scale=1)
                         stop_button = gr.Button("Stop", variant="stop", scale=1)
-
+                        save_button = gr.Button("Save", variant="secondary", scale=1, visible=False)  # Initially Hidden
                 # Initially hidden iframe column inside the SAME Row
                 with gr.Column(scale=3, min_width=720, elem_id="right-column", visible=False) as iframe_row:
                     gr.HTML(
                         """
                         <div style="height: 580px; width: 100%; margin: 0 !important; padding: 0 !important; display: flex; align-items: center; justify-content: center;">
                             <iframe 
-                                src="http://localhost:6081/vnc.html?autoconnect=true&resize=scale" 
+                                src="http://localhost:6080/vnc.html?autoconnect=true&resize=scale" 
                                 width="100%" 
                                 height="100%" 
                                 frameborder="0"
@@ -774,7 +834,11 @@ def create_ui(config, theme_name="custom_theme"):
                     )
 
             # Button click will now reveal the iframe in the SAME row
-            run_button.click(show_iframe, outputs=iframe_row)
+            run_button.click(show_iframe_and_save, outputs=[iframe_row, save_button])
+            
+            # Save button click triggers POST request
+            save_button.click(send_post_request, inputs=[title, task], outputs=None)
+
 
 
         gr.HTML(
@@ -797,7 +861,7 @@ def create_ui(config, theme_name="custom_theme"):
         )
 
         with gr.Row(elem_id="settings-button-container"):
-            open_modal_button = gr.Button("⚙️", variant="secondary", elem_id="settings-button")
+            open_modal_button = gr.Button("⚙️", variant="secondary", elem_id="settings-button",visible =False)
 
 
         # Modal Container (Initially Hidden)
@@ -1597,7 +1661,8 @@ def main():
     config_dict = default_config()
 
     demo = create_ui(config_dict, theme_name=args.theme)
-    demo.launch(server_name=args.ip, server_port=args.port,favicon_path="logo.png")
+    demo.queue(False)
+    demo.launch(server_name=args.ip, server_port=args.port,favicon_path="logo.png",show_api=False,share=True)
 
 if __name__ == '__main__':
     main()