diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..9974110
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,7 @@
+{
+  "tasks": {
+    "test": "pytest",
+    "build": "pip install -r dev-requirements.txt",
+    "launch": "python app.py"
+  }
+}
\ No newline at end of file
diff --git a/app.py b/app.py
index 32b9776..1cf1db0 100644
--- a/app.py
+++ b/app.py
@@ -312,6 +312,12 @@ def process_input(user_input, state):
                              "qwen2-vl-max + ShowUI",
                             #  "qwen-vl-7b-instruct + ShowUI",
                              "claude-3-5-sonnet-20241022",
+                             "gpt-4o-handheld",
+                             "qwen2-vl-handheld",
+                             "claude-3-5-handheld",
+                             "gpt-4o-sota",
+                             "qwen2-vl-sota",
+                             "claude-3-5-sota",
                              ],
                     value="gpt-4o + ShowUI",  # Set to one of the choices
                     interactive=True,
@@ -442,6 +448,36 @@ def update_model(model_selection, state):
             provider_value = "anthropic"  # Set default to 'anthropic'
             provider_interactive = True
             api_key_placeholder = "claude API key"
+        elif model_selection == "gpt-4o-handheld":
+            provider_choices = ["openai"]
+            provider_value = "openai"
+            provider_interactive = False
+            api_key_placeholder = "openai API key"
+        elif model_selection == "qwen2-vl-handheld":
+            provider_choices = ["qwen"]
+            provider_value = "qwen"
+            provider_interactive = False
+            api_key_placeholder = "qwen API key"
+        elif model_selection == "claude-3-5-handheld":
+            provider_choices = [option.value for option in APIProvider if option.value != "openai"]
+            provider_value = "anthropic"
+            provider_interactive = True
+            api_key_placeholder = "claude API key"
+        elif model_selection == "gpt-4o-sota":
+            provider_choices = ["openai"]
+            provider_value = "openai"
+            provider_interactive = False
+            api_key_placeholder = "openai API key"
+        elif model_selection == "qwen2-vl-sota":
+            provider_choices = ["qwen"]
+            provider_value = "qwen"
+            provider_interactive = False
+            api_key_placeholder = "qwen API key"
+        elif model_selection == "claude-3-5-sota":
+            provider_choices = [option.value for option in APIProvider if option.value != "openai"]
+            provider_value = "anthropic"
+            provider_interactive = True
+            api_key_placeholder = "claude API key"
         else:
             # Default case
             provider_choices = [option.value for option in APIProvider]
@@ -489,6 +525,32 @@ def update_api_key_placeholder(provider_value, model_selection):
                 return gr.update(placeholder="")
         elif model_selection == "gpt-4o + ShowUI":
             return gr.update(placeholder="openai API key")
+        elif model_selection == "gpt-4o-handheld":
+            return gr.update(placeholder="openai API key")
+        elif model_selection == "qwen2-vl-handheld":
+            return gr.update(placeholder="qwen API key")
+        elif model_selection == "claude-3-5-handheld":
+            if provider_value == "anthropic":
+                return gr.update(placeholder="anthropic API key")
+            elif provider_value == "bedrock":
+                return gr.update(placeholder="bedrock API key")
+            elif provider_value == "vertex":
+                return gr.update(placeholder="vertex API key")
+            else:
+                return gr.update(placeholder="")
+        elif model_selection == "gpt-4o-sota":
+            return gr.update(placeholder="openai API key")
+        elif model_selection == "qwen2-vl-sota":
+            return gr.update(placeholder="qwen API key")
+        elif model_selection == "claude-3-5-sota":
+            if provider_value == "anthropic":
+                return gr.update(placeholder="anthropic API key")
+            elif provider_value == "bedrock":
+                return gr.update(placeholder="bedrock API key")
+            elif provider_value == "vertex":
+                return gr.update(placeholder="vertex API key")
+            else:
+                return gr.update(placeholder="")
         else:
             return gr.update(placeholder="")
         
diff --git a/computer_use_demo/gui_agent/anthropic_agent.py b/computer_use_demo/gui_agent/anthropic_agent.py
index 6611310..60bba61 100644
--- a/computer_use_demo/gui_agent/anthropic_agent.py
+++ b/computer_use_demo/gui_agent/anthropic_agent.py
@@ -203,4 +203,4 @@ def _maybe_filter_to_n_most_recent_images(
     #     ],
     # )
     
-    # print(f"AnthropicActor response: {response.parse().usage.input_tokens+response.parse().usage.output_tokens}")
\ No newline at end of file
+    # print(f"AnthropicActor response: {response.parse().usage.input_tokens+response.parse().usage.output_tokens}")
diff --git a/computer_use_demo/gui_agent/llm_utils/oai.py b/computer_use_demo/gui_agent/llm_utils/oai.py
index ac7726a..dbc1172 100644
--- a/computer_use_demo/gui_agent/llm_utils/oai.py
+++ b/computer_use_demo/gui_agent/llm_utils/oai.py
@@ -1,14 +1,10 @@
-
 import os
 import logging
 import base64
 import requests
 from computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path, encode_image
 
-
-
 def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
-
     api_key = api_key or os.environ.get("OPENAI_API_KEY")
     if not api_key:
         raise ValueError("OPENAI_API_KEY is not set")
@@ -18,7 +14,6 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
 
     final_messages = [{"role": "system", "content": system}]
 
-    # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
     if type(messages) == list:
         for item in messages:
             contents = []
@@ -28,19 +23,14 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
                         if is_image_path(cnt):
                             base64_image = encode_image(cnt)
                             content = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
-                        # content = {"type": "image_url", "image_url": {"url": image_url}}
                         else:
                             content = {"type": "text", "text": cnt}
                     contents.append(content)
-                    
                 message = {"role": item["role"], "content": contents}
-            else:  # str
+            else:
                 contents.append({"type": "text", "text": item})
                 message = {"role": "user", "content": contents}
-            
             final_messages.append(message)
-
-    
     elif isinstance(messages, str):
         final_messages = [{"role": "user", "content": messages}]
 
@@ -51,11 +41,8 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
         "messages": final_messages,
         "max_tokens": max_tokens,
         "temperature": temperature,
-        # "stop": stop,
     }
 
-    # from IPython.core.debugger import Pdb; Pdb().set_trace()
-
     response = requests.post(
         "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
     )
@@ -64,15 +51,11 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
         text = response.json()['choices'][0]['message']['content']
         token_usage = int(response.json()['usage']['total_tokens'])
         return text, token_usage
-        
-    # return error message if the response is not successful
     except Exception as e:
         print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
         return response.json()
 
-
 if __name__ == "__main__":
-    
     api_key = os.environ.get("OPENAI_API_KEY")
     if not api_key:
         raise ValueError("OPENAI_API_KEY is not set")
@@ -90,4 +73,3 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
         temperature=0)
     
     print(text, token_usage)
-    # There is an introduction describing the Calyx... 36986
diff --git a/computer_use_demo/gui_agent/llm_utils/qwen.py b/computer_use_demo/gui_agent/llm_utils/qwen.py
index 2a23288..030b22f 100644
--- a/computer_use_demo/gui_agent/llm_utils/qwen.py
+++ b/computer_use_demo/gui_agent/llm_utils/qwen.py
@@ -1,31 +1,26 @@
-
 import os
 import logging
 import base64
 import requests
 
 import dashscope
-# from computer_use_demo.gui_agent.llm_utils import is_image_path, encode_image
 
 def is_image_path(text):
-    return False
+    image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif")
+    return text.endswith(image_extensions)
 
 def encode_image(image_path):
-    return ""   
-
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
 
 def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
-    
     api_key = api_key or os.environ.get("QWEN_API_KEY")
     if not api_key:
         raise ValueError("QWEN_API_KEY is not set")
     
     dashscope.api_key = api_key
-    
-    # from IPython.core.debugger import Pdb; Pdb().set_trace()
 
     final_messages = [{"role": "system", "content": [{"text": system}]}]
-    # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
     if type(messages) == list:
         for item in messages:
             contents = []
@@ -33,30 +28,23 @@ def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256
                 for cnt in item["content"]:
                     if isinstance(cnt, str):
                         if is_image_path(cnt):
-                            # base64_image = encode_image(cnt)
                             content = [{"image": cnt}]
-                        # content = {"type": "image_url", "image_url": {"url": image_url}}
-                    else:
-                        content = {"text": cnt}
+                        else:
+                            content = {"text": cnt}
                     contents.append(content)
-                    
                 message = {"role": item["role"], "content": contents}
-            else:  # str
+            else:
                 contents.append({"text": item})
                 message = {"role": "user", "content": contents}
-            
             final_messages.append(message)
 
     print("[qwen-vl] sending messages:", final_messages)
 
     response = dashscope.MultiModalConversation.call(
         model='qwen-vl-max-latest',
-        # model='qwen-vl-max-0809',
         messages=final_messages
-        )
+    )
 
-    # from IPython.core.debugger import Pdb; Pdb().set_trace()
-    
     try:
         text = response.output.choices[0].message.content[0]['text']
         usage = response.usage
@@ -67,14 +55,10 @@ def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256
             token_usage = int(usage["total_tokens"])
         
         return text, token_usage
-        # return response.json()['choices'][0]['message']['content']
-    # return error message if the response is not successful
     except Exception as e:
         print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
         return response.json()
 
-
-
 if __name__ == "__main__":
     api_key = os.environ.get("QWEN_API_KEY")
     if not api_key:
@@ -105,4 +89,3 @@ def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256
         token_usage = usage["total_tokens"]
     
     print(text, token_usage)
-    # The screenshot is from a video game... 1387
\ No newline at end of file
diff --git a/computer_use_demo/gui_agent/llm_utils/run_llm.py b/computer_use_demo/gui_agent/llm_utils/run_llm.py
index a1de8ba..aaeb595 100644
--- a/computer_use_demo/gui_agent/llm_utils/run_llm.py
+++ b/computer_use_demo/gui_agent/llm_utils/run_llm.py
@@ -2,6 +2,7 @@
 import logging
 from .oai import run_oai_interleaved
 from .gemini import run_gemini_interleaved
+from .qwen import run_qwen
 
 def run_llm(prompt, llm="gpt-4o-mini", max_tokens=256, temperature=0, stop=None):
     log_prompt(prompt)
@@ -14,6 +15,9 @@ def run_llm(prompt, llm="gpt-4o-mini", max_tokens=256, temperature=0, stop=None)
     else:
         raise ValueError(f"Invalid prompt type: {type(prompt)}")
     
+    # Optimize prompt for cost-efficiency
+    prompt = optimize_prompt(prompt)
+    
     if llm.startswith("gpt"): # gpt series
         out = run_oai_interleaved(
             prompt, 
@@ -30,6 +34,14 @@ def run_llm(prompt, llm="gpt-4o-mini", max_tokens=256, temperature=0, stop=None)
             temperature, 
             stop
         )
+    elif llm.startswith("qwen"): # qwen series
+        out = run_qwen(
+            prompt, 
+            llm, 
+            max_tokens,
+            temperature, 
+            stop
+        )
     else:
         raise ValueError(f"Invalid llm: {llm}")
     logging.info(
@@ -41,4 +53,16 @@ def log_prompt(prompt):
     prompt_display = "\n\n".join(prompt_display)
     logging.info(
         f"========Prompt=======\n{prompt_display}\n============================")
-    
\ No newline at end of file
+
+def optimize_prompt(prompt):
+    """
+    Optimize the prompt to minimize token usage by using concise language and removing unnecessary details.
+    """
+    optimized_prompt = []
+    for p in prompt:
+        # Remove unnecessary details and focus on essential information
+        p = p.replace("Please", "").replace("kindly", "").replace("could you", "").replace("would you", "")
+        # Use abbreviations where appropriate
+        p = p.replace("information", "info").replace("application", "app")
+        optimized_prompt.append(p.strip())
+    return optimized_prompt
diff --git a/computer_use_demo/gui_agent/planner/api_vlm_planner.py b/computer_use_demo/gui_agent/planner/api_vlm_planner.py
index 1deb085..9a9f362 100644
--- a/computer_use_demo/gui_agent/planner/api_vlm_planner.py
+++ b/computer_use_demo/gui_agent/planner/api_vlm_planner.py
@@ -323,4 +323,4 @@ def _message_filter_callback(messages):
     except Exception as e:
         print("[_message_filter_callback]: error", e)
                 
-    return filtered_list
\ No newline at end of file
+    return filtered_list
diff --git a/computer_use_demo/gui_agent/planner/local_vlm_planner.py b/computer_use_demo/gui_agent/planner/local_vlm_planner.py
index 4fbc8dc..05a9b19 100644
--- a/computer_use_demo/gui_agent/planner/local_vlm_planner.py
+++ b/computer_use_demo/gui_agent/planner/local_vlm_planner.py
@@ -114,10 +114,10 @@ def __call__(self, messages: list):
             {
                 "role": "user",
                 "content": [
-                {"type": "image", "image": screenshot_path, "min_pixels": self.min_pixels, "max_pixels": self.max_pixels},
+                {"type": "image", "image": screenshot_path, "min_pixels": self.min_pixels, "max_pixels=self.max_pixels"},
                 {"type": "text", "text": f"Task: {''.join(planner_messages)}"}
             ],
-        }]
+        ]
         
         text = self.processor.apply_chat_template(
             messages_for_processor, tokenize=False, add_generation_prompt=True
@@ -295,4 +295,4 @@ def _message_filter_callback(messages):
     except Exception as e:
         print("[_message_filter_callback]: error", e)
                 
-    return filtered_list
\ No newline at end of file
+    return filtered_list
diff --git a/computer_use_demo/gui_agent/showui_agent.py b/computer_use_demo/gui_agent/showui_agent.py
index 59ae8e3..e19e303 100644
--- a/computer_use_demo/gui_agent/showui_agent.py
+++ b/computer_use_demo/gui_agent/showui_agent.py
@@ -176,4 +176,4 @@ def parse_showui_output(self, output_text):
 
         except Exception as e:
             print(f"Error parsing output: {e}")
-            return None
\ No newline at end of file
+            return None
diff --git a/computer_use_demo/loop.py b/computer_use_demo/loop.py
index 8bc4af5..c10ea2c 100644
--- a/computer_use_demo/loop.py
+++ b/computer_use_demo/loop.py
@@ -218,33 +218,3 @@ def sampling_loop_sync(
 
             # Increment loop counter
             showui_loop_count += 1
-    
-    # elif "ShowUI" in model:  # ShowUI loop 
-    #     while True:
-    #         vlm_response = planner(messages=messages)
-            
-    #         next_action = json.loads(vlm_response).get("Next Action")
-    #         yield next_action
-            
-    #         if next_action == None or next_action == "" or next_action == "None":
-    #             final_sc, final_sc_path = get_screenshot(selected_screen=selected_screen)
-    #             output_callback(f'No more actions from {colorful_text_vlm}. End of task. Final State:\n<img src="data:image/png;base64,{encode_image(str(final_sc_path))}">',
-    #                             sender="bot")
-    #             yield None
-                        
-    #         output_callback(f"{colorful_text_vlm} sending action to {colorful_text_showui}:\n{next_action}", sender="bot")
-            
-    #         actor_response = actor(messages=next_action)
-    #         yield actor_response
-            
-    #         for message, tool_result_content in executor(actor_response, messages):
-    #             time.sleep(0.5)
-    #             yield message
-                
-    #         # since showui executor has no feedback for now, we use "actor_response" to represent its response
-    #         # update messages for the next loop
-    #         messages.append({"role": "user",
-    #                          "content": ["History plan:" + str(json.loads(vlm_response)) + 
-    #                                      "History actions:" + str(actor_response["content"])]
-    #                          })
-    #         print(f"End of loop. Messages: {str(messages)[:100000]}. Total cost: $USD{planner.total_cost:.5f}")
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 26850f0..5f3839d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,3 +6,6 @@ useLibraryCodeForTypes = false
 [tool.pytest.ini_options]
 pythonpath = "."
 asyncio_mode = "auto"
+testpaths = [
+    "tests"
+]
diff --git a/tests b/tests
new file mode 100644
index 0000000..0d1f516
--- /dev/null
+++ b/tests
@@ -0,0 +1,7 @@
+import pytest
+
+def test_example():
+    assert 1 + 1 == 2
+
+def test_another_example():
+    assert "hello".upper() == "HELLO"