autogenhub · marklysze · Sep 13, 2024 · Sep 14, 2024 · Sep 17, 2024 · Sep 28, 2024
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
@@ -275,6 +275,8 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion:
         iostream = IOStream.get_default()
 
         completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions  # type: ignore [attr-defined]
+        params = self._map_params(params.copy())
+
         # If streaming is enabled and has messages, then iterate over the chunks of the response.
         if params.get("stream", False) and "messages" in params:
             response_contents = [""] * params.get("n", 1)
@@ -415,6 +417,40 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float:
             return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000  # type: ignore [no-any-return]
         return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000  # type: ignore [operator]
 
+    def _map_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Maps parameters that are deprecated"""
+
+        # max_tokens is deprecated and replaced by max_completion_tokens as of 2024.09.12
+        if "max_tokens" in params:
+            params["max_completion_tokens"] = params.pop("max_tokens")
+            logger.warning("OpenAI API: 'max_tokens' parameter is deprecated, converting to 'max_completion_tokens'.")
+
+        if params["model"].startswith("o1"):
+            # Beta limitation - remove streaming, convert system messages to user, remove other parameters which have fixed values
+            # https://platform.openai.com/docs/guides/reasoning/beta-limitations
+            if "stream" in params:
+                if params["stream"]:
+                    logger.warning("OpenAI API o1 beta limitation: streaming is not supported.")
+                params.pop("stream")
+
+            for message in params["messages"]:
+                warned = False
+                if message["role"] == "system":
+                    message["role"] = "user"
+                    if not warned:
+                        logger.warning("OpenAI API o1 beta limitation: changing system messages to user messages.")
+                    warned = True
+
+            fixed_params = ["temperature", "top_p", "n", "presence_penalty", "frequency_penalty"]
+            for param_name in fixed_params:
+                if param_name in params:
+                    logger.warning(
+                        f"OpenAI API o1 beta limitation: {param_name} parameter has a fixed value, removing."
+                    )
+                    params.pop(param_name)
+
+        return params
+
     @staticmethod
     def get_usage(response: Union[ChatCompletion, Completion]) -> Dict:
         return {

diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
@@ -31,6 +31,12 @@
 DEFAULT_AZURE_API_VERSION = "2024-02-01"
 OAI_PRICE1K = {
     # https://openai.com/api/pricing/
+    # o1
+    "o1-preview": (0.015, 0.06),
+    "o1-preview-2024-09-12": (0.015, 0.06),
+    # o1-mini
+    "o1-mini": (0.003, 0.012),
+    "o1-mini-2024-09-12": (0.003, 0.012),
     # gpt-4o
     "gpt-4o": (0.005, 0.015),
     "gpt-4o-2024-05-13": (0.005, 0.015),

diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py
@@ -45,6 +45,10 @@ def get_max_token_limit(model: str = "gpt-3.5-turbo-0613") -> int:
         "gpt-4o-2024-08-06": 128000,
         "gpt-4o-mini": 128000,
         "gpt-4o-mini-2024-07-18": 128000,
+        "o1-preview-2024-09-12": 128000,
+        "o1-preview": 128000,
+        "o1-mini-2024-09-12": 128000,
+        "o1-mini": 128000,
     }
     return max_token_limit[model]
 
@@ -106,33 +110,17 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0
     except KeyError:
         logger.warning(f"Model {model} not found. Using cl100k_base encoding.")
         encoding = tiktoken.get_encoding("cl100k_base")
-    if model in {
-        "gpt-3.5-turbo-0613",
-        "gpt-3.5-turbo-16k-0613",
-        "gpt-4-0314",
-        "gpt-4-32k-0314",
-        "gpt-4-0613",
-        "gpt-4-32k-0613",
-    }:
-        tokens_per_message = 3
-        tokens_per_name = 1
-    elif model == "gpt-3.5-turbo-0301":
+    if "gpt-3" in model or "gpt-4" in model or model.startswith("o1"):
         tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        tokens_per_name = -1  # if there's a name, the role is omitted
-    elif "gpt-3.5-turbo" in model:
-        logger.info("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
-        return _num_token_from_messages(messages, model="gpt-3.5-turbo-0613")
-    elif "gpt-4" in model:
-        logger.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
-        return _num_token_from_messages(messages, model="gpt-4-0613")
+        tokens_per_name = 1  # OpenAI guidance is 1 extra token if 'name' field is used
     elif "gemini" in model:
-        logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.")
+        logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).")
         return _num_token_from_messages(messages, model="gpt-4-0613")
     elif "claude" in model:
-        logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.")
+        logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).")
         return _num_token_from_messages(messages, model="gpt-4-0613")
     elif "mistral-" in model or "mixtral-" in model:
-        logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613.")
+        logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).")
         return _num_token_from_messages(messages, model="gpt-4-0613")
     else:
         raise NotImplementedError(
@@ -158,7 +146,7 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0
             num_tokens += len(encoding.encode(value))
             if key == "name":
                 num_tokens += tokens_per_name
-    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    num_tokens += 2  # every reply is primed with <im_start>assistant
     return num_tokens
 
 

diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@
 current_os = platform.system()
 
 install_requires = [
-    "openai>=1.3",
+    "openai>=1.51",
     "diskcache",
     "termcolor",
     "flaml",

diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py
@@ -34,6 +34,8 @@
         filter_dict={
             "api_type": ["openai"],
             "model": [
+                "o1-preview",
+                "o1-mini",
                 "gpt-4o-mini",
                 "gpt-4o",
                 "gpt-4-turbo",

diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
@@ -37,6 +37,8 @@
     {"model": "gpt-4-32k"},
     {"model": "gpt-4o"},
     {"model": "gpt-4o-mini"},
+    {"model": "o1-preview"},
+    {"model": "o1-mini"},
 ]
 
 

diff --git a/test/test_token_count.py b/test/test_token_count.py
@@ -72,9 +72,9 @@ def test_count_token():
             "content": "hello asdfjj qeweee",
         },
     ]
-    assert count_token(messages) == 34
-    assert percentile_used(messages) == 34 / 4096
-    assert token_left(messages) == 4096 - 34
+    assert count_token(messages) == 35
+    assert percentile_used(messages) == 35 / 4096
+    assert token_left(messages) == 4096 - 35
 
     text = "I'm sorry, but I'm not able to"
     assert count_token(text) == 10