diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 87916319d082..2379f4322126 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -275,6 +275,8 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion: iostream = IOStream.get_default() completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined] + params = self._map_params(params.copy()) + # If streaming is enabled and has messages, then iterate over the chunks of the response. if params.get("stream", False) and "messages" in params: response_contents = [""] * params.get("n", 1) @@ -415,6 +417,40 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float: return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 # type: ignore [no-any-return] return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 # type: ignore [operator] + def _map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: + """Maps parameters that are deprecated""" + + # max_tokens is deprecated and replaced by max_completion_tokens as of 2024.09.12 + if "max_tokens" in params: + params["max_completion_tokens"] = params.pop("max_tokens") + logger.warning("OpenAI API: 'max_tokens' parameter is deprecated, converting to 'max_completion_tokens'.") + + if params["model"].startswith("o1"): + # Beta limitation - remove streaming, convert system messages to user, remove other parameters which have fixed values + # https://platform.openai.com/docs/guides/reasoning/beta-limitations + if "stream" in params: + if params["stream"]: + logger.warning("OpenAI API o1 beta limitation: streaming is not supported.") + params.pop("stream") + + for message in params["messages"]: + warned = False + if message["role"] == "system": + message["role"] = "user" + if not warned: + logger.warning("OpenAI API o1 beta limitation: changing system messages to user messages.") + warned = True + + fixed_params = ["temperature", "top_p", "n", "presence_penalty", "frequency_penalty"] + for param_name in fixed_params: + if param_name in params: + logger.warning( + f"OpenAI API o1 beta limitation: {param_name} parameter has a fixed value, removing." + ) + params.pop(param_name) + + return params + @staticmethod def get_usage(response: Union[ChatCompletion, Completion]) -> Dict: return { diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index f4175f804edd..b378480aed51 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -31,6 +31,12 @@ DEFAULT_AZURE_API_VERSION = "2024-02-01" OAI_PRICE1K = { # https://openai.com/api/pricing/ + # o1 + "o1-preview": (0.015, 0.06), + "o1-preview-2024-09-12": (0.015, 0.06), + # o1-mini + "o1-mini": (0.003, 0.012), + "o1-mini-2024-09-12": (0.003, 0.012), # gpt-4o "gpt-4o": (0.005, 0.015), "gpt-4o-2024-05-13": (0.005, 0.015), diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py index f586f0ae7c5e..be8df5b035c4 100644 --- a/autogen/token_count_utils.py +++ b/autogen/token_count_utils.py @@ -45,6 +45,10 @@ def get_max_token_limit(model: str = "gpt-3.5-turbo-0613") -> int: "gpt-4o-2024-08-06": 128000, "gpt-4o-mini": 128000, "gpt-4o-mini-2024-07-18": 128000, + "o1-preview-2024-09-12": 128000, + "o1-preview": 128000, + "o1-mini-2024-09-12": 128000, + "o1-mini": 128000, } return max_token_limit[model] @@ -106,33 +110,17 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0 except KeyError: logger.warning(f"Model {model} not found. Using cl100k_base encoding.") encoding = tiktoken.get_encoding("cl100k_base") - if model in { - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - "gpt-4-0314", - "gpt-4-32k-0314", - "gpt-4-0613", - "gpt-4-32k-0613", - }: - tokens_per_message = 3 - tokens_per_name = 1 - elif model == "gpt-3.5-turbo-0301": + if "gpt-3" in model or "gpt-4" in model or model.startswith("o1"): tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n - tokens_per_name = -1 # if there's a name, the role is omitted - elif "gpt-3.5-turbo" in model: - logger.info("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") - return _num_token_from_messages(messages, model="gpt-3.5-turbo-0613") - elif "gpt-4" in model: - logger.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") - return _num_token_from_messages(messages, model="gpt-4-0613") + tokens_per_name = 1 # OpenAI guidance is 1 extra token if 'name' field is used elif "gemini" in model: - logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") + logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") return _num_token_from_messages(messages, model="gpt-4-0613") elif "claude" in model: - logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") + logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") return _num_token_from_messages(messages, model="gpt-4-0613") elif "mistral-" in model or "mixtral-" in model: - logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") + logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") return _num_token_from_messages(messages, model="gpt-4-0613") else: raise NotImplementedError( @@ -158,7 +146,7 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0 num_tokens += len(encoding.encode(value)) if key == "name": num_tokens += tokens_per_name - num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> + num_tokens += 2 # every reply is primed with assistant return num_tokens diff --git a/setup.py b/setup.py index f8a1753d2888..6da0cd26926c 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ current_os = platform.system() install_requires = [ - "openai>=1.3", + "openai>=1.51", "diskcache", "termcolor", "flaml", diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py index 7ff0e7285cef..2ac3941363f2 100755 --- a/test/agentchat/contrib/test_gpt_assistant.py +++ b/test/agentchat/contrib/test_gpt_assistant.py @@ -34,6 +34,8 @@ filter_dict={ "api_type": ["openai"], "model": [ + "o1-preview", + "o1-mini", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py index da1ad92f5d4c..448220dcc07b 100755 --- a/test/agentchat/test_conversable_agent.py +++ b/test/agentchat/test_conversable_agent.py @@ -37,6 +37,8 @@ {"model": "gpt-4-32k"}, {"model": "gpt-4o"}, {"model": "gpt-4o-mini"}, + {"model": "o1-preview"}, + {"model": "o1-mini"}, ] diff --git a/test/test_token_count.py b/test/test_token_count.py index ee096c16cbd8..de3d283e56cf 100755 --- a/test/test_token_count.py +++ b/test/test_token_count.py @@ -72,9 +72,9 @@ def test_count_token(): "content": "hello asdfjj qeweee", }, ] - assert count_token(messages) == 34 - assert percentile_used(messages) == 34 / 4096 - assert token_left(messages) == 4096 - 34 + assert count_token(messages) == 35 + assert percentile_used(messages) == 35 / 4096 + assert token_left(messages) == 4096 - 35 text = "I'm sorry, but I'm not able to" assert count_token(text) == 10