From 97a53c5f70846f132c3a442b04804cd98ae722b1 Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Fri, 13 Sep 2024 19:54:41 +0000 Subject: [PATCH 1/4] OpenAI o1 model support --- autogen/oai/client.py | 12 +++++++ autogen/oai/openai_utils.py | 6 ++++ autogen/token_count_utils.py | 38 +++++++------------- test/agentchat/contrib/test_gpt_assistant.py | 2 ++ test/agentchat/test_conversable_agent.py | 2 ++ test/agentchat/test_function_call.py | 2 +- 6 files changed, 36 insertions(+), 26 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index dcf18edda657..09c51a14f285 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -268,6 +268,8 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion: iostream = IOStream.get_default() completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined] + params = self.map_params(params.copy()) + # If streaming is enabled and has messages, then iterate over the chunks of the response. if params.get("stream", False) and "messages" in params: response_contents = [""] * params.get("n", 1) @@ -408,6 +410,16 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float: return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 # type: ignore [no-any-return] return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 # type: ignore [operator] + def map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: + """Maps parameters that are deprecated""" + + # max_tokens is deprecated and replaced by max_completion_tokens as of 2024.09.12 + if "max_tokens" in params: + params["max_completion_tokens"] = params.pop("max_tokens") + logger.warning("OpenAI: 'max_tokens' parameter is deprecated, converting to 'max_completion_tokens'.") + + return params + @staticmethod def get_usage(response: Union[ChatCompletion, Completion]) -> Dict: return { diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index fc6c36b6e9e9..cc6116fec29b 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -31,6 +31,12 @@ DEFAULT_AZURE_API_VERSION = "2024-02-01" OAI_PRICE1K = { # https://openai.com/api/pricing/ + # o1 + "o1-preview": (0.015, 0.06), + "o1-preview-2024-09-12": (0.015, 0.06), + # o1-mini + "o1-mini": (0.003, 0.012), + "o1-mini-2024-09-12": (0.003, 0.012), # gpt-4o "gpt-4o": (0.005, 0.015), "gpt-4o-2024-05-13": (0.005, 0.015), diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py index f586f0ae7c5e..9dd8c8f1dd71 100644 --- a/autogen/token_count_utils.py +++ b/autogen/token_count_utils.py @@ -45,6 +45,10 @@ def get_max_token_limit(model: str = "gpt-3.5-turbo-0613") -> int: "gpt-4o-2024-08-06": 128000, "gpt-4o-mini": 128000, "gpt-4o-mini-2024-07-18": 128000, + "o1-preview-2024-09-12": 128000, + "o1-preview": 128000, + "o1-mini-2024-09-12": 128000, + "o1-mini": 128000, } return max_token_limit[model] @@ -106,34 +110,18 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0 except KeyError: logger.warning(f"Model {model} not found. Using cl100k_base encoding.") encoding = tiktoken.get_encoding("cl100k_base") - if model in { - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - "gpt-4-0314", - "gpt-4-32k-0314", - "gpt-4-0613", - "gpt-4-32k-0613", - }: - tokens_per_message = 3 - tokens_per_name = 1 - elif model == "gpt-3.5-turbo-0301": + if "gpt-3" in model or "gpt-4" in model or model.startswith("o1"): tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n - tokens_per_name = -1 # if there's a name, the role is omitted - elif "gpt-3.5-turbo" in model: - logger.info("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") - return _num_token_from_messages(messages, model="gpt-3.5-turbo-0613") - elif "gpt-4" in model: - logger.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") - return _num_token_from_messages(messages, model="gpt-4-0613") + tokens_per_name = 1 # OpenAI guidance is 1 extra token if 'name' field is used elif "gemini" in model: - logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") - return _num_token_from_messages(messages, model="gpt-4-0613") + logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4.") + return _num_token_from_messages(messages, model="gpt-4") elif "claude" in model: - logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") - return _num_token_from_messages(messages, model="gpt-4-0613") + logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4.") + return _num_token_from_messages(messages, model="gpt-4") elif "mistral-" in model or "mixtral-" in model: - logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") - return _num_token_from_messages(messages, model="gpt-4-0613") + logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4.") + return _num_token_from_messages(messages, model="gpt-4") else: raise NotImplementedError( f"""_num_token_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" @@ -158,7 +146,7 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0 num_tokens += len(encoding.encode(value)) if key == "name": num_tokens += tokens_per_name - num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> + num_tokens += 2 # every reply is primed with assistant return num_tokens diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py index 7ff0e7285cef..2ac3941363f2 100755 --- a/test/agentchat/contrib/test_gpt_assistant.py +++ b/test/agentchat/contrib/test_gpt_assistant.py @@ -34,6 +34,8 @@ filter_dict={ "api_type": ["openai"], "model": [ + "o1-preview", + "o1-mini", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py index da1ad92f5d4c..448220dcc07b 100755 --- a/test/agentchat/test_conversable_agent.py +++ b/test/agentchat/test_conversable_agent.py @@ -37,6 +37,8 @@ {"model": "gpt-4-32k"}, {"model": "gpt-4o"}, {"model": "gpt-4o-mini"}, + {"model": "o1-preview"}, + {"model": "o1-mini"}, ] diff --git a/test/agentchat/test_function_call.py b/test/agentchat/test_function_call.py index c8f73b4f4ad7..7aa85dbd8e9d 100755 --- a/test/agentchat/test_function_call.py +++ b/test/agentchat/test_function_call.py @@ -238,7 +238,7 @@ def test_update_function(): config_list_gpt4 = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4", "gpt-4-32k", "gpt-4o", "gpt-4o-mini"], + "tags": ["gpt-4", "gpt-4-32k", "gpt-4o", "gpt-4o-mini", "o1-preview", "o1-mini"], }, file_location=KEY_LOC, ) From 2e387d15dae47875cc4415d30cbfb9a816871b3b Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Sat, 14 Sep 2024 06:25:43 +0000 Subject: [PATCH 2/4] Handling beta limitations --- autogen/oai/client.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 09c51a14f285..a3917ab1050b 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -416,7 +416,27 @@ def map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: # max_tokens is deprecated and replaced by max_completion_tokens as of 2024.09.12 if "max_tokens" in params: params["max_completion_tokens"] = params.pop("max_tokens") - logger.warning("OpenAI: 'max_tokens' parameter is deprecated, converting to 'max_completion_tokens'.") + logger.warning("OpenAI API: 'max_tokens' parameter is deprecated, converting to 'max_completion_tokens'.") + + if params["model"].startswith("o1"): + # Beta limitation - remove streaming, convert system messages to user, remove other parameters which have fixed values + # https://platform.openai.com/docs/guides/reasoning/beta-limitations + if "stream" in params: + if params["stream"]: + logger.warning("OpenAI API o1 beta limitation: streaming is not supported.") + params.pop("stream") + + for message in params["messages"]: + if message["role"] == "system": + message["role"] = "user" + + fixed_params = ["temperature", "top_p", "n", "presence_penalty", "frequency_penalty"] + for param_name in fixed_params: + if param_name in params: + logger.warning( + f"OpenAI API o1 beta limitation: {param_name} parameter has a fixed value, removing." + ) + params.pop(param_name) return params From f1edecd18213c5c31b60f75c8063b29b8c9a0ebf Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Tue, 17 Sep 2024 03:45:38 +0000 Subject: [PATCH 3/4] Removed function testing, added system>user warning, updated map_params to _map_params --- autogen/oai/client.py | 8 ++++++-- test/agentchat/test_function_call.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index a3917ab1050b..ffb700ed1717 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -268,7 +268,7 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion: iostream = IOStream.get_default() completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined] - params = self.map_params(params.copy()) + params = self._map_params(params.copy()) # If streaming is enabled and has messages, then iterate over the chunks of the response. if params.get("stream", False) and "messages" in params: @@ -410,7 +410,7 @@ def cost(self, response: Union[ChatCompletion, Completion]) -> float: return (tmp_price1K[0] * n_input_tokens + tmp_price1K[1] * n_output_tokens) / 1000 # type: ignore [no-any-return] return tmp_price1K * (n_input_tokens + n_output_tokens) / 1000 # type: ignore [operator] - def map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: + def _map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: """Maps parameters that are deprecated""" # max_tokens is deprecated and replaced by max_completion_tokens as of 2024.09.12 @@ -427,8 +427,12 @@ def map_params(self, params: Dict[str, Any]) -> Dict[str, Any]: params.pop("stream") for message in params["messages"]: + warned = False if message["role"] == "system": message["role"] = "user" + if not warned: + logger.warning("OpenAI API o1 beta limitation: changing system messages to user messages.") + warned = True fixed_params = ["temperature", "top_p", "n", "presence_penalty", "frequency_penalty"] for param_name in fixed_params: diff --git a/test/agentchat/test_function_call.py b/test/agentchat/test_function_call.py index 7aa85dbd8e9d..c8f73b4f4ad7 100755 --- a/test/agentchat/test_function_call.py +++ b/test/agentchat/test_function_call.py @@ -238,7 +238,7 @@ def test_update_function(): config_list_gpt4 = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4", "gpt-4-32k", "gpt-4o", "gpt-4o-mini", "o1-preview", "o1-mini"], + "tags": ["gpt-4", "gpt-4-32k", "gpt-4o", "gpt-4o-mini"], }, file_location=KEY_LOC, ) From fe83b783cb81da7c189fc68a68725905b63c51e6 Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Wed, 2 Oct 2024 06:24:18 +0000 Subject: [PATCH 4/4] OpenAI package version, token counting updates (incl. test) --- autogen/token_count_utils.py | 12 ++++++------ setup.py | 2 +- test/test_token_count.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py index 9dd8c8f1dd71..be8df5b035c4 100644 --- a/autogen/token_count_utils.py +++ b/autogen/token_count_utils.py @@ -114,14 +114,14 @@ def _num_token_from_messages(messages: Union[List, Dict], model="gpt-3.5-turbo-0 tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n tokens_per_name = 1 # OpenAI guidance is 1 extra token if 'name' field is used elif "gemini" in model: - logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4.") - return _num_token_from_messages(messages, model="gpt-4") + logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") + return _num_token_from_messages(messages, model="gpt-4-0613") elif "claude" in model: - logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4.") - return _num_token_from_messages(messages, model="gpt-4") + logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") + return _num_token_from_messages(messages, model="gpt-4-0613") elif "mistral-" in model or "mixtral-" in model: - logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4.") - return _num_token_from_messages(messages, model="gpt-4") + logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613 (2023).") + return _num_token_from_messages(messages, model="gpt-4-0613") else: raise NotImplementedError( f"""_num_token_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" diff --git a/setup.py b/setup.py index f8a1753d2888..6da0cd26926c 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ current_os = platform.system() install_requires = [ - "openai>=1.3", + "openai>=1.51", "diskcache", "termcolor", "flaml", diff --git a/test/test_token_count.py b/test/test_token_count.py index ee096c16cbd8..de3d283e56cf 100755 --- a/test/test_token_count.py +++ b/test/test_token_count.py @@ -72,9 +72,9 @@ def test_count_token(): "content": "hello asdfjj qeweee", }, ] - assert count_token(messages) == 34 - assert percentile_used(messages) == 34 / 4096 - assert token_left(messages) == 4096 - 34 + assert count_token(messages) == 35 + assert percentile_used(messages) == 35 / 4096 + assert token_left(messages) == 4096 - 35 text = "I'm sorry, but I'm not able to" assert count_token(text) == 10