Skip to content

Commit 720681b

Browse files
committed
feat: add max_model_len for vllm
1 parent 00fc9bb commit 720681b

File tree

4 files changed

+8
-3
lines changed

4 files changed

+8
-3
lines changed

ADVANCED_USAGE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Below are all the arguments for `bigcodebench.evaluate` for the remote evaluatio
5050
- `--n_samples`: The number of samples, default to `1`
5151
- `--temperature`: The temperature, default to `0.0`
5252
- `--max_new_tokens`: The length of max new tokens, default to `1280`
53+
- `--max_model_len`: The length of max tokens in VLLM, default to `12800`
5354
- `--greedy`: Whether to use greedy decoding, default to `False`
5455
- `--strip_newlines`: Whether to strip newlines, default to `False`, set to `True` to strip newlines for some model series like StarCoder2
5556
- `--direct_completion`: Whether to use direct completion, default to `False`

bigcodebench/generate.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ def run_codegen(
132132
n_samples: int = 1,
133133
temperature: float = 0.0,
134134
max_new_tokens: int = 1280,
135+
# vllm
136+
max_model_len: int = 12800,
135137
greedy: bool = False,
136138
# openai
137139
reasoning_effort: str = "medium",
@@ -178,6 +180,7 @@ def run_codegen(
178180
lora_path=lora_path,
179181
temperature=temperature,
180182
max_new_tokens=max_new_tokens,
183+
max_model_len=max_model_len,
181184
reasoning_effort=reasoning_effort,
182185
reasoning_budget=reasoning_budget,
183186
reasoning_beta=reasoning_beta,

bigcodebench/provider/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def make_model(
1010
dataset: str = "bigcodebench",
1111
temperature: float = 0.0,
1212
max_new_tokens: int = 1280,
13+
max_model_len: int = 12800,
1314
# openai only
1415
reasoning_effort: str = "medium",
1516
# anthropic only
@@ -42,6 +43,7 @@ def make_model(
4243
lora_path=lora_path,
4344
temperature=temperature,
4445
max_new_tokens=max_new_tokens,
46+
max_model_len=max_model_len,
4547
revision=revision,
4648
dataset=dataset,
4749
direct_completion=direct_completion,

bigcodebench/provider/vllm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
)
1414

1515
class VllmDecoder(DecoderBase):
16-
def __init__(self, name: str, lora_path: str, dataset: str, tp: int, **kwargs) -> None:
16+
def __init__(self, name: str, lora_path: str, dataset: str, tp: int, max_model_len: int, **kwargs) -> None:
1717
super().__init__(name, **kwargs)
1818

1919
kwargs = {
@@ -41,8 +41,7 @@ def __init__(self, name: str, lora_path: str, dataset: str, tp: int, **kwargs) -
4141
local_lora_path,
4242
)
4343

44-
# max_model_len is set to max_new_tokens * 10
45-
self.llm = LLM(model=name, max_model_len=self.max_new_tokens * 10, enable_lora=True if self.lora_request else False, **kwargs)
44+
self.llm = LLM(model=name, max_model_len=max_model_len, enable_lora=True if self.lora_request else False, **kwargs)
4645
self.llm.set_tokenizer(tokenizer=self.tokenizer)
4746

4847
def is_direct_completion(self) -> bool:

0 commit comments

Comments
 (0)