Skip to content

Commit c96f5be

Browse files
committed
fix qwen2_5_vl
1 parent c05d281 commit c96f5be

File tree

7 files changed

+72
-78
lines changed

7 files changed

+72
-78
lines changed

models/Qwen2_5_VL/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ python3 export_onnx.py --model_path /path/to/Qwen2_5-VL-3B-Instruct --seq_length
3737
此处介绍如何将onnx模型编译成bmodel。也可以省去编译模型这一步,直接下载编译好的模型:
3838

3939
``` shell
40-
python3 -m dfss [email protected]:/ext_model_information/LLM/LLM-TPU/qwen2-vl-2b_int4_seq2048_1dev.bmodel
40+
python3 -m dfss [email protected]:/ext_model_information/LLM/LLM-TPU/qwen2.5-vl-3b_w4bf16_seq8192.bmodel
4141
```
4242

4343
#### 1. 下载docker,启动容器

models/Qwen2_5_VL/compile/compile.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,6 @@ vision_transformer() {
188188
model_deploy.py \
189189
--mlir vit.mlir \
190190
${half_quantize_args} \
191-
--quant_input \
192-
--quant_input_list 3 \
193191
--quant_output \
194192
--high_precision \
195193
--chip bm1684x \

models/Qwen2_5_VL/compile/export_onnx.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,8 @@ def test_net_with_mask(mode, messages):
428428
padding=True,
429429
return_tensors="pt",
430430
)
431-
431+
del image_inputs, video_inputs
432+
torch.cuda.empty_cache()
432433
input_ids = inputs.input_ids
433434
if mode == "image":
434435
pixel_values = inputs.pixel_values
@@ -445,7 +446,8 @@ def test_net_with_mask(mode, messages):
445446

446447
# vit
447448
vit_embeds = vit_launch(pixel_values, grid_thw)
448-
449+
del pixel_values
450+
torch.cuda.empty_cache()
449451
# embedding
450452
input_ids_prefill = torch.zeros(1, SEQ_LENGTH).to(torch.int32).to(device)
451453
input_ids_prefill[:, :input_ids.shape[-1]] = input_ids
@@ -482,6 +484,8 @@ def test_net_with_mask(mode, messages):
482484
v[:, input_ids.shape[-1]:, :, :] = 0
483485
k_cache.append(k)
484486
v_cache.append(v)
487+
del k, v
488+
torch.cuda.empty_cache()
485489
inputs_embeds = inputs_embeds[:, input_ids.shape[-1] - 1:input_ids.shape[-1]].view(1, 1, HIDDEN_SIZE)
486490
lm = LmHead()
487491

@@ -494,6 +498,7 @@ def test_net_with_mask(mode, messages):
494498
token_len += 1
495499
input_id = torch.tensor([token]).to(device)
496500
out = embed(input_id).view(1, 1, HIDDEN_SIZE)
501+
del input_id
497502
valid_position_ids += 1
498503
position_ids = torch.tensor(3*[[[valid_position_ids]]]).to(device)
499504

@@ -506,6 +511,8 @@ def test_net_with_mask(mode, messages):
506511
k_cache[i].to(dtype), v_cache[i].to(dtype))
507512
k_cache[i][:, token_len-1:token_len, :, :] = k[:, :, :, :]
508513
v_cache[i][:, token_len-1:token_len, :, :] = v[:, :, :, :]
514+
del k, v
515+
torch.cuda.empty_cache()
509516
token = greedy(lm(out.to(dtype))).view(1)
510517
out_ids.append(int(token))
511518
words = tokenizer.decode(out_ids)
@@ -592,7 +599,7 @@ def test_image(path, resized_height, resized_width):
592599
print("\033[31m如果输入为图片时,注意resized_height与resized_width,避免resize导致图片质量损失 \033[0m")
593600

594601

595-
# test_image(path = "./../python_demo/image1.jpg", resized_height=280, resized_width=420)
602+
# test_image(path = "./../python_demo/test.jpg", resized_height=280, resized_width=420)
596603
# test_video(path = "./sample.mp4")
597604

598605
# convert

models/Qwen2_5_VL/python_demo/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,14 @@ cd build
1616
cmake ..
1717
make
1818
mv chat*.so ../
19+
```
20+
21+
## 下载模型
22+
```shell
23+
python3 -m dfss [email protected]:/ext_model_information/LLM/LLM-TPU/qwen2.5-vl-3b_w4bf16_seq8192.bmodel
24+
```
25+
26+
## 运行
27+
``` shell
28+
python3 pipeline.py --model_path ../qwen2.5-vl-3b_w4bf16_seq8192_1dev_20250226_104241.bmodel --config_path config/
1929
```

models/Qwen2_5_VL/python_demo/chat.cpp

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ class Qwen2VL {
4040
Qwen2VL() : sgen(std::random_device()()) {};
4141

4242
private:
43-
std::vector<int> make_vit_posid(std::vector<int> &grid_thw);
44-
std::vector<uint16_t> make_vit_attn_mask(std::vector<int> &grid_thw);
43+
void make_vit_posid(std::vector<int> &grid_thw,
44+
std::vector<int> &position_ids);
45+
void make_vit_attn_mask(std::vector<int> &grid_thw,
46+
std::vector<float> &attention_mask);
4547
std::vector<int> make_posid(const std::vector<int> &grid_thw, int vit_offset,
4648
int valid_vit_length, int token_length);
4749

@@ -186,7 +188,8 @@ void Qwen2VL::head_launch(const bm_net_info_t *net,
186188
bm_thread_sync(bm_handle);
187189
}
188190

189-
std::vector<int> Qwen2VL::make_vit_posid(std::vector<int> &grid_thw) {
191+
void Qwen2VL::make_vit_posid(std::vector<int> &grid_thw,
192+
std::vector<int> &pos_ids) {
190193
int t = grid_thw[0];
191194
int h = grid_thw[1];
192195
int w = grid_thw[2];
@@ -214,18 +217,16 @@ std::vector<int> Qwen2VL::make_vit_posid(std::vector<int> &grid_thw) {
214217
}
215218

216219
int valid_vit_pixels = h * w;
217-
std::vector<int> pos_ids(MAX_PIXELS * 2, 0);
218220
for (int i = 0; i < t; ++i) {
219221
for (int j = 0; j < valid_vit_pixels; ++j) {
220222
pos_ids[i * valid_vit_pixels + 2 * j] = hpos_ids[j];
221223
pos_ids[i * valid_vit_pixels + 2 * j + 1] = wpos_ids[j];
222224
}
223225
}
224-
225-
return pos_ids;
226226
}
227227

228-
std::vector<uint16_t> Qwen2VL::make_vit_attn_mask(std::vector<int> &grid_thw) {
228+
void Qwen2VL::make_vit_attn_mask(std::vector<int> &grid_thw,
229+
std::vector<float> &attention_mask) {
229230
// Extract t, h, w from grid_thw
230231
int t = grid_thw[0];
231232
int h = grid_thw[1];
@@ -237,9 +238,6 @@ std::vector<uint16_t> Qwen2VL::make_vit_attn_mask(std::vector<int> &grid_thw) {
237238
cu_seqlens[i] = h * w * i;
238239
}
239240

240-
// Initialize attention_mask with -10000
241-
std::vector<uint16_t> attention_mask(MAX_PIXELS * MAX_PIXELS, ATTENTION_MASK);
242-
243241
// Update attention_mask based on cu_seqlens
244242
for (size_t i = 1; i < cu_seqlens.size(); ++i) {
245243
int start = cu_seqlens[i - 1];
@@ -253,8 +251,6 @@ std::vector<uint16_t> Qwen2VL::make_vit_attn_mask(std::vector<int> &grid_thw) {
253251
}
254252
}
255253
}
256-
257-
return attention_mask;
258254
}
259255

260256
void Qwen2VL::vit_launch(std::vector<float> &pixel_values, int vit_offset,
@@ -264,8 +260,11 @@ void Qwen2VL::vit_launch(std::vector<float> &pixel_values, int vit_offset,
264260
out_mem = dev_buffer;
265261
// forward vision transformer
266262
std::vector<float> pixel_values_pad(MAX_PIXELS * VIT_DIMS, 0);
267-
auto position_ids = make_vit_posid(grid_thw);
268-
auto attention_mask = make_vit_attn_mask(grid_thw);
263+
// Initialize attention_mask with -10000
264+
std::vector<float> attention_mask(MAX_PIXELS * MAX_PIXELS, -10000.0f);
265+
std::vector<int> position_ids(MAX_PIXELS * 2, 0);
266+
make_vit_posid(grid_thw, position_ids);
267+
make_vit_attn_mask(grid_thw, attention_mask);
269268
std::copy(pixel_values.begin(), pixel_values.end(), pixel_values_pad.data());
270269

271270
empty_net(bm_handle, net_vit);
@@ -387,20 +386,18 @@ int Qwen2VL::forward_first(std::vector<int> &tokens,
387386
std::vector<int> &grid_thw, int vit_offset,
388387
int valid_vit_length) {
389388
std::vector<int> input_ids(SEQLEN, 0);
390-
std::vector<uint16_t> attention_mask(SEQLEN * SEQLEN, 0);
389+
std::vector<uint16_t> attention_mask(SEQLEN * SEQLEN, ATTENTION_MASK);
391390
std::copy(tokens.begin(), tokens.end(), input_ids.data());
392391

393392
token_length = tokens.size(); // text input length
394393

395394
auto position_ids =
396395
make_posid(grid_thw, vit_offset, valid_vit_length, token_length);
397396
for (int i = 0; i < token_length; i++) {
398-
for (int j = 0; j < SEQLEN; j++) {
397+
for (int j = 0; j < token_length; j++) {
399398
if (j <= i) {
400399
attention_mask[i * SEQLEN + j] = 0;
401-
} else {
402-
attention_mask[i * SEQLEN + j] = ATTENTION_MASK;
403-
}
400+
}
404401
}
405402
}
406403

models/Qwen2_5_VL/python_demo/pipeline.py

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,39 +45,23 @@ def text_message(self):
4545
return messages
4646

4747
def image_message(self, path):
48-
if self.resized_height != None and self.resized_width != None:
49-
print("\033[31m如果输入为图片时,注意resized_height与resized_width与export_onnx.py时的保持一致\033[0m")
50-
messages = [{
51-
"role":
52-
"user",
53-
"content": [
54-
{
55-
"type": "image",
56-
"image": path,
57-
"resized_height": self.resized_height,
58-
"resized_width": self.resized_width,
59-
},
60-
{
61-
"type": "text",
62-
"text": self.input_str
63-
},
64-
],
65-
}]
66-
else:
67-
messages = [{
68-
"role":
69-
"user",
70-
"content": [
71-
{
72-
"type": "image",
73-
"image": path,
74-
},
75-
{
76-
"type": "text",
77-
"text": self.input_str
78-
},
79-
],
80-
}]
48+
print("\033[31m如果输入为图片时,注意resized_height与resized_width与export_onnx.py时的保持一致\033[0m")
49+
messages = [{
50+
"role":
51+
"user",
52+
"content": [
53+
{
54+
"type": "image",
55+
"image": path,
56+
"resized_height": self.resized_height,
57+
"resized_width": self.resized_width,
58+
},
59+
{
60+
"type": "text",
61+
"text": self.input_str
62+
},
63+
],
64+
}]
8165
return messages
8266

8367
def video_message(self, path):
@@ -151,18 +135,14 @@ def chat(self):
151135
break
152136

153137
media_path = input("\nImage or Video Path: ")
154-
if media_path == "":
155-
media_type = "text"
156-
messages = self.text_message()
138+
if not os.path.exists(media_path):
139+
print("Can't find image or video: {}".format(media_path))
140+
continue
141+
media_type = self.get_media_type(media_path)
142+
if media_type == "image":
143+
messages = self.image_message(media_path)
157144
else:
158-
if not os.path.exists(media_path):
159-
print("Can't find image or video: {}".format(media_path))
160-
continue
161-
media_type = self.get_media_type(media_path)
162-
if media_type == "image":
163-
messages = self.image_message(media_path)
164-
else:
165-
messages = self.video_message(media_path)
145+
messages = self.video_message(media_path)
166146
inputs = self.process(messages)
167147
print("\nAnswer:")
168148

support/include/utils.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ void compare_out_net(
542542
// Dump to file
543543
//===------------------------------------------------------------===//
544544
void dump_tensor_to_file(bm_handle_t &bm_handle, bm_tensor_t &t,
545-
bm_shape_t bm_shape, const std::string &filename,
545+
bm_shape_t bm_shape, cnpy::npz_t &npz_map,
546546
bm_data_type_t tensor_type,
547547
const std::string &tensor_name) {
548548
int mem_size = bm_mem_get_device_size(t.device_mem);
@@ -556,7 +556,7 @@ void dump_tensor_to_file(bm_handle_t &bm_handle, bm_tensor_t &t,
556556
for (size_t i = 0; i < data.size(); i++) {
557557
data[i] = fp16_ieee_to_fp32_value(buffer[i]);
558558
}
559-
cnpy::npz_save(filename, tensor_name, data.data(), shape, "a");
559+
cnpy::npz_add_array(npz_map, tensor_name, data.data(), shape);
560560
} else if (tensor_type == BM_BFLOAT16) {
561561
// BF16
562562
int cnt = mem_size / sizeof(uint16_t);
@@ -566,19 +566,19 @@ void dump_tensor_to_file(bm_handle_t &bm_handle, bm_tensor_t &t,
566566
for (size_t i = 0; i < data.size(); i++) {
567567
data[i] = bf16_to_fp32_value(buffer[i]);
568568
}
569-
cnpy::npz_save(filename, tensor_name, data.data(), shape, "a");
569+
cnpy::npz_add_array(npz_map, tensor_name, data.data(), shape);
570570
} else if (tensor_type == BM_INT32) {
571571
// INT32
572572
int cnt = mem_size / sizeof(int32_t);
573573
std::vector<int> data(cnt);
574574
bm_memcpy_d2s(bm_handle, data.data(), t.device_mem);
575-
cnpy::npz_save(filename, tensor_name, data.data(), shape, "a");
575+
cnpy::npz_add_array(npz_map, tensor_name, data.data(), shape);
576576
} else if (tensor_type == BM_FLOAT32) {
577577
// FLOAT32
578578
int cnt = mem_size / sizeof(float);
579579
std::vector<float> data(cnt);
580580
bm_memcpy_d2s(bm_handle, data.data(), t.device_mem);
581-
cnpy::npz_save(filename, tensor_name, data.data(), shape, "a");
581+
cnpy::npz_add_array(npz_map, tensor_name, data.data(), shape);
582582
} else {
583583
throw std::runtime_error("Not support dtype");
584584
}
@@ -587,31 +587,33 @@ void dump_tensor_to_file(bm_handle_t &bm_handle, bm_tensor_t &t,
587587
void dump_net_input_to_file(bm_handle_t &bm_handle, const bm_net_info_t *net,
588588
const std::string &filename) {
589589
std::vector<bm_tensor_t> in_tensors(net->input_num);
590-
590+
cnpy::npz_t npz_map;
591591
for (int i = 0; i < net->input_num; i++) {
592592
bmrt_tensor_with_device(&in_tensors[i], net->stages[0].input_mems[i],
593593
net->input_dtypes[i],
594594
net->stages[0].input_shapes[i]);
595595

596596
dump_tensor_to_file(bm_handle, in_tensors[i],
597-
net->stages[0].input_shapes[i], filename,
597+
net->stages[0].input_shapes[i], npz_map,
598598
net->input_dtypes[i], "input_" + std::to_string(i));
599599
}
600+
cnpy::npz_save_all(filename, npz_map);
600601
}
601602

602603
void dump_net_output_to_file(bm_handle_t &bm_handle, const bm_net_info_t *net,
603604
const std::string &filename) {
604605
std::vector<bm_tensor_t> out_tensors(net->output_num);
605-
606+
cnpy::npz_t npz_map;
606607
for (int i = 0; i < net->output_num; i++) {
607608
bmrt_tensor_with_device(&out_tensors[i], net->stages[0].output_mems[i],
608609
net->output_dtypes[i],
609610
net->stages[0].output_shapes[i]);
610611

611612
dump_tensor_to_file(bm_handle, out_tensors[i],
612-
net->stages[0].output_shapes[i], filename,
613+
net->stages[0].output_shapes[i], npz_map,
613614
net->output_dtypes[i], "output_" + std::to_string(i));
614615
}
616+
cnpy::npz_save_all(filename, npz_map);
615617
}
616618

617619
void dump_net_to_file(bm_handle_t &bm_handle, const bm_net_info_t *net,

0 commit comments

Comments
 (0)