fix: optimize the handling of CLIP embedding weight (#840)

leejet · web-flow · commit 2abe9451c46f · 2025-09-25T00:28:20.000+08:00
diff --git a/clip.hpp b/clip.hpp
@@ -553,12 +553,13 @@ class CLIPEmbeddings : public GGMLBlock {
     void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
         enum ggml_type token_wtype = GGML_TYPE_F32;
         if (!force_clip_f32) {
-            auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
-            if (tensor_type != tensor_types.end())
+            auto tensor_type                = tensor_types.find(prefix + "token_embedding.weight");
+            std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
+            if (tensor_type != tensor_types.end() && allow_types.find(tensor_type->second) != allow_types.end()) {
                 token_wtype = tensor_type->second;
+            }
         }
-        enum ggml_type position_wtype = GGML_TYPE_F32;
-
+        enum ggml_type position_wtype       = GGML_TYPE_F32;
         params["token_embedding.weight"]    = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
         params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
     }
diff --git a/model.cpp b/model.cpp
@@ -2422,6 +2422,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
             // Pass, do not convert. For MMDiT
         } else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
             // Pass, do not convert. For Unet
+        } else if (contains(name, "embedding")) {
+            // Pass, do not convert embedding
         } else {
             return true;
         }

Original file line number	Diff line number	Diff line change
`@@ -2422,6 +2422,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage`
`2422`	`2422`	`// Pass, do not convert. For MMDiT`
`2423`	`2423`	`} else if (contains(name, "time_embed.") \|\| contains(name, "label_emb.")) {`
`2424`	`2424`	`// Pass, do not convert. For Unet`
	`2425`	`+ } else if (contains(name, "embedding")) {`
	`2426`	`+ // Pass, do not convert embedding`
`2425`	`2427`	`} else {`
`2426`	`2428`	`return true;`
`2427`	`2429`	`}`