@@ -155,43 +155,46 @@ def create_config_id(
155
155
"""
156
156
# Possibly add a suffix to the name to handle custom features/data_files/config_kwargs
157
157
suffix : Optional [str ] = None
158
- config_kwargs_to_add_to_suffix = config_kwargs .copy ()
159
- # name and version are already used to build the cache directory
160
- config_kwargs_to_add_to_suffix .pop ("name" , None )
161
- config_kwargs_to_add_to_suffix .pop ("version" , None )
162
- # data dir handling (when specified it points to the manually downloaded data):
163
- # it was previously ignored before the introduction of config id because we didn't want
164
- # to change the config name. Now it's fine to take it into account for the config id.
165
- # config_kwargs_to_add_to_suffix.pop("data_dir", None)
166
- if "data_dir" in config_kwargs_to_add_to_suffix :
167
- if config_kwargs_to_add_to_suffix ["data_dir" ] is None :
168
- config_kwargs_to_add_to_suffix .pop ("data_dir" , None )
169
- else :
170
- # canonicalize the data dir to avoid two paths to the same location having different
171
- # hashes
172
- data_dir = config_kwargs_to_add_to_suffix ["data_dir" ]
173
- data_dir = os .path .normpath (data_dir )
174
- config_kwargs_to_add_to_suffix ["data_dir" ] = data_dir
175
- if config_kwargs_to_add_to_suffix :
176
- # we don't care about the order of the kwargs
177
- config_kwargs_to_add_to_suffix = {
178
- k : config_kwargs_to_add_to_suffix [k ] for k in sorted (config_kwargs_to_add_to_suffix )
179
- }
180
- if all (isinstance (v , (str , bool , int , float )) for v in config_kwargs_to_add_to_suffix .values ()):
181
- suffix = "," .join (
182
- str (k ) + "=" + urllib .parse .quote_plus (str (v )) for k , v in config_kwargs_to_add_to_suffix .items ()
183
- )
184
- if len (suffix ) > 32 : # hash if too long
158
+ if "dataset_id_suffix" in config_kwargs and config_kwargs ["dataset_id_suffix" ] is not None :
159
+ suffix = config_kwargs ["dataset_id_suffix" ]
160
+ else :
161
+ config_kwargs_to_add_to_suffix = config_kwargs .copy ()
162
+ # name and version are already used to build the cache directory
163
+ config_kwargs_to_add_to_suffix .pop ("name" , None )
164
+ config_kwargs_to_add_to_suffix .pop ("version" , None )
165
+ # data dir handling (when specified it points to the manually downloaded data):
166
+ # it was previously ignored before the introduction of config id because we didn't want
167
+ # to change the config name. Now it's fine to take it into account for the config id.
168
+ # config_kwargs_to_add_to_suffix.pop("data_dir", None)
169
+ if "data_dir" in config_kwargs_to_add_to_suffix :
170
+ if config_kwargs_to_add_to_suffix ["data_dir" ] is None :
171
+ config_kwargs_to_add_to_suffix .pop ("data_dir" , None )
172
+ else :
173
+ # canonicalize the data dir to avoid two paths to the same location having different
174
+ # hashes
175
+ data_dir = config_kwargs_to_add_to_suffix ["data_dir" ]
176
+ data_dir = os .path .normpath (data_dir )
177
+ config_kwargs_to_add_to_suffix ["data_dir" ] = data_dir
178
+ if config_kwargs_to_add_to_suffix :
179
+ # we don't care about the order of the kwargs
180
+ config_kwargs_to_add_to_suffix = {
181
+ k : config_kwargs_to_add_to_suffix [k ] for k in sorted (config_kwargs_to_add_to_suffix )
182
+ }
183
+ if all (isinstance (v , (str , bool , int , float )) for v in config_kwargs_to_add_to_suffix .values ()):
184
+ suffix = "," .join (
185
+ str (k ) + "=" + urllib .parse .quote_plus (str (v )) for k , v in config_kwargs_to_add_to_suffix .items ()
186
+ )
187
+ if len (suffix ) > 32 : # hash if too long
188
+ suffix = Hasher .hash (config_kwargs_to_add_to_suffix )
189
+ else :
185
190
suffix = Hasher .hash (config_kwargs_to_add_to_suffix )
186
- else :
187
- suffix = Hasher .hash (config_kwargs_to_add_to_suffix )
188
-
189
- if custom_features is not None :
190
- m = Hasher ()
191
- if suffix :
192
- m .update (suffix )
193
- m .update (custom_features )
194
- suffix = m .hexdigest ()
191
+
192
+ if custom_features is not None :
193
+ m = Hasher ()
194
+ if suffix :
195
+ m .update (suffix )
196
+ m .update (custom_features )
197
+ suffix = m .hexdigest ()
195
198
196
199
if suffix :
197
200
config_id = self .name + "-" + suffix
0 commit comments