Skip to content

HIVE-29066: PARTITION_NAME_WHITELIST_PATTERN is not honouring session level configuration #5943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions common/src/java/org/apache/hive/common/util/HiveStringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hive.common.util;

import com.google.common.base.Splitter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetAddress;
Expand All @@ -38,8 +39,6 @@
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import com.google.common.base.Splitter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
import org.apache.commons.lang3.text.translate.EntityArrays;
Expand Down Expand Up @@ -1080,19 +1079,17 @@ public static boolean commentProvided(char[] chars) {
return false;
}

public static String getPartitionValWithInvalidCharacter(List<String> partVals,
Pattern partitionValidationPattern) {
if (partitionValidationPattern == null) {
return null;
public static String getPartitionValWithInvalidCharacter(
List<String> partVals, Pattern partitionValidationPattern) {
String result = null;
if (partitionValidationPattern != null) {
result =
partVals.stream()
.filter(partVal -> !partitionValidationPattern.matcher(partVal).matches())
.findFirst()
.orElse(null);
}

for (String partVal : partVals) {
if (!partitionValidationPattern.matcher(partVal).matches()) {
return partVal;
}
}

return null;
return result;
}

/**
Expand Down
35 changes: 21 additions & 14 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils.setWriteOperation;
import static org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils.setWriteOperationIsSorted;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
Expand All @@ -38,20 +39,17 @@
import java.util.Properties;
import java.util.Set;
import java.util.function.BiFunction;

import com.google.common.collect.Lists;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConfUtil;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConfUtil;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Utilities.MissingBucketsContext;
Expand Down Expand Up @@ -81,7 +79,11 @@
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.serde2.*;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
Expand All @@ -92,13 +94,11 @@
import org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim;
import org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue;
import org.apache.hadoop.hive.shims.ShimLoader;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;

import org.apache.hive.common.util.HiveStringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -1125,12 +1125,19 @@ public void process(Object row, int tag) throws HiveException {
}
}

String invalidPartitionVal;
if((invalidPartitionVal = HiveStringUtils.getPartitionValWithInvalidCharacter(dpVals, dpCtx.getWhiteListPattern()))!=null) {
throw new HiveFatalException("Partition value '" + invalidPartitionVal +
"' contains a character not matched by whitelist pattern '" +
dpCtx.getWhiteListPattern().toString() + "'. " + "(configure with " +
HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN.varname + ")");
String invalidPartitionVal =
HiveStringUtils.getPartitionValWithInvalidCharacter(
dpVals, dpCtx.getWhiteListPattern());

if (invalidPartitionVal != null) {
String errorMsg =
("Partition value '%s' contains a character not matched by whitelist pattern '%s'. Configure with %s")
.formatted(
invalidPartitionVal,
dpCtx.getWhiteListPattern().toString(),
MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getVarname());

throw new HiveFatalException(errorMsg);
}
fpaths = getDynOutPaths(dpVals, lbDirName);
dynamicPartitionSpecs.add(fpaths.dpDirForCounters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public DynamicPartitionCtx(List<String> partColNames, String defaultPartName,
this.spPath = null;
String confVal;
try {
confVal = Hive.get().getMetaConf(ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getHiveName());
confVal = Hive.get().getMetaConf(ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getVarname());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is enough for fixing dynamic partition. Reasoning:

hivename: hive.metastore.partition.name.whitelist.pattern
varname: metastore.partition.name.whitelist.pattern

As users do set metaconf:metastore.partition.name.whitelist.pattern=[^9]*, then varname is used not the hivename. I believe it was never working earlier also for session level.

NOTE: This also means that in hive-site.xml as well this is now going to be picked instead of hivename.

  <property>
    <name>metastore.partition.name.whitelist.pattern</name>
    <value>[^9]*</value>
  </property>

This shouldn't be an issue because:

  1. in HiveConf this config is deprecated.
  2. If users still uses hive.metastore.partition.name.whitelist.pattern, then also MoveTask will fail (add_partitions_req) which is indireclty the expected behaviour
Error: Error while compiling statement: FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.exec.MoveTask. MetaException(message:Partition value '09' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern); Query ID: raghav_20250729215001_82d33e22-04db-4be3-8b01-189504089bf6 (state=08S01,code=40000)
  1. If users uses hive.metastore.partition.name.whitelist.pattern for HMS Side operation like alter then it will also work becuase of MetastoreConf#getVar() which check for hivename as failsafe. That's why q file are not required to change.

} catch (HiveException e) {
throw new SemanticException(e);
}
Expand Down Expand Up @@ -126,7 +126,7 @@ public DynamicPartitionCtx(Map<String, String> partSpec, String defaultPartName,
}
String confVal;
try {
confVal = Hive.get().getMetaConf(ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getHiveName());
confVal = Hive.get().getMetaConf(ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getVarname());
} catch (HiveException e) {
throw new SemanticException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
set hive.strict.checks.bucketing=false;

set hive.mapred.mode=nonstrict;
SET hive.metastore.partition.name.whitelist.pattern=[^9]*;
set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifyTableDirectoryIsEmptyHook;

set hive.exec.dynamic.partition=true;
Expand All @@ -17,4 +16,5 @@ load data local inpath '../../data/files/bmj/000000_0' INTO TABLE source_table p
-- If the directory is not empty the hook will throw an error, instead the error should come from the metastore
-- This shows that no dynamic partitions were created and left behind or had directories created

SET metaconf:metastore.partition.name.whitelist.pattern=[^9]*;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for moving this line below is, the metaconf will be resetted after any meta-operation i.e create etc. My assumption is, metconf should always be a line before the config is consumed in the query. the question can be tracked here.

insert overwrite table dest_table partition (ds, hr) select key, hr, ds, value from source_table where ds='2008-04-08' and value='val_129' order by value asc;
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
set metaconf:metastore.partition.name.whitelist.pattern;

create table t1 (id int) partitioned by (pcol string);
alter table t1 add partition (pCol='2025-06-09');

set metaconf:metastore.partition.name.whitelist.pattern=[^9]*;
alter table t1 add partition (pCol='2025-06-19');
show partitions t1;

Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ POSTHOOK: Input: default@part_whitelist_test
PREHOOK: query: ALTER TABLE part_whitelist_test ADD PARTITION (ds='1,2,3,4')
PREHOOK: type: ALTERTABLE_ADDPARTS
PREHOOK: Output: default@part_whitelist_test
FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. MetaException(message:Partition value '1,2,3,4' contains a character not matched by whitelist pattern '[\\x20-\\x7E&&[^,]]*'. (configure with metastore.partition.name.whitelist.pattern))
FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. MetaException(message:Partition value '1,2,3,4' contains a character not matched by whitelist pattern '[\\x20-\\x7E&&[^,]]*'. Configure with metastore.partition.name.whitelist.pattern)
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ PREHOOK: query: ALTER TABLE part_whitelist_test PARTITION (ds='1') rename to par
PREHOOK: type: ALTERTABLE_RENAMEPART
PREHOOK: Input: default@part_whitelist_test
PREHOOK: Output: default@part_whitelist_test@ds=1
FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. Unable to rename partition. Partition value '1,2,3' contains a character not matched by whitelist pattern '[\\x20-\\x7E&&[^,]]*'. (configure with metastore.partition.name.whitelist.pattern)
FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. Unable to rename partition. Partition value '1,2,3' contains a character not matched by whitelist pattern '[\\x20-\\x7E&&[^,]]*'. Configure with metastore.partition.name.whitelist.pattern
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ Vertex failed, vertexName=Map 1, vertexId=vertex_#ID#, diagnostics=[Task failed,
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. (configure with hive.metastore.partition.name.whitelist.pattern)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern
#### A masked pattern was here ####
], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. (configure with hive.metastore.partition.name.whitelist.pattern)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern
#### A masked pattern was here ####
]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Map 1] killed/failed due to:OWN_TASK_FAILURE]
[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
Expand All @@ -48,12 +48,12 @@ FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.T
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. (configure with hive.metastore.partition.name.whitelist.pattern)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern
#### A masked pattern was here ####
], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: Hive Runtime Error while closing operators
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. (configure with hive.metastore.partition.name.whitelist.pattern)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: Partition value 'val_129' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern
#### A masked pattern was here ####
]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Map 1] killed/failed due to:OWN_TASK_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:1
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
metaconf:metastore.partition.name.whitelist.pattern=
PREHOOK: query: create table t1 (id int) partitioned by (pcol string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1 (id int) partitioned by (pcol string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: alter table t1 add partition (pCol='2025-06-09')
PREHOOK: type: ALTERTABLE_ADDPARTS
PREHOOK: Output: default@t1
POSTHOOK: query: alter table t1 add partition (pCol='2025-06-09')
POSTHOOK: type: ALTERTABLE_ADDPARTS
POSTHOOK: Output: default@t1
POSTHOOK: Output: default@t1@pcol=2025-06-09
PREHOOK: query: alter table t1 add partition (pCol='2025-06-19')
PREHOOK: type: ALTERTABLE_ADDPARTS
PREHOOK: Output: default@t1
FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. MetaException(message:Partition value '2025-06-19' contains a character not matched by whitelist pattern '[^9]*'. Configure with metastore.partition.name.whitelist.pattern)
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ public Configuration getHiveConf() {
private MetaStoreFilterHook filterHook;
private boolean isServerFilterEnabled = false;

private Pattern partitionValidationPattern;
private final boolean isInTest;

@Override
Expand Down Expand Up @@ -366,11 +365,7 @@ public void init() throws MetaException {
endFunctionListeners = MetaStoreServerUtils.getMetaStoreListeners(
MetaStoreEndFunctionListener.class, conf, MetastoreConf.getVar(conf, ConfVars.END_FUNCTION_LISTENERS));

String partitionValidationRegex =
MetastoreConf.getVar(conf, ConfVars.PARTITION_NAME_WHITELIST_PATTERN);
if (partitionValidationRegex != null && !partitionValidationRegex.isEmpty()) {
partitionValidationPattern = Pattern.compile(partitionValidationRegex);
}


expressionProxy = PartFilterExprUtil.createExpressionProxy(conf);
fileMetadataManager = new FileMetadataManager(this.getMS(), conf);
Expand Down Expand Up @@ -467,13 +462,13 @@ public void setConf(Configuration conf) {

@Override
public Configuration getConf() {
Configuration conf = HMSHandlerContext.getConfiguration()
.orElseGet(() -> {
Configuration configuration = new Configuration(this.conf);
HMSHandlerContext.setConfiguration(configuration);
return configuration;
});
return conf;
return HMSHandlerContext.getConfiguration()
.orElseGet(
() -> {
Configuration configuration = new Configuration(this.conf);
HMSHandlerContext.setConfiguration(configuration);
return configuration;
});
}

@Override
Expand Down Expand Up @@ -4044,7 +4039,7 @@ private Partition append_partition_common(RawStore ms, String catName, String db
part.setTableName(tableName);
part.setValues(part_vals);

MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern);
MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, getConf());

tbl = ms.getTable(part.getCatName(), part.getDbName(), part.getTableName(), null);
if (tbl == null) {
Expand Down Expand Up @@ -4457,8 +4452,7 @@ private void cleanupPartitionFolders(final Map<PartValEqWrapperLite, Boolean> ad
private void validatePartition(final Partition part, final String catName,
final String tblName, final String dbName, final Set<PartValEqWrapperLite> partsToAdd)
throws MetaException, TException {
MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(),
partitionValidationPattern);
MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(), getConf());
if (part.getDbName() == null || part.getTableName() == null) {
throw new MetaException("The database and table name must be set in the partition.");
}
Expand Down Expand Up @@ -6007,8 +6001,7 @@ private void alter_partition_core(String catName, String db_name, String tbl_nam

firePreEvent(new PreAlterPartitionEvent(db_name, tbl_name, table, part_vals, new_part, this));
if (part_vals != null && !part_vals.isEmpty()) {
MetaStoreServerUtils.validatePartitionNameCharacters(new_part.getValues(),
partitionValidationPattern);
MetaStoreServerUtils.validatePartitionNameCharacters(new_part.getValues(), getConf());
}

oldPart = alterHandler.alterPartition(getMS(), wh, catName, db_name, tbl_name,
Expand Down Expand Up @@ -8645,18 +8638,17 @@ public List<String> set_ugi(String username, List<String> groupNames) throws TEx
}

@Override
public boolean partition_name_has_valid_characters(List<String> part_vals,
boolean throw_exception) throws TException {
public boolean partition_name_has_valid_characters(
List<String> part_vals, boolean throw_exception) throws TException {
startFunction("partition_name_has_valid_characters");
boolean ret;
Exception ex = null;
try {
if (throw_exception) {
MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern);
MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, getConf());
ret = true;
} else {
ret = MetaStoreServerUtils.partitionNameHasValidCharacters(part_vals,
partitionValidationPattern);
ret = MetaStoreServerUtils.partitionNameHasValidCharacters(part_vals, getConf());
}
} catch (Exception e) {
ex = e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ private enum TXN_STATUS {
private volatile int openTrasactionCalls = 0;
private Transaction currentTransaction = null;
private TXN_STATUS transactionStatus = TXN_STATUS.NO_STATE;
private Pattern partitionValidationPattern;
private Counter directSqlErrors;
private boolean areTxnStatsSupported = false;
private PropertyStore propertyStore;
Expand Down Expand Up @@ -384,15 +383,7 @@ public void setConf(Configuration conf) {
transactionStatus = TXN_STATUS.NO_STATE;

initialize();

String partitionValidationRegex =
MetastoreConf.getVar(this.conf, ConfVars.PARTITION_NAME_WHITELIST_PATTERN);
if (partitionValidationRegex != null && !partitionValidationRegex.isEmpty()) {
partitionValidationPattern = Pattern.compile(partitionValidationRegex);
} else {
partitionValidationPattern = null;
}


// Note, if metrics have not been initialized this will return null, which means we aren't
// using metrics. Thus we should always check whether this is non-null before using.
MetricRegistry registry = Metrics.getRegistry();
Expand Down Expand Up @@ -2773,8 +2764,7 @@ protected String describeResult() {

private boolean isValidPartition(
Partition part, List<FieldSchema> partitionKeys, boolean ifNotExists) throws MetaException {
MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(),
partitionValidationPattern);
MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(), conf);
boolean doesExist = doesPartitionExist(part.getCatName(),
part.getDbName(), part.getTableName(), partitionKeys, part.getValues());
if (doesExist && !ifNotExists) {
Expand Down
Loading
Loading