diff --git a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py index 1e7e05a..1984ca3 100644 --- a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py +++ b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py @@ -475,6 +475,14 @@ def transform_df_with_idx(self, df, id_col, idx, payloads_column_name, payload_t return self.sql_context.createDataFrame(rdd_result, schema) def transform_ms_partition_keys(self, ms_partition_keys): + def extract_row(row): + def truncate(x): + return x[:255] if hasattr(x,"__getitem__") else x + return ( + row['PKEY_NAME'], + row['PKEY_NAME'], + truncate(row['PKEY_COMMENT']) + ) return self.transform_df_with_idx( df=ms_partition_keys, id_col="TBL_ID", @@ -487,7 +495,7 @@ def transform_ms_partition_keys(self, ms_partition_keys): StructField(name="comment", dataType=StringType()), ] ), - payload_func=lambda row: (row["PKEY_NAME"], row["PKEY_TYPE"], row["PKEY_COMMENT"]), + payload_func=extract_row, ) def transform_ms_partition_key_vals(self, ms_partition_key_vals): @@ -511,6 +519,14 @@ def transform_ms_bucketing_cols(self, ms_bucketing_cols): ) def transform_ms_columns(self, ms_columns): + def extract_row(row): + def truncate(x): + return x[:255] if hasattr(x,"__getitem__") else x + return ( + row['COLUMN_NAME'], + row['TYPE_NAME'], + truncate(row['COMMENT']) + ) return self.transform_df_with_idx( df=ms_columns, id_col="CD_ID", @@ -523,7 +539,7 @@ def transform_ms_columns(self, ms_columns): StructField(name="comment", dataType=StringType()), ] ), - payload_func=lambda row: (row["COLUMN_NAME"], row["TYPE_NAME"], row["COMMENT"]), + payload_func=extract_row, ) def transform_ms_skewed_col_names(self, ms_skewed_col_names):