From f3baf576d7da13ff79dbfe52938f22834fb6c0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20Lepp=C3=A4nen?= Date: Wed, 24 Oct 2018 13:38:50 +0300 Subject: [PATCH 1/2] As Glue limits comments to 255 characters, we may need to truncate them. --- .../src/hive_metastore_migration.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py index e9f76bd7..ed898e4a 100644 --- a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py +++ b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py @@ -414,6 +414,14 @@ def transform_ms_bucketing_cols(self, ms_bucketing_cols): payload_func=lambda row: row['BUCKET_COL_NAME']) def transform_ms_columns(self, ms_columns): + def extract_row(row): + def truncate(x): + return x[:255] if hasattr(x,"__getitem__") else x + return ( + row['COLUMN_NAME'], + row['TYPE_NAME'], + truncate(row['COMMENT']) + ) return self.transform_df_with_idx(df=ms_columns, id_col='CD_ID', idx='INTEGER_IDX', @@ -422,8 +430,7 @@ def transform_ms_columns(self, ms_columns): StructField(name='name', dataType=StringType()), StructField(name='type', dataType=StringType()), StructField(name='comment', dataType=StringType())]), - payload_func=lambda row: ( - row['COLUMN_NAME'], row['TYPE_NAME'], row['COMMENT'])) + payload_func=extract_row) def transform_ms_skewed_col_names(self, ms_skewed_col_names): return self.transform_df_with_idx(df=ms_skewed_col_names, From 986c5c500807b82346a199f6bc8afdf1fab2c3c6 Mon Sep 17 00:00:00 2001 From: Ryo Manabe Date: Fri, 23 May 2025 18:15:38 +0900 Subject: [PATCH 2/2] truncate comments to 255 characters for partition key --- .../src/hive_metastore_migration.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py index 6eaa691a..1984ca30 100644 --- a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py +++ b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py @@ -475,6 +475,14 @@ def transform_df_with_idx(self, df, id_col, idx, payloads_column_name, payload_t return self.sql_context.createDataFrame(rdd_result, schema) def transform_ms_partition_keys(self, ms_partition_keys): + def extract_row(row): + def truncate(x): + return x[:255] if hasattr(x,"__getitem__") else x + return ( + row['PKEY_NAME'], + row['PKEY_NAME'], + truncate(row['PKEY_COMMENT']) + ) return self.transform_df_with_idx( df=ms_partition_keys, id_col="TBL_ID", @@ -487,7 +495,7 @@ def transform_ms_partition_keys(self, ms_partition_keys): StructField(name="comment", dataType=StringType()), ] ), - payload_func=lambda row: (row["PKEY_NAME"], row["PKEY_TYPE"], row["PKEY_COMMENT"]), + payload_func=extract_row, ) def transform_ms_partition_key_vals(self, ms_partition_key_vals):