diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java index 14a949b0e0..a13b5d2707 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java @@ -20,23 +20,15 @@ import static org.apache.parquet.column.Encoding.PLAIN_DICTIONARY; import static org.apache.parquet.column.Encoding.RLE_DICTIONARY; -import static org.apache.parquet.format.Util.readColumnMetaData; import com.fasterxml.jackson.annotation.JsonIgnore; -import java.io.ByteArrayInputStream; -import java.io.IOException; import java.util.Set; import org.apache.parquet.column.Encoding; import org.apache.parquet.column.EncodingStats; import org.apache.parquet.column.statistics.BooleanStatistics; import org.apache.parquet.column.statistics.SizeStatistics; import org.apache.parquet.column.statistics.Statistics; -import org.apache.parquet.crypto.AesCipher; -import org.apache.parquet.crypto.InternalColumnDecryptionSetup; import org.apache.parquet.crypto.InternalFileDecryptor; -import org.apache.parquet.crypto.ModuleCipherFactory.ModuleType; -import org.apache.parquet.crypto.ParquetCryptoRuntimeException; -import org.apache.parquet.format.ColumnMetaData; import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.internal.hadoop.metadata.IndexReference; import org.apache.parquet.schema.PrimitiveType; @@ -173,18 +165,18 @@ public static ColumnChunkMetaData get( } /** - * @param path the path of this column in the write schema - * @param type primitive type for this column - * @param codec the compression codec used to compress - * @param encodingStats EncodingStats for the encodings used in this column - * @param encodings a set of encoding used in this column - * @param statistics statistics for the data in this column - * @param firstDataPage offset of the first non-dictionary page - * @param dictionaryPageOffset offset of the dictionary page - * @param valueCount number of values - * @param totalSize total compressed size + * @param path the path of this column in the write schema + * @param type primitive type for this column + * @param codec the compression codec used to compress + * @param encodingStats EncodingStats for the encodings used in this column + * @param encodings a set of encoding used in this column + * @param statistics statistics for the data in this column + * @param firstDataPage offset of the first non-dictionary page + * @param dictionaryPageOffset offset of the dictionary page + * @param valueCount number of values + * @param totalSize total compressed size * @param totalUncompressedSize uncompressed data size - * @param sizeStatistics size statistics for the data in this column + * @param sizeStatistics size statistics for the data in this column * @return a column chunk metadata instance */ public static ColumnChunkMetaData get( @@ -207,33 +199,37 @@ && positiveLongFitsInAnInt(dictionaryPageOffset) && positiveLongFitsInAnInt(valueCount) && positiveLongFitsInAnInt(totalSize) && positiveLongFitsInAnInt(totalUncompressedSize)) { - return new IntColumnChunkMetaData( - path, - type, - codec, - encodingStats, - encodings, - statistics, - firstDataPage, - dictionaryPageOffset, - valueCount, - totalSize, - totalUncompressedSize, - sizeStatistics); + return factory() + .intColumnChunkMetaData() + .withPath(path) + .withPrimitiveType(type) + .withCodec(codec) + .withEncodingStats(encodingStats) + .withEncodings(encodings) + .withStatistics(statistics) + .withDictionaryPageOffset(positiveLongToInt(dictionaryPageOffset)) + .withValueCount(positiveLongToInt(valueCount)) + .withTotalSize(positiveLongToInt(totalSize)) + .withTotalUncompressedSize(positiveLongToInt(totalUncompressedSize)) + .withFirstDataPage(positiveLongToInt(firstDataPage)) + .withSizeStatistics(sizeStatistics) + .build(); } else { - return new LongColumnChunkMetaData( - path, - type, - codec, - encodingStats, - encodings, - statistics, - firstDataPage, - dictionaryPageOffset, - valueCount, - totalSize, - totalUncompressedSize, - sizeStatistics); + return factory() + .longColumnChunkMetaData() + .withPath(path) + .withPrimitiveType(type) + .withCodec(codec) + .withEncodingStats(encodingStats) + .withEncodings(encodings) + .withStatistics(statistics) + .withFirstDataPageOffset(firstDataPage) + .withDictionaryPageOffset(dictionaryPageOffset) + .withValueCount(valueCount) + .withTotalSize(totalSize) + .withTotalUncompressedSize(totalUncompressedSize) + .withSizeStatistics(sizeStatistics) + .build(); } } @@ -253,16 +249,19 @@ public static ColumnChunkMetaData getWithEncryptedMetadata( int rowGroupOrdinal, int columnOrdinal, String createdBy) { - return new EncryptedColumnChunkMetaData( - parquetMetadataConverter, - path, - type, - encryptedMetadata, - columnKeyMetadata, - fileDecryptor, - rowGroupOrdinal, - columnOrdinal, - createdBy); + + return factory() + .encryptedColumnChunkMetaData() + .withParquetMetadataConverter(parquetMetadataConverter) + .withPath(path) + .withPrimitiveType(type) + .withEncryptedMetadata(encryptedMetadata) + .withColumnKeyMetadata(columnKeyMetadata) + .withFileDecryptor(fileDecryptor) + .withRowGroupOrdinal(rowGroupOrdinal) + .withColumnOrdinal(columnOrdinal) + .withCreatedBy(createdBy) + .build(); } public void setRowGroupOrdinal(int rowGroupOrdinal) { @@ -298,6 +297,19 @@ protected static boolean positiveLongFitsInAnInt(long value) { return (value >= 0) && (value + Integer.MIN_VALUE <= Integer.MAX_VALUE); } + /** + * stores a positive long into an int (assuming it fits) + * + * @param value + * @return + */ + private static int positiveLongToInt(long value) { + if (!ColumnChunkMetaData.positiveLongFitsInAnInt(value)) { + throw new IllegalArgumentException("value should be positive and fit in an int: " + value); + } + return (int) (value + Integer.MIN_VALUE); + } + EncodingStats encodingStats; // we save 3 references by storing together the column properties that have few distinct values @@ -309,6 +321,48 @@ protected static boolean positiveLongFitsInAnInt(long value) { private long bloomFilterOffset = -1; private int bloomFilterLength = -1; + protected ColumnChunkMetaData(Builder builder) { + this.encodingStats = builder.encodingStats; + this.properties = builder.properties; + } + + public abstract static class Builder> { + protected EncodingStats encodingStats; + protected ColumnChunkProperties properties; + + public T withEncodingStats(EncodingStats encodingStats) { + this.encodingStats = encodingStats; + return self(); + } + + public T withProperties(ColumnChunkProperties properties) { + this.properties = properties; + return self(); + } + + protected abstract T self(); + + public abstract ColumnChunkMetaData build(); + } + + public static ColumnChunkMetaDataFactory factory() { + return new ColumnChunkMetaDataFactory(); + } + + public static class ColumnChunkMetaDataFactory { + public IntColumnChunkMetaData.Builder intColumnChunkMetaData() { + return new IntColumnChunkMetaData.Builder(); + } + + public LongColumnChunkMetaData.Builder longColumnChunkMetaData() { + return new LongColumnChunkMetaData.Builder(); + } + + public EncryptedColumnChunkMetaData.Builder encryptedColumnChunkMetaData() { + return new EncryptedColumnChunkMetaData.Builder(); + } + } + protected ColumnChunkMetaData(ColumnChunkProperties columnChunkProperties) { this(null, columnChunkProperties); } @@ -443,8 +497,7 @@ public void setBloomFilterOffset(long bloomFilterOffset) { } /** - * @param bloomFilterLength - * the reference to the Bloom filter + * @param bloomFilterLength the reference to the Bloom filter */ public void setBloomFilterLength(int bloomFilterLength) { this.bloomFilterLength = bloomFilterLength; @@ -505,349 +558,3 @@ public boolean isEncrypted() { return false; } } - -class IntColumnChunkMetaData extends ColumnChunkMetaData { - - private final int firstDataPage; - private final int dictionaryPageOffset; - private final int valueCount; - private final int totalSize; - private final int totalUncompressedSize; - private final Statistics statistics; - private final SizeStatistics sizeStatistics; - - /** - * @param path column identifier - * @param type type of the column - * @param codec - * @param encodings - * @param statistics - * @param firstDataPage - * @param dictionaryPageOffset - * @param valueCount - * @param totalSize - * @param totalUncompressedSize - * @param sizeStatistics - */ - IntColumnChunkMetaData( - ColumnPath path, - PrimitiveType type, - CompressionCodecName codec, - EncodingStats encodingStats, - Set encodings, - Statistics statistics, - long firstDataPage, - long dictionaryPageOffset, - long valueCount, - long totalSize, - long totalUncompressedSize, - SizeStatistics sizeStatistics) { - super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings)); - this.firstDataPage = positiveLongToInt(firstDataPage); - this.dictionaryPageOffset = positiveLongToInt(dictionaryPageOffset); - this.valueCount = positiveLongToInt(valueCount); - this.totalSize = positiveLongToInt(totalSize); - this.totalUncompressedSize = positiveLongToInt(totalUncompressedSize); - this.statistics = statistics; - this.sizeStatistics = sizeStatistics; - } - - /** - * stores a positive long into an int (assuming it fits) - * - * @param value - * @return - */ - private int positiveLongToInt(long value) { - if (!ColumnChunkMetaData.positiveLongFitsInAnInt(value)) { - throw new IllegalArgumentException("value should be positive and fit in an int: " + value); - } - return (int) (value + Integer.MIN_VALUE); - } - - /** - * turns the int back into a positive long - * - * @param value - * @return - */ - private long intToPositiveLong(int value) { - return (long) value - Integer.MIN_VALUE; - } - - /** - * @return start of the column data offset - */ - public long getFirstDataPageOffset() { - return intToPositiveLong(firstDataPage); - } - - /** - * @return the location of the dictionary page if any - */ - public long getDictionaryPageOffset() { - return intToPositiveLong(dictionaryPageOffset); - } - - /** - * @return count of values in this block of the column - */ - public long getValueCount() { - return intToPositiveLong(valueCount); - } - - /** - * @return the totalUncompressedSize - */ - public long getTotalUncompressedSize() { - return intToPositiveLong(totalUncompressedSize); - } - - /** - * @return the totalSize - */ - public long getTotalSize() { - return intToPositiveLong(totalSize); - } - - /** - * @return the stats for this column - */ - public Statistics getStatistics() { - return statistics; - } - - /** - * @return the size stats for this column - */ - @Override - public SizeStatistics getSizeStatistics() { - return sizeStatistics; - } -} - -class LongColumnChunkMetaData extends ColumnChunkMetaData { - - private final long firstDataPageOffset; - private final long dictionaryPageOffset; - private final long valueCount; - private final long totalSize; - private final long totalUncompressedSize; - private final Statistics statistics; - private final SizeStatistics sizeStatistics; - - /** - * @param path column identifier - * @param type type of the column - * @param codec - * @param encodings - * @param statistics - * @param firstDataPageOffset - * @param dictionaryPageOffset - * @param valueCount - * @param totalSize - * @param totalUncompressedSize - * @param sizeStatistics - */ - LongColumnChunkMetaData( - ColumnPath path, - PrimitiveType type, - CompressionCodecName codec, - EncodingStats encodingStats, - Set encodings, - Statistics statistics, - long firstDataPageOffset, - long dictionaryPageOffset, - long valueCount, - long totalSize, - long totalUncompressedSize, - SizeStatistics sizeStatistics) { - super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings)); - this.firstDataPageOffset = firstDataPageOffset; - this.dictionaryPageOffset = dictionaryPageOffset; - this.valueCount = valueCount; - this.totalSize = totalSize; - this.totalUncompressedSize = totalUncompressedSize; - this.statistics = statistics; - this.sizeStatistics = sizeStatistics; - } - - /** - * @return start of the column data offset - */ - public long getFirstDataPageOffset() { - return firstDataPageOffset; - } - - /** - * @return the location of the dictionary page if any - */ - public long getDictionaryPageOffset() { - return dictionaryPageOffset; - } - - /** - * @return count of values in this block of the column - */ - public long getValueCount() { - return valueCount; - } - - /** - * @return the totalUncompressedSize - */ - public long getTotalUncompressedSize() { - return totalUncompressedSize; - } - - /** - * @return the totalSize - */ - public long getTotalSize() { - return totalSize; - } - - /** - * @return the stats for this column - */ - public Statistics getStatistics() { - return statistics; - } - - /** - * @return the size stats for this column - */ - @Override - public SizeStatistics getSizeStatistics() { - return sizeStatistics; - } -} - -class EncryptedColumnChunkMetaData extends ColumnChunkMetaData { - private final ParquetMetadataConverter parquetMetadataConverter; - private final byte[] encryptedMetadata; - private final byte[] columnKeyMetadata; - private final InternalFileDecryptor fileDecryptor; - - private final int columnOrdinal; - private final PrimitiveType primitiveType; - private final String createdBy; - private ColumnPath path; - - private boolean decrypted; - private ColumnChunkMetaData shadowColumnChunkMetaData; - - EncryptedColumnChunkMetaData( - ParquetMetadataConverter parquetMetadataConverter, - ColumnPath path, - PrimitiveType type, - byte[] encryptedMetadata, - byte[] columnKeyMetadata, - InternalFileDecryptor fileDecryptor, - int rowGroupOrdinal, - int columnOrdinal, - String createdBy) { - super((EncodingStats) null, (ColumnChunkProperties) null); - this.parquetMetadataConverter = parquetMetadataConverter; - this.path = path; - this.encryptedMetadata = encryptedMetadata; - this.columnKeyMetadata = columnKeyMetadata; - this.fileDecryptor = fileDecryptor; - this.rowGroupOrdinal = rowGroupOrdinal; - this.columnOrdinal = columnOrdinal; - this.primitiveType = type; - this.createdBy = createdBy; - - this.decrypted = false; - } - - @Override - protected void decryptIfNeeded() { - if (decrypted) return; - - if (null == fileDecryptor) { - throw new ParquetCryptoRuntimeException(path + ". Null File Decryptor"); - } - - // Decrypt the ColumnMetaData - InternalColumnDecryptionSetup columnDecryptionSetup = - fileDecryptor.setColumnCryptoMetadata(path, true, false, columnKeyMetadata, columnOrdinal); - - ColumnMetaData metaData; - ByteArrayInputStream tempInputStream = new ByteArrayInputStream(encryptedMetadata); - byte[] columnMetaDataAAD = AesCipher.createModuleAAD( - fileDecryptor.getFileAAD(), ModuleType.ColumnMetaData, rowGroupOrdinal, columnOrdinal, -1); - try { - metaData = readColumnMetaData( - tempInputStream, columnDecryptionSetup.getMetaDataDecryptor(), columnMetaDataAAD); - } catch (IOException e) { - throw new ParquetCryptoRuntimeException(path + ". Failed to decrypt column metadata", e); - } - decrypted = true; - shadowColumnChunkMetaData = - parquetMetadataConverter.buildColumnChunkMetaData(metaData, path, primitiveType, createdBy); - this.encodingStats = shadowColumnChunkMetaData.encodingStats; - this.properties = shadowColumnChunkMetaData.properties; - if (metaData.isSetBloom_filter_offset()) { - setBloomFilterOffset(metaData.getBloom_filter_offset()); - } - if (metaData.isSetBloom_filter_length()) { - setBloomFilterLength(metaData.getBloom_filter_length()); - } - } - - @Override - public ColumnPath getPath() { - return path; - } - - @Override - public long getFirstDataPageOffset() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getFirstDataPageOffset(); - } - - @Override - public long getDictionaryPageOffset() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getDictionaryPageOffset(); - } - - @Override - public long getValueCount() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getValueCount(); - } - - @Override - public long getTotalUncompressedSize() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getTotalUncompressedSize(); - } - - @Override - public long getTotalSize() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getTotalSize(); - } - - @Override - public Statistics getStatistics() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getStatistics(); - } - - @Override - public SizeStatistics getSizeStatistics() { - decryptIfNeeded(); - return shadowColumnChunkMetaData.getSizeStatistics(); - } - - /** - * @return whether or not this column is encrypted - */ - @Override - public boolean isEncrypted() { - return true; - } -} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/EncryptedColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/EncryptedColumnChunkMetaData.java new file mode 100644 index 0000000000..c7dbcec0a5 --- /dev/null +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/EncryptedColumnChunkMetaData.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.hadoop.metadata; + +import static org.apache.parquet.format.Util.readColumnMetaData; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import org.apache.parquet.column.statistics.SizeStatistics; +import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.crypto.AesCipher; +import org.apache.parquet.crypto.InternalColumnDecryptionSetup; +import org.apache.parquet.crypto.InternalFileDecryptor; +import org.apache.parquet.crypto.ModuleCipherFactory; +import org.apache.parquet.crypto.ParquetCryptoRuntimeException; +import org.apache.parquet.format.ColumnMetaData; +import org.apache.parquet.format.converter.ParquetMetadataConverter; +import org.apache.parquet.schema.PrimitiveType; + +class EncryptedColumnChunkMetaData extends ColumnChunkMetaData { + private final ParquetMetadataConverter parquetMetadataConverter; + private final byte[] encryptedMetadata; + private final byte[] columnKeyMetadata; + private final InternalFileDecryptor fileDecryptor; + + private final int columnOrdinal; + private final PrimitiveType primitiveType; + private final String createdBy; + private final ColumnPath path; + + private boolean decrypted; + private ColumnChunkMetaData shadowColumnChunkMetaData; + + private EncryptedColumnChunkMetaData(Builder builder) { + super(builder); + this.parquetMetadataConverter = builder.parquetMetadataConverter; + this.path = builder.path; + this.encryptedMetadata = builder.encryptedMetadata; + this.columnKeyMetadata = builder.columnKeyMetadata; + this.fileDecryptor = builder.fileDecryptor; + this.columnOrdinal = builder.columnOrdinal; + this.primitiveType = builder.primitiveType; + this.createdBy = builder.createdBy; + this.decrypted = false; + this.rowGroupOrdinal = builder.rowGroupOrdinal; + } + + @Override + protected void decryptIfNeeded() { + if (decrypted) return; + + if (null == fileDecryptor) { + throw new ParquetCryptoRuntimeException(path + ". Null File Decryptor"); + } + + // Decrypt the ColumnMetaData + InternalColumnDecryptionSetup columnDecryptionSetup = + fileDecryptor.setColumnCryptoMetadata(path, true, false, columnKeyMetadata, columnOrdinal); + + ColumnMetaData metaData; + ByteArrayInputStream tempInputStream = new ByteArrayInputStream(encryptedMetadata); + byte[] columnMetaDataAAD = AesCipher.createModuleAAD( + fileDecryptor.getFileAAD(), + ModuleCipherFactory.ModuleType.ColumnMetaData, + rowGroupOrdinal, + columnOrdinal, + -1); + try { + metaData = readColumnMetaData( + tempInputStream, columnDecryptionSetup.getMetaDataDecryptor(), columnMetaDataAAD); + } catch (IOException e) { + throw new ParquetCryptoRuntimeException(path + ". Failed to decrypt column metadata", e); + } + decrypted = true; + shadowColumnChunkMetaData = + parquetMetadataConverter.buildColumnChunkMetaData(metaData, path, primitiveType, createdBy); + this.encodingStats = shadowColumnChunkMetaData.encodingStats; + this.properties = shadowColumnChunkMetaData.properties; + if (metaData.isSetBloom_filter_offset()) { + setBloomFilterOffset(metaData.getBloom_filter_offset()); + } + if (metaData.isSetBloom_filter_length()) { + setBloomFilterLength(metaData.getBloom_filter_length()); + } + } + + @Override + public ColumnPath getPath() { + return path; + } + + @Override + public long getFirstDataPageOffset() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getFirstDataPageOffset(); + } + + @Override + public long getDictionaryPageOffset() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getDictionaryPageOffset(); + } + + @Override + public long getValueCount() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getValueCount(); + } + + @Override + public long getTotalUncompressedSize() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getTotalUncompressedSize(); + } + + @Override + public long getTotalSize() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getTotalSize(); + } + + @Override + public Statistics getStatistics() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getStatistics(); + } + + @Override + public SizeStatistics getSizeStatistics() { + decryptIfNeeded(); + return shadowColumnChunkMetaData.getSizeStatistics(); + } + + /** + * @return whether or not this column is encrypted + */ + @Override + public boolean isEncrypted() { + return true; + } + + public static class Builder extends ColumnChunkMetaData.Builder { + private ParquetMetadataConverter parquetMetadataConverter; + private ColumnPath path; + private byte[] encryptedMetadata; + private byte[] columnKeyMetadata; + private InternalFileDecryptor fileDecryptor; + private int columnOrdinal; + private PrimitiveType primitiveType; + private String createdBy; + private int rowGroupOrdinal; + + public Builder withParquetMetadataConverter(ParquetMetadataConverter parquetMetadataConverter) { + this.parquetMetadataConverter = parquetMetadataConverter; + return this; + } + + public Builder withPath(ColumnPath path) { + this.path = path; + return this; + } + + public Builder withEncryptedMetadata(byte[] encryptedMetadata) { + this.encryptedMetadata = encryptedMetadata; + return this; + } + + public Builder withColumnKeyMetadata(byte[] columnKeyMetadata) { + this.columnKeyMetadata = columnKeyMetadata; + return this; + } + + public Builder withFileDecryptor(InternalFileDecryptor fileDecryptor) { + this.fileDecryptor = fileDecryptor; + return this; + } + + public Builder withColumnOrdinal(int columnOrdinal) { + this.columnOrdinal = columnOrdinal; + return this; + } + + public Builder withPrimitiveType(PrimitiveType primitiveType) { + this.primitiveType = primitiveType; + return this; + } + + public Builder withCreatedBy(String createdBy) { + this.createdBy = createdBy; + return this; + } + + public Builder withRowGroupOrdinal(int rowGroupOrdinal) { + this.rowGroupOrdinal = rowGroupOrdinal; + return this; + } + + @Override + protected Builder self() { + return this; + } + + @Override + public ColumnChunkMetaData build() { + return new EncryptedColumnChunkMetaData(this); + } + } +} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/IntColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/IntColumnChunkMetaData.java new file mode 100644 index 0000000000..3566fb1d11 --- /dev/null +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/IntColumnChunkMetaData.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.hadoop.metadata; + +import java.util.Set; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.column.statistics.SizeStatistics; +import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.schema.PrimitiveType; + +class IntColumnChunkMetaData extends ColumnChunkMetaData { + + private final int firstDataPage; + private final int dictionaryPageOffset; + private final int valueCount; + private final int totalSize; + private final int totalUncompressedSize; + private final Statistics statistics; + private final SizeStatistics sizeStatistics; + + private IntColumnChunkMetaData(Builder builder) { + super(builder); + this.firstDataPage = builder.firstDataPage; + this.dictionaryPageOffset = builder.dictionaryPageOffset; + this.valueCount = builder.valueCount; + this.totalSize = builder.totalSize; + this.totalUncompressedSize = builder.totalUncompressedSize; + this.statistics = builder.statistics; + this.sizeStatistics = builder.sizeStatistics; + } + + /** + * turns the int back into a positive long + * + * @param value + * @return + */ + private long intToPositiveLong(int value) { + return (long) value - Integer.MIN_VALUE; + } + + /** + * @return start of the column data offset + */ + public long getFirstDataPageOffset() { + return intToPositiveLong(firstDataPage); + } + + /** + * @return the location of the dictionary page if any + */ + public long getDictionaryPageOffset() { + return intToPositiveLong(dictionaryPageOffset); + } + + /** + * @return count of values in this block of the column + */ + public long getValueCount() { + return intToPositiveLong(valueCount); + } + + /** + * @return the totalUncompressedSize + */ + public long getTotalUncompressedSize() { + return intToPositiveLong(totalUncompressedSize); + } + + /** + * @return the totalSize + */ + public long getTotalSize() { + return intToPositiveLong(totalSize); + } + + /** + * @return the stats for this column + */ + public Statistics getStatistics() { + return statistics; + } + + /** + * @return the size stats for this column + */ + @Override + public SizeStatistics getSizeStatistics() { + return sizeStatistics; + } + + public static class Builder extends ColumnChunkMetaData.Builder { + private int firstDataPage; + private int dictionaryPageOffset; + private int valueCount; + private int totalSize; + private int totalUncompressedSize; + private Statistics statistics; + private SizeStatistics sizeStatistics; + private ColumnPath path; + private PrimitiveType type; + private CompressionCodecName codec; + private Set encodings; + + public Builder withFirstDataPage(int firstDataPage) { + this.firstDataPage = firstDataPage; + return this; + } + + public Builder withDictionaryPageOffset(int dictionaryPageOffset) { + this.dictionaryPageOffset = dictionaryPageOffset; + return this; + } + + public Builder withValueCount(int valueCount) { + this.valueCount = valueCount; + return this; + } + + public Builder withTotalSize(int totalSize) { + this.totalSize = totalSize; + return this; + } + + public Builder withTotalUncompressedSize(int totalUncompressedSize) { + this.totalUncompressedSize = totalUncompressedSize; + return this; + } + + public Builder withStatistics(Statistics statistics) { + this.statistics = statistics; + return this; + } + + public Builder withSizeStatistics(SizeStatistics sizeStatistics) { + this.sizeStatistics = sizeStatistics; + return this; + } + + public Builder withPath(ColumnPath path) { + this.path = path; + return this; + } + + public Builder withPrimitiveType(PrimitiveType type) { + this.type = type; + return this; + } + + public Builder withCodec(CompressionCodecName codec) { + this.codec = codec; + return this; + } + + public Builder withEncodings(Set encodings) { + this.encodings = encodings; + return this; + } + + @Override + protected Builder self() { + return this; + } + + @Override + public ColumnChunkMetaData build() { + ColumnChunkProperties columnChunkProperties = + ColumnChunkProperties.get(this.path, this.type, this.codec, this.encodings); + this.withProperties(columnChunkProperties); + return new IntColumnChunkMetaData(this); + } + } +} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/LongColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/LongColumnChunkMetaData.java new file mode 100644 index 0000000000..4658e22c19 --- /dev/null +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/LongColumnChunkMetaData.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.hadoop.metadata; + +import java.util.Set; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.column.statistics.SizeStatistics; +import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.schema.PrimitiveType; + +class LongColumnChunkMetaData extends ColumnChunkMetaData { + + private final long firstDataPageOffset; + private final long dictionaryPageOffset; + private final long valueCount; + private final long totalSize; + private final long totalUncompressedSize; + private final Statistics statistics; + private final SizeStatistics sizeStatistics; + + private LongColumnChunkMetaData(Builder builder) { + super(builder); + this.firstDataPageOffset = builder.firstDataPageOffset; + this.dictionaryPageOffset = builder.dictionaryPageOffset; + this.valueCount = builder.valueCount; + this.totalSize = builder.totalSize; + this.totalUncompressedSize = builder.totalUncompressedSize; + this.statistics = builder.statistics; + this.sizeStatistics = builder.sizeStatistics; + } + + /** + * @return start of the column data offset + */ + public long getFirstDataPageOffset() { + return firstDataPageOffset; + } + + /** + * @return the location of the dictionary page if any + */ + public long getDictionaryPageOffset() { + return dictionaryPageOffset; + } + + /** + * @return count of values in this block of the column + */ + public long getValueCount() { + return valueCount; + } + + /** + * @return the totalUncompressedSize + */ + public long getTotalUncompressedSize() { + return totalUncompressedSize; + } + + /** + * @return the totalSize + */ + public long getTotalSize() { + return totalSize; + } + + /** + * @return the stats for this column + */ + public Statistics getStatistics() { + return statistics; + } + + /** + * @return the size stats for this column + */ + @Override + public SizeStatistics getSizeStatistics() { + return sizeStatistics; + } + + public static class Builder extends ColumnChunkMetaData.Builder { + private long firstDataPageOffset; + private long dictionaryPageOffset; + private long valueCount; + private long totalSize; + private long totalUncompressedSize; + private Statistics statistics; + private SizeStatistics sizeStatistics; + private ColumnPath path; + private PrimitiveType type; + private CompressionCodecName codec; + private Set encodings; + + public Builder withFirstDataPageOffset(long firstDataPageOffset) { + this.firstDataPageOffset = firstDataPageOffset; + return this; + } + + public Builder withDictionaryPageOffset(long dictionaryPageOffset) { + this.dictionaryPageOffset = dictionaryPageOffset; + return this; + } + + public Builder withValueCount(long valueCount) { + this.valueCount = valueCount; + return this; + } + + public Builder withTotalSize(long totalSize) { + this.totalSize = totalSize; + return this; + } + + public Builder withTotalUncompressedSize(long totalUncompressedSize) { + this.totalUncompressedSize = totalUncompressedSize; + return this; + } + + public Builder withStatistics(Statistics statistics) { + this.statistics = statistics; + return this; + } + + public Builder withSizeStatistics(SizeStatistics sizeStatistics) { + this.sizeStatistics = sizeStatistics; + return this; + } + + public Builder withPath(ColumnPath path) { + this.path = path; + return this; + } + + public Builder withPrimitiveType(PrimitiveType type) { + this.type = type; + return this; + } + + public Builder withCodec(CompressionCodecName codec) { + this.codec = codec; + return this; + } + + public Builder withEncodings(Set encodings) { + this.encodings = encodings; + return this; + } + + @Override + protected Builder self() { + return this; + } + + @Override + public ColumnChunkMetaData build() { + ColumnChunkProperties columnChunkProperties = + ColumnChunkProperties.get(this.path, this.type, this.codec, this.encodings); + this.withProperties(columnChunkProperties); + return new LongColumnChunkMetaData(this); + } + } +}