Skip to content

Commit ede2b9a

Browse files
author
Rahul Sharma
committed
change int96 internal format
1 parent 325d335 commit ede2b9a

File tree

1 file changed

+51
-74
lines changed

1 file changed

+51
-74
lines changed

parquet/src/data_type.rs

Lines changed: 51 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ use crate::util::bit_util::FromBytes;
3535
/// The type only takes 12 bytes, without extra padding.
3636
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
3737
pub struct Int96 {
38-
value: [u32; 3],
38+
/// First 8 bytes store nanoseconds since midnight
39+
pub nanos: i64,
40+
/// Last 4 bytes store Julian days
41+
pub days: i32,
3942
}
4043

4144
const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
@@ -59,30 +62,28 @@ const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS;
5962
impl Int96 {
6063
/// Creates new INT96 type struct with no data set.
6164
pub fn new() -> Self {
62-
Self { value: [0; 3] }
65+
Self { nanos: 0, days: 0 }
6366
}
6467

65-
/// Returns underlying data as slice of [`u32`].
68+
/// Returns underlying data as slice of [`u32`] for compatibility with Parquet format
6669
#[inline]
6770
pub fn data(&self) -> &[u32] {
68-
&self.value
71+
// SAFETY: We're reinterpreting the bytes of our struct as [u32; 3]
72+
// This is safe because:
73+
// 1. The memory layout is compatible (12 bytes total)
74+
// 2. The alignment requirements are met (u32 requires 4-byte alignment)
75+
// 3. We maintain the invariant that the bytes are always valid u32s
76+
unsafe { std::slice::from_raw_parts(self as *const Int96 as *const u32, 3) }
6977
}
7078

71-
/// Sets data for this INT96 type.
79+
/// Sets data for this INT96 type from raw Parquet format.
7280
#[inline]
7381
pub fn set_data(&mut self, elem0: u32, elem1: u32, elem2: u32) {
74-
self.value = [elem0, elem1, elem2];
75-
}
76-
77-
/// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch
78-
#[deprecated(since = "54.0.0", note = "Use `to_millis` instead")]
79-
pub fn to_i64(&self) -> i64 {
80-
self.to_millis()
82+
self.nanos = ((elem1 as i64) << 32) | (elem0 as i64);
83+
self.days = elem2 as i32;
8184
}
8285

8386
/// Converts this INT96 into an i64 representing the number of SECONDS since EPOCH
84-
///
85-
/// Will wrap around on overflow
8687
#[inline]
8788
pub fn to_seconds(&self) -> i64 {
8889
let (day, nanos) = self.data_as_days_and_nanos();
@@ -92,8 +93,6 @@ impl Int96 {
9293
}
9394

9495
/// Converts this INT96 into an i64 representing the number of MILLISECONDS since EPOCH
95-
///
96-
/// Will wrap around on overflow
9796
#[inline]
9897
pub fn to_millis(&self) -> i64 {
9998
let (day, nanos) = self.data_as_days_and_nanos();
@@ -103,8 +102,6 @@ impl Int96 {
103102
}
104103

105104
/// Converts this INT96 into an i64 representing the number of MICROSECONDS since EPOCH
106-
///
107-
/// Will wrap around on overflow
108105
#[inline]
109106
pub fn to_micros(&self) -> i64 {
110107
let (day, nanos) = self.data_as_days_and_nanos();
@@ -114,8 +111,6 @@ impl Int96 {
114111
}
115112

116113
/// Converts this INT96 into an i64 representing the number of NANOSECONDS since EPOCH
117-
///
118-
/// Will wrap around on overflow
119114
#[inline]
120115
pub fn to_nanos(&self) -> i64 {
121116
let (day, nanos) = self.data_as_days_and_nanos();
@@ -124,33 +119,19 @@ impl Int96 {
124119
.wrapping_add(nanos)
125120
}
126121

127-
/// Sets the INT96 data from seconds since epoch
128-
///
129-
/// Will wrap around on overflow
130-
#[inline]
131-
pub fn set_data_from_seconds(&mut self, seconds: i64) {
132-
self.set_data_from_nanos(seconds.wrapping_mul(NANOSECONDS));
133-
}
134-
135-
/// Sets the INT96 data from milliseconds since epoch
136-
///
137-
/// Will wrap around on overflow
122+
/// Sets the INT96 data directly from days and nanoseconds
138123
#[inline]
139-
pub fn set_data_from_millis(&mut self, millis: i64) {
140-
self.set_data_from_nanos(millis.wrapping_mul(MICROSECONDS));
124+
pub fn set_data_from_days_and_nanos(&mut self, days: i32, nanos: i64) {
125+
self.days = days;
126+
self.nanos = nanos;
141127
}
142128

143-
/// Sets the INT96 data from microseconds since epoch
144-
///
145-
/// Will wrap around on overflow
146129
#[inline]
147-
pub fn set_data_from_micros(&mut self, micros: i64) {
148-
self.set_data_from_nanos(micros.wrapping_mul(MILLISECONDS));
130+
fn data_as_days_and_nanos(&self) -> (i32, i64) {
131+
(self.days, self.nanos)
149132
}
150133

151134
/// Sets the INT96 data from nanoseconds since epoch
152-
///
153-
/// Will wrap around on overflow
154135
#[inline]
155136
pub fn set_data_from_nanos(&mut self, nanos: i64) {
156137
let days = nanos / NANOSECONDS_IN_DAY;
@@ -159,42 +140,30 @@ impl Int96 {
159140
self.set_data_from_days_and_nanos(julian_day, remaining_nanos);
160141
}
161142

162-
/// Sets the INT96 data directly from days and nanoseconds
163-
///
164-
/// This is the most direct way to set the Int96 data structure which internally
165-
/// stores days and nanoseconds. The days should be Julian days since epoch.
166-
143+
/// Sets the INT96 data from seconds since epoch
167144
#[inline]
168-
pub fn set_data_from_days_and_nanos(&mut self, days: i32, nanos: i64) {
169-
let julian_day = (days as i32) as u32;
170-
let nanos_low = (nanos & 0xFFFFFFFF) as u32;
171-
let nanos_high = ((nanos >> 32) & 0xFFFFFFFF) as u32;
172-
self.set_data(nanos_low, nanos_high, julian_day);
145+
pub fn set_data_from_seconds(&mut self, seconds: i64) {
146+
self.set_data_from_nanos(seconds.wrapping_mul(NANOSECONDS))
173147
}
174148

149+
/// Sets the INT96 data from milliseconds since epoch
175150
#[inline]
176-
fn data_as_days_and_nanos(&self) -> (i32, i64) {
177-
let day = self.data()[2] as i32;
178-
let nanos = ((self.data()[1] as i64) << 32) + self.data()[0] as i64;
179-
(day, nanos)
151+
pub fn set_data_from_millis(&mut self, millis: i64) {
152+
self.set_data_from_nanos(millis.wrapping_mul(MICROSECONDS))
180153
}
181-
}
182154

183-
impl PartialOrd for Int96 {
184-
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
185-
Some(self.cmp(other))
155+
/// Sets the INT96 data from microseconds since epoch
156+
#[inline]
157+
pub fn set_data_from_micros(&mut self, micros: i64) {
158+
self.set_data_from_nanos(micros.wrapping_mul(MILLISECONDS))
186159
}
187160
}
188161

189-
impl Ord for Int96 {
190-
fn cmp(&self, other: &Self) -> Ordering {
191-
let (self_days, self_nanos) = self.data_as_days_and_nanos();
192-
let (other_days, other_nanos) = other.data_as_days_and_nanos();
193-
194-
match self_days.cmp(&other_days) {
195-
Ordering::Equal => self_nanos.cmp(&other_nanos),
196-
ord => ord,
197-
}
162+
163+
impl AsBytes for Int96 {
164+
fn as_bytes(&self) -> &[u8] {
165+
// SAFETY: The layout of Int96 is i64 followed by i32, which is 12 contiguous bytes
166+
unsafe { std::slice::from_raw_parts(self as *const Int96 as *const u8, 12) }
198167
}
199168
}
200169

@@ -207,6 +176,21 @@ impl From<Vec<u32>> for Int96 {
207176
}
208177
}
209178

179+
impl PartialOrd for Int96 {
180+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
181+
Some(self.cmp(other))
182+
}
183+
}
184+
185+
impl Ord for Int96 {
186+
fn cmp(&self, other: &Self) -> Ordering {
187+
match self.days.cmp(&other.days) {
188+
Ordering::Equal => self.nanos.cmp(&other.nanos),
189+
ord => ord,
190+
}
191+
}
192+
}
193+
210194
impl fmt::Display for Int96 {
211195
#[cold]
212196
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -673,13 +657,6 @@ impl AsBytes for bool {
673657
}
674658
}
675659

676-
impl AsBytes for Int96 {
677-
fn as_bytes(&self) -> &[u8] {
678-
// SAFETY: Int96::data is a &[u32; 3].
679-
unsafe { std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12) }
680-
}
681-
}
682-
683660
impl AsBytes for ByteArray {
684661
fn as_bytes(&self) -> &[u8] {
685662
self.data()

0 commit comments

Comments
 (0)