From 2f08d051bf025c1879f1005a7f414c555d2b73d8 Mon Sep 17 00:00:00 2001 From: fabianskier Date: Tue, 17 Jun 2025 13:33:27 -0300 Subject: [PATCH] feat: add support for parsing general-format numbers as decimals --- README.md | 21 +++++++++++++++++++++ lib/creek/book.rb | 5 ++++- lib/creek/sheet.rb | 6 ++++-- lib/creek/styles/converter.rb | 30 +++++++++++++++++++++++++----- spec/test_spec.rb | 19 +++++++++++++++++++ 5 files changed, 73 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 28e43c7..f5d830d 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,27 @@ By default, Creek will map cell names with letter and number(A1, B3 and etc). To creek = Creek::Book.new file.path, with_headers: true ``` +## Handling decimals with General format + +When an Excel cell uses the *General* number format, Creek returns the raw +string from the file. Numeric values such as `0.001` may therefore appear as +`"1E-3"`. To work with the numeric value, cast it manually or enable automatic +parsing with the `parse_general_as_number` option: + +```ruby +require 'bigdecimal' + +creek = Creek::Book.new 'spec/fixtures/sample.xlsx', parse_general_as_number: true +sheet = creek.sheets[0] + +sheet.rows.each do |row| + value = row['M2'] + decimal = BigDecimal(value) + puts decimal.to_f # => 0.001 +end +``` + +Without this option, you may cast `value` manually using `BigDecimal` or `Float`. ## Contributing diff --git a/lib/creek/book.rb b/lib/creek/book.rb index 157884e..547f986 100644 --- a/lib/creek/book.rb +++ b/lib/creek/book.rb @@ -9,7 +9,8 @@ module Creek class Creek::Book attr_reader :files, :shared_strings, - :with_headers + :with_headers, + :parse_general_as_number DATE_1900 = Date.new(1899, 12, 30).freeze DATE_1904 = Date.new(1904, 1, 1).freeze @@ -24,6 +25,7 @@ def initialize(path, options = {}) @files = Zip::File.open(path) @shared_strings = SharedStrings.new(self) @with_headers = options.fetch(:with_headers, false) + @parse_general_as_number = options.fetch(:parse_general_as_number, false) end def sheets @@ -51,6 +53,7 @@ def sheets sheetfile ) sheet.with_headers = with_headers + sheet.parse_general_as_number = parse_general_as_number sheet end end diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index 69d8497..7e78f6f 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -10,7 +10,8 @@ class Creek::Sheet HEADERS_ROW_NUMBER = '1' SPREADSHEETML_URI = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' - attr_accessor :with_headers + attr_accessor :with_headers, + :parse_general_as_number attr_reader :book, :name, :sheetid, @@ -161,7 +162,8 @@ def convert(value, type, style_idx) def converter_options @converter_options ||= { shared_strings: @book.shared_strings.dictionary, - base_date: @book.base_date + base_date: @book.base_date, + parse_general_as_number: @book.parse_general_as_number } end diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb index 08d7047..0d12f31 100644 --- a/lib/creek/styles/converter.rb +++ b/lib/creek/styles/converter.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'set' +require 'bigdecimal' module Creek class Styles @@ -30,6 +31,7 @@ class Converter # - base_date: from what date to begin, see method #base_date DATE_TYPES = %i[date time date_time].to_set + NUMERIC_REGEXP = /\A-?(?:\d+(?:\.\d+)?|\d*\.\d+)(?:[eE][+-]?\d+)?\z/.freeze def self.call(value, type, style, options = {}) return nil if value.nil? || value.empty? @@ -59,9 +61,13 @@ def self.call(value, type, style, options = {}) ## when :string - value + if options[:parse_general_as_number] && numeric_string?(value) + convert_general_number(value) + else + value + end when :unsupported - convert_unknown(value) + convert_unknown(value, options) when :fixnum value.to_i when :float, :percentage @@ -75,13 +81,15 @@ def self.call(value, type, style, options = {}) ## Nothing matched else - convert_unknown(value) + convert_unknown(value, options) end end - def self.convert_unknown(value) - if value.nil? or value.empty? + def self.convert_unknown(value, options = {}) + if value.nil? || value.empty? value + elsif options[:parse_general_as_number] && numeric_string?(value) + convert_general_number(value) elsif value.to_i.to_s == value.to_s value.to_i elsif value.to_f.to_s == value.to_s @@ -129,6 +137,18 @@ def self.round_datetime(datetime_string) ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) end + + def self.numeric_string?(value) + value.match?(NUMERIC_REGEXP) + end + + def self.convert_general_number(value) + if defined?(BigDecimal) + BigDecimal(value) + else + value.to_f + end + end end end end diff --git a/spec/test_spec.rb b/spec/test_spec.rb index 6ff1fff..fbf8f9e 100644 --- a/spec/test_spec.rb +++ b/spec/test_spec.rb @@ -76,6 +76,25 @@ end end +describe 'Creek parsing a file with large numbers with automatic conversion' do + before(:all) do + @creek = Creek::Book.new 'spec/fixtures/large_numbers.xlsx', parse_general_as_number: true + @expected_simple_rows = [{ 'A' => 783_294_732.0, 'B' => '783294732', 'C' => 783_294_732.0 }] + end + + after(:all) do + @creek.close + end + + it 'casts general numbers to floats' do + rows = [] + @creek.sheets[0].simple_rows.each do |row| + rows << row + end + expect(rows[0]).to eq(@expected_simple_rows[0]) + end +end + describe 'Creek parsing a sample XLSX file' do before(:all) do @creek = Creek::Book.new 'spec/fixtures/sample.xlsx'