Skip to content

Commit bbe8736

Browse files
jeremyfebenenglish
authored andcommitted
Extracting a helper method to define interface
1 parent 4f6d29f commit bbe8736

File tree

5 files changed

+36
-15
lines changed

5 files changed

+36
-15
lines changed

lib/newspaper_works/text_extraction/alto_reader.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,11 @@ def isxml?(xml)
112112
# @return [String] JSON serialization of flattened word coordinates
113113
def json
114114
words = @doc_stream.words
115-
builder = NewspaperWorks::TextExtraction::WordCoordsBuilder.new(words,
116-
@image_width,
117-
@image_height)
118-
builder.to_json
115+
NewspaperWorks::TextExtraction::WordCoordsBuilder.json_coordinates_for(
116+
words: words,
117+
width:@image_width,
118+
height: @image_height
119+
)
119120
end
120121
end
121122
end

lib/newspaper_works/text_extraction/hocr_reader.rb

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,11 @@ def isxml?(xml)
161161
# @return [String] JSON serialization of flattened word coordinates
162162
def json
163163
words = @doc_stream.words
164-
builder = NewspaperWorks::TextExtraction::WordCoordsBuilder.new(
165-
words,
166-
@doc_stream.width,
167-
@doc_stream.height
164+
NewspaperWorks::TextExtraction::WordCoordsBuilder.json_coordinates_for(
165+
words: words,
166+
width: @doc_stream.width,
167+
height: @doc_stream.height
168168
)
169-
builder.to_json
170169
end
171170
end
172171
end

lib/newspaper_works/text_extraction/page_ocr.rb

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,11 @@ def words
4040
end
4141

4242
def word_json
43-
builder = NewspaperWorks::TextExtraction::WordCoordsBuilder.new(
44-
words,
45-
width,
46-
height
43+
NewspaperWorks::TextExtraction::WordCoordsBuilder.json_coordinates_for(
44+
words: words,
45+
width: width,
46+
height: height
4747
)
48-
builder.to_json
4948
end
5049

5150
def plain

lib/newspaper_works/text_extraction/word_coords_builder.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ module NewspaperWorks
22
# Module for text extraction (OCR or otherwise)
33
module TextExtraction
44
class WordCoordsBuilder
5+
# @params words [Array<Hash>] an array of hash objects that have the keys `:word` and `:coordinates`.
6+
# @params width [Integer] the width of the "canvas" on which the words appear.
7+
# @params height [Integer] the height of the "canvas" on which the words appear.
8+
# @return [String] a JSON encoded string.
9+
def self.json_coordinates_for(words:, width: nil, height: nil)
10+
new(words, width, height).to_json
11+
end
12+
513
def initialize(words, width = nil, height = nil)
614
@words = words
715
@width = width

spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,24 @@
1111
end
1212
let(:image_width) { 1_234 }
1313
let(:image_height) { 5_678 }
14-
let(:wcb) { described_class.new(words, image_width, image_height) }
14+
15+
describe '.json_coordinates_for' do
16+
let(:wcb_to_json) { described_class.json_coordinates_for(words: words, width: image_width, height: image_height) }
17+
it 'has the correct structure' do
18+
expect(wcb_to_json['height']).to eq image_height
19+
expect(wcb_to_json['width']).to eq image_width
20+
expect(wcb_to_json['coords'].length).to eq 3
21+
expect(wcb_to_json['coords']['foo']).not_to be_falsey
22+
end
23+
24+
it 'combines coordinates for the same word' do
25+
expect(wcb_to_json['coords']['foo']).to eq [[1, 2, 3, 4], [13, 14, 15, 16]]
26+
end
27+
end
1528

1629
describe '#to_json' do
1730
let(:wcb_to_json) { JSON.parse(wcb.to_json) }
31+
let(:wcb) { described_class.new(words, image_width, image_height) }
1832

1933
it 'has the correct structure' do
2034
expect(wcb_to_json['height']).to eq image_height

0 commit comments

Comments
 (0)