From 544550a0fc9c1dd516472d9a0ab18ca0e0f2f375 Mon Sep 17 00:00:00 2001 From: Edigleysson Silva Date: Sun, 17 Sep 2023 00:43:50 -0300 Subject: [PATCH 1/5] define storage layout --- layout.md | 6 ++++++ lib/amnesia/storage.rb | 27 +++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 layout.md diff --git a/layout.md b/layout.md new file mode 100644 index 0000000..e424ee1 --- /dev/null +++ b/layout.md @@ -0,0 +1,6 @@ +HEADER +numero de chaves (8-bit) | timestamp (64-bit) -> 13 bytes + +BLOCO +tamanho do bloco (8-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave +(8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index 5d15224..82badba 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -54,9 +54,32 @@ def file_exists? private def populate_data(items) - data_block = items.map { |(key, value)| "#{key},#{value}\n" }.join('') + num_keys = items.length + creation_timestamp = Time.now.to_i + fixed_amount_of_byter_per_block = 12 - create_db_file(data_block) + header = [num_keys, creation_timestamp].pack('CQ') + + data_blocks = items.map do |(key, value)| + is_tombstone = value.empty? ? 1 : 0 + key_size = key.bytesize + value_size = value.bytesize + record_size = key_size + value_size + record_size_tombstone_composition = (record_size << 1) | is_tombstone + + block_size = fixed_amount_of_byter_per_block + record_size + + row = [block_size, record_size_tombstone_composition, creation_timestamp, key_size, key, value_size, value] + + row.pack("CCQCa#{key_size}Ca#{value_size}") + end.join + + pp header + pp data_blocks + + # data_block = items.map { |(key, value)| "#{key},#{value}\n" }.join('') + + # create_db_file(data_block) end def record_from_scan(key) From e25a9d5474c60d7c39405a20f16a1ec271233287 Mon Sep 17 00:00:00 2001 From: Edigleysson Silva Date: Sun, 17 Sep 2023 00:57:16 -0300 Subject: [PATCH 2/5] implement layout writing --- layout.md | 5 ++--- lib/amnesia/segment_handler.rb | 17 +++++------------ lib/amnesia/storage.rb | 10 ++++------ 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/layout.md b/layout.md index e424ee1..6429ccf 100644 --- a/layout.md +++ b/layout.md @@ -1,6 +1,5 @@ HEADER -numero de chaves (8-bit) | timestamp (64-bit) -> 13 bytes +numero de chaves (8-bit) | timestamp (64-bit) -> 9 bytes BLOCO -tamanho do bloco (8-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave -(8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) +tamanho do bloco (8-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave (8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) diff --git a/lib/amnesia/segment_handler.rb b/lib/amnesia/segment_handler.rb index 0f7739b..436be7f 100644 --- a/lib/amnesia/segment_handler.rb +++ b/lib/amnesia/segment_handler.rb @@ -32,18 +32,7 @@ def compact end def flush(items) - # TODO: Use the storage class for that - filename = "./_data/#{Time.now.to_i}.segment" - - File.open(filename, 'w') do |f| - items.each { |(key, value)| f.write("#{key},#{value}\n") } - end - - @segments.unshift(Amnesia::Segment.new(filename)) - - compact if @segments.length == 2 - - :finished_flushing + create_segment("./_data/#{Time.now.to_i}.segment", items) end # TODO: remove this method @@ -83,6 +72,10 @@ def load_segments(filenames) private + def create_segment(filename, items) + @segments.unshift(Amnesia::Segment.new(filename, items: items)) + end + def start_segment filename = "./_data/#{Time.now.to_i}.segment" diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index 82badba..f734b45 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -2,6 +2,8 @@ module Amnesia class Storage attr_reader :filename + FIXED_AMOUNT_OF_BYTE_PER_BLOCK = 12 + def initialize(filename, items: nil) @filename = filename populate_data(items) unless items.nil? || items.empty? @@ -56,7 +58,6 @@ def file_exists? def populate_data(items) num_keys = items.length creation_timestamp = Time.now.to_i - fixed_amount_of_byter_per_block = 12 header = [num_keys, creation_timestamp].pack('CQ') @@ -67,17 +68,14 @@ def populate_data(items) record_size = key_size + value_size record_size_tombstone_composition = (record_size << 1) | is_tombstone - block_size = fixed_amount_of_byter_per_block + record_size + block_size = FIXED_AMOUNT_OF_BYTE_PER_BLOCK + record_size row = [block_size, record_size_tombstone_composition, creation_timestamp, key_size, key, value_size, value] row.pack("CCQCa#{key_size}Ca#{value_size}") end.join - pp header - pp data_blocks - - # data_block = items.map { |(key, value)| "#{key},#{value}\n" }.join('') + File.binwrite(filename, "#{header}#{data_blocks}") # create_db_file(data_block) end From d3bbdee56c74441274afd19c3eb347cf832de1cb Mon Sep 17 00:00:00 2001 From: Edigleysson Silva Date: Sun, 17 Sep 2023 01:53:52 -0300 Subject: [PATCH 3/5] refactor record searching to parse binary file --- lib/amnesia/segment.rb | 17 ++++++++------- lib/amnesia/storage.rb | 48 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/lib/amnesia/segment.rb b/lib/amnesia/segment.rb index 2bb9a66..97caede 100644 --- a/lib/amnesia/segment.rb +++ b/lib/amnesia/segment.rb @@ -48,17 +48,18 @@ def store(hash_input) end def populate_index_structure - lines = File.readlines(@storage.filename) - byte_offset = 0 + puts 'no populating the fking index' + # lines = File.readlines(@storage.filename) + # byte_offset = 0 - lines.each do |line| - record_key, = line.split(',', 2) - record_size = line.bytesize + # lines.each do |line| + # record_key, = line.split(',', 2) + # record_size = line.bytesize - @index_structure.add(record_key, [byte_offset, record_size - 1]) + # @index_structure.add(record_key, [byte_offset, record_size - 1]) - byte_offset += line.bytesize - end + # byte_offset += line.bytesize + # end end end end diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index f734b45..f601366 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -80,15 +80,49 @@ def populate_data(items) # create_db_file(data_block) end - def record_from_scan(key) - lines = File.readlines(filename) + def record_from_scan(searching_key) + handler = File.open(filename, 'rb') - record = lines.filter do |line| - record_key, = line.split(',', 2) - record_key == key - end.last + handler.seek(9, IO::SEEK_CUR) # skipping header + # header = handler.read(9).unpack('CQ') # [num of keys, timestamp] - parse_record(record) + # puts 'header of the file' + # pp header + + result = nil + + until handler.eof? + block_size, record_size_tombstone, _timestamp, key_size = handler.read(11).unpack('CCQC') + + key = handler.read(key_size) + + if searching_key == key + is_tombstone = record_size_tombstone & 1 + + # value_size = block_size - (key_size + 11 + 1) # 11 ja lidos pra pegar a key, 1 a menos também que é a informacao value_size em si + + # handler.seek(1, IO::SEEK_CUR) + + value_size, = handler.read(1).unpack('C') + + value, = handler.read(value_size).unpack('a*') + + result = "#{key},#{value}\n" # por questoes de compatiblidade + + result = "#{key},\n" if is_tombstone == 1 + + break + else + # vai para o proximo bloco + # offset calculado com base no tamanho do bloco subtraidos dos bytes já lidos, 11 + key - numero de bytes da + # key + handler.seek(block_size - (key_size + 11), IO::SEEK_CUR) + end + end + + handler.close + + parse_record(result) end def record_from_index(index_entry) From 0392999758f698ffce9c689efecf4b3264bdd3a7 Mon Sep 17 00:00:00 2001 From: Edigleysson Silva Date: Sun, 17 Sep 2023 02:11:44 -0300 Subject: [PATCH 4/5] implement index in the new layout --- lib/amnesia/segment.rb | 26 +++++++++++++++++--------- lib/amnesia/storage.rb | 10 ++++++---- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/lib/amnesia/segment.rb b/lib/amnesia/segment.rb index 97caede..f1ccdf2 100644 --- a/lib/amnesia/segment.rb +++ b/lib/amnesia/segment.rb @@ -48,18 +48,26 @@ def store(hash_input) end def populate_index_structure - puts 'no populating the fking index' - # lines = File.readlines(@storage.filename) - # byte_offset = 0 + fd = File.open(@storage.filename, 'rb') - # lines.each do |line| - # record_key, = line.split(',', 2) - # record_size = line.bytesize + fd.seek(9, IO::SEEK_CUR) # skipping header - # @index_structure.add(record_key, [byte_offset, record_size - 1]) + until fd.eof? + _block_size, _record_size_tombstone, _timestamp, key_size = fd.read(11).unpack('CCQC') + record_key = fd.read(key_size) - # byte_offset += line.bytesize - # end + value_size, = fd.read(1).unpack('C') + + puts "Adding index entry\nKey -> #{record_key}\nFile offset -> #{fd.pos}\nValue size -> #{value_size}" + + @index_structure.add(record_key, [fd.pos, value_size]) + + fd.seek(value_size, IO::SEEK_CUR) + end + + pp @index_structure + + fd.close end end end diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index f601366..070f96a 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -32,7 +32,7 @@ def delete(key) end def get(key, index_entry: nil) - return record_from_index(index_entry) unless index_entry.nil? + return record_from_index(index_entry, key) unless index_entry.nil? record_from_scan(key) end @@ -125,12 +125,14 @@ def record_from_scan(searching_key) parse_record(result) end - def record_from_index(index_entry) + def record_from_index(index_entry, key) offset, size = index_entry - record = File.read(filename, size, offset) + # puts "Reading from index -> offset: #{offset} / size -> #{size}" - parse_record(record) + value = File.binread(filename, size, offset) + + parse_record("#{key},#{value}\n") end end end From 8c5f184f1354aa11d1bca3e4da024ee7da72eb18 Mon Sep 17 00:00:00 2001 From: Edigleysson Silva Date: Sun, 17 Sep 2023 11:04:29 -0300 Subject: [PATCH 5/5] fix block size field to 16-bit length --- layout.md | 2 +- lib/amnesia/segment.rb | 2 +- lib/amnesia/storage.rb | 19 ++++++++----------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/layout.md b/layout.md index 6429ccf..73124e2 100644 --- a/layout.md +++ b/layout.md @@ -2,4 +2,4 @@ HEADER numero de chaves (8-bit) | timestamp (64-bit) -> 9 bytes BLOCO -tamanho do bloco (8-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave (8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) +tamanho do bloco (16-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave (8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) diff --git a/lib/amnesia/segment.rb b/lib/amnesia/segment.rb index f1ccdf2..9fdd4f6 100644 --- a/lib/amnesia/segment.rb +++ b/lib/amnesia/segment.rb @@ -53,7 +53,7 @@ def populate_index_structure fd.seek(9, IO::SEEK_CUR) # skipping header until fd.eof? - _block_size, _record_size_tombstone, _timestamp, key_size = fd.read(11).unpack('CCQC') + _block_size, _record_size_tombstone, _timestamp, key_size = fd.read(12).unpack('SCQC') record_key = fd.read(key_size) value_size, = fd.read(1).unpack('C') diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index 070f96a..c423df2 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -2,7 +2,7 @@ module Amnesia class Storage attr_reader :filename - FIXED_AMOUNT_OF_BYTE_PER_BLOCK = 12 + FIXED_AMOUNT_OF_BYTE_PER_BLOCK = 13 def initialize(filename, items: nil) @filename = filename @@ -72,7 +72,7 @@ def populate_data(items) row = [block_size, record_size_tombstone_composition, creation_timestamp, key_size, key, value_size, value] - row.pack("CCQCa#{key_size}Ca#{value_size}") + row.pack("SCQCa#{key_size}Ca#{value_size}") end.join File.binwrite(filename, "#{header}#{data_blocks}") @@ -84,18 +84,17 @@ def record_from_scan(searching_key) handler = File.open(filename, 'rb') handler.seek(9, IO::SEEK_CUR) # skipping header - # header = handler.read(9).unpack('CQ') # [num of keys, timestamp] - - # puts 'header of the file' - # pp header result = nil until handler.eof? - block_size, record_size_tombstone, _timestamp, key_size = handler.read(11).unpack('CCQC') + block_seek = 12 + block_size, record_size_tombstone, _timestamp, key_size = handler.read(block_seek).unpack('SCQC') key = handler.read(key_size) + puts "Key Size -> #{key_size} // Key -> #{key}\n\n" + if searching_key == key is_tombstone = record_size_tombstone & 1 @@ -114,9 +113,9 @@ def record_from_scan(searching_key) break else # vai para o proximo bloco - # offset calculado com base no tamanho do bloco subtraidos dos bytes já lidos, 11 + key - numero de bytes da + # offset calculado com base no tamanho do bloco subtraidos dos bytes já lidos, 12 + key - numero de bytes da # key - handler.seek(block_size - (key_size + 11), IO::SEEK_CUR) + handler.seek(block_size - (key_size + block_seek), IO::SEEK_CUR) end end @@ -128,8 +127,6 @@ def record_from_scan(searching_key) def record_from_index(index_entry, key) offset, size = index_entry - # puts "Reading from index -> offset: #{offset} / size -> #{size}" - value = File.binread(filename, size, offset) parse_record("#{key},#{value}\n")