diff --git a/layout.md b/layout.md new file mode 100644 index 0000000..73124e2 --- /dev/null +++ b/layout.md @@ -0,0 +1,5 @@ +HEADER +numero de chaves (8-bit) | timestamp (64-bit) -> 9 bytes + +BLOCO +tamanho do bloco (16-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave (8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes) diff --git a/lib/amnesia/segment.rb b/lib/amnesia/segment.rb index 2bb9a66..9fdd4f6 100644 --- a/lib/amnesia/segment.rb +++ b/lib/amnesia/segment.rb @@ -48,17 +48,26 @@ def store(hash_input) end def populate_index_structure - lines = File.readlines(@storage.filename) - byte_offset = 0 + fd = File.open(@storage.filename, 'rb') - lines.each do |line| - record_key, = line.split(',', 2) - record_size = line.bytesize + fd.seek(9, IO::SEEK_CUR) # skipping header - @index_structure.add(record_key, [byte_offset, record_size - 1]) + until fd.eof? + _block_size, _record_size_tombstone, _timestamp, key_size = fd.read(12).unpack('SCQC') + record_key = fd.read(key_size) - byte_offset += line.bytesize + value_size, = fd.read(1).unpack('C') + + puts "Adding index entry\nKey -> #{record_key}\nFile offset -> #{fd.pos}\nValue size -> #{value_size}" + + @index_structure.add(record_key, [fd.pos, value_size]) + + fd.seek(value_size, IO::SEEK_CUR) end + + pp @index_structure + + fd.close end end end diff --git a/lib/amnesia/segment_handler.rb b/lib/amnesia/segment_handler.rb index 0f7739b..436be7f 100644 --- a/lib/amnesia/segment_handler.rb +++ b/lib/amnesia/segment_handler.rb @@ -32,18 +32,7 @@ def compact end def flush(items) - # TODO: Use the storage class for that - filename = "./_data/#{Time.now.to_i}.segment" - - File.open(filename, 'w') do |f| - items.each { |(key, value)| f.write("#{key},#{value}\n") } - end - - @segments.unshift(Amnesia::Segment.new(filename)) - - compact if @segments.length == 2 - - :finished_flushing + create_segment("./_data/#{Time.now.to_i}.segment", items) end # TODO: remove this method @@ -83,6 +72,10 @@ def load_segments(filenames) private + def create_segment(filename, items) + @segments.unshift(Amnesia::Segment.new(filename, items: items)) + end + def start_segment filename = "./_data/#{Time.now.to_i}.segment" diff --git a/lib/amnesia/storage.rb b/lib/amnesia/storage.rb index 5d15224..c423df2 100644 --- a/lib/amnesia/storage.rb +++ b/lib/amnesia/storage.rb @@ -2,6 +2,8 @@ module Amnesia class Storage attr_reader :filename + FIXED_AMOUNT_OF_BYTE_PER_BLOCK = 13 + def initialize(filename, items: nil) @filename = filename populate_data(items) unless items.nil? || items.empty? @@ -30,7 +32,7 @@ def delete(key) end def get(key, index_entry: nil) - return record_from_index(index_entry) unless index_entry.nil? + return record_from_index(index_entry, key) unless index_entry.nil? record_from_scan(key) end @@ -54,28 +56,80 @@ def file_exists? private def populate_data(items) - data_block = items.map { |(key, value)| "#{key},#{value}\n" }.join('') + num_keys = items.length + creation_timestamp = Time.now.to_i + + header = [num_keys, creation_timestamp].pack('CQ') + + data_blocks = items.map do |(key, value)| + is_tombstone = value.empty? ? 1 : 0 + key_size = key.bytesize + value_size = value.bytesize + record_size = key_size + value_size + record_size_tombstone_composition = (record_size << 1) | is_tombstone + + block_size = FIXED_AMOUNT_OF_BYTE_PER_BLOCK + record_size + + row = [block_size, record_size_tombstone_composition, creation_timestamp, key_size, key, value_size, value] + + row.pack("SCQCa#{key_size}Ca#{value_size}") + end.join - create_db_file(data_block) + File.binwrite(filename, "#{header}#{data_blocks}") + + # create_db_file(data_block) end - def record_from_scan(key) - lines = File.readlines(filename) + def record_from_scan(searching_key) + handler = File.open(filename, 'rb') + + handler.seek(9, IO::SEEK_CUR) # skipping header + + result = nil + + until handler.eof? + block_seek = 12 + block_size, record_size_tombstone, _timestamp, key_size = handler.read(block_seek).unpack('SCQC') + + key = handler.read(key_size) + + puts "Key Size -> #{key_size} // Key -> #{key}\n\n" + + if searching_key == key + is_tombstone = record_size_tombstone & 1 + + # value_size = block_size - (key_size + 11 + 1) # 11 ja lidos pra pegar a key, 1 a menos também que é a informacao value_size em si + + # handler.seek(1, IO::SEEK_CUR) + + value_size, = handler.read(1).unpack('C') + + value, = handler.read(value_size).unpack('a*') + + result = "#{key},#{value}\n" # por questoes de compatiblidade + + result = "#{key},\n" if is_tombstone == 1 + + break + else + # vai para o proximo bloco + # offset calculado com base no tamanho do bloco subtraidos dos bytes já lidos, 12 + key - numero de bytes da + # key + handler.seek(block_size - (key_size + block_seek), IO::SEEK_CUR) + end + end - record = lines.filter do |line| - record_key, = line.split(',', 2) - record_key == key - end.last + handler.close - parse_record(record) + parse_record(result) end - def record_from_index(index_entry) + def record_from_index(index_entry, key) offset, size = index_entry - record = File.read(filename, size, offset) + value = File.binread(filename, size, offset) - parse_record(record) + parse_record("#{key},#{value}\n") end end end