Paměť efektivní 2D bit storage v Ruby (100 položek)

0

Otázka

Chci používat velké datové struktury reprezentovat bit (0 nebo 1) nebo typu boolean (true/false) v Ruby.

V níže uvedeném příkladu kódu, já používám 2D pole o velikosti 10^4 * 10^4, pro uložení boolean data.

require 'get_process_mem'

num = 10000
twod =  Array.new(num) { Array.new(num, false)}
for i in 0..num-1 do
  for j in 0..num-1 do
    twod[i][j] = i>j
  end
end

mem = GetProcessMem.new
puts mem.inspect

Výstup ukazuje, že to používá ~778 MB.

#<GetProcessMem:0x00000078 @mb=777.93359375 @gb=0.7597007751464844 @kb=796604.0 @bytes=0.815722496e9>

Jen jsem se snažil změnit datový typ na celé číslo v kódu a využití paměti hlásí stejné hodnoty.

Aktualizováno kód:

require 'get_process_mem'

num = 10000
twod =  Array.new(num) { Array.new(num, 500)}
for i in 0..num-1 do
  for j in 0..num-1 do
    twod[i][j] = 1000+i+j
  end
end

mem = GetProcessMem.new
puts mem.inspect

Výstup:

#<GetProcessMem:0x00000078 @mb=777.6015625 @gb=0.7593765258789062 @kb=796264.0 @bytes=0.815374336e9>

Čekal jsem, že to boolean array by se použít méně paměti než celé číslo pole, což se nezdá být případ! Je tam nějaké jiné optimalizované způsob, jak ukládat bit nebo boolean hodnoty?

bit boolean data-structures optimization
2021-11-22 09:54:40
1

Nejlepší odpověď

0

Můžete vytvořit pole byte FFI:Vyrovnávací paměti, aby si dobrou velikost/výkon https://rubydoc.info/github/ffi/ffi/FFI/Buffer a dělat bitové operace na každý bajt:

#!/usr/bin/env ruby
require 'get_process_mem'
require 'ffi'
BIT_OP = {
  get: 1,
  set: 2,
  clear: 3,
}
class BitStore
  attr_accessor :buf, :size
  def initialize(shape) # ex [10000,10000] [10,20,30], etc
    @shp = shape
    @size = 1;
    shape.each do | sz |
      @size *= sz
    end
    @size = max(next8(@size)/8, 8) + 1 # min size needed
    clear = true
    @buf = FFI::Buffer.new(@size,1,clear)
  end
  def max(x,y)
    x > y ? x : y
  end
  def next8(val)
    val % 8 == 0 ? val+8 : ((val / 8).to_i + 1)*8
  end
  def write_buf_to_file(fname)
    fout = File.open(fname, "wb")
    fout.write(@buf.get_bytes(0,@size))
    fout.close
  end
  def check_position(coord_arr)
    if coord_arr.size != @shp.size
      raise "coord_arr size != shape size"
    end
    coord_arr.each_with_index do | coord, i |
      if coord_arr[i] > @shp[i]-1
        raise "coord index out of bounds "
      end
    end
  end
  def get_position(coord_arr)
    position = coord_arr[0]
    (1..coord_arr.size-1).each do | i |
      position += coord_arr[i] * @shp.reverse[i]
    end
    return position
  end
  def bit_op(coord_arr, op)
    check_position(coord_arr)
    position = get_position(coord_arr)
    offset = (position / 8).to_i
    bit_pos = (position % 8)
    bit_val = 1 << bit_pos
    val = @buf.get_string(offset, 1)
    numeric_value = val == "" ? 0 : val.unpack("C")[0]
    case op
    when BIT_OP[:get]
      numeric_value = numeric_value & bit_val
      return numeric_value > 0 ? "set" : "clear"
    when BIT_OP[:set]
      numeric_value = numeric_value | bit_val
    when BIT_OP[:clear]
      numeric_value = numeric_value & (bit_val ^ 0xff)
    end
    @buf.put_string(offset,[numeric_value].pack("C"))
    return ""
  end
end
def main
  begin
    rows = 10000
    cols = 10000
    b = BitStore.new([rows,cols])
    
    puts "setting [3,0]"
    b.bit_op([3,0],BIT_OP[:set])
    is_set = b.bit_op([3,0],BIT_OP[:get])
    puts is_set

    puts "setting [8000,8000]"
    b.bit_op([8000,8000],BIT_OP[:set])
    is_set = b.bit_op([8000,8000],BIT_OP[:get])
    puts is_set

    puts "clearing [8000,8000]"
    b.bit_op([8000,8000],BIT_OP[:clear])
    is_set = b.bit_op([8000,8000],BIT_OP[:get])
    puts is_set

    mem = GetProcessMem.new
    puts mem.inspect
  rescue NoMemoryError => e
    puts "NoMemoryError"
    p e
    p e.backtrace.inspect
  end
end
main

Využití paměti je 26MB:

user1@debian10 /home/user1/rubynew > ./pack_testb.rb 
setting [3,0]
set
setting [8000,8000]
set
clearing [8000,8000]
clear
#<GetProcessMem:0x000000a0 @mb=26.6953125 @gb=0.02606964111328125 @kb=27336.0 @bytes=0.27992064e8>

Struktura souboru na disku:

b = BitStore.new([7,6])
puts "setting [3,0]"
b.bit_op([3,0],BIT_OP[:set])
is_set = b.bit_op([3,0],BIT_OP[:get])
puts is_set
b.write_buf_to_file("/tmp/the_buf.bin")
system("xxd /tmp/the_buf.bin")

puts "setting [6,5]"
b.bit_op([6,5],BIT_OP[:set])
is_set = b.bit_op([6,5],BIT_OP[:get])
puts is_set
b.write_buf_to_file("/tmp/the_buf.bin")
system("xxd /tmp/the_buf.bin")

setting [3,0]
00000000: 0800 0000 0000 0000 00
setting [6,5]
00000000: 0000 0000 0002 0000 00
2021-11-23 15:53:34

V jiných jazycích

Tato stránka je v jiných jazycích

Русский
..................................................................................................................
Italiano
..................................................................................................................
Polski
..................................................................................................................
Română
..................................................................................................................
한국어
..................................................................................................................
हिन्दी
..................................................................................................................
Français
..................................................................................................................
Türk
..................................................................................................................
Português
..................................................................................................................
ไทย
..................................................................................................................
中文
..................................................................................................................
Español
..................................................................................................................
Slovenský
..................................................................................................................