[前][次][番号順一覧][スレッド一覧]

ruby-changes:4457

From: ko1@a...
Date: Thu, 10 Apr 2008 16:12:58 +0900 (JST)
Subject: [ruby-changes:4457] matz - Ruby:r15948 (trunk): * lib/pstore.rb: replaced by Hongli Lai's faster version.

matz	2008-04-10 16:12:41 +0900 (Thu, 10 Apr 2008)

  New Revision: 15948

  Added files:
    trunk/test/test_pstore.rb
  Modified files:
    trunk/ChangeLog
    trunk/lib/pstore.rb

  Log:
    * lib/pstore.rb: replaced by Hongli Lai's faster version.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15948&r2=15947&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/test_pstore.rb?revision=15948&view=markup
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/lib/pstore.rb?r1=15948&r2=15947&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15947)
+++ ChangeLog	(revision 15948)
@@ -2,6 +2,8 @@
 
 	* lib/generator.rb: removed obsolete library.  [ruby-core:16233]
 
+	* lib/pstore.rb: replaced by Hongli Lai's faster version.
+
 Thu Apr 10 10:27:24 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* thread_pthread.c (native_sleep): sleep_cond is initialized at
Index: lib/pstore.rb
===================================================================
--- lib/pstore.rb	(revision 15947)
+++ lib/pstore.rb	(revision 15948)
@@ -3,12 +3,14 @@
 # pstore.rb -
 #   originally by matz
 #   documentation by Kev Jackson and James Edward Gray II
+#   improved by Hongli Lai
 #
 # See PStore for documentation.
 
 
 require "fileutils"
 require "digest/md5"
+require "thread"
 
 #
 # PStore implements a file based persistance mechanism based on a Hash.  User
@@ -77,6 +79,20 @@
 #    end
 #  end
 #
+# == Transaction modes
+#
+# By default, file integrity is only ensured as long as the operating system
+# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
+# I/O error occurs while PStore is writing to its file, then the file will
+# become corrupted.
+#
+# You can prevent this by setting <em>pstore.ultra_safe = true</em>.
+# However, this results in a minor performance loss, and only works on platforms
+# that support atomic file renames. Please consult the documentation for
+# +ultra_safe+ for details.
+#
+# Needless to say, if you're storing valuable data with PStore, then you should
+# backup the PStore files from time to time.
 class PStore
   binmode = defined?(File::BINARY) ? File::BINARY : 0
   RDWR_ACCESS = File::RDWR | File::CREAT | binmode
@@ -86,12 +102,24 @@
   # The error type thrown by all PStore methods.
   class Error < StandardError
   end
+  
+  # Whether PStore should do its best to prevent file corruptions, even when under
+  # unlikely-to-occur error conditions such as out-of-space conditions and other
+  # unusual OS filesystem errors. Setting this flag comes at the price in the form
+  # of a performance loss.
+  #
+  # This flag only has effect on platforms on which file renames are atomic (e.g.
+  # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
+  attr_accessor :ultra_safe
 
   # 
   # To construct a PStore object, pass in the _file_ path where you would like 
   # the data to be stored.
+  #
+  # PStore objects are always reentrant. But if _thread_safe_ is set to true,
+  # then it will become thread-safe at the cost of a minor performance hit.
   # 
-  def initialize(file)
+  def initialize(file, thread_safe = false)
     dir = File::dirname(file)
     unless File::directory? dir
       raise PStore::Error, format("directory %s does not exist", dir)
@@ -102,6 +130,12 @@
     @transaction = false
     @filename = file
     @abort = false
+    @ultra_safe = false
+    if @thread_safe
+      @lock = Mutex.new
+    else
+      @lock = DummyMutex.new
+    end
   end
 
   # Raises PStore::Error if the calling code is not in a PStore#transaction.
@@ -142,10 +176,10 @@
   def fetch(name, default=PStore::Error)
     in_transaction
     unless @table.key? name
-      if default==PStore::Error
-	raise PStore::Error, format("undefined root name `%s'", name)
+      if default == PStore::Error
+        raise PStore::Error, format("undefined root name `%s'", name)
       else
-	return default
+        return default
       end
     end
     @table[name]
@@ -281,95 +315,189 @@
   # 
   # Note that PStore does not support nested transactions.
   #
-  def transaction(read_only=false)  # :yields:  pstore
+  def transaction(read_only = false, &block)  # :yields:  pstore
+    value = nil
     raise PStore::Error, "nested transaction" if @transaction
-    begin
+    @lock.synchronize do
       @rdonly = read_only
+      @transaction = true
       @abort = false
-      @transaction = true
-      value = nil
-      new_file = @filename + ".new"
-
-      content = nil
-      unless read_only
-        file = File.open(@filename, RDWR_ACCESS)
-        file.flock(File::LOCK_EX)
-        commit_new(file) if FileTest.exist?(new_file)
-        content = file.read()
+      file = open_and_lock_file(@filename, read_only)
+      if file
+        begin
+          @table, checksum, original_data_size = load_data(file, read_only)
+          
+          catch(:pstore_abort_transaction) do
+            value = yield(self)
+          end
+          
+          if !@abort && !read_only
+            save_data(checksum, original_data_size, file)
+          end
+        ensure
+          file.close if !file.closed?
+        end
       else
+        # This can only occur if read_only == true.
+        @table = {}
+        catch(:pstore_abort_transaction) do
+          value = yield(self)
+        end
+      end
+    end
+  ensure
+    @transaction = false
+    value
+  end
+  
+  private
+  # Constant for relieving Ruby's garbage collector.
+  EMPTY_STRING = ""
+  EMPTY_MARSHAL_DATA = Marshal.dump({})
+  EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
+  
+  class DummyMutex
+    def synchronize
+      yield
+    end
+  end
+  
+  #
+  # Open the specified filename (either in read-only mode or in
+  # read-write mode) and lock it for reading or writing.
+  #
+  # The opened File object will be returned. If _read_only_ is true,
+  # and the file does not exist, then nil will be returned.
+  #
+  # All exceptions are propagated.
+  #
+  def open_and_lock_file(filename, read_only)
+    if read_only
+      begin
+        file = File.new(filename, RD_ACCESS)
         begin
-          file = File.open(@filename, RD_ACCESS)
           file.flock(File::LOCK_SH)
-          content = (File.open(new_file, RD_ACCESS) {|n| n.read} rescue file.read())
-        rescue Errno::ENOENT
-          content = ""
+          return file
+        rescue
+          file.close
+          raise
         end
+      rescue Errno::ENOENT
+        return nil
       end
-
-      if content != ""
-	@table = load(content)
-        if !read_only
-          size = content.size
-          md5 = Digest::MD5.digest(content)
+    else
+      file = File.new(filename, RDWR_ACCESS)
+      file.flock(File::LOCK_EX)
+      return file
+    end
+  end
+  
+  # Load the given PStore file.
+  # If +read_only+ is true, the unmarshalled Hash will be returned.
+  # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
+  # Hash, an MD5 checksum of the data, and the size of the data.
+  def load_data(file, read_only)
+    if read_only
+      begin
+        table = Marshal.load(file)
+        if !table.is_a?(Hash)
+          raise Error, "PStore file seems to be corrupted."
         end
+      rescue EOFError
+        # This seems to be a newly-created file.
+        table = {}
+      end
+      table
+    else
+      data = file.read
+      if data.empty?
+        # This seems to be a newly-created file.
+        table = {}
+        checksum = EMPTY_MARSHAL_CHECKSUM
+        size = EMPTY_MARSHAL_DATA.size
       else
-	@table = {}
+        table = Marshal.load(data)
+        checksum = Digest::MD5.digest(data)
+        size = data.size
+        if !table.is_a?(Hash)
+          raise Error, "PStore file seems to be corrupted."
+        end
       end
-      content = nil		# unreference huge data
-
-      begin
-	catch(:pstore_abort_transaction) do
-	  value = yield(self)
-	end
-      rescue Exception
-	@abort = true
-	raise
-      ensure
-	if !read_only and !@abort
-          tmp_file = @filename + ".tmp"
-	  content = dump(@table)
-	  if !md5 || size != content.size || md5 != Digest::MD5.digest(content)
-            File.open(tmp_file, WR_ACCESS) {|t| t.write(content)}
-            File.rename(tmp_file, new_file)
-            commit_new(file)
-          end
-          content = nil		# unreference huge data
-	end
-      end
-    ensure
-      @table = nil
-      @transaction = false
-      file.close if file
+      data.replace(EMPTY_STRING)
+      [table, checksum, size]
     end
-    value
   end
-
-  # This method is just a wrapped around Marshal.dump.
-  def dump(table)  # :nodoc:
-    Marshal::dump(table)
+  
+  def on_windows?
+    is_windows = RUBY_PLATFORM =~ /mswin/  ||
+                 RUBY_PLATFORM =~ /mingw/  ||
+                 RUBY_PLATFORM =~ /bbcwin/ ||
+                 RUBY_PLATFORM =~ /wince/
+    self.class.__send__(:define_method, :on_windows?) do
+      is_windows
+    end
+    is_windows
   end
-
-  # This method is just a wrapped around Marshal.load.
-  def load(content)  # :nodoc:
-    Marshal::load(content)
+  
+  # Check whether Marshal.dump supports the 'canonical' option. This option
+  # makes sure that Marshal.dump always dumps data structures in the same order.
+  # This is important because otherwise, the checksums that we generate may differ.
+  def marshal_dump_supports_canonical_option?
+    begin
+      Marshal.dump(nil, -1, true)
+      result = true
+    rescue
+      result = false
+    end
+    self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do
+      result
+    end
+    result
   end
-
-  # This method is just a wrapped around Marshal.load.
-  def load_file(file)  # :nodoc:
-    Marshal::load(file)
+  
+  def save_data(original_checksum, original_file_size, file)
+    # We only want to save the new data if the size or checksum has changed.
+    # This results in less filesystem calls, which is good for performance.
+    if marshal_dump_supports_canonical_option?
+      new_data = Marshal.dump(@table, -1, true)
+    else
+      new_data = Marshal.dump(@table)
+    end
+    new_checksum = Digest::MD5.digest(new_data)
+    
+    if new_data.size != original_file_size || new_checksum != original_checksum
+      if @ultra_safe && !on_windows?
+        # Windows doesn't support atomic file renames.
+        save_data_with_atomic_file_rename_strategy(new_data, file)
+      else
+        save_data_with_fast_strategy(new_data, file)
+      end
+    end
+    
+    new_data.replace(EMPTY_STRING)
   end
-
-  private
-  # Commits changes to the data store file.
-  def commit_new(f)
-    f.truncate(0)
-    f.rewind
-    new_file = @filename + ".new"
-    File.open(new_file, RD_ACCESS) do |nf|
-      FileUtils.copy_stream(nf, f)
+  
+  def save_data_with_atomic_file_rename_strategy(data, file)
+    temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
+    temp_file = File.new(temp_filename, WR_ACCESS)
+    begin
+      temp_file.flock(File::LOCK_EX)
+      temp_file.write(data)
+      temp_file.flush
+      File.rename(temp_filename, @filename)
+    rescue
+      File.unlink(temp_file) rescue nil
+      raise
+    ensure
+      temp_file.close
     end
-    File.unlink(new_file)
   end
+  
+  def save_data_with_fast_strategy(data, file)
+    file.rewind
+    file.truncate(0)
+    file.write(data)
+  end
 end
 
 # :enddoc:
Index: test/test_pstore.rb
===================================================================
--- test/test_pstore.rb	(revision 0)
+++ test/test_pstore.rb	(revision 15948)
@@ -0,0 +1,74 @@
+require 'test/unit'
+require 'pstore'
+
+class PStoreTest < Test::Unit::TestCase
+  def setup
+    @pstore_file = "pstore.tmp.#{Process.pid}"
+    @pstore = PStore.new(@pstore_file)
+  end
+
+  def teardown
+    File.unlink(@pstore_file) rescue nil
+  end
+
+  def test_opening_new_file_in_readonly_mode_should_result_in_empty_values
+    @pstore.transaction(true) do
+      assert_nil @pstore[:foo]
+      assert_nil @pstore[:bar]
+    end
+  end
+  
+  def test_opening_new_file_in_readwrite_mode_should_result_in_empty_values
+    @pstore.transaction do
+      assert_nil @pstore[:foo]
+      assert_nil @pstore[:bar]
+    end
+  end
+  
+  def test_data_should_be_loaded_correctly_when_in_readonly_mode
+    @pstore.transaction do
+      @pstore[:foo] = "bar"
+    end
+    @pstore.transaction(true) do
+      assert_equal "bar", @pstore[:foo]
+    end
+  end
+  
+  def test_data_should_be_loaded_correctly_when_in_readwrite_mode
+    @pstore.transaction do
+      @pstore[:foo] = "bar"
+    end
+    @pstore.transaction do
+      assert_equal "bar", @pstore[:foo]
+    end
+  end
+  
+  def test_changes_after_commit_are_discarded
+    @pstore.transaction do
+      @pstore[:foo] = "bar"
+      @pstore.commit
+      @pstore[:foo] = "baz"
+    end
+    @pstore.transaction(true) do
+      assert_equal "bar", @pstore[:foo]
+    end
+  end
+  
+  def test_changes_are_not_written_on_abort
+    @pstore.transaction do
+      @pstore[:foo] = "bar"
+      @pstore.abort
+    end
+    @pstore.transaction(true) do
+      assert_nil @pstore[:foo]
+    end
+  end
+  
+  def test_writing_inside_readonly_transaction_raises_error
+    assert_raise(PStore::Error) do
+      @pstore.transaction(true) do
+        @pstore[:foo] = "bar"
+      end
+    end
+  end
+end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]