[前][次][番号順一覧][スレッド一覧]

ruby-changes:25424

From: shirosaki <ko1@a...>
Date: Tue, 6 Nov 2012 00:28:04 +0900 (JST)
Subject: [ruby-changes:25424] shirosaki:r37480 (trunk): Index $LOADED_FEATURES so that require isn't so slow

shirosaki	2012-11-06 00:27:01 +0900 (Tue, 06 Nov 2012)

  New Revision: 37480

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=37480

  Log:
    Index $LOADED_FEATURES so that require isn't so slow
    
    * load.c (rb_feature_p, rb_provide_feature): index $LOADED_FEATURES
      so that require isn't so slow.
    
    * load.c (rb_provide_feature, get_loaded_features_index): ensure
      that $LOADED_FEATURES entries are frozen strings.  The user
      must mutate $LOADED_FEATURES itself rather than its individual
      entries.
    
    * load.c (reset_loaded_features_snapshot): add a new function to reset
      vm->loaded_features_snapshot.
    
    * load.c (get_loaded_features_index_raw): add a new function to get
      the loaded-features index.
    
    * load.c (features_index_add_single): add a new function to add to the
      loaded-features index a single feature.
    
    * load.c (features_index_add): add a new function to add to the
      loaded-features index all the required entries for `feature`.
    
    * vm_core.h (rb_vm_struct): add fields.
    
    * vm.c (rb_vm_mark): mark new fields.
    
    * include/ruby/intern.h (rb_hash_clear): declare function.
    
    * hash.c (rb_hash_clear): make function non-static.
      Patch by Greg Price.
      [ruby-core:47970] [Bug #7158]

  Modified files:
    trunk/ChangeLog
    trunk/hash.c
    trunk/include/ruby/intern.h
    trunk/load.c
    trunk/vm.c
    trunk/vm_core.h

Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h	(revision 37479)
+++ include/ruby/intern.h	(revision 37480)
@@ -459,6 +459,7 @@
 VALUE rb_hash_lookup2(VALUE, VALUE, VALUE);
 VALUE rb_hash_fetch(VALUE, VALUE);
 VALUE rb_hash_aset(VALUE, VALUE, VALUE);
+VALUE rb_hash_clear(VALUE);
 VALUE rb_hash_delete_if(VALUE);
 VALUE rb_hash_delete(VALUE,VALUE);
 typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value);
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 37479)
+++ ChangeLog	(revision 37480)
@@ -1,3 +1,35 @@
+Mon Nov  5 23:24:42 2012  Greg Price  <price@m...>
+
+	* load.c (rb_feature_p, rb_provide_feature): index $LOADED_FEATURES
+	  so that require isn't so slow.
+
+	* load.c (rb_provide_feature, get_loaded_features_index): ensure
+	  that $LOADED_FEATURES entries are frozen strings.  The user
+	  must mutate $LOADED_FEATURES itself rather than its individual
+	  entries.
+
+	* load.c (reset_loaded_features_snapshot): add a new function to reset
+	  vm->loaded_features_snapshot.
+
+	* load.c (get_loaded_features_index_raw): add a new function to get
+	  the loaded-features index.
+
+	* load.c (features_index_add_single): add a new function to add to the
+	  loaded-features index a single feature.
+
+	* load.c (features_index_add): add a new function to add to the
+	  loaded-features index all the required entries for `feature`.
+
+	* vm_core.h (rb_vm_struct): add fields.
+
+	* vm.c (rb_vm_mark): mark new fields.
+
+	* include/ruby/intern.h (rb_hash_clear): declare function.
+
+	* hash.c (rb_hash_clear): make function non-static.
+	  Patch by Greg Price.
+	  [ruby-core:47970] [Bug #7158]
+
 Mon Nov  5 23:23:51 2012  Greg Price  <price@m...>
 
 	* array.c (rb_ary_shared_with_p): new function.
Index: vm_core.h
===================================================================
--- vm_core.h	(revision 37479)
+++ vm_core.h	(revision 37480)
@@ -355,6 +355,8 @@
     VALUE top_self;
     VALUE load_path;
     VALUE loaded_features;
+    VALUE loaded_features_snapshot;
+    VALUE loaded_features_index;
     struct st_table *loading_table;
 
     /* signal */
Index: load.c
===================================================================
--- load.c	(revision 37479)
+++ load.c	(revision 37480)
@@ -18,7 +18,6 @@
 #define IS_DLEXT(e) (strcmp((e), DLEXT) == 0)
 #endif
 
-
 static const char *const loadable_ext[] = {
     ".rb", DLEXT,
 #ifdef DLEXT2
@@ -63,12 +62,110 @@
     return GET_VM()->loaded_features;
 }
 
+static void
+reset_loaded_features_snapshot(void)
+{
+    rb_vm_t *vm = GET_VM();
+    rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features);
+}
+
+static VALUE
+get_loaded_features_index_raw(void)
+{
+    return GET_VM()->loaded_features_index;
+}
+
 static st_table *
 get_loading_table(void)
 {
     return GET_VM()->loading_table;
 }
 
+static void
+features_index_add_single(VALUE short_feature, VALUE offset)
+{
+    VALUE features_index, this_feature_index;
+    features_index = get_loaded_features_index_raw();
+    if ((this_feature_index = rb_hash_lookup(features_index, short_feature)) == Qnil) {
+	this_feature_index = rb_ary_new();
+	rb_hash_aset(features_index, short_feature, this_feature_index);
+    }
+    rb_ary_push(this_feature_index, offset);
+}
+
+/* Add to the loaded-features index all the required entries for
+   `feature`, located at `offset` in $LOADED_FEATURES.  We add an
+   index entry at each string `short_feature` for which
+     feature == "#{prefix}#{short_feature}#{e}"
+   where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty
+   or ends in '/'.  This maintains the invariant that `rb_feature_p()`
+   relies on for its fast lookup.
+*/
+static void
+features_index_add(VALUE feature, VALUE offset)
+{
+    VALUE short_feature;
+    const char *feature_str, *feature_end, *ext, *p;
+
+    feature_str = StringValuePtr(feature);
+    feature_end = feature_str + RSTRING_LEN(feature);
+
+    for (ext = feature_end; ext > feature_str; ext--)
+      if (*ext == '.' || *ext == '/')
+	break;
+    if (*ext != '.')
+      ext = NULL;
+    /* Now `ext` points to the only string matching %r{^\.[^./]*$} that is
+       at the end of `feature`, or is NULL if there is no such string. */
+
+    p = ext ? ext : feature_end;
+    while (1) {
+	p--;
+	while (p >= feature_str && *p != '/')
+	    p--;
+	if (p < feature_str)
+	    break;
+	/* Now *p == '/'.  We reach this point for every '/' in `feature`. */
+	short_feature = rb_str_substr(feature, p + 1 - feature_str, feature_end - p - 1);
+	features_index_add_single(short_feature, offset);
+	if (ext) {
+	    short_feature = rb_str_substr(feature, p + 1 - feature_str, ext - p - 1);
+	    features_index_add_single(short_feature, offset);
+	}
+    }
+    features_index_add_single(feature, offset);
+    if (ext) {
+	short_feature = rb_str_substr(feature, 0, ext - feature_str);
+	features_index_add_single(short_feature, offset);
+    }
+}
+
+static VALUE
+get_loaded_features_index(void)
+{
+    VALUE features;
+    int i;
+    rb_vm_t *vm = GET_VM();
+
+    if (!rb_ary_shared_with_p(vm->loaded_features_snapshot, vm->loaded_features)) {
+	/* The sharing was broken; something (other than us in rb_provide_feature())
+	   modified loaded_features.  Rebuild the index. */
+	rb_hash_clear(vm->loaded_features_index);
+	features = vm->loaded_features;
+	for (i = 0; i < RARRAY_LEN(features); i++) {
+	    VALUE entry, as_str;
+	    as_str = entry = rb_ary_entry(features, i);
+	    StringValue(as_str);
+	    if (as_str != entry)
+		rb_ary_store(features, i, as_str);
+	    rb_str_freeze(as_str);
+	    features_index_add(as_str, INT2FIX(i));
+	}
+	reset_loaded_features_snapshot();
+    }
+    return vm->loaded_features_index;
+}
+
 /* This searches `load_path` for a value such that
      name == "#{load_path[i]}/#{feature}"
    if `feature` is a suffix of `name`, or otherwise
@@ -143,7 +240,7 @@
 static int
 rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
 {
-    VALUE v, features, p, load_path = 0;
+    VALUE features, features_index, feature_val, this_feature_index, v, p, load_path = 0;
     const char *f, *e;
     long i, len, elen, n;
     st_table *loading_tbl;
@@ -162,27 +259,39 @@
 	type = 0;
     }
     features = get_loaded_features();
-    for (i = 0; i < RARRAY_LEN(features); ++i) {
-	/* This loop searches `features` for an entry such that either
-	     "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
-	   for some j, or
-	     "#{features[i]}" == "#{feature}#{e}"
-	   Here `e` is an "allowed" extension -- either empty or one
-	   of the extensions accepted by IS_RBEXT, IS_SOEXT, or
-	   IS_DLEXT.  Further, if `ext && rb` then `IS_RBEXT(e)`,
-	   and if `ext && !rb` then `IS_SOEXT(e) || IS_DLEXT(e)`.
+    features_index = get_loaded_features_index();
 
-	   If `expanded`, then only the latter form (without
-	   load_path[j]) is accepted.  Otherwise either form is
-	   accepted, *unless* `ext` is false and an otherwise-matching
-	   entry of the first form is preceded by an entry of the form
-	     "#{features[i2]}" == "#{load_path[j2]}/#{feature}#{e2}"
-	   where `e2` matches /^\.[^./]*$/ but is not an allowed extension.
-	   After a "distractor" entry of this form, only entries of the
-	   form "#{feature}#{e}" are accepted.
-	*/
+    feature_val = rb_str_new(feature, len);
+    this_feature_index = rb_hash_lookup(features_index, feature_val);
+    /* We search `features` for an entry such that either
+         "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
+       for some j, or
+         "#{features[i]}" == "#{feature}#{e}"
+       Here `e` is an "allowed" extension -- either empty or one
+       of the extensions accepted by IS_RBEXT, IS_SOEXT, or
+       IS_DLEXT.  Further, if `ext && rb` then `IS_RBEXT(e)`,
+       and if `ext && !rb` then `IS_SOEXT(e) || IS_DLEXT(e)`.
 
-	v = RARRAY_PTR(features)[i];
+       If `expanded`, then only the latter form (without load_path[j])
+       is accepted.  Otherwise either form is accepted, *unless* `ext`
+       is false and an otherwise-matching entry of the first form is
+       preceded by an entry of the form
+         "#{features[i2]}" == "#{load_path[j2]}/#{feature}#{e2}"
+       where `e2` matches %r{^\.[^./]*$} but is not an allowed extension.
+       After a "distractor" entry of this form, only entries of the
+       form "#{feature}#{e}" are accepted.
+
+       In `rb_provide_feature()` and `get_loaded_features_index()` we
+       maintain an invariant that the array `this_feature_index` will
+       point to every entry in `features` which has the form
+         "#{prefix}#{feature}#{e}"
+       where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty
+       or ends in '/'.  This includes both match forms above, as well
+       as any distractors, so we may ignore all other entries in `features`.
+     */
+    for (i = 0; this_feature_index != Qnil && i < RARRAY_LEN(this_feature_index); i++) {
+	long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+	v = RARRAY_PTR(features)[index];
 	f = StringValuePtr(v);
 	if ((n = RSTRING_LEN(v)) < len) continue;
 	if (strncmp(f, feature, len) != 0) {
@@ -205,6 +314,7 @@
 	    return 'r';
 	}
     }
+
     loading_tbl = get_loading_table();
     if (loading_tbl) {
 	f = 0;
@@ -284,11 +394,18 @@
 static void
 rb_provide_feature(VALUE feature)
 {
-    if (OBJ_FROZEN(get_loaded_features())) {
+    VALUE features;
+
+    features = get_loaded_features();
+    if (OBJ_FROZEN(features)) {
 	rb_raise(rb_eRuntimeError,
 		 "$LOADED_FEATURES is frozen; cannot append feature");
     }
-    rb_ary_push(get_loaded_features(), feature);
+    rb_str_freeze(feature);
+
+    rb_ary_push(features, feature);
+    features_index_add(feature, INT2FIX(RARRAY_LEN(features)-1));
+    reset_loaded_features_snapshot();
 }
 
 void
@@ -829,6 +946,8 @@
     rb_define_virtual_variable("$\"", get_loaded_features, 0);
     rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
     vm->loaded_features = rb_ary_new();
+    vm->loaded_features_snapshot = rb_ary_new();
+    vm->loaded_features_index = rb_hash_new();
 
     rb_define_global_function("load", rb_f_load, -1);
     rb_define_global_function("require", rb_f_require, 1);
Index: hash.c
===================================================================
--- hash.c	(revision 37479)
+++ hash.c	(revision 37480)
@@ -1105,7 +1105,7 @@
  *
  */
 
-static VALUE
+VALUE
 rb_hash_clear(VALUE hash)
 {
     rb_hash_modify_check(hash);
Index: vm.c
===================================================================
--- vm.c	(revision 37479)
+++ vm.c	(revision 37480)
@@ -1513,6 +1513,8 @@
 	RUBY_MARK_UNLESS_NULL(vm->mark_object_ary);
 	RUBY_MARK_UNLESS_NULL(vm->load_path);
 	RUBY_MARK_UNLESS_NULL(vm->loaded_features);
+	RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot);
+	RUBY_MARK_UNLESS_NULL(vm->loaded_features_index);
 	RUBY_MARK_UNLESS_NULL(vm->top_self);
 	RUBY_MARK_UNLESS_NULL(vm->coverages);
 	rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]