[前][次][番号順一覧][スレッド一覧]

ruby-changes:73589

From: Nobuyoshi <ko1@a...>
Date: Sat, 17 Sep 2022 16:35:17 +0900 (JST)
Subject: [ruby-changes:73589] ca4cbe59ed (master): Move case-folding.rb to tooldir with enc-prefix

https://git.ruby-lang.org/ruby.git/commit/?id=ca4cbe59ed

From ca4cbe59eda77a3855094c843486759868794e85 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Wed, 14 Sep 2022 19:15:45 +0900
Subject: Move case-folding.rb to tooldir with enc-prefix

---
 common.mk                   |   6 +-
 enc/unicode/case-folding.rb | 418 --------------------------------------------
 tool/enc-case-folding.rb    | 418 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 421 deletions(-)
 delete mode 100644 enc/unicode/case-folding.rb
 create mode 100755 tool/enc-case-folding.rb

diff --git a/common.mk b/common.mk
index 5e34aedcbc..ad492c0079 100644
--- a/common.mk
+++ b/common.mk
@@ -1644,19 +1644,19 @@ $(UNICODE_HDR_DIR)/name2ctype.h: https://github.com/ruby/ruby/blob/trunk/common.mk#L1644
 	$(MV) $@.new $@
 
 # the next non-comment line was:
-# $(UNICODE_HDR_DIR)/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \
+# $(UNICODE_HDR_DIR)/casefold.h: $(tooldir)/enc-case-folding.rb \
 # but was changed to make sure CI works on systems that don't have gperf
 unicode-up: $(UNICODE_DATA_HEADERS)
 
 $(UNICODE_HDR_DIR)/$(ALWAYS_UPDATE_UNICODE:yes=casefold.h): \
-		$(srcdir)/enc/unicode/case-folding.rb \
+		$(tooldir)/enc-case-folding.rb \
 		$(UNICODE_SRC_DATA_DIR)/UnicodeData.txt \
 		$(UNICODE_SRC_DATA_DIR)/SpecialCasing.txt \
 		$(UNICODE_SRC_DATA_DIR)/CaseFolding.txt
 
 $(UNICODE_HDR_DIR)/casefold.h:
 	$(MAKEDIRS) $(@D)
-	$(Q) $(BASERUBY) $(srcdir)/enc/unicode/case-folding.rb \
+	$(Q) $(BASERUBY) $(tooldir)/enc-case-folding.rb \
 		--output-file=$@ \
 		--mapping-data-directory=$(UNICODE_SRC_DATA_DIR)
 
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
deleted file mode 100644
index 4a29fdebf7..0000000000
--- a/enc/unicode/case-folding.rb
+++ /dev/null
@@ -1,418 +0,0 @@ https://github.com/ruby/ruby/blob/trunk/common.mk#L0
-#!/usr/bin/ruby
-require 'stringio'
-
-# Usage (for case folding only):
-#   $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
-#   $ ruby case-folding.rb CaseFolding.txt -o casefold.h
-#  or (for case folding and case mapping):
-#   $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
-#   $ wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
-#   $ wget http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
-#   $ ruby case-folding.rb -m . -o casefold.h
-# using -d or --debug will include UTF-8 characters in comments for debugging
-
-class CaseFolding
-  module Util
-    module_function
-
-    def hex_seq(v)
-      v.map { |i| "0x%04x" % i }.join(", ")
-    end
-
-    def print_table_1(dest, type, mapping_data, data)
-      for k, v in data = data.sort
-        sk = (Array === k and k.length > 1) ? "{#{hex_seq(k)}}" : ("0x%04x" % k)
-        if type=='CaseUnfold_11' and v.length>1
-          # reorder CaseUnfold_11 entries to avoid special treatment for U+03B9/U+03BC/U+A64B
-          item = mapping_data.map("%04X" % k[0])
-          upper = item.upper if item
-          v = v.sort_by { |i| ("%04X"%i) == upper ? 0 : 1 }
-        end
-        ck = @debug ? ' /* ' + Array(k).pack("U*") + ' */' : ''
-        cv = @debug ? ' /* ' + Array(v).map{|c|[c].pack("U*")}.join(", ") + ' */' : ''
-        dest.print("  {#{sk}#{ck}, {#{v.length}#{mapping_data.flags(k, type, v)}, {#{hex_seq(v)}#{cv}}}},\n")
-      end
-      data
-    end
-
-    def print_table(dest, type, mapping_data, data)
-      dest.print("static const #{type}_Type #{type}_Table[] = {\n")
-      i = 0
-      ret = data.inject([]) do |a, (n, d)|
-        dest.print("#define #{n} (*(#{type}_Type (*)[#{d.size}])(#{type}_Table+#{i}))\n")
-        i += d.size
-        a.concat(print_table_1(dest, type, mapping_data, d))
-      end
-      dest.print("};\n\n")
-      ret
-    end
-  end
-
-  include Util
-
-  attr_reader :fold, :fold_locale, :unfold, :unfold_locale, :version
-
-  def load(filename)
-    pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
-
-    @fold = fold = {}
-    @unfold = unfold = [{}, {}, {}]
-    @debug = false
-    @version = nil
-    turkic = []
-
-    IO.foreach(filename, mode: "rb") do |line|
-      @version ||= line[/-([0-9.]+).txt/, 1]
-      next unless res = pattern.match(line)
-      ch_from = res[1].to_i(16)
-
-      if res[2] == 'T'
-        # Turkic case folding
-        turkic << ch_from
-        next
-      end
-
-      # store folding data
-      ch_to = res[3..6].inject([]) do |a, i|
-        break a unless i
-        a << i.to_i(16)
-      end
-      fold[ch_from] = ch_to
-
-      # store unfolding data
-      i = ch_to.length - 1
-      (unfold[i][ch_to] ||= []) << ch_from
-    end
-
-    # move locale dependent data to (un)fold_locale
-    @fold_locale = fold_locale = {}
-    @unfold_locale = unfold_locale = [{}, {}]
-    for ch_from in turkic
-      key = fold[ch_from]
-      i = key.length - 1
-      unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
-      fold_locale[ch_from] = fold.delete(ch_from)
-    end
-    self
-  end
-
-  def range_check(code)
-    "#{code} <= MAX_CODE_VALUE && #{code} >= MIN_CODE_VALUE"
-  end
-
-  def lookup_hash(key, type, data)
-    hash = "onigenc_unicode_#{key}_hash"
-    lookup = "onigenc_unicode_#{key}_lookup"
-    arity = Array(data[0][0]).size
-    gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(',')} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup} -n"
-    argname = arity > 1 ? "codes" : "code"
-    argdecl = "const OnigCodePoint #{arity > 1 ? "*": ""}#{argname}"
-    n = 7
-    m = (1 << n) - 1
-    min, max = data.map {|c, *|c}.flatten.minmax
-    src = IO.popen(gperf, "r+") {|f|
-      f << "short\n%%\n"
-      data.each_with_index {|(k, _), i|
-        k = Array(k)
-        ks = k.map {|j| [(j >> n*2) & m, (j >> n) & m, (j) & m]}.flatten.map {|c| "\\x%.2x" % c}.join("")
-        f.printf "\"%s\", ::::/*%s*/ %d\n", ks, k.map {|c| "0x%.4x" % c}.join(","), i
-      }
-      f << "%%\n"
-      f.close_write
-      f.read
-    }
-    src.sub!(/^(#{hash})\s*\(.*?\).*?\n\{\n(.*)^\}/m) {
-      name = $1
-      body = $2
-      body.gsub!(/\(unsigned char\)str\[(\d+)\]/, "bits_#{arity > 1 ? 'at' : 'of'}(#{argname}, \\1)")
-      "#{name}(#{argdecl})\n{\n#{body}}"
-    }
-    src.sub!(/const short *\*\n^(#{lookup})\s*\(.*?\).*?\n\{\n(.*)^\}/m) {
-      name = $1
-      body = $2
-      body.sub!(/\benum\s+\{(\n[ \t]+)/, "\\&MIN_CODE_VALUE = 0x#{min.to_s(16)},\\1""MAX_CODE_VALUE = 0x#{max.to_s(16)},\\1")
-      body.gsub!(/(#{hash})\s*\(.*?\)/, "\\1(#{argname})")
-      body.gsub!(/\{"",-1}/, "-1")
-      body.gsub!(/\{"(?:[^"]|\\")+", *::::(.*)\}/, '\1')
-      body.sub!(/(\s+if\s)\(len\b.*\)/) do
-        "#$1(" <<
-          (arity > 1 ? (0...arity).map {|i| range_check("#{argname}[#{i}]")}.join(" &&\n      ") : range_check(argname)) <<
-          ")"
-      end
-      v = nil
-      body.sub!(/(if\s*\(.*MAX_HASH_VALUE.*\)\n([ \t]*))\{(.*?)\n\2\}/m) {
-        pre = $1
-        indent = $2
-        s = $3
-        s.sub!(/const char *\* *(\w+)( *= *wordlist\[\w+\]).\w+/, 'short \1 = wordlist[key]')
-        v = $1
-        s.sub!(/\bif *\(.*\)/, "if (#{v} >= 0 && code#{arity}_equal(#{argname}, #{key}_Table[#{v}].from))")
-        "#{pre}{#{s}\n#{indent}}"
-      }
-      body.sub!(/\b(return\s+&)([^;]+);/, '\1'"#{key}_Table[#{v}].to;")
-      "static const #{type} *\n#{name}(#{argdecl})\n{\n#{body}}"
-    }
-    src
-  end
-
-  def display(dest, mapping_data)
-    # print the header
-    dest.print("/* DO NOT EDIT THIS FILE. */\n")
-    dest.print("/* Generated by enc/unicode/case-folding.rb */\n\n")
-
-    versions = version.scan(/\d+/)
-    dest.print("#if defined ONIG_UNICODE_VERSION_STRING && !( \\\n")
-    %w[MAJOR MINOR TEENY].zip(versions) do |n, v|
-      dest.print("      ONIG_UNICODE_VERSION_#{n} == #{v} && \\\n")
-    end
-    dest.print("      1)\n")
-    dest.print("# error ONIG_UNICODE_VERSION_STRING mismatch\n")
-    dest.print("#endif\n")
-    dest.print("#define ONIG_UNICODE_VERSION_STRING #{version.dump}\n")
-    %w[MAJOR MINOR TEENY].zip(versions) do |n, v|
-      dest.print("#define ONIG_UNICODE_VERSION_#{n} #{v}\n")
-    end
-    dest.print("\n")
-
-    # print folding data
-
-    # CaseFold + CaseFold_Locale
-    name = "CaseFold_11"
-    data = print_table(dest, name, mapping_data, "CaseFold"=>fold, "CaseFold_Locale"=>fold_locale)
-    dest.print lookup_hash(name, "CodePointList3", data)
-
-    # print unfolding data
-
-    # CaseUnfold_11 + CaseUnfold_11_Locale
-    name = "CaseUnfold_11"
-    data = print_table(dest, name, mapping_data, name=>unfold[0], "#{name}_Locale"=>unfold_locale[0])
-    dest.print lookup_hash(name, "CodePointList3", data)
-
-    # CaseUnfold_12 + CaseUnfold_12_Locale
-    name = "CaseUnfold_12"
-    data = print_table(dest, name, mapping_data, name=>unfold[1], "#{name}_Locale"=>unfold_locale[1])
-    dest.print lookup_hash(name, "CodePointList2", data)
-
-    # CaseUnfold_13
-    name = "CaseUnfold_13"
-    data = print_table(dest, name, mapping_data, name=>unfold[2])
-    dest.print lookup_hash(name, "CodePointList2", data)
-
-    # TitleCase
-    dest.print mapping_data.specials_output
-  end
-
-  def debug!
-    @debug = true
-  end
-
-  def self.load(*args)
-    new.load(*args)
-  end
-end
-
-class MapItem
-  attr_accessor :upper, :lower, :title, :code
-
-  def initialize(code, upper, lower, title)
-    @code = code
-    @upper = upper unless upper == ''
-    @lower = lower unless lower == ''
-    @title = title unless title == ''
-  end
-end
-
-class CaseMapping
-  attr_reader :filename, :version
-
-  def initialize(mapping_directory)
-    @mappings = {}
-    @specials = []
-    @specials_length = 0
-    @version = nil
-    IO.foreach(File.join(mapping_directory, 'UnicodeData.txt'), mode: "rb") do |line|
-      next if line =~ /^</
-      code, _, _, _, _, _, _, _, _, _, _, _, upper, lower, title = line.chomp.split ';'
-      unless upper and lower and title and (upper+lower+title)==''
-        @mappings[code] = MapItem.new(code,  (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]