ruby-changes:2059
From: ko1@a...
Date: 28 Sep 2007 22:42:51 +0900
Subject: [ruby-changes:2059] ko1 - Ruby:r13550 (trunk): * benchmark/bm_so_fasta.rb: added.
ko1 2007-09-28 22:42:36 +0900 (Fri, 28 Sep 2007) New Revision: 13550 Added files: trunk/benchmark/bm_so_fasta.rb trunk/benchmark/bm_so_k_nucleotide.rb trunk/benchmark/bm_so_reverse_complement.rb trunk/benchmark/make_fasta_output.rb trunk/benchmark/prepare_so_k_nucleotide.rb trunk/benchmark/prepare_so_reverse_complement.rb Modified files: trunk/ChangeLog Log: * benchmark/bm_so_fasta.rb: added. * benchmark/bm_so_k_nucleotide.rb: added. * benchmark/bm_so_reverse_complement.rb: added. * benchmark/make_fasta_output.rb: added. * benchmark/prepare_so_k_nucleotide.rb: added. * benchmark/prepare_so_reverse_complement.rb: added. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/bm_so_reverse_complement.rb?revision=13550&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/bm_so_fasta.rb?revision=13550&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13550&r2=13549 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/prepare_so_reverse_complement.rb?revision=13550&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/bm_so_k_nucleotide.rb?revision=13550&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/make_fasta_output.rb?revision=13550&view=markup http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/benchmark/prepare_so_k_nucleotide.rb?revision=13550&view=markup Index: ChangeLog =================================================================== --- ChangeLog (revision 13549) +++ ChangeLog (revision 13550) @@ -1,3 +1,17 @@ +Fri Sep 28 22:33:47 2007 Koichi Sasada <ko1@a...> + + * benchmark/bm_so_fasta.rb: added. + + * benchmark/bm_so_k_nucleotide.rb: added. + + * benchmark/bm_so_reverse_complement.rb: added. + + * benchmark/make_fasta_output.rb: added. + + * benchmark/prepare_so_k_nucleotide.rb: added. + + * benchmark/prepare_so_reverse_complement.rb: added. + Fri Sep 28 19:14:51 2007 Koichi Sasada <ko1@a...> * benchmark/driver.rb: fix notations. Index: benchmark/bm_so_reverse_complement.rb =================================================================== --- benchmark/bm_so_reverse_complement.rb (revision 0) +++ benchmark/bm_so_reverse_complement.rb (revision 13550) @@ -0,0 +1,30 @@ +#!/usr/bin/ruby +# The Great Computer Language Shootout +# http://shootout.alioth.debian.org/ +# +# Contributed by Peter Bjarke Olsen +# Modified by Doug King + +seq=Array.new + +def revcomp(seq) + seq.reverse!.tr!('wsatugcyrkmbdhvnATUGCYRKMBDHVN','WSTAACGRYMKVHDBNTAACGRYMKVHDBN') + stringlen=seq.length + 0.step(stringlen-1,60) {|x| print seq.slice(x,60) , "\n"} +end + +input = open(File.join(File.dirname($0), 'fasta.output.2500000'), 'rb') + +while input.gets + if $_ =~ />/ + if seq.length != 0 + revcomp(seq.join) + seq=Array.new + end + puts $_ + else + $_.sub(/\n/,'') + seq.push $_ + end +end +revcomp(seq.join) Index: benchmark/bm_so_fasta.rb =================================================================== --- benchmark/bm_so_fasta.rb (revision 0) +++ benchmark/bm_so_fasta.rb (revision 13550) @@ -0,0 +1,81 @@ +# The Computer Language Shootout +# http://shootout.alioth.debian.org/ +# Contributed by Sokolov Yura + +$last = 42.0 +def gen_random (max,im=139968,ia=3877,ic=29573) + (max * ($last = ($last * ia + ic) % im)) / im +end + +alu = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+ + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+ + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+ + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+ + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+ + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+ + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" + +iub = [ + ["a", 0.27], + ["c", 0.12], + ["g", 0.12], + ["t", 0.27], + + ["B", 0.02], + ["D", 0.02], + ["H", 0.02], + ["K", 0.02], + ["M", 0.02], + ["N", 0.02], + ["R", 0.02], + ["S", 0.02], + ["V", 0.02], + ["W", 0.02], + ["Y", 0.02], +] +homosapiens = [ + ["a", 0.3029549426680], + ["c", 0.1979883004921], + ["g", 0.1975473066391], + ["t", 0.3015094502008], +] + +def make_repeat_fasta(id, desc, src, n) + puts ">#{id} #{desc}" + v = nil + width = 60 + l = src.length + s = src * ((n / l) + 1) + s.slice!(n, l) + puts(s.scan(/.{1,#{width}}/).join("\n")) +end + +def make_random_fasta(id, desc, table, n) + puts ">#{id} #{desc}" + rand, v = nil,nil + width = 60 + chunk = 1 * width + prob = 0.0 + table.each{|v| v[1]= (prob += v[1])} + for i in 1..(n/width) + puts((1..width).collect{ + rand = gen_random(1.0) + table.find{|v| v[1]>rand}[0] + }.join) + end + if n%width != 0 + puts((1..(n%width)).collect{ + rand = gen_random(1.0) + table.find{|v| v[1]>rand}[0] + }.join) + end +end + + +n = (ARGV[0] or 250_000).to_i + +make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2) +make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3) +make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5) + Index: benchmark/make_fasta_output.rb =================================================================== --- benchmark/make_fasta_output.rb (revision 0) +++ benchmark/make_fasta_output.rb (revision 13550) @@ -0,0 +1,19 @@ +# prepare 'fasta.output' + +def prepare_fasta_output n + filebase = File.join(File.dirname($0), 'fasta.output') + script = File.join(File.dirname($0), 'bm_so_fasta.rb') + file = "#{filebase}.#{n}" + + unless FileTest.exist?(file) + STDERR.puts "preparing #{file}" + + open(file, 'w'){|f| + ARGV[0] = n + $stdout = f + load script + $stdout = STDOUT + } + end +end + Index: benchmark/prepare_so_reverse_complement.rb =================================================================== --- benchmark/prepare_so_reverse_complement.rb (revision 0) +++ benchmark/prepare_so_reverse_complement.rb (revision 13550) @@ -0,0 +1,2 @@ +require File.join(File.dirname(__FILE__), 'make_fasta_output') +prepare_fasta_output(2_500_000) Index: benchmark/bm_so_k_nucleotide.rb =================================================================== --- benchmark/bm_so_k_nucleotide.rb (revision 0) +++ benchmark/bm_so_k_nucleotide.rb (revision 13550) @@ -0,0 +1,48 @@ +# The Computer Language Shootout +# http://shootout.alioth.debian.org +# +# contributed by jose fco. gonzalez +# modified by Sokolov Yura + +seq = String.new + +def frecuency( seq,length ) + n, table = seq.length - length + 1, Hash.new(0) + f, i = nil, nil + (0 ... length).each do |f| + (f ... n).step(length) do |i| + table[seq[i,length]] += 1 + end + end + [n,table] + +end + +def sort_by_freq( seq,length ) + n,table = frecuency( seq,length ) + a, b, v = nil, nil, nil + table.sort{|a,b| b[1] <=> a[1]}.each do |v| + puts "%s %.3f" % [v[0].upcase,((v[1]*100).to_f/n)] + end + puts +end + +def find_seq( seq,s ) + n,table = frecuency( seq,s.length ) + puts "#{table[s].to_s}\t#{s.upcase}" +end + +input = open(File.join(File.dirname($0), 'fasta.output.100000'), 'rb') + +line = input.gets while line !~ /^>THREE/ +line = input.gets + +while (line !~ /^>/) & line do + seq << line.chomp + line = input.gets +end + +[1,2].each {|i| sort_by_freq( seq,i ) } + +%w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt).each{|s| find_seq( seq,s) } + Index: benchmark/prepare_so_k_nucleotide.rb =================================================================== --- benchmark/prepare_so_k_nucleotide.rb (revision 0) +++ benchmark/prepare_so_k_nucleotide.rb (revision 13550) @@ -0,0 +1,2 @@ +require File.join(File.dirname(__FILE__), 'make_fasta_output') +prepare_fasta_output(100_000) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml