Dogcows Code - chaz/p5-CGI-Ex/blob - samples/benchmark/bench_template_tag_parser.pl

   1 #!/usr/bin/perl -w
   2
   3 use strict;
   4 use Benchmark qw(timethese cmpthese countit timestr);
   5 use IO::Socket;
   6
   7 my $str;
   8 $str = "--[% one %][% two %]--\n";
   9 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
  10 #   grammar:  4 wallclock secs ( 2.04 usr +  0.00 sys =  2.04 CPU) @ 36585.78/s (n=74635)
  11 #   index:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 81146.23/s (n=172030)
  12 #   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 71674.76/s (n=150517)
  13 #   match:  4 wallclock secs ( 2.12 usr +  0.01 sys =  2.13 CPU) @ 57690.14/s (n=122880)
  14 #   split:  2 wallclock secs ( 2.06 usr +  0.00 sys =  2.06 CPU) @ 36230.58/s (n=74635)
  15 #            Rate   split grammar   match  index2   index
  16 # split   36231/s      --     -1%    -37%    -49%    -55%
  17 # grammar 36586/s      1%      --    -37%    -49%    -55%
  18 # match   57690/s     59%     58%      --    -20%    -29%
  19 # index2  71675/s     98%     96%     24%      --    -12%
  20 # index   81146/s    124%    122%     41%     13%      --
  21
  22 $str = ((" "x1000)."[% one %]\n")x10;
  23 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
  24 #   grammar:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 689.52/s (n=1448)
  25 #   index:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 10239.52/s (n=21503)
  26 #   index2:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 10095.31/s (n=21503)
  27 #   match:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 6727.23/s (n=14329)
  28 #   split:  4 wallclock secs ( 2.14 usr +  0.00 sys =  2.14 CPU) @ 5023.83/s (n=10751)
  29 #            Rate grammar   split   match  index2   index
  30 # grammar   690/s      --    -86%    -90%    -93%    -93%
  31 # split    5024/s    629%      --    -25%    -50%    -51%
  32 # match    6727/s    876%     34%      --    -33%    -34%
  33 # index2  10095/s   1364%    101%     50%      --     -1%
  34 # index   10240/s   1385%    104%     52%      1%      --
  35
  36 #$str = ((" "x10)."[% one %]\n")x1000;
  37 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
  38 #   grammar:  3 wallclock secs ( 2.10 usr +  0.01 sys =  2.11 CPU) @ 81.52/s (n=172)
  39 #   index:  4 wallclock secs ( 2.11 usr +  0.01 sys =  2.12 CPU) @ 207.55/s (n=440)
  40 #   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 209.52/s (n=440)
  41 #   match:  3 wallclock secs ( 2.07 usr +  0.00 sys =  2.07 CPU) @ 173.43/s (n=359)
  42 #   split:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 91.98/s (n=195)
  43 #           Rate grammar   split   match   index  index2
  44 # grammar 81.5/s      --    -11%    -53%    -61%    -61%
  45 # split   92.0/s     13%      --    -47%    -56%    -56%
  46 # match    173/s    113%     89%      --    -16%    -17%
  47 # index    208/s    155%    126%     20%      --     -1%
  48 # index2   210/s    157%    128%     21%      1%      --
  49
  50 ###----------------------------------------------------------------###
  51
  52 ### use a regular expression to go through the string
  53 sub parse_match {
  54     my $new = '';
  55     my $START = quotemeta '[%';
  56     my $END   = quotemeta '%]';
  57
  58     my $pos;
  59     local pos($_[0]) = 0;
  60     while ($_[0] =~ / \G (.*?) $START (.*?) $END /gsx) {
  61         my ($begin, $tag) = ($1, $2);
  62         $pos = pos($_[0]);
  63         $new .= $begin;
  64         $new .= "($tag)";
  65     }
  66     return $pos ? $new . substr($_[0], $pos) : $_[0];
  67 }
  68
  69 ### good ole index - hard coded
  70 sub parse_index {
  71     my $new   = '';
  72
  73     my $last = 0;
  74     while (1) {
  75         my $i = index($_[0], '[%', $last);
  76         last if $i == -1;
  77         $new .= substr($_[0], $last, $i - $last),
  78         my $j   = index($_[0], '%]', $i + 2);
  79         die "Unclosed tag" if $j == -1;
  80         my $tag = substr($_[0], $i + 2, $j - ($i + 2));
  81         $new .= "($tag)";
  82         $last = $j + 2;
  83     }
  84     return $last ? $new . substr($_[0], $last) : $_[0];
  85 }
  86
  87 ### index searching - but configurable
  88 sub parse_index2 {
  89     my $new   = '';
  90     my $START = '[%';
  91     my $END   = '%]';
  92     my $len_s = length $START;
  93     my $len_e = length $END;
  94
  95     my $last = 0;
  96     while (1) {
  97         my $i = index($_[0], $START, $last);
  98         last if $i == -1;
  99         $new .= substr($_[0], $last, $i - $last),
 100         my $j = index($_[0], $END, $i + $len_s);
 101         $last = $j + $len_e;
 102         if ($j == -1) { # missing closing tag
 103             $last = length($_[0]);
 104             last;
 105         }
 106         my $tag = substr($_[0], $i + $len_s, $j - ($i + $len_s));
 107         $new .= "($tag)";
 108     }
 109     return $last ? $new . substr($_[0], $last) : $_[0];
 110 }
 111
 112 ### using a split method (several other split methods were also tried - but were slower)
 113 sub parse_split {
 114     my $new = '';
 115     my $START = quotemeta '[%';
 116     my $END   = quotemeta '%]';
 117
 118     my @all = split /($START .*? $END)/sx, $_[0];
 119     for my $piece (@all) {
 120         next if ! length $piece;
 121         if ($piece !~ /^$START (.*) $END$/sx) {
 122             $new .= $piece;
 123             next;
 124         }
 125         my $tag = $1;
 126         $new .= "($tag)";
 127     }
 128     return $new;
 129 }
 130
 131 ### a regex grammar type matcher
 132 sub parse_grammar {
 133     my $new = '';
 134     my $START = quotemeta '[%';
 135     my $END   = quotemeta '%]';
 136
 137     local pos($_[0]) = 0;
 138     while (1) {
 139         ### find the start tag
 140         last if $_[0] !~ /\G (.*?) $START /gcxs;
 141         $new .= $1;
 142
 143         if ($_[0] !~ /\G (.*?) $END /gcxs) {
 144             die "Unmatched $START tag";
 145         }
 146         $new .= "($1)";
 147     }
 148     return pos($_[0]) ? $new . substr($_[0], pos $_[0]) : $_[0];
 149 }
 150
 151 ### a regex grammar type matcher
 152 sub parse_grammar2 {
 153     my $new = '';
 154     my $START = quotemeta '[%';
 155     my $END   = quotemeta '%]';
 156
 157     local pos $_[0] = 0;
 158     my $last = 0;
 159     while (1) {
 160         ### find the start tag
 161         last if $_[0] !~ / ($START) /gcxs;
 162         my $i = pos $_[0];
 163         $new .= substr $_[0], $last, $i - length($1) - $last;
 164
 165         if ($_[0] !~ / ($END) /gcxs) {
 166             die "Unmatched $START tag";
 167         }
 168         $last = pos $_[0];
 169         my $j = $last - length $1;
 170         $new .= "(".substr($_[0], $i, $j - $i).")";
 171     }
 172     return pos($_[0]) ? $new . substr($_[0], pos $_[0]) : $_[0];
 173 }
 174
 175 ### use a regular expression to go through the string bruteforce
 176 sub parse_pos_array {
 177     my $new = '';
 178     my $START = '[%';
 179     my $END   = '%]';
 180
 181     local pos($_[0]) = 0;
 182     my @start1;
 183     my @start2;
 184     while ($_[0] =~ /(\Q$START\E)/g) { push @start1, $-[1]; push @start2, $+[1] }
 185
 186     local pos($_[0]) = 0;
 187     my @end1;
 188     my @end2;
 189     while ($_[0] =~ /(\Q$END\E)/g) { push @end1, $-[1]; push @end2, $+[1] }
 190
 191     my $last = 0;
 192     while (1) {
 193         last if ! @start1;
 194         my $i  = shift @start1;
 195         my $i2 = shift @start2;
 196
 197         $new .= substr($_[0], $last, $i - $last);
 198
 199         die "Unclosed tag" if ! @end1;
 200         my $j  = shift @end1;
 201         my $j2 = shift @end2;
 202
 203         my $tag = substr($_[0], $i2, $j - $i2);
 204         $new.= "($tag)";
 205
 206         $last = $j2;
 207     }
 208     return $last ? $new . substr($_[0], $last) : $_[0];
 209 }
 210
 211 ###----------------------------------------------------------------###
 212 ### check compliance
 213
 214 #print parse_match($str);
 215 #print "---\n";
 216 #print parse_split($str);
 217 #print "---\n";
 218 #print parse_grammar($str);
 219 #print "---\n";
 220 #print parse_index($str);
 221 #print "---\n";
 222 #print parse_pos_array($str);
 223 #exit;
 224 die "parse_split     didn't match" if parse_split($str)     ne parse_match($str);
 225 die "parse_grammar   didn't match" if parse_grammar($str)   ne parse_match($str);
 226 die "parse_grammar2  didn't match" if parse_grammar2($str)  ne parse_match($str);
 227 die "parse_index     didn't match" if parse_index($str)     ne parse_match($str);
 228 die "parse_index2    didn't match" if parse_index2($str)    ne parse_match($str);
 229 die "parse_pos_array didn't match" if parse_pos_array($str) ne parse_match($str);
 230 #exit;
 231
 232 ### and run them
 233 cmpthese timethese (-2, {
 234     index     => sub { parse_index($str) },
 235     index2    => sub { parse_index2($str) },
 236     match     => sub { parse_match($str) },
 237     split     => sub { parse_split($str) },
 238     grammar   => sub { parse_grammar($str) },
 239     grammar2  => sub { parse_grammar2($str) },
 240     pos_array => sub { parse_pos_array($str) },
 241 });