Dogcows Code - chaz/p5-CGI-Ex/blob - samples/benchmark/bench_template_tag_parser.pl

   1 #!/usr/bin/perl -w
   2
   3 use strict;
   4 use Benchmark qw(timethese cmpthese countit timestr);
   5 use IO::Socket;
   6
   7 my $str = "--[% one %][% two %]--\n";
   8 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
   9 #   grammar:  4 wallclock secs ( 2.04 usr +  0.00 sys =  2.04 CPU) @ 36585.78/s (n=74635)
  10 #   index:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 81146.23/s (n=172030)
  11 #   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 71674.76/s (n=150517)
  12 #   match:  4 wallclock secs ( 2.12 usr +  0.01 sys =  2.13 CPU) @ 57690.14/s (n=122880)
  13 #   split:  2 wallclock secs ( 2.06 usr +  0.00 sys =  2.06 CPU) @ 36230.58/s (n=74635)
  14 #            Rate   split grammar   match  index2   index
  15 # split   36231/s      --     -1%    -37%    -49%    -55%
  16 # grammar 36586/s      1%      --    -37%    -49%    -55%
  17 # match   57690/s     59%     58%      --    -20%    -29%
  18 # index2  71675/s     98%     96%     24%      --    -12%
  19 # index   81146/s    124%    122%     41%     13%      --
  20
  21 #my $str = ((" "x1000)."[% one %]\n")x10;
  22 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
  23 #   grammar:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 689.52/s (n=1448)
  24 #   index:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 10239.52/s (n=21503)
  25 #   index2:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 10095.31/s (n=21503)
  26 #   match:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 6727.23/s (n=14329)
  27 #   split:  4 wallclock secs ( 2.14 usr +  0.00 sys =  2.14 CPU) @ 5023.83/s (n=10751)
  28 #            Rate grammar   split   match  index2   index
  29 # grammar   690/s      --    -86%    -90%    -93%    -93%
  30 # split    5024/s    629%      --    -25%    -50%    -51%
  31 # match    6727/s    876%     34%      --    -33%    -34%
  32 # index2  10095/s   1364%    101%     50%      --     -1%
  33 # index   10240/s   1385%    104%     52%      1%      --
  34
  35 #my $str = ((" "x10)."[% one %]\n")x1000;
  36 # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
  37 #   grammar:  3 wallclock secs ( 2.10 usr +  0.01 sys =  2.11 CPU) @ 81.52/s (n=172)
  38 #   index:  4 wallclock secs ( 2.11 usr +  0.01 sys =  2.12 CPU) @ 207.55/s (n=440)
  39 #   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 209.52/s (n=440)
  40 #   match:  3 wallclock secs ( 2.07 usr +  0.00 sys =  2.07 CPU) @ 173.43/s (n=359)
  41 #   split:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 91.98/s (n=195)
  42 #           Rate grammar   split   match   index  index2
  43 # grammar 81.5/s      --    -11%    -53%    -61%    -61%
  44 # split   92.0/s     13%      --    -47%    -56%    -56%
  45 # match    173/s    113%     89%      --    -16%    -17%
  46 # index    208/s    155%    126%     20%      --     -1%
  47 # index2   210/s    157%    128%     21%      1%      --
  48
  49 ###----------------------------------------------------------------###
  50
  51 ### use a regular expression to go through the string
  52 sub parse_match {
  53     my $new = '';
  54     my $START = quotemeta '[%';
  55     my $END   = quotemeta '%]';
  56
  57     my $pos;
  58     local pos($_[0]) = 0;
  59     while ($_[0] =~ / \G (.*?) $START (.*?) $END /gsx) {
  60         my ($begin, $tag) = ($1, $2);
  61         $pos = pos($_[0]);
  62         $new .= $begin;
  63         $new .= "($tag)";
  64     }
  65     return $pos ? $new . substr($_[0], $pos) : $_[0];
  66 }
  67
  68 ### good ole index - hard coded
  69 sub parse_index {
  70     my $new   = '';
  71
  72     my $last = 0;
  73     while (1) {
  74         my $i = index($_[0], '[%', $last);
  75         last if $i == -1;
  76         $new .= substr($_[0], $last, $i - $last),
  77         my $j   = index($_[0], '%]', $i + 2);
  78         die "Unclosed tag" if $j == -1;
  79         my $tag = substr($_[0], $i + 2, $j - ($i + 2));
  80         $new .= "($tag)";
  81         $last = $j + 2;
  82     }
  83     return $last ? $new . substr($_[0], $last) : $_[0];
  84 }
  85
  86 ### index searching - but configurable
  87 sub parse_index2 {
  88     my $new   = '';
  89     my $START = '[%';
  90     my $END   = '%]';
  91     my $len_s = length $START;
  92     my $len_e = length $END;
  93
  94     my $last = 0;
  95     while (1) {
  96         my $i = index($_[0], $START, $last);
  97         last if $i == -1;
  98         $new .= substr($_[0], $last, $i - $last),
  99         my $j = index($_[0], $END, $i + $len_s);
 100         $last = $j + $len_e;
 101         if ($j == -1) { # missing closing tag
 102             $last = length($_[0]);
 103             last;
 104         }
 105         my $tag = substr($_[0], $i + $len_s, $j - ($i + $len_s));
 106         $new .= "($tag)";
 107     }
 108     return $last ? $new . substr($_[0], $last) : $_[0];
 109 }
 110
 111 ### using a split method (several other split methods were also tried - but were slower)
 112 sub parse_split {
 113     my $new = '';
 114     my $START = quotemeta '[%';
 115     my $END   = quotemeta '%]';
 116
 117     my @all = split /($START .*? $END)/sx, $_[0];
 118     for my $piece (@all) {
 119         next if ! length $piece;
 120         if ($piece !~ /^$START (.*) $END$/sx) {
 121             $new .= $piece;
 122             next;
 123         }
 124         my $tag = $1;
 125         $new .= "($tag)";
 126     }
 127     return $new;
 128 }
 129
 130 ### a regex grammar type matcher
 131 sub parse_grammar {
 132     my $new = '';
 133     my $START = quotemeta '[%';
 134     my $END   = quotemeta '%]';
 135
 136     my $in_tag;
 137     local pos($_[0]) = 0;
 138     while (1) {
 139         ### find the start tag
 140         if (! $in_tag) {
 141             if ($_[0] =~ /\G (.*?) $START /gcxs) {
 142                 $new .= $1;
 143                 $in_tag = 1;
 144                 next;
 145             } else {
 146                 $new .= substr $_[0], pos($_[0]);
 147                 last;
 148             }
 149         }
 150
 151         ### end
 152         if ($_[0] =~ /\G $END /gcx) {
 153             $in_tag = 0;
 154         }
 155
 156         if ($_[0] =~ /\G (\s*\w+\s*) /gcx) {
 157             my $tag = $1;
 158             $new .= "($tag)";
 159         }
 160     }
 161     return $new;
 162 }
 163
 164 ###----------------------------------------------------------------###
 165 ### check compliance
 166
 167 #print parse_match($str);
 168 #print "---\n";
 169 #print parse_split($str);
 170 #print "---\n";
 171 #print parse_grammar($str);
 172 #print "---\n";
 173 #print parse_index($str);
 174 die "parse_split   didn't match" if parse_split($str)   ne parse_match($str);
 175 die "parse_grammar didn't match" if parse_grammar($str) ne parse_match($str);
 176 die "parse_index   didn't match" if parse_index($str)   ne parse_match($str);
 177 die "parse_index2  didn't match" if parse_index2($str)  ne parse_match($str);
 178 #exit;
 179
 180 ### and run them
 181 cmpthese timethese (-2, {
 182     index   => sub { parse_index($str) },
 183     index2  => sub { parse_index2($str) },
 184     match   => sub { parse_match($str) },
 185     split   => sub { parse_split($str) },
 186     grammar => sub { parse_grammar($str) },
 187 });