CGI::Ex 2.00

[chaz/p5-CGI-Ex] / samples / benchmark / bench_template_tag_parser.pl
diff --git a/samples/benchmark/bench_template_tag_parser.pl b/samples/benchmark/bench_template_tag_parser.pl

new file mode 100644 (file)

index 0000000..68aa14c
--- /dev/null
+++ b/samples/benchmark/bench_template_tag_parser.pl
@@ -0,0 +1,187 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Benchmark qw(timethese cmpthese countit timestr);
+use IO::Socket;
+
+my $str = "--[% one %][% two %]--\n";
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+#   grammar:  4 wallclock secs ( 2.04 usr +  0.00 sys =  2.04 CPU) @ 36585.78/s (n=74635)
+#   index:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 81146.23/s (n=172030)
+#   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 71674.76/s (n=150517)
+#   match:  4 wallclock secs ( 2.12 usr +  0.01 sys =  2.13 CPU) @ 57690.14/s (n=122880)
+#   split:  2 wallclock secs ( 2.06 usr +  0.00 sys =  2.06 CPU) @ 36230.58/s (n=74635)
+#            Rate   split grammar   match  index2   index
+# split   36231/s      --     -1%    -37%    -49%    -55%
+# grammar 36586/s      1%      --    -37%    -49%    -55%
+# match   57690/s     59%     58%      --    -20%    -29%
+# index2  71675/s     98%     96%     24%      --    -12%
+# index   81146/s    124%    122%     41%     13%      --
+
+#my $str = ((" "x1000)."[% one %]\n")x10;
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+#   grammar:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 689.52/s (n=1448)
+#   index:  3 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 10239.52/s (n=21503)
+#   index2:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 10095.31/s (n=21503)
+#   match:  4 wallclock secs ( 2.13 usr +  0.00 sys =  2.13 CPU) @ 6727.23/s (n=14329)
+#   split:  4 wallclock secs ( 2.14 usr +  0.00 sys =  2.14 CPU) @ 5023.83/s (n=10751)
+#            Rate grammar   split   match  index2   index
+# grammar   690/s      --    -86%    -90%    -93%    -93%
+# split    5024/s    629%      --    -25%    -50%    -51%
+# match    6727/s    876%     34%      --    -33%    -34%
+# index2  10095/s   1364%    101%     50%      --     -1%
+# index   10240/s   1385%    104%     52%      1%      --
+
+#my $str = ((" "x10)."[% one %]\n")x1000;
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+#   grammar:  3 wallclock secs ( 2.10 usr +  0.01 sys =  2.11 CPU) @ 81.52/s (n=172)
+#   index:  4 wallclock secs ( 2.11 usr +  0.01 sys =  2.12 CPU) @ 207.55/s (n=440)
+#   index2:  4 wallclock secs ( 2.10 usr +  0.00 sys =  2.10 CPU) @ 209.52/s (n=440)
+#   match:  3 wallclock secs ( 2.07 usr +  0.00 sys =  2.07 CPU) @ 173.43/s (n=359)
+#   split:  4 wallclock secs ( 2.12 usr +  0.00 sys =  2.12 CPU) @ 91.98/s (n=195)
+#           Rate grammar   split   match   index  index2
+# grammar 81.5/s      --    -11%    -53%    -61%    -61%
+# split   92.0/s     13%      --    -47%    -56%    -56%
+# match    173/s    113%     89%      --    -16%    -17%
+# index    208/s    155%    126%     20%      --     -1%
+# index2   210/s    157%    128%     21%      1%      --
+
+###----------------------------------------------------------------###
+
+### use a regular expression to go through the string
+sub parse_match {
+    my $new = '';
+    my $START = quotemeta '[%';
+    my $END   = quotemeta '%]';
+
+    my $pos;
+    local pos($_[0]) = 0;
+    while ($_[0] =~ / \G (.*?) $START (.*?) $END /gsx) {
+        my ($begin, $tag) = ($1, $2);
+        $pos = pos($_[0]);
+        $new .= $begin;
+        $new .= "($tag)";
+    }
+    return $pos ? $new . substr($_[0], $pos) : $_[0];
+}
+
+### good ole index - hard coded
+sub parse_index {
+    my $new   = '';
+
+    my $last = 0;
+    while (1) {
+        my $i = index($_[0], '[%', $last);
+        last if $i == -1;
+        $new .= substr($_[0], $last, $i - $last),
+        my $j   = index($_[0], '%]', $i + 2);
+        die "Unclosed tag" if $j == -1;
+        my $tag = substr($_[0], $i + 2, $j - ($i + 2));
+        $new .= "($tag)";
+        $last = $j + 2;
+    }
+    return $last ? $new . substr($_[0], $last) : $_[0];
+}
+
+### index searching - but configurable
+sub parse_index2 {
+    my $new   = '';
+    my $START = '[%';
+    my $END   = '%]';
+    my $len_s = length $START;
+    my $len_e = length $END;
+
+    my $last = 0;
+    while (1) {
+        my $i = index($_[0], $START, $last);
+        last if $i == -1;
+        $new .= substr($_[0], $last, $i - $last),
+        my $j = index($_[0], $END, $i + $len_s);
+        $last = $j + $len_e;
+        if ($j == -1) { # missing closing tag
+            $last = length($_[0]);
+            last;
+        }
+        my $tag = substr($_[0], $i + $len_s, $j - ($i + $len_s));
+        $new .= "($tag)";
+    }
+    return $last ? $new . substr($_[0], $last) : $_[0];
+}
+
+### using a split method (several other split methods were also tried - but were slower)
+sub parse_split {
+    my $new = '';
+    my $START = quotemeta '[%';
+    my $END   = quotemeta '%]';
+
+    my @all = split /($START .*? $END)/sx, $_[0];
+    for my $piece (@all) {
+        next if ! length $piece;
+        if ($piece !~ /^$START (.*) $END$/sx) {
+            $new .= $piece;
+            next;
+        }
+        my $tag = $1;
+        $new .= "($tag)";
+    }
+    return $new;
+}
+
+### a regex grammar type matcher
+sub parse_grammar {
+    my $new = '';
+    my $START = quotemeta '[%';
+    my $END   = quotemeta '%]';
+
+    my $in_tag;
+    local pos($_[0]) = 0;
+    while (1) {
+        ### find the start tag
+        if (! $in_tag) {
+            if ($_[0] =~ /\G (.*?) $START /gcxs) {
+                $new .= $1;
+                $in_tag = 1;
+                next;
+            } else {
+                $new .= substr $_[0], pos($_[0]);
+                last;
+            }
+        }
+
+        ### end
+        if ($_[0] =~ /\G $END /gcx) {
+            $in_tag = 0;
+        }
+
+        if ($_[0] =~ /\G (\s*\w+\s*) /gcx) {
+            my $tag = $1;
+            $new .= "($tag)";
+        }
+    }
+    return $new;
+}
+
+###----------------------------------------------------------------###
+### check compliance
+
+#print parse_match($str);
+#print "---\n";
+#print parse_split($str);
+#print "---\n";
+#print parse_grammar($str);
+#print "---\n";
+#print parse_index($str);
+die "parse_split   didn't match" if parse_split($str)   ne parse_match($str);
+die "parse_grammar didn't match" if parse_grammar($str) ne parse_match($str);
+die "parse_index   didn't match" if parse_index($str)   ne parse_match($str);
+die "parse_index2  didn't match" if parse_index2($str)  ne parse_match($str);
+#exit;
+
+### and run them
+cmpthese timethese (-2, {
+    index   => sub { parse_index($str) },
+    index2  => sub { parse_index2($str) },
+    match   => sub { parse_match($str) },
+    split   => sub { parse_split($str) },
+    grammar => sub { parse_grammar($str) },
+});