--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Benchmark qw(timethese cmpthese countit timestr);
+use IO::Socket;
+
+my $str = "--[% one %][% two %]--\n";
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+# grammar: 4 wallclock secs ( 2.04 usr + 0.00 sys = 2.04 CPU) @ 36585.78/s (n=74635)
+# index: 4 wallclock secs ( 2.12 usr + 0.00 sys = 2.12 CPU) @ 81146.23/s (n=172030)
+# index2: 4 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 71674.76/s (n=150517)
+# match: 4 wallclock secs ( 2.12 usr + 0.01 sys = 2.13 CPU) @ 57690.14/s (n=122880)
+# split: 2 wallclock secs ( 2.06 usr + 0.00 sys = 2.06 CPU) @ 36230.58/s (n=74635)
+# Rate split grammar match index2 index
+# split 36231/s -- -1% -37% -49% -55%
+# grammar 36586/s 1% -- -37% -49% -55%
+# match 57690/s 59% 58% -- -20% -29%
+# index2 71675/s 98% 96% 24% -- -12%
+# index 81146/s 124% 122% 41% 13% --
+
+#my $str = ((" "x1000)."[% one %]\n")x10;
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+# grammar: 3 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 689.52/s (n=1448)
+# index: 3 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 10239.52/s (n=21503)
+# index2: 4 wallclock secs ( 2.13 usr + 0.00 sys = 2.13 CPU) @ 10095.31/s (n=21503)
+# match: 4 wallclock secs ( 2.13 usr + 0.00 sys = 2.13 CPU) @ 6727.23/s (n=14329)
+# split: 4 wallclock secs ( 2.14 usr + 0.00 sys = 2.14 CPU) @ 5023.83/s (n=10751)
+# Rate grammar split match index2 index
+# grammar 690/s -- -86% -90% -93% -93%
+# split 5024/s 629% -- -25% -50% -51%
+# match 6727/s 876% 34% -- -33% -34%
+# index2 10095/s 1364% 101% 50% -- -1%
+# index 10240/s 1385% 104% 52% 1% --
+
+#my $str = ((" "x10)."[% one %]\n")x1000;
+# Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds...
+# grammar: 3 wallclock secs ( 2.10 usr + 0.01 sys = 2.11 CPU) @ 81.52/s (n=172)
+# index: 4 wallclock secs ( 2.11 usr + 0.01 sys = 2.12 CPU) @ 207.55/s (n=440)
+# index2: 4 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 209.52/s (n=440)
+# match: 3 wallclock secs ( 2.07 usr + 0.00 sys = 2.07 CPU) @ 173.43/s (n=359)
+# split: 4 wallclock secs ( 2.12 usr + 0.00 sys = 2.12 CPU) @ 91.98/s (n=195)
+# Rate grammar split match index index2
+# grammar 81.5/s -- -11% -53% -61% -61%
+# split 92.0/s 13% -- -47% -56% -56%
+# match 173/s 113% 89% -- -16% -17%
+# index 208/s 155% 126% 20% -- -1%
+# index2 210/s 157% 128% 21% 1% --
+
+###----------------------------------------------------------------###
+
+### use a regular expression to go through the string
+sub parse_match {
+ my $new = '';
+ my $START = quotemeta '[%';
+ my $END = quotemeta '%]';
+
+ my $pos;
+ local pos($_[0]) = 0;
+ while ($_[0] =~ / \G (.*?) $START (.*?) $END /gsx) {
+ my ($begin, $tag) = ($1, $2);
+ $pos = pos($_[0]);
+ $new .= $begin;
+ $new .= "($tag)";
+ }
+ return $pos ? $new . substr($_[0], $pos) : $_[0];
+}
+
+### good ole index - hard coded
+sub parse_index {
+ my $new = '';
+
+ my $last = 0;
+ while (1) {
+ my $i = index($_[0], '[%', $last);
+ last if $i == -1;
+ $new .= substr($_[0], $last, $i - $last),
+ my $j = index($_[0], '%]', $i + 2);
+ die "Unclosed tag" if $j == -1;
+ my $tag = substr($_[0], $i + 2, $j - ($i + 2));
+ $new .= "($tag)";
+ $last = $j + 2;
+ }
+ return $last ? $new . substr($_[0], $last) : $_[0];
+}
+
+### index searching - but configurable
+sub parse_index2 {
+ my $new = '';
+ my $START = '[%';
+ my $END = '%]';
+ my $len_s = length $START;
+ my $len_e = length $END;
+
+ my $last = 0;
+ while (1) {
+ my $i = index($_[0], $START, $last);
+ last if $i == -1;
+ $new .= substr($_[0], $last, $i - $last),
+ my $j = index($_[0], $END, $i + $len_s);
+ $last = $j + $len_e;
+ if ($j == -1) { # missing closing tag
+ $last = length($_[0]);
+ last;
+ }
+ my $tag = substr($_[0], $i + $len_s, $j - ($i + $len_s));
+ $new .= "($tag)";
+ }
+ return $last ? $new . substr($_[0], $last) : $_[0];
+}
+
+### using a split method (several other split methods were also tried - but were slower)
+sub parse_split {
+ my $new = '';
+ my $START = quotemeta '[%';
+ my $END = quotemeta '%]';
+
+ my @all = split /($START .*? $END)/sx, $_[0];
+ for my $piece (@all) {
+ next if ! length $piece;
+ if ($piece !~ /^$START (.*) $END$/sx) {
+ $new .= $piece;
+ next;
+ }
+ my $tag = $1;
+ $new .= "($tag)";
+ }
+ return $new;
+}
+
+### a regex grammar type matcher
+sub parse_grammar {
+ my $new = '';
+ my $START = quotemeta '[%';
+ my $END = quotemeta '%]';
+
+ my $in_tag;
+ local pos($_[0]) = 0;
+ while (1) {
+ ### find the start tag
+ if (! $in_tag) {
+ if ($_[0] =~ /\G (.*?) $START /gcxs) {
+ $new .= $1;
+ $in_tag = 1;
+ next;
+ } else {
+ $new .= substr $_[0], pos($_[0]);
+ last;
+ }
+ }
+
+ ### end
+ if ($_[0] =~ /\G $END /gcx) {
+ $in_tag = 0;
+ }
+
+ if ($_[0] =~ /\G (\s*\w+\s*) /gcx) {
+ my $tag = $1;
+ $new .= "($tag)";
+ }
+ }
+ return $new;
+}
+
+###----------------------------------------------------------------###
+### check compliance
+
+#print parse_match($str);
+#print "---\n";
+#print parse_split($str);
+#print "---\n";
+#print parse_grammar($str);
+#print "---\n";
+#print parse_index($str);
+die "parse_split didn't match" if parse_split($str) ne parse_match($str);
+die "parse_grammar didn't match" if parse_grammar($str) ne parse_match($str);
+die "parse_index didn't match" if parse_index($str) ne parse_match($str);
+die "parse_index2 didn't match" if parse_index2($str) ne parse_match($str);
+#exit;
+
+### and run them
+cmpthese timethese (-2, {
+ index => sub { parse_index($str) },
+ index2 => sub { parse_index2($str) },
+ match => sub { parse_match($str) },
+ split => sub { parse_split($str) },
+ grammar => sub { parse_grammar($str) },
+});