#!/usr/bin/perl -w use strict; use Benchmark qw(timethese cmpthese countit timestr); use IO::Socket; my $str = "--[% one %][% two %]--\n"; # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds... # grammar: 4 wallclock secs ( 2.04 usr + 0.00 sys = 2.04 CPU) @ 36585.78/s (n=74635) # index: 4 wallclock secs ( 2.12 usr + 0.00 sys = 2.12 CPU) @ 81146.23/s (n=172030) # index2: 4 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 71674.76/s (n=150517) # match: 4 wallclock secs ( 2.12 usr + 0.01 sys = 2.13 CPU) @ 57690.14/s (n=122880) # split: 2 wallclock secs ( 2.06 usr + 0.00 sys = 2.06 CPU) @ 36230.58/s (n=74635) # Rate split grammar match index2 index # split 36231/s -- -1% -37% -49% -55% # grammar 36586/s 1% -- -37% -49% -55% # match 57690/s 59% 58% -- -20% -29% # index2 71675/s 98% 96% 24% -- -12% # index 81146/s 124% 122% 41% 13% -- #my $str = ((" "x1000)."[% one %]\n")x10; # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds... # grammar: 3 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 689.52/s (n=1448) # index: 3 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 10239.52/s (n=21503) # index2: 4 wallclock secs ( 2.13 usr + 0.00 sys = 2.13 CPU) @ 10095.31/s (n=21503) # match: 4 wallclock secs ( 2.13 usr + 0.00 sys = 2.13 CPU) @ 6727.23/s (n=14329) # split: 4 wallclock secs ( 2.14 usr + 0.00 sys = 2.14 CPU) @ 5023.83/s (n=10751) # Rate grammar split match index2 index # grammar 690/s -- -86% -90% -93% -93% # split 5024/s 629% -- -25% -50% -51% # match 6727/s 876% 34% -- -33% -34% # index2 10095/s 1364% 101% 50% -- -1% # index 10240/s 1385% 104% 52% 1% -- #my $str = ((" "x10)."[% one %]\n")x1000; # Benchmark: running grammar, index, index2, match, split for at least 2 CPU seconds... # grammar: 3 wallclock secs ( 2.10 usr + 0.01 sys = 2.11 CPU) @ 81.52/s (n=172) # index: 4 wallclock secs ( 2.11 usr + 0.01 sys = 2.12 CPU) @ 207.55/s (n=440) # index2: 4 wallclock secs ( 2.10 usr + 0.00 sys = 2.10 CPU) @ 209.52/s (n=440) # match: 3 wallclock secs ( 2.07 usr + 0.00 sys = 2.07 CPU) @ 173.43/s (n=359) # split: 4 wallclock secs ( 2.12 usr + 0.00 sys = 2.12 CPU) @ 91.98/s (n=195) # Rate grammar split match index index2 # grammar 81.5/s -- -11% -53% -61% -61% # split 92.0/s 13% -- -47% -56% -56% # match 173/s 113% 89% -- -16% -17% # index 208/s 155% 126% 20% -- -1% # index2 210/s 157% 128% 21% 1% -- ###----------------------------------------------------------------### ### use a regular expression to go through the string sub parse_match { my $new = ''; my $START = quotemeta '[%'; my $END = quotemeta '%]'; my $pos; local pos($_[0]) = 0; while ($_[0] =~ / \G (.*?) $START (.*?) $END /gsx) { my ($begin, $tag) = ($1, $2); $pos = pos($_[0]); $new .= $begin; $new .= "($tag)"; } return $pos ? $new . substr($_[0], $pos) : $_[0]; } ### good ole index - hard coded sub parse_index { my $new = ''; my $last = 0; while (1) { my $i = index($_[0], '[%', $last); last if $i == -1; $new .= substr($_[0], $last, $i - $last), my $j = index($_[0], '%]', $i + 2); die "Unclosed tag" if $j == -1; my $tag = substr($_[0], $i + 2, $j - ($i + 2)); $new .= "($tag)"; $last = $j + 2; } return $last ? $new . substr($_[0], $last) : $_[0]; } ### index searching - but configurable sub parse_index2 { my $new = ''; my $START = '[%'; my $END = '%]'; my $len_s = length $START; my $len_e = length $END; my $last = 0; while (1) { my $i = index($_[0], $START, $last); last if $i == -1; $new .= substr($_[0], $last, $i - $last), my $j = index($_[0], $END, $i + $len_s); $last = $j + $len_e; if ($j == -1) { # missing closing tag $last = length($_[0]); last; } my $tag = substr($_[0], $i + $len_s, $j - ($i + $len_s)); $new .= "($tag)"; } return $last ? $new . substr($_[0], $last) : $_[0]; } ### using a split method (several other split methods were also tried - but were slower) sub parse_split { my $new = ''; my $START = quotemeta '[%'; my $END = quotemeta '%]'; my @all = split /($START .*? $END)/sx, $_[0]; for my $piece (@all) { next if ! length $piece; if ($piece !~ /^$START (.*) $END$/sx) { $new .= $piece; next; } my $tag = $1; $new .= "($tag)"; } return $new; } ### a regex grammar type matcher sub parse_grammar { my $new = ''; my $START = quotemeta '[%'; my $END = quotemeta '%]'; my $in_tag; local pos($_[0]) = 0; while (1) { ### find the start tag if (! $in_tag) { if ($_[0] =~ /\G (.*?) $START /gcxs) { $new .= $1; $in_tag = 1; next; } else { $new .= substr $_[0], pos($_[0]); last; } } ### end if ($_[0] =~ /\G $END /gcx) { $in_tag = 0; } if ($_[0] =~ /\G (\s*\w+\s*) /gcx) { my $tag = $1; $new .= "($tag)"; } } return $new; } ###----------------------------------------------------------------### ### check compliance #print parse_match($str); #print "---\n"; #print parse_split($str); #print "---\n"; #print parse_grammar($str); #print "---\n"; #print parse_index($str); die "parse_split didn't match" if parse_split($str) ne parse_match($str); die "parse_grammar didn't match" if parse_grammar($str) ne parse_match($str); die "parse_index didn't match" if parse_index($str) ne parse_match($str); die "parse_index2 didn't match" if parse_index2($str) ne parse_match($str); #exit; ### and run them cmpthese timethese (-2, { index => sub { parse_index($str) }, index2 => sub { parse_index2($str) }, match => sub { parse_match($str) }, split => sub { parse_split($str) }, grammar => sub { parse_grammar($str) }, });