Dogcows Code - chaz/p5-File-KDBX/blob - lib/File/KDBX/Util.pm

   1 package File::KDBX::Util;
   2 # ABSTRACT: Utility functions for working with KDBX files
   3
   4 use warnings;
   5 use strict;
   6
   7 use Crypt::PRNG qw(random_bytes random_string);
   8 use Encode qw(decode encode);
   9 use Exporter qw(import);
  10 use File::KDBX::Error;
  11 use List::Util 1.33 qw(any all);
  12 use Module::Load;
  13 use Ref::Util qw(is_arrayref is_coderef is_hashref is_ref is_refref is_scalarref is_ref);
  14 use Scalar::Util qw(blessed isdual looks_like_number readonly refaddr);
  15 use namespace::clean -except => 'import';
  16
  17 our $VERSION = '999.999'; # VERSION
  18
  19 our %EXPORT_TAGS = (
  20     assert      => [qw(assert_64bit)],
  21     clone       => [qw(clone clone_nomagic)],
  22     crypt       => [qw(pad_pkcs7)],
  23     debug       => [qw(dumper)],
  24     fork        => [qw(can_fork)],
  25     function    => [qw(memoize recurse_limit)],
  26     empty       => [qw(empty nonempty)],
  27     erase       => [qw(erase erase_scoped)],
  28     gzip        => [qw(gzip gunzip)],
  29     io          => [qw(read_all)],
  30     load        => [qw(load_optional load_xs try_load_optional)],
  31     search      => [qw(query search simple_expression_query)],
  32     text        => [qw(snakify trim)],
  33     uuid        => [qw(format_uuid generate_uuid is_uuid uuid)],
  34     uri         => [qw(split_url uri_escape_utf8 uri_unescape_utf8)],
  35 );
  36
  37 $EXPORT_TAGS{all} = [map { @$_ } values %EXPORT_TAGS];
  38 our @EXPORT_OK = @{$EXPORT_TAGS{all}};
  39
  40 my %OPS = (
  41     'eq'        =>  2, # binary
  42     'ne'        =>  2,
  43     'lt'        =>  2,
  44     'gt'        =>  2,
  45     'le'        =>  2,
  46     'ge'        =>  2,
  47     '=='        =>  2,
  48     '!='        =>  2,
  49     '<'         =>  2,
  50     '>'         =>  2,
  51     '<='        =>  2,
  52     '>='        =>  2,
  53     '=~'        =>  2,
  54     '!~'        =>  2,
  55     '!'         =>  1, # unary
  56     '!!'        =>  1,
  57     '-not'      =>  1, # special
  58     '-false'    =>  1,
  59     '-true'     =>  1,
  60     '-defined'  =>  1,
  61     '-undef'    =>  1,
  62     '-empty'    =>  1,
  63     '-nonempty' =>  1,
  64     '-or'       => -1,
  65     '-and'      => -1,
  66 );
  67 my %OP_NEG = (
  68     'eq'    =>  'ne',
  69     'ne'    =>  'eq',
  70     'lt'    =>  'ge',
  71     'gt'    =>  'le',
  72     'le'    =>  'gt',
  73     'ge'    =>  'lt',
  74     '=='    =>  '!=',
  75     '!='    =>  '==',
  76     '<'     =>  '>=',
  77     '>'     =>  '<=',
  78     '<='    =>  '>',
  79     '>='    =>  '<',
  80     '=~'    =>  '!~',
  81     '!~'    =>  '=~',
  82 );
  83
  84 =func assert_64bit
  85
  86     assert_64bit();
  87
  88 Throw if perl doesn't support 64-bit IVs.
  89
  90 =cut
  91
  92 sub assert_64bit() {
  93     require Config;
  94     $Config::Config{ivsize} < 8
  95         and throw "64-bit perl is required to use this feature.\n", ivsize => $Config::Config{ivsize};
  96 }
  97
  98 =func can_fork
  99
 100     $bool = can_fork;
 101
 102 Determine if perl can fork, with logic lifted from L<Test2::Util/CAN_FORK>.
 103
 104 =cut
 105
 106 sub can_fork {
 107     require Config;
 108     return 1 if $Config::Config{d_fork};
 109     return 0 if $^O ne 'MSWin32' && $^O ne 'NetWare';
 110     return 0 if !$Config::Config{useithreads};
 111     return 0 if $Config::Config{ccflags} !~ /-DPERL_IMPLICIT_SYS/;
 112     return 0 if $] < 5.008001;
 113     if ($] == 5.010000 && $Config::Config{ccname} eq 'gcc' && $Config::Config{gccversion}) {
 114         return 0 if $Config::Config{gccversion} !~ m/^(\d+)\.(\d+)/;
 115         my @parts = split(/[\.\s]+/, $Config::Config{gccversion});
 116         return 0 if $parts[0] > 4 || ($parts[0] == 4 && $parts[1] >= 8);
 117     }
 118     return 0 if $INC{'Devel/Cover.pm'};
 119     return 1;
 120 }
 121
 122 =func clone_nomagic
 123
 124     $clone = clone_nomagic($thing);
 125
 126 Clone deeply without keeping [most of] the magic.
 127
 128 B<NOTE:> At the moment the implementation is naïve and won't respond well to nontrivial data.
 129
 130 =cut
 131
 132 sub clone {
 133     require Storable;
 134     goto &Storable::dclone;
 135 }
 136
 137 sub clone_nomagic {
 138     my $thing = shift;
 139     if (is_arrayref($thing)) {
 140         my @arr = map { clone_nomagic($_) } @$thing;
 141         return \@arr;
 142     }
 143     elsif (is_hashref($thing)) {
 144         my %hash;
 145         $hash{$_} = clone_nomagic($thing->{$_}) for keys %$thing;
 146         return \%hash;
 147     }
 148     elsif (is_ref($thing)) {
 149         return clone($thing);
 150     }
 151     return $thing;
 152 }
 153
 154 =func dumper
 155
 156     $str = dumper $struct;
 157
 158 Like L<Data::Dumper> but slightly terser in some cases relevent to L<File::KDBX>.
 159
 160 =cut
 161
 162 sub dumper {
 163     require Data::Dumper;
 164     # avoid "once" warnings
 165     local $Data::Dumper::Deepcopy = $Data::Dumper::Deepcopy = 1;
 166     local $Data::Dumper::Deparse = $Data::Dumper::Deparse = 1;
 167     local $Data::Dumper::Indent = 1;
 168     local $Data::Dumper::Quotekeys = 0;
 169     local $Data::Dumper::Sortkeys = 1;
 170     local $Data::Dumper::Terse = 1;
 171     local $Data::Dumper::Trailingcomma = 1;
 172     local $Data::Dumper::Useqq = 1;
 173
 174     my @dumps;
 175     for my $struct (@_) {
 176         my $str = Data::Dumper::Dumper($struct);
 177
 178         # boolean
 179         $str =~ s/bless\( do\{\\\(my \$o = ([01])\)\}, 'boolean' \)/boolean($1)/gs;
 180         # Time::Piece
 181         $str =~ s/bless\([^\)]+?(\d+)'?,\s+\d+,?\s+\], 'Time::Piece' \)/Time::Piece->new($1)/gs;
 182
 183         print STDERR $str if !defined wantarray;
 184         push @dumps, $str;
 185         return $str;
 186     }
 187     return join("\n", @dumps);
 188 }
 189
 190 =func empty
 191
 192 =func nonempty
 193
 194     $bool = empty $thing;
 195
 196     $bool = nonempty $thing;
 197
 198 Test whether a thing is empty (or nonempty). An empty thing is one of these:
 199
 200 =for :list
 201 * nonexistent
 202 * C<undef>
 203 * zero-length string
 204 * zero-length array
 205 * hash with zero keys
 206 * reference to an empty thing (recursive)
 207
 208 Note in particular that zero C<0> is not considered empty because it is an actual value.
 209
 210 =cut
 211
 212 sub empty    {  _empty(@_) }
 213 sub nonempty { !_empty(@_) }
 214
 215 sub _empty {
 216     return 1 if @_ == 0;
 217     local $_ = shift;
 218     return !defined $_
 219         || $_ eq ''
 220         || (is_arrayref($_)  && @$_ == 0)
 221         || (is_hashref($_)   && keys %$_ == 0)
 222         || (is_scalarref($_) && (!defined $$_ || $$_ eq ''))
 223         || (is_refref($_)    && _empty($$_));
 224 }
 225
 226 =func erase
 227
 228     erase($string, ...);
 229     erase(\$string, ...);
 230
 231 Overwrite the memory used by one or more string.
 232
 233 =cut
 234
 235 # use File::KDBX::XS;
 236
 237 sub erase {
 238     # Only bother zeroing out memory if we have the last SvPV COW reference, otherwise we'll end up just
 239     # creating a copy and erasing the copy.
 240     # TODO - Is this worth doing? Need some benchmarking.
 241     for (@_) {
 242         if (!is_ref($_)) {
 243             next if !defined $_ || readonly $_;
 244             if (USE_COWREFCNT()) {
 245                 my $cowrefcnt = B::COW::cowrefcnt($_);
 246                 goto FREE_NONREF if defined $cowrefcnt && 1 < $cowrefcnt;
 247             }
 248             # if (__PACKAGE__->can('erase_xs')) {
 249             #     erase_xs($_);
 250             # }
 251             # else {
 252                 substr($_, 0, length($_), "\0" x length($_));
 253             # }
 254             FREE_NONREF: {
 255                 no warnings 'uninitialized';
 256                 undef $_;
 257             }
 258         }
 259         elsif (is_scalarref($_)) {
 260             next if !defined $$_ || readonly $$_;
 261             if (USE_COWREFCNT()) {
 262                 my $cowrefcnt = B::COW::cowrefcnt($$_);
 263                 goto FREE_REF if defined $cowrefcnt && 1 < $cowrefcnt;
 264             }
 265             # if (__PACKAGE__->can('erase_xs')) {
 266             #     erase_xs($$_);
 267             # }
 268             # else {
 269                 substr($$_, 0, length($$_), "\0" x length($$_));
 270             # }
 271             FREE_REF: {
 272                 no warnings 'uninitialized';
 273                 undef $$_;
 274             }
 275         }
 276         elsif (is_arrayref($_)) {
 277             erase(@$_);
 278             @$_ = ();
 279         }
 280         elsif (is_hashref($_)) {
 281             erase(values %$_);
 282             %$_ = ();
 283         }
 284         else {
 285             throw 'Cannot erase this type of scalar', type => ref $_, what => $_;
 286         }
 287     }
 288 }
 289
 290 =func erase_scoped
 291
 292     $scope_guard = erase_scoped($string, ...);
 293     $scope_guard = erase_scoped(\$string, ...);
 294     undef $scope_guard; # erase happens here
 295
 296 Get a scope guard that will cause scalars to be erased later (i.e. when the scope ends). This is useful if you
 297 want to make sure a string gets erased after you're done with it, even if the scope ends abnormally.
 298
 299 See L</erase>.
 300
 301 =cut
 302
 303 sub erase_scoped {
 304     my @args;
 305     for (@_) {
 306         !is_ref($_) || is_arrayref($_) || is_hashref($_) || is_scalarref($_)
 307             or throw 'Cannot erase this type of scalar', type => ref $_, what => $_;
 308         push @args, is_ref($_) ? $_ : \$_;
 309     }
 310     require Scope::Guard;
 311     return Scope::Guard->new(sub { erase(@args) });
 312 }
 313
 314 =func format_uuid
 315
 316     $string_uuid = format_uuid($raw_uuid);
 317     $string_uuid = format_uuid($raw_uuid, $delimiter);
 318
 319 Format a 128-bit UUID (given as a string of 16 octets) into a hexidecimal string, optionally with a delimiter
 320 to break up the UUID visually into five parts. Examples:
 321
 322     my $uuid = uuid('01234567-89AB-CDEF-0123-456789ABCDEF');
 323     say format_uuid($uuid);         # -> 0123456789ABCDEF0123456789ABCDEF
 324     say format_uuid($uuid, '-');    # -> 01234567-89AB-CDEF-0123-456789ABCDEF
 325
 326 This is the inverse of L</uuid>.
 327
 328 =cut
 329
 330 sub format_uuid {
 331     local $_    = shift // "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 332     my $delim   = shift // '';
 333     length($_) == 16 or throw 'Must provide a 16-bytes UUID', size => length($_), str => $_;
 334     return uc(join($delim, unpack('H8 H4 H4 H4 H12', $_)));
 335 }
 336
 337 =func generate_uuid
 338
 339     $uuid = generate_uuid;
 340     $uuid = generate_uuid(\%set);
 341     $uuid = generate_uuid(\&test_uuid);
 342
 343 Generate a new random UUID. It's pretty unlikely that this will generate a repeat, but if you're worried about
 344 that you can provide either a set of existing UUIDs (as a hashref where the keys are the elements of a set) or
 345 a function to check for existing UUIDs, and this will be sure to not return a UUID already in provided set.
 346 Perhaps an example will make it clear:
 347
 348     my %uuid_set = (
 349         uuid('12345678-9ABC-DEFG-1234-56789ABCDEFG') => 'whatever',
 350     );
 351     $uuid = generate_uuid(\%uuid_set);
 352     # OR
 353     $uuid = generate_uuid(sub { !$uuid_set{$_} });
 354
 355 Here, C<$uuid> can't be "12345678-9ABC-DEFG-1234-56789ABCDEFG". This example uses L</uuid> to easily pack
 356 a 16-byte UUID from a literal, but it otherwise is not a consequential part of the example.
 357
 358 =cut
 359
 360 sub generate_uuid {
 361     my $set  = @_ % 2 == 1 ? shift : undef;
 362     my %args = @_;
 363     my $test = $set //= $args{test};
 364     $test   = sub { !$set->{$_} } if is_hashref($test);
 365     $test //= sub { 1 };
 366     my $printable = $args{printable} // $args{print};
 367     local $_ = '';
 368     do {
 369         $_ = $printable ? random_string(16) : random_bytes(16);
 370     } while (!$test->($_));
 371     return $_;
 372 }
 373
 374 =func gunzip
 375
 376     $unzipped = gunzip($string);
 377
 378 Decompress an octet stream.
 379
 380 =cut
 381
 382 sub gunzip {
 383     load_optional('Compress::Raw::Zlib');
 384     local $_ = shift;
 385     my ($i, $status) = Compress::Raw::Zlib::Inflate->new(-WindowBits => 31);
 386     $status == Compress::Raw::Zlib::Z_OK()
 387         or throw 'Failed to initialize compression library', status => $status;
 388     $status = $i->inflate($_, my $out);
 389     $status == Compress::Raw::Zlib::Z_STREAM_END()
 390         or throw 'Failed to decompress data', status => $status;
 391     return $out;
 392 }
 393
 394 =func gunzip
 395
 396     $zipped = gzip($string);
 397
 398 Compress an octet stream.
 399
 400 =cut
 401
 402 sub gzip {
 403     load_optional('Compress::Raw::Zlib');
 404     local $_ = shift;
 405     my ($d, $status) = Compress::Raw::Zlib::Deflate->new(-WindowBits => 31, -AppendOutput => 1);
 406     $status == Compress::Raw::Zlib::Z_OK()
 407         or throw 'Failed to initialize compression library', status => $status;
 408     $status = $d->deflate($_, my $out);
 409     $status == Compress::Raw::Zlib::Z_OK()
 410         or throw 'Failed to compress data', status => $status;
 411     $status = $d->flush($out);
 412     $status == Compress::Raw::Zlib::Z_OK()
 413         or throw 'Failed to compress data', status => $status;
 414     return $out;
 415 }
 416
 417 =func is_uuid
 418
 419     $bool = is_uuid($thing);
 420
 421 Check if a thing is a UUID (i.e. scalar string of length 16).
 422
 423 =cut
 424
 425 sub is_uuid { defined $_[0] && !is_ref($_[0]) && length($_[0]) == 16 }
 426
 427 =func load_optional
 428
 429     $package = load_optional($package);
 430
 431 Load a module that isn't required but can provide extra functionality. Throw if the module is not available.
 432
 433 =cut
 434
 435 sub load_optional {
 436     for my $module (@_) {
 437         eval { load $module };
 438         if (my $err = $@) {
 439             warn $err if $ENV{DEBUG};
 440             throw "Missing dependency: Please install $module to use this feature.\n", module => $module;
 441         }
 442     }
 443     return wantarray ? @_ : $_[0];
 444 }
 445
 446 =func load_xs
 447
 448     $bool = load_xs();
 449     $bool = load_xs($version);
 450
 451 Attempt to load L<File::KDBX::XS>. Return truthy if C<XS> is loaded. If C<$version> is given, it will check
 452 that at least the given version is loaded.
 453
 454 =cut
 455
 456 sub load_xs {
 457     my $version = shift;
 458
 459     require File::KDBX;
 460
 461     my $has_xs = File::KDBX->can('XS_LOADED');
 462     return $has_xs->() && ($version ? eval { File::KDBX::XS->VERSION($version); 1 } : 1) if $has_xs;
 463
 464     my $try_xs = 1;
 465     $try_xs = 0 if $ENV{PERL_ONLY} || (exists $ENV{PERL_FILE_KDBX_XS} && !$ENV{PERL_FILE_KDBX_XS});
 466
 467     my $use_xs = 0;
 468     $use_xs = try_load_optional('File::KDBX::XS') if $try_xs;
 469
 470     *File::KDBX::XS_LOADED = *File::KDBX::XS_LOADED = $use_xs ? sub() { 1 } : sub() { 0 };
 471     return $version ? eval { File::KDBX::XS->VERSION($version); 1 } : 1;
 472 }
 473
 474 =func memoize
 475
 476     \&memoized_code = memoize(\&code, ...);
 477
 478 Memoize a function. Extra arguments are passed through to C<&code> when it is called.
 479
 480 =cut
 481
 482 sub memoize {
 483     my $func = shift;
 484     my @args = @_;
 485     my %cache;
 486     return sub { $cache{join("\0", grep { defined } @_)} //= $func->(@args, @_) };
 487 }
 488
 489 =func pad_pkcs7
 490
 491     $padded_string = pad_pkcs7($string, $block_size),
 492
 493 Pad a block using the PKCS#7 method.
 494
 495 =cut
 496
 497 sub pad_pkcs7 {
 498     my $data = shift // throw 'Must provide a string to pad';
 499     my $size = shift or throw 'Must provide block size';
 500
 501     0 <= $size && $size < 256
 502         or throw 'Cannot add PKCS7 padding to a large block size', size => $size;
 503
 504     my $pad_len = $size - length($data) % $size;
 505     $data .= chr($pad_len) x $pad_len;
 506 }
 507
 508 =func query
 509
 510     $query = query(@where);
 511     $query->(\%data);
 512
 513 Generate a function that will run a series of tests on a passed hashref and return true or false depending on
 514 if the data record in the hash matched the specified logic.
 515
 516 The logic can be specified in a manner similar to L<SQL::Abstract/"WHERE CLAUSES"> which was the inspiration
 517 for this function, but this code is distinct, supporting an overlapping but not identical feature set and
 518 having its own bugs.
 519
 520 See L<File::KDBX/QUERY> for examples.
 521
 522 =cut
 523
 524 sub query { _query(undef, '-or', \@_) }
 525
 526 =func read_all
 527
 528     $size = read_all($fh, my $buffer, $size);
 529     $size = read_all($fh, my $buffer, $size, $offset);
 530
 531 Like L<functions/read> but returns C<undef> if not all C<$size> bytes are read. This is considered an error,
 532 distinguishable from other errors by C<$!> not being set.
 533
 534 =cut
 535
 536 sub read_all($$$;$) { ## no critic (ProhibitSubroutinePrototypes)
 537     my $result = @_ == 3 ? read($_[0], $_[1], $_[2])
 538                          : read($_[0], $_[1], $_[2], $_[3]);
 539     return if !defined $result;
 540     return if $result != $_[2];
 541     return $result;
 542 }
 543
 544 =func recurse_limit
 545
 546     \&limited_code = recurse_limit(\&code);
 547     \&limited_code = recurse_limit(\&code, $max_depth);
 548     \&limited_code = recurse_limit(\&code, $max_depth, \&error_handler);
 549
 550 Wrap a function with a guard to prevent deep recursion.
 551
 552 =cut
 553
 554 sub recurse_limit {
 555     my $func        = shift;
 556     my $max_depth   = shift // 200;
 557     my $error       = shift // sub {};
 558     my $depth = 0;
 559     return sub { return $error->(@_) if $max_depth < ++$depth; $func->(@_) };
 560 };
 561
 562 =func search
 563
 564     # Generate a query on-the-fly:
 565     \@matches = search(\@records, @where);
 566
 567     # Use a pre-compiled query:
 568     $query = query(@where);
 569     \@matches = search(\@records, $query);
 570
 571     # Use a simple expression:
 572     \@matches = search(\@records, \'query terms', @fields);
 573     \@matches = search(\@records, \'query terms', $operator, @fields);
 574
 575     # Use your own subroutine:
 576     \@matches = search(\@records, \&query);
 577     \@matches = search(\@records, sub { $record = shift; ... });
 578
 579 Execute a linear search over an array of records using a L</query>. A "record" is usually a hash.
 580
 581 This is the search engine described with many examples at L<File::KDBX/QUERY>.
 582
 583 =cut
 584
 585 sub search {
 586     my $list    = shift;
 587     my $query   = shift;
 588     # my %args    = @_;
 589
 590     if (is_coderef($query) && !@_) {
 591         # already a query
 592     }
 593     elsif (is_scalarref($query)) {
 594         $query = simple_expression_query($$query, @_);
 595     }
 596     else {
 597         $query = query($query, @_);
 598     }
 599
 600     # my $limit = $args{limit};
 601
 602     my @match;
 603     for my $item (@$list) {
 604         push @match, $item if $query->($item);
 605         # last if defined $limit && $limit <= @match;
 606     }
 607     return \@match;
 608 }
 609
 610 =func simple_expression_query
 611
 612     $query = simple_expression_query($expression, @fields);
 613
 614 Generate a query, like L</query>, to be used with L</search> but built from a "simple expression" as
 615 L<described here|https://keepass.info/help/base/search.html#mode_se>.
 616
 617 An expression is a string with one or more space-separated terms. Terms with spaces can be enclosed in double
 618 quotes. Terms are negated if they are prefixed with a minus sign. A record must match every term on at least
 619 one of the given fields.
 620
 621 =cut
 622
 623 sub simple_expression_query {
 624     my $expr = shift;
 625     my $op   = @_ && ($OPS{$_[0] || ''} || 0) == 2 ? shift : '=~';
 626
 627     my $neg_op = $OP_NEG{$op};
 628     my $is_re  = $op eq '=~' || $op eq '!~';
 629
 630     require Text::ParseWords;
 631     my @terms = Text::ParseWords::shellwords($expr);
 632
 633     my @query = qw(-and);
 634
 635     for my $term (@terms) {
 636         my @subquery = qw(-or);
 637
 638         my $neg = $term =~ s/^-//;
 639         my $condition = [($neg ? $neg_op : $op) => ($is_re ? qr/\Q$term\E/i : $term)];
 640
 641         for my $field (@_) {
 642             push @subquery, $field => $condition;
 643         }
 644
 645         push @query, \@subquery;
 646     }
 647
 648     return query(\@query);
 649 }
 650
 651 =func snakify
 652
 653     $string = snakify($string);
 654
 655 Turn a CamelCase string into snake_case.
 656
 657 =cut
 658
 659 sub snakify {
 660     local $_ = shift;
 661     s/UserName/Username/g;
 662     s/([a-z])([A-Z0-9])/${1}_${2}/g;
 663     s/([A-Z0-9]+)([A-Z0-9])(?![A-Z0-9]|$)/${1}_${2}/g;
 664     return lc($_);
 665 }
 666
 667 =func split_url
 668
 669     ($scheme, $auth, $host, $port, $path, $query, $hash, $usename, $password) = split_url($url);
 670
 671 Split a URL into its parts.
 672
 673 For example, C<http://user:pass@localhost:4000/path?query#hash> gets split like:
 674
 675 =for :list
 676 * C<http>
 677 * C<user:pass>
 678 * C<host>
 679 * C<4000>
 680 * C</path>
 681 * C<?query>
 682 * C<#hash>
 683 * C<user>
 684 * C<pass>
 685
 686 =cut
 687
 688 sub split_url {
 689     local $_ = shift;
 690     my ($scheme, $auth, $host, $port, $path, $query, $hash) =~ m!
 691         ^([^:/\?\#]+) ://
 692         (?:([^\@]+)\@)
 693         ([^:/\?\#]*)
 694         (?::(\d+))?
 695         ([^\?\#]*)
 696         (\?[^\#]*)?
 697         (\#(.*))?
 698     !x;
 699
 700     $scheme = lc($scheme);
 701
 702     $host ||= 'localhost';
 703     $host = lc($host);
 704
 705     $path = "/$path" if $path !~ m!^/!;
 706
 707     $port ||= $scheme eq 'http' ? 80 : $scheme eq 'https' ? 433 : undef;
 708
 709     my ($username, $password) = split($auth, ':', 2);
 710
 711     return ($scheme, $auth, $host, $port, $path, $query, $hash, $username, $password);
 712 }
 713
 714 =func trim
 715
 716     $string = trim($string);
 717
 718 The ubiquitous C<trim> function. Removes all whitespace from both ends of a string.
 719
 720 =cut
 721
 722 sub trim($) { ## no critic (ProhibitSubroutinePrototypes)
 723     local $_ = shift // return;
 724     s/^\s*//;
 725     s/\s*$//;
 726     return $_;
 727 }
 728
 729 =func try_load_optional
 730
 731     $package = try_load_optional($package);
 732
 733 Try to load a module that isn't required but can provide extra functionality, and return true if successful.
 734
 735 =cut
 736
 737 sub try_load_optional {
 738     for my $module (@_) {
 739         eval { load $module };
 740         if (my $err = $@) {
 741             warn $err if $ENV{DEBUG};
 742             return;
 743         }
 744     }
 745     return @_;
 746 }
 747
 748 =func uri_escape_utf8
 749
 750     $string = uri_escape_utf8($string);
 751
 752 Percent-encode arbitrary text strings, like for a URI.
 753
 754 =cut
 755
 756 my %ESC = map { chr($_) => sprintf('%%%02X', $_) } 0..255;
 757 sub uri_escape_utf8 {
 758     local $_ = shift // return;
 759     $_ = encode('UTF-8', $_);
 760     # RFC 3986 section 2.3 unreserved characters
 761     s/([^A-Za-z0-9\-\._~])/$ESC{$1}/ge;
 762     return $_;
 763 }
 764
 765 sub uri_unescape_utf8 {
 766     local $_ = shift // return;
 767     s/\%([A-Fa-f0-9]{2})/chr(hex($1))/;
 768     return decode('UTF-8', $_);
 769 }
 770
 771 =func uuid
 772
 773     $raw_uuid = uuid($string_uuid);
 774
 775 Pack a 128-bit UUID (given as a hexidecimal string with optional C<->'s, like
 776 C<12345678-9ABC-DEFG-1234-56789ABCDEFG>) into a string of exactly 16 octets.
 777
 778 This is the inverse of L</format_uuid>.
 779
 780 =cut
 781
 782 sub uuid {
 783     local $_ = shift // return "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 784     s/-//g;
 785     /^[A-Fa-f0-9]{32}$/ or throw 'Must provide a formatted 128-bit UUID';
 786     return pack('H32', $_);
 787
 788 }
 789
 790 BEGIN {
 791     my $use_cowrefcnt = eval { require B::COW; 1 };
 792     *USE_COWREFCNT = $use_cowrefcnt ? sub() { 1 } : sub() { 0 };
 793 }
 794
 795 ### --------------------------------------------------------------------------
 796
 797 # Determine if an array looks like keypairs from a hash.
 798 sub _looks_like_keypairs {
 799     my $arr = shift;
 800     return 0 if @$arr % 2 == 1;
 801     for (my $i = 0; $i < @$arr; $i += 2) {
 802         return 0 if is_ref($arr->[$i]);
 803     }
 804     return 1;
 805 }
 806
 807 sub _is_operand_plain {
 808     local $_ = shift;
 809     return !(is_hashref($_) || is_arrayref($_));
 810 }
 811
 812 sub _query {
 813     # dumper \@_;
 814     my $subject = shift;
 815     my $op      = shift // throw 'Must specify a query operator';
 816     my $operand = shift;
 817
 818     return _query_simple($op, $subject) if defined $subject && !is_ref($op) && ($OPS{$subject} || 2) < 2;
 819     return _query_simple($subject, $op, $operand) if _is_operand_plain($operand);
 820     return _query_inverse(_query($subject, '-or', $operand)) if $op eq '-not' || $op eq '-false';
 821     return _query($subject, '-and', [%$operand]) if is_hashref($operand);
 822
 823     my @queries;
 824
 825     my @atoms = @$operand;
 826     while (@atoms) {
 827         if (_looks_like_keypairs(\@atoms)) {
 828             my ($atom, $operand) = splice @atoms, 0, 2;
 829             if (my $op_type = $OPS{$atom}) {
 830                 if ($op_type == 1 && _is_operand_plain($operand)) { # unary
 831                     push @queries, _query_simple($operand, $atom);
 832                 }
 833                 else {
 834                     push @queries, _query($subject, $atom, $operand);
 835                 }
 836             }
 837             elsif (!is_ref($atom)) {
 838                 push @queries, _query($atom, 'eq', $operand);
 839             }
 840         }
 841         else {
 842             my $atom = shift @atoms;
 843             if ($OPS{$atom}) {     # apply new operator over the rest
 844                 push @queries, _query($subject, $atom, \@atoms);
 845                 last;
 846             }
 847             else {  # apply original operator over this one
 848                 push @queries, _query($subject, $op, $atom);
 849             }
 850         }
 851     }
 852
 853     if (@queries == 1) {
 854         return $queries[0];
 855     }
 856     elsif ($op eq '-and') {
 857         return _query_all(@queries);
 858     }
 859     elsif ($op eq '-or') {
 860         return _query_any(@queries);
 861     }
 862     throw 'Malformed query';
 863 }
 864
 865 sub _query_simple {
 866     my $subject = shift;
 867     my $op      = shift // 'eq';
 868     my $operand = shift;
 869
 870     # these special operators can also act as simple operators
 871     $op = '!!' if $op eq '-true';
 872     $op = '!'  if $op eq '-false';
 873     $op = '!'  if $op eq '-not';
 874
 875     defined $subject or throw 'Subject is not set in query';
 876     $OPS{$op} >= 0   or throw 'Cannot use a non-simple operator in a simple query';
 877     if (empty($operand)) {
 878         if ($OPS{$op} < 2) {
 879             # no operand needed
 880         }
 881         # Allow field => undef and field => {'ne' => undef} to do the (arguably) right thing.
 882         elsif ($op eq 'eq' || $op eq '==') {
 883             $op = '-empty';
 884         }
 885         elsif ($op eq 'ne' || $op eq '!=') {
 886             $op = '-nonempty';
 887         }
 888         else {
 889             throw 'Operand is required';
 890         }
 891     }
 892
 893     my $field = sub { blessed $_[0] && $_[0]->can($subject) ? $_[0]->$subject : $_[0]->{$subject} };
 894
 895     my %map = (
 896         'eq'        => sub { local $_ = $field->(@_); defined && $_ eq $operand },
 897         'ne'        => sub { local $_ = $field->(@_); defined && $_ ne $operand },
 898         'lt'        => sub { local $_ = $field->(@_); defined && $_ lt $operand },
 899         'gt'        => sub { local $_ = $field->(@_); defined && $_ gt $operand },
 900         'le'        => sub { local $_ = $field->(@_); defined && $_ le $operand },
 901         'ge'        => sub { local $_ = $field->(@_); defined && $_ ge $operand },
 902         '=='        => sub { local $_ = $field->(@_); defined && $_ == $operand },
 903         '!='        => sub { local $_ = $field->(@_); defined && $_ != $operand },
 904         '<'         => sub { local $_ = $field->(@_); defined && $_ <  $operand },
 905         '>'         => sub { local $_ = $field->(@_); defined && $_ >  $operand },
 906         '<='        => sub { local $_ = $field->(@_); defined && $_ <= $operand },
 907         '>='        => sub { local $_ = $field->(@_); defined && $_ >= $operand },
 908         '=~'        => sub { local $_ = $field->(@_); defined && $_ =~ $operand },
 909         '!~'        => sub { local $_ = $field->(@_); defined && $_ !~ $operand },
 910         '!'         => sub { local $_ = $field->(@_); ! $_ },
 911         '!!'        => sub { local $_ = $field->(@_); !!$_ },
 912         '-defined'  => sub { local $_ = $field->(@_);  defined $_ },
 913         '-undef'    => sub { local $_ = $field->(@_); !defined $_ },
 914         '-nonempty' => sub { local $_ = $field->(@_); nonempty $_ },
 915         '-empty'    => sub { local $_ = $field->(@_); empty    $_ },
 916     );
 917
 918     return $map{$op} // throw "Unexpected operator in query: $op",
 919         subject     => $subject,
 920         operator    => $op,
 921         operand     => $operand;
 922 }
 923
 924 sub _query_inverse {
 925     my $query = shift;
 926     return sub { !$query->(@_) };
 927 }
 928
 929 sub _query_all {
 930     my @queries = @_;
 931     return sub {
 932         my $val = shift;
 933         all { $_->($val) } @queries;
 934     };
 935 }
 936
 937 sub _query_any {
 938     my @queries = @_;
 939     return sub {
 940         my $val = shift;
 941         any { $_->($val) } @queries;
 942     };
 943 }
 944
 945 1;