Dogcows Code - chaz/p5-File-KDBX/blob - lib/File/KDBX/Util.pm

   1 package File::KDBX::Util;
   2 # ABSTRACT: Utility functions for working with KDBX files
   3
   4 use warnings;
   5 use strict;
   6
   7 use Crypt::PRNG qw(random_bytes random_string);
   8 use Encode qw(decode encode);
   9 use Exporter qw(import);
  10 use File::KDBX::Constants qw(:bool);
  11 use File::KDBX::Error;
  12 use List::Util 1.33 qw(any all);
  13 use Module::Load;
  14 use Ref::Util qw(is_arrayref is_coderef is_hashref is_ref is_refref is_scalarref);
  15 use Scalar::Util qw(blessed looks_like_number readonly);
  16 use Time::Piece;
  17 use boolean;
  18 use namespace::clean -except => 'import';
  19
  20 our $VERSION = '999.999'; # VERSION
  21
  22 our %EXPORT_TAGS = (
  23     assert      => [qw(assert_64bit)],
  24     class       => [qw(extends has list_attributes)],
  25     clone       => [qw(clone clone_nomagic)],
  26     coercion    => [qw(to_bool to_number to_string to_time to_tristate to_uuid)],
  27     crypt       => [qw(pad_pkcs7)],
  28     debug       => [qw(dumper)],
  29     fork        => [qw(can_fork)],
  30     function    => [qw(memoize recurse_limit)],
  31     empty       => [qw(empty nonempty)],
  32     erase       => [qw(erase erase_scoped)],
  33     gzip        => [qw(gzip gunzip)],
  34     io          => [qw(is_readable is_writable read_all)],
  35     load        => [qw(load_optional load_xs try_load_optional)],
  36     search      => [qw(query search search_limited simple_expression_query)],
  37     text        => [qw(snakify trim)],
  38     uuid        => [qw(format_uuid generate_uuid is_uuid uuid UUID_NULL)],
  39     uri         => [qw(split_url uri_escape_utf8 uri_unescape_utf8)],
  40 );
  41
  42 $EXPORT_TAGS{all} = [map { @$_ } values %EXPORT_TAGS];
  43 our @EXPORT_OK = @{$EXPORT_TAGS{all}};
  44
  45 my %OPS = (
  46     'eq'        =>  2, # binary
  47     'ne'        =>  2,
  48     'lt'        =>  2,
  49     'gt'        =>  2,
  50     'le'        =>  2,
  51     'ge'        =>  2,
  52     '=='        =>  2,
  53     '!='        =>  2,
  54     '<'         =>  2,
  55     '>'         =>  2,
  56     '<='        =>  2,
  57     '>='        =>  2,
  58     '=~'        =>  2,
  59     '!~'        =>  2,
  60     '!'         =>  1, # unary
  61     '!!'        =>  1,
  62     '-not'      =>  1, # special
  63     '-false'    =>  1,
  64     '-true'     =>  1,
  65     '-defined'  =>  1,
  66     '-undef'    =>  1,
  67     '-empty'    =>  1,
  68     '-nonempty' =>  1,
  69     '-or'       => -1,
  70     '-and'      => -1,
  71 );
  72 my %OP_NEG = (
  73     'eq'    =>  'ne',
  74     'ne'    =>  'eq',
  75     'lt'    =>  'ge',
  76     'gt'    =>  'le',
  77     'le'    =>  'gt',
  78     'ge'    =>  'lt',
  79     '=='    =>  '!=',
  80     '!='    =>  '==',
  81     '<'     =>  '>=',
  82     '>'     =>  '<=',
  83     '<='    =>  '>',
  84     '>='    =>  '<',
  85     '=~'    =>  '!~',
  86     '!~'    =>  '=~',
  87 );
  88 my %ATTRIBUTES;
  89
  90 =func load_xs
  91
  92     $bool = load_xs();
  93     $bool = load_xs($version);
  94
  95 Attempt to load L<File::KDBX::XS>. Return truthy if C<XS> is loaded. If C<$version> is given, it will check
  96 that at least the given version is loaded.
  97
  98 =cut
  99
 100 my $XS_LOADED;
 101 sub load_xs {
 102     my $version = shift;
 103
 104     goto IS_LOADED if defined $XS_LOADED;
 105
 106     if ($ENV{PERL_ONLY} || (exists $ENV{PERL_FILE_KDBX_XS} && !$ENV{PERL_FILE_KDBX_XS})) {
 107         return $XS_LOADED = FALSE;
 108     }
 109
 110     $XS_LOADED = !!eval { require File::KDBX::XS; 1 };
 111
 112     IS_LOADED:
 113     {
 114         local $@;
 115         return $XS_LOADED if !$version;
 116         return !!eval { File::KDBX::XS->VERSION($version); 1 };
 117     }
 118 }
 119
 120 =func assert_64bit
 121
 122     assert_64bit();
 123
 124 Throw if perl doesn't support 64-bit IVs.
 125
 126 =cut
 127
 128 sub assert_64bit() {
 129     require Config;
 130     $Config::Config{ivsize} < 8
 131         and throw "64-bit perl is required to use this feature.\n", ivsize => $Config::Config{ivsize};
 132 }
 133
 134 =func can_fork
 135
 136     $bool = can_fork;
 137
 138 Determine if perl can fork, with logic lifted from L<Test2::Util/CAN_FORK>.
 139
 140 =cut
 141
 142 sub can_fork {
 143     require Config;
 144     return 1 if $Config::Config{d_fork};
 145     return 0 if $^O ne 'MSWin32' && $^O ne 'NetWare';
 146     return 0 if !$Config::Config{useithreads};
 147     return 0 if $Config::Config{ccflags} !~ /-DPERL_IMPLICIT_SYS/;
 148     return 0 if $] < 5.008001;
 149     if ($] == 5.010000 && $Config::Config{ccname} eq 'gcc' && $Config::Config{gccversion}) {
 150         return 0 if $Config::Config{gccversion} !~ m/^(\d+)\.(\d+)/;
 151         my @parts = split(/[\.\s]+/, $Config::Config{gccversion});
 152         return 0 if $parts[0] > 4 || ($parts[0] == 4 && $parts[1] >= 8);
 153     }
 154     return 0 if $INC{'Devel/Cover.pm'};
 155     return 1;
 156 }
 157
 158 =func clone
 159
 160     $clone = clone($thing);
 161
 162 Clone deeply. This is an unadorned alias to L<Storable> C<dclone>.
 163
 164 =cut
 165
 166 sub clone {
 167     require Storable;
 168     goto &Storable::dclone;
 169 }
 170
 171 =func clone_nomagic
 172
 173     $clone = clone_nomagic($thing);
 174
 175 Clone deeply without keeping [most of] the magic.
 176
 177 B<WARNING:> At the moment the implementation is naïve and won't respond well to nontrivial data or recursive
 178 structures.
 179
 180 =cut
 181
 182 sub clone_nomagic {
 183     my $thing = shift;
 184     if (is_arrayref($thing)) {
 185         my @arr = map { clone_nomagic($_) } @$thing;
 186         return \@arr;
 187     }
 188     elsif (is_hashref($thing)) {
 189         my %hash;
 190         $hash{$_} = clone_nomagic($thing->{$_}) for keys %$thing;
 191         return \%hash;
 192     }
 193     elsif (is_ref($thing)) {
 194         return clone($thing);
 195     }
 196     return $thing;
 197 }
 198
 199 =func dumper
 200
 201     $str = dumper $thing;
 202     dumper $thing;  # in void context, prints to STDERR
 203
 204 Like L<Data::Dumper> but slightly terser in some cases relevent to L<File::KDBX>.
 205
 206 =cut
 207
 208 sub dumper {
 209     require Data::Dumper;
 210     # avoid "once" warnings
 211     local $Data::Dumper::Deepcopy = $Data::Dumper::Deepcopy = 1;
 212     local $Data::Dumper::Deparse = $Data::Dumper::Deparse = 1;
 213     local $Data::Dumper::Indent = 1;
 214     local $Data::Dumper::Quotekeys = 0;
 215     local $Data::Dumper::Sortkeys = 1;
 216     local $Data::Dumper::Terse = 1;
 217     local $Data::Dumper::Trailingcomma = 1;
 218     local $Data::Dumper::Useqq = 1;
 219
 220     my @dumps;
 221     for my $struct (@_) {
 222         my $str = Data::Dumper::Dumper($struct);
 223
 224         # boolean
 225         $str =~ s/bless\( do\{\\\(my \$o = ([01])\)\}, 'boolean' \)/boolean($1)/gs;
 226         # Time::Piece
 227         $str =~ s/bless\([^\)]+?(\d+)'?,\s+\d+,?\s+\], 'Time::Piece' \),/
 228             "scalar gmtime($1), # " . scalar gmtime($1)->datetime/ges;
 229
 230         print STDERR $str if !defined wantarray;
 231         push @dumps, $str;
 232         return $str;
 233     }
 234     return join("\n", @dumps);
 235 }
 236
 237 =func empty
 238
 239 =func nonempty
 240
 241     $bool = empty $thing;
 242
 243     $bool = nonempty $thing;
 244
 245 Test whether a thing is empty (or nonempty). An empty thing is one of these:
 246
 247 =for :list
 248 * nonexistent
 249 * C<undef>
 250 * zero-length string
 251 * zero-length array
 252 * hash with zero keys
 253 * reference to an empty thing (recursive)
 254
 255 Note in particular that zero C<0> is not considered empty because it is an actual value.
 256
 257 =cut
 258
 259 sub empty    {  _empty(@_) }
 260 sub nonempty { !_empty(@_) }
 261
 262 sub _empty {
 263     return 1 if @_ == 0;
 264     local $_ = shift;
 265     return !defined $_
 266         || $_ eq ''
 267         || (is_arrayref($_)  && @$_ == 0)
 268         || (is_hashref($_)   && keys %$_ == 0)
 269         || (is_scalarref($_) && (!defined $$_ || $$_ eq ''))
 270         || (is_refref($_)    && _empty($$_));
 271 }
 272
 273 =func erase
 274
 275     erase($string, ...);
 276     erase(\$string, ...);
 277
 278 Overwrite the memory used by one or more string.
 279
 280 =cut
 281
 282 BEGIN {
 283     if (load_xs) {
 284         *_CowREFCNT = \&File::KDBX::XS::CowREFCNT;
 285     }
 286     elsif (eval { require B::COW; 1 }) {
 287         *_CowREFCNT = \&B::COW::cowrefcnt;
 288     }
 289     else {
 290         *_CowREFCNT = sub { undef };
 291     }
 292 }
 293
 294 sub erase {
 295     # Only bother zeroing out memory if we have the last SvPV COW reference, otherwise we'll end up just
 296     # creating a copy and erasing the copy.
 297     # TODO - Is this worth doing? Need some benchmarking.
 298     for (@_) {
 299         if (!is_ref($_)) {
 300             next if !defined $_ || readonly $_;
 301             my $cowrefcnt = _CowREFCNT($_);
 302             goto FREE_NONREF if defined $cowrefcnt && 1 < $cowrefcnt;
 303             # if (__PACKAGE__->can('erase_xs')) {
 304             #     erase_xs($_);
 305             # }
 306             # else {
 307                 substr($_, 0, length($_), "\0" x length($_));
 308             # }
 309             FREE_NONREF: {
 310                 no warnings 'uninitialized';
 311                 undef $_;
 312             }
 313         }
 314         elsif (is_scalarref($_)) {
 315             next if !defined $$_ || readonly $$_;
 316             my $cowrefcnt = _CowREFCNT($$_);
 317             goto FREE_REF if defined $cowrefcnt && 1 < $cowrefcnt;
 318             # if (__PACKAGE__->can('erase_xs')) {
 319             #     erase_xs($$_);
 320             # }
 321             # else {
 322                 substr($$_, 0, length($$_), "\0" x length($$_));
 323             # }
 324             FREE_REF: {
 325                 no warnings 'uninitialized';
 326                 undef $$_;
 327             }
 328         }
 329         elsif (is_arrayref($_)) {
 330             erase(@$_);
 331             @$_ = ();
 332         }
 333         elsif (is_hashref($_)) {
 334             erase(values %$_);
 335             %$_ = ();
 336         }
 337         else {
 338             throw 'Cannot erase this type of scalar', type => ref $_, what => $_;
 339         }
 340     }
 341 }
 342
 343 =func erase_scoped
 344
 345     $scope_guard = erase_scoped($string, ...);
 346     $scope_guard = erase_scoped(\$string, ...);
 347     undef $scope_guard; # erase happens here
 348
 349 Get a scope guard that will cause scalars to be erased later (i.e. when the scope ends). This is useful if you
 350 want to make sure a string gets erased after you're done with it, even if the scope ends abnormally.
 351
 352 See L</erase>.
 353
 354 =cut
 355
 356 sub erase_scoped {
 357     throw 'Programmer error: Cannot call erase_scoped in void context' if !defined wantarray;
 358     my @args;
 359     for (@_) {
 360         !is_ref($_) || is_arrayref($_) || is_hashref($_) || is_scalarref($_)
 361             or throw 'Cannot erase this type of scalar', type => ref $_, what => $_;
 362         push @args, is_ref($_) ? $_ : \$_;
 363     }
 364     require Scope::Guard;
 365     return Scope::Guard->new(sub { erase(@args) });
 366 }
 367
 368 =func extends
 369
 370     extends $class;
 371
 372 Set up the current module to inheret from another module.
 373
 374 =cut
 375
 376 sub extends {
 377     my $parent  = shift;
 378     my $caller  = caller;
 379     load $parent;
 380     no strict 'refs'; ## no critic (ProhibitNoStrict)
 381     @{"${caller}::ISA"} = $parent;
 382 }
 383
 384 =func has
 385
 386     has $name => %options;
 387
 388 Create an attribute getter/setter. Possible options:
 389
 390 =for :list
 391 * C<is> - Either "rw" (default) or "ro"
 392 * C<default> - Default value
 393 * C<coerce> - Coercive function
 394
 395 =cut
 396
 397 sub has {
 398     my $name = shift;
 399     my %args = @_ % 2 == 1 ? (default => shift, @_) : @_;
 400
 401     my ($package, $file, $line) = caller;
 402
 403     my $d = $args{default};
 404     my $default = is_arrayref($d) ? sub { [@$d] } : is_hashref($d) ? sub { +{%$d} } : $d;
 405     my $coerce  = $args{coerce};
 406     my $is      = $args{is} || 'rw';
 407
 408     my $store = $args{store};
 409     ($store, $name) = split(/\./, $name, 2) if $name =~ /\./;
 410     push @{$ATTRIBUTES{$package} //= []}, $name;
 411
 412     my $store_code = '';
 413     $store_code = qq{->$store} if $store;
 414     my $member = qq{\$_[0]$store_code\->{'$name'}};
 415
 416     my $default_code = is_coderef $default ? q{scalar $default->($_[0])}
 417                         : defined $default ? q{$default}
 418                                            : q{undef};
 419     my $get = qq{$member //= $default_code;};
 420
 421     my $set = '';
 422     if ($is eq 'rw') {
 423         $set = is_coderef $coerce ? qq{$member = scalar \$coerce->(\@_[1..\$#_]) if \$#_;}
 424                 : defined $coerce ? qq{$member = do { local @_ = (\@_[1..\$#_]); $coerce } if \$#_;}
 425                                   : qq{$member = \$_[1] if \$#_;};
 426     }
 427
 428     $line -= 4;
 429     my $code = <<END;
 430 # line $line "$file"
 431 sub ${package}::${name} {
 432     return $default_code if !Scalar::Util::blessed(\$_[0]);
 433     $set
 434     $get
 435 }
 436 END
 437     eval $code; ## no critic (ProhibitStringyEval)
 438 }
 439
 440 =func format_uuid
 441
 442     $string_uuid = format_uuid($raw_uuid);
 443     $string_uuid = format_uuid($raw_uuid, $delimiter);
 444
 445 Format a 128-bit UUID (given as a string of 16 octets) into a hexidecimal string, optionally with a delimiter
 446 to break up the UUID visually into five parts. Examples:
 447
 448     my $uuid = uuid('01234567-89AB-CDEF-0123-456789ABCDEF');
 449     say format_uuid($uuid);         # -> 0123456789ABCDEF0123456789ABCDEF
 450     say format_uuid($uuid, '-');    # -> 01234567-89AB-CDEF-0123-456789ABCDEF
 451
 452 This is the inverse of L</uuid>.
 453
 454 =cut
 455
 456 sub format_uuid {
 457     local $_    = shift // "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 458     my $delim   = shift // '';
 459     length($_) == 16 or throw 'Must provide a 16-bytes UUID', size => length($_), str => $_;
 460     return uc(join($delim, unpack('H8 H4 H4 H4 H12', $_)));
 461 }
 462
 463 =func generate_uuid
 464
 465     $uuid = generate_uuid;
 466     $uuid = generate_uuid(\%set);
 467     $uuid = generate_uuid(\&test_uuid);
 468
 469 Generate a new random UUID. It's pretty unlikely that this will generate a repeat, but if you're worried about
 470 that you can provide either a set of existing UUIDs (as a hashref where the keys are the elements of a set) or
 471 a function to check for existing UUIDs, and this will be sure to not return a UUID already in provided set.
 472 Perhaps an example will make it clear:
 473
 474     my %uuid_set = (
 475         uuid('12345678-9ABC-DEFG-1234-56789ABCDEFG') => 'whatever',
 476     );
 477     $uuid = generate_uuid(\%uuid_set);
 478     # OR
 479     $uuid = generate_uuid(sub { !$uuid_set{$_} });
 480
 481 Here, C<$uuid> can't be "12345678-9ABC-DEFG-1234-56789ABCDEFG". This example uses L</uuid> to easily pack
 482 a 16-byte UUID from a literal, but it otherwise is not a consequential part of the example.
 483
 484 =cut
 485
 486 sub generate_uuid {
 487     my $set  = @_ % 2 == 1 ? shift : undef;
 488     my %args = @_;
 489     my $test = $set //= $args{test};
 490     $test   = sub { !$set->{$_} } if is_hashref($test);
 491     $test //= sub { 1 };
 492     my $printable = $args{printable} // $args{print};
 493     local $_ = '';
 494     do {
 495         $_ = $printable ? random_string(16) : random_bytes(16);
 496     } while (!$test->($_));
 497     return $_;
 498 }
 499
 500 =func gunzip
 501
 502     $unzipped = gunzip($string);
 503
 504 Decompress an octet stream.
 505
 506 =cut
 507
 508 sub gunzip {
 509     load_optional('Compress::Raw::Zlib');
 510     local $_ = shift;
 511     my ($i, $status) = Compress::Raw::Zlib::Inflate->new(-WindowBits => 31);
 512     $status == Compress::Raw::Zlib::Z_OK()
 513         or throw 'Failed to initialize compression library', status => $status;
 514     $status = $i->inflate($_, my $out);
 515     $status == Compress::Raw::Zlib::Z_STREAM_END()
 516         or throw 'Failed to decompress data', status => $status;
 517     return $out;
 518 }
 519
 520 =func gzip
 521
 522     $zipped = gzip($string);
 523
 524 Compress an octet stream.
 525
 526 =cut
 527
 528 sub gzip {
 529     load_optional('Compress::Raw::Zlib');
 530     local $_ = shift;
 531     my ($d, $status) = Compress::Raw::Zlib::Deflate->new(-WindowBits => 31, -AppendOutput => 1);
 532     $status == Compress::Raw::Zlib::Z_OK()
 533         or throw 'Failed to initialize compression library', status => $status;
 534     $status = $d->deflate($_, my $out);
 535     $status == Compress::Raw::Zlib::Z_OK()
 536         or throw 'Failed to compress data', status => $status;
 537     $status = $d->flush($out);
 538     $status == Compress::Raw::Zlib::Z_OK()
 539         or throw 'Failed to compress data', status => $status;
 540     return $out;
 541 }
 542
 543 =func is_readable
 544
 545 =func is_writable
 546
 547     $bool = is_readable($mode);
 548     $bool = is_writable($mode);
 549
 550 Determine of an C<fopen>-style mode is readable, writable or both.
 551
 552 =cut
 553
 554 sub is_readable { $_[0] !~ /^[aw]b?$/ }
 555 sub is_writable { $_[0] !~ /^rb?$/ }
 556
 557 =func is_uuid
 558
 559     $bool = is_uuid($thing);
 560
 561 Check if a thing is a UUID (i.e. scalar string of length 16).
 562
 563 =cut
 564
 565 sub is_uuid { defined $_[0] && !is_ref($_[0]) && length($_[0]) == 16 }
 566
 567 =func list_attributes
 568
 569     @attributes = list_attributes($package);
 570
 571 Get a list of attributes for a class.
 572
 573 =cut
 574
 575 sub list_attributes {
 576     my $package = shift;
 577     return @{$ATTRIBUTES{$package} // []};
 578 }
 579
 580 =func load_optional
 581
 582     $package = load_optional($package);
 583
 584 Load a module that isn't required but can provide extra functionality. Throw if the module is not available.
 585
 586 =cut
 587
 588 sub load_optional {
 589     for my $module (@_) {
 590         eval { load $module };
 591         if (my $err = $@) {
 592             warn $err if $ENV{DEBUG};
 593             throw "Missing dependency: Please install $module to use this feature.\n", module => $module;
 594         }
 595     }
 596     return wantarray ? @_ : $_[0];
 597 }
 598
 599 =func memoize
 600
 601     \&memoized_code = memoize(\&code, ...);
 602
 603 Memoize a function. Extra arguments are passed through to C<&code> when it is called.
 604
 605 =cut
 606
 607 sub memoize {
 608     my $func = shift;
 609     my @args = @_;
 610     my %cache;
 611     return sub { $cache{join("\0", grep { defined } @_)} //= $func->(@args, @_) };
 612 }
 613
 614 =func pad_pkcs7
 615
 616     $padded_string = pad_pkcs7($string, $block_size),
 617
 618 Pad a block using the PKCS#7 method.
 619
 620 =cut
 621
 622 sub pad_pkcs7 {
 623     my $data = shift // throw 'Must provide a string to pad';
 624     my $size = shift or throw 'Must provide block size';
 625
 626     0 <= $size && $size < 256
 627         or throw 'Cannot add PKCS7 padding to a large block size', size => $size;
 628
 629     my $pad_len = $size - length($data) % $size;
 630     $data .= chr($pad_len) x $pad_len;
 631 }
 632
 633 =func query
 634
 635     $query = query(@where);
 636     $query->(\%data);
 637
 638 Generate a function that will run a series of tests on a passed hashref and return true or false depending on
 639 if the data record in the hash matched the specified logic.
 640
 641 The logic can be specified in a manner similar to L<SQL::Abstract/"WHERE CLAUSES"> which was the inspiration
 642 for this function, but this code is distinct, supporting an overlapping but not identical feature set and
 643 having its own bugs.
 644
 645 See L<File::KDBX/QUERY> for examples.
 646
 647 =cut
 648
 649 sub query { _query(undef, '-or', \@_) }
 650
 651 =func read_all
 652
 653     $size = read_all($fh, my $buffer, $size);
 654     $size = read_all($fh, my $buffer, $size, $offset);
 655
 656 Like L<functions/read> but returns C<undef> if not all C<$size> bytes are read. This is considered an error,
 657 distinguishable from other errors by C<$!> not being set.
 658
 659 =cut
 660
 661 sub read_all($$$;$) { ## no critic (ProhibitSubroutinePrototypes)
 662     my $result = @_ == 3 ? read($_[0], $_[1], $_[2])
 663                          : read($_[0], $_[1], $_[2], $_[3]);
 664     return if !defined $result;
 665     return if $result != $_[2];
 666     return $result;
 667 }
 668
 669 =func recurse_limit
 670
 671     \&limited_code = recurse_limit(\&code);
 672     \&limited_code = recurse_limit(\&code, $max_depth);
 673     \&limited_code = recurse_limit(\&code, $max_depth, \&error_handler);
 674
 675 Wrap a function with a guard to prevent deep recursion.
 676
 677 =cut
 678
 679 sub recurse_limit {
 680     my $func        = shift;
 681     my $max_depth   = shift // 200;
 682     my $error       = shift // sub {};
 683     my $depth = 0;
 684     return sub { return $error->(@_) if $max_depth < ++$depth; $func->(@_) };
 685 };
 686
 687 =func search
 688
 689     # Generate a query on-the-fly:
 690     \@matches = search(\@records, @where);
 691
 692     # Use a pre-compiled query:
 693     $query = query(@where);
 694     \@matches = search(\@records, $query);
 695
 696     # Use a simple expression:
 697     \@matches = search(\@records, \'query terms', @fields);
 698     \@matches = search(\@records, \'query terms', $operator, @fields);
 699
 700     # Use your own subroutine:
 701     \@matches = search(\@records, \&query);
 702     \@matches = search(\@records, sub { $record = shift; ... });
 703
 704 Execute a linear search over an array of records using a L</query>. A "record" is usually a hash.
 705
 706 This is the search engine described with many examples at L<File::KDBX/QUERY>.
 707
 708 =cut
 709
 710 sub search {
 711     my $list    = shift;
 712     my $query   = shift;
 713
 714     if (is_coderef($query) && !@_) {
 715         # already a query
 716     }
 717     elsif (is_scalarref($query)) {
 718         $query = simple_expression_query($$query, @_);
 719     }
 720     else {
 721         $query = query($query, @_);
 722     }
 723
 724     my @match;
 725     for my $item (@$list) {
 726         push @match, $item if $query->($item);
 727     }
 728     return \@match;
 729 }
 730
 731 =for Pod::Coverage search_limited
 732
 733 =cut
 734
 735 sub search_limited {
 736     my $list    = shift;
 737     my $query   = shift;
 738     my $limit   = shift // 1;
 739
 740     if (is_coderef($query) && !@_) {
 741         # already a query
 742     }
 743     elsif (is_scalarref($query)) {
 744         $query = simple_expression_query($$query, @_);
 745     }
 746     else {
 747         $query = query($query, @_);
 748     }
 749
 750     my @match;
 751     for my $item (@$list) {
 752         push @match, $item if $query->($item);
 753         last if $limit <= @match;
 754     }
 755     return \@match;
 756 }
 757
 758 =func simple_expression_query
 759
 760     $query = simple_expression_query($expression, @fields);
 761
 762 Generate a query, like L</query>, to be used with L</search> but built from a "simple expression" as
 763 L<described here|https://keepass.info/help/base/search.html#mode_se>.
 764
 765 An expression is a string with one or more space-separated terms. Terms with spaces can be enclosed in double
 766 quotes. Terms are negated if they are prefixed with a minus sign. A record must match every term on at least
 767 one of the given fields.
 768
 769 =cut
 770
 771 sub simple_expression_query {
 772     my $expr = shift;
 773     my $op   = @_ && ($OPS{$_[0] || ''} || 0) == 2 ? shift : '=~';
 774
 775     my $neg_op = $OP_NEG{$op};
 776     my $is_re  = $op eq '=~' || $op eq '!~';
 777
 778     require Text::ParseWords;
 779     my @terms = Text::ParseWords::shellwords($expr);
 780
 781     my @query = qw(-and);
 782
 783     for my $term (@terms) {
 784         my @subquery = qw(-or);
 785
 786         my $neg = $term =~ s/^-//;
 787         my $condition = [($neg ? $neg_op : $op) => ($is_re ? qr/\Q$term\E/i : $term)];
 788
 789         for my $field (@_) {
 790             push @subquery, $field => $condition;
 791         }
 792
 793         push @query, \@subquery;
 794     }
 795
 796     return query(\@query);
 797 }
 798
 799 =func snakify
 800
 801     $string = snakify($string);
 802
 803 Turn a CamelCase string into snake_case.
 804
 805 =cut
 806
 807 sub snakify {
 808     local $_ = shift;
 809     s/UserName/Username/g;
 810     s/([a-z])([A-Z0-9])/${1}_${2}/g;
 811     s/([A-Z0-9]+)([A-Z0-9])(?![A-Z0-9]|$)/${1}_${2}/g;
 812     return lc($_);
 813 }
 814
 815 =func split_url
 816
 817     ($scheme, $auth, $host, $port, $path, $query, $hash, $usename, $password) = split_url($url);
 818
 819 Split a URL into its parts.
 820
 821 For example, C<http://user:pass@localhost:4000/path?query#hash> gets split like:
 822
 823 =for :list
 824 * C<http>
 825 * C<user:pass>
 826 * C<host>
 827 * C<4000>
 828 * C</path>
 829 * C<?query>
 830 * C<#hash>
 831 * C<user>
 832 * C<pass>
 833
 834 =cut
 835
 836 sub split_url {
 837     local $_ = shift;
 838     my ($scheme, $auth, $host, $port, $path, $query, $hash) =~ m!
 839         ^([^:/\?\#]+) ://
 840         (?:([^\@]+)\@)
 841         ([^:/\?\#]*)
 842         (?::(\d+))?
 843         ([^\?\#]*)
 844         (\?[^\#]*)?
 845         (\#(.*))?
 846     !x;
 847
 848     $scheme = lc($scheme);
 849
 850     $host ||= 'localhost';
 851     $host = lc($host);
 852
 853     $path = "/$path" if $path !~ m!^/!;
 854
 855     $port ||= $scheme eq 'http' ? 80 : $scheme eq 'https' ? 433 : undef;
 856
 857     my ($username, $password) = split($auth, ':', 2);
 858
 859     return ($scheme, $auth, $host, $port, $path, $query, $hash, $username, $password);
 860 }
 861
 862 =func to_bool
 863
 864 =func to_number
 865
 866 =func to_string
 867
 868 =func to_time
 869
 870 =func to_tristate
 871
 872 =func to_uuid
 873
 874 Various typecasting / coercive functions.
 875
 876 =cut
 877
 878 sub to_bool   { $_[0] // return; boolean($_[0]) }
 879 sub to_number { $_[0] // return; 0+$_[0] }
 880 sub to_string { $_[0] // return; "$_[0]" }
 881 sub to_time   {
 882     $_[0] // return;
 883     return gmtime($_[0]) if looks_like_number($_[0]);
 884     return Time::Piece->strptime($_[0], '%Y-%m-%d %H:%M:%S') if !blessed $_[0];
 885     return $_[0];
 886 }
 887 sub to_tristate { $_[0] // return; boolean($_[0]) }
 888 sub to_uuid {
 889     my $str = to_string(@_) // return;
 890     return sprintf('%016s', $str) if length($str) < 16;
 891     return substr($str, 0, 16) if 16 < length($str);
 892     return $str;
 893 }
 894
 895 =func trim
 896
 897     $string = trim($string);
 898
 899 The ubiquitous C<trim> function. Removes all whitespace from both ends of a string.
 900
 901 =cut
 902
 903 sub trim($) { ## no critic (ProhibitSubroutinePrototypes)
 904     local $_ = shift // return;
 905     s/^\s*//;
 906     s/\s*$//;
 907     return $_;
 908 }
 909
 910 =func try_load_optional
 911
 912     $package = try_load_optional($package);
 913
 914 Try to load a module that isn't required but can provide extra functionality, and return true if successful.
 915
 916 =cut
 917
 918 sub try_load_optional {
 919     for my $module (@_) {
 920         eval { load $module };
 921         if (my $err = $@) {
 922             warn $err if $ENV{DEBUG};
 923             return;
 924         }
 925     }
 926     return @_;
 927 }
 928
 929 =func uri_escape_utf8
 930
 931     $string = uri_escape_utf8($string);
 932
 933 Percent-encode arbitrary text strings, like for a URI.
 934
 935 =cut
 936
 937 my %ESC = map { chr($_) => sprintf('%%%02X', $_) } 0..255;
 938 sub uri_escape_utf8 {
 939     local $_ = shift // return;
 940     $_ = encode('UTF-8', $_);
 941     # RFC 3986 section 2.3 unreserved characters
 942     s/([^A-Za-z0-9\-\._~])/$ESC{$1}/ge;
 943     return $_;
 944 }
 945
 946 =func uri_unescape_utf8
 947
 948     $string = uri_unescape_utf8($string);
 949
 950 Inverse of L</uri_escape_utf8>.
 951
 952 =cut
 953
 954 sub uri_unescape_utf8 {
 955     local $_ = shift // return;
 956     s/\%([A-Fa-f0-9]{2})/chr(hex($1))/;
 957     return decode('UTF-8', $_);
 958 }
 959
 960 =func uuid
 961
 962     $raw_uuid = uuid($string_uuid);
 963
 964 Pack a 128-bit UUID (given as a hexidecimal string with optional C<->'s, like
 965 C<12345678-9ABC-DEFG-1234-56789ABCDEFG>) into a string of exactly 16 octets.
 966
 967 This is the inverse of L</format_uuid>.
 968
 969 =cut
 970
 971 sub uuid {
 972     local $_ = shift // return "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 973     s/-//g;
 974     /^[A-Fa-f0-9]{32}$/ or throw 'Must provide a formatted 128-bit UUID';
 975     return pack('H32', $_);
 976
 977 }
 978
 979 =func UUID_NULL
 980
 981 Get the null UUID (i.e. string of 16 null bytes).
 982
 983 =cut
 984
 985 sub UUID_NULL() { "\0" x 16 }
 986
 987 ### --------------------------------------------------------------------------
 988
 989 # Determine if an array looks like keypairs from a hash.
 990 sub _looks_like_keypairs {
 991     my $arr = shift;
 992     return 0 if @$arr % 2 == 1;
 993     for (my $i = 0; $i < @$arr; $i += 2) {
 994         return 0 if is_ref($arr->[$i]);
 995     }
 996     return 1;
 997 }
 998
 999 sub _is_operand_plain {
1000     local $_ = shift;
1001     return !(is_hashref($_) || is_arrayref($_));
1002 }
1003
1004 sub _query {
1005     # dumper \@_;
1006     my $subject = shift;
1007     my $op      = shift // throw 'Must specify a query operator';
1008     my $operand = shift;
1009
1010     return _query_simple($op, $subject) if defined $subject && !is_ref($op) && ($OPS{$subject} || 2) < 2;
1011     return _query_simple($subject, $op, $operand) if _is_operand_plain($operand);
1012     return _query_inverse(_query($subject, '-or', $operand)) if $op eq '-not' || $op eq '-false';
1013     return _query($subject, '-and', [%$operand]) if is_hashref($operand);
1014
1015     my @queries;
1016
1017     my @atoms = @$operand;
1018     while (@atoms) {
1019         if (_looks_like_keypairs(\@atoms)) {
1020             my ($atom, $operand) = splice @atoms, 0, 2;
1021             if (my $op_type = $OPS{$atom}) {
1022                 if ($op_type == 1 && _is_operand_plain($operand)) { # unary
1023                     push @queries, _query_simple($operand, $atom);
1024                 }
1025                 else {
1026                     push @queries, _query($subject, $atom, $operand);
1027                 }
1028             }
1029             elsif (!is_ref($atom)) {
1030                 push @queries, _query($atom, 'eq', $operand);
1031             }
1032         }
1033         else {
1034             my $atom = shift @atoms;
1035             if ($OPS{$atom}) {     # apply new operator over the rest
1036                 push @queries, _query($subject, $atom, \@atoms);
1037                 last;
1038             }
1039             else {  # apply original operator over this one
1040                 push @queries, _query($subject, $op, $atom);
1041             }
1042         }
1043     }
1044
1045     if (@queries == 1) {
1046         return $queries[0];
1047     }
1048     elsif ($op eq '-and') {
1049         return _query_all(@queries);
1050     }
1051     elsif ($op eq '-or') {
1052         return _query_any(@queries);
1053     }
1054     throw 'Malformed query';
1055 }
1056
1057 sub _query_simple {
1058     my $subject = shift;
1059     my $op      = shift // 'eq';
1060     my $operand = shift;
1061
1062     # these special operators can also act as simple operators
1063     $op = '!!' if $op eq '-true';
1064     $op = '!'  if $op eq '-false';
1065     $op = '!'  if $op eq '-not';
1066
1067     defined $subject or throw 'Subject is not set in query';
1068     $OPS{$op} >= 0   or throw 'Cannot use a non-simple operator in a simple query';
1069     if (empty($operand)) {
1070         if ($OPS{$op} < 2) {
1071             # no operand needed
1072         }
1073         # Allow field => undef and field => {'ne' => undef} to do the (arguably) right thing.
1074         elsif ($op eq 'eq' || $op eq '==') {
1075             $op = '-empty';
1076         }
1077         elsif ($op eq 'ne' || $op eq '!=') {
1078             $op = '-nonempty';
1079         }
1080         else {
1081             throw 'Operand is required';
1082         }
1083     }
1084
1085     my $field = sub { blessed $_[0] && $_[0]->can($subject) ? $_[0]->$subject : $_[0]->{$subject} };
1086
1087     my %map = (
1088         'eq'        => sub { local $_ = $field->(@_); defined && $_ eq $operand },
1089         'ne'        => sub { local $_ = $field->(@_); defined && $_ ne $operand },
1090         'lt'        => sub { local $_ = $field->(@_); defined && $_ lt $operand },
1091         'gt'        => sub { local $_ = $field->(@_); defined && $_ gt $operand },
1092         'le'        => sub { local $_ = $field->(@_); defined && $_ le $operand },
1093         'ge'        => sub { local $_ = $field->(@_); defined && $_ ge $operand },
1094         '=='        => sub { local $_ = $field->(@_); defined && $_ == $operand },
1095         '!='        => sub { local $_ = $field->(@_); defined && $_ != $operand },
1096         '<'         => sub { local $_ = $field->(@_); defined && $_ <  $operand },
1097         '>'         => sub { local $_ = $field->(@_); defined && $_ >  $operand },
1098         '<='        => sub { local $_ = $field->(@_); defined && $_ <= $operand },
1099         '>='        => sub { local $_ = $field->(@_); defined && $_ >= $operand },
1100         '=~'        => sub { local $_ = $field->(@_); defined && $_ =~ $operand },
1101         '!~'        => sub { local $_ = $field->(@_); defined && $_ !~ $operand },
1102         '!'         => sub { local $_ = $field->(@_); ! $_ },
1103         '!!'        => sub { local $_ = $field->(@_); !!$_ },
1104         '-defined'  => sub { local $_ = $field->(@_);  defined $_ },
1105         '-undef'    => sub { local $_ = $field->(@_); !defined $_ },
1106         '-nonempty' => sub { local $_ = $field->(@_); nonempty $_ },
1107         '-empty'    => sub { local $_ = $field->(@_); empty    $_ },
1108     );
1109
1110     return $map{$op} // throw "Unexpected operator in query: $op",
1111         subject     => $subject,
1112         operator    => $op,
1113         operand     => $operand;
1114 }
1115
1116 sub _query_inverse {
1117     my $query = shift;
1118     return sub { !$query->(@_) };
1119 }
1120
1121 sub _query_all {
1122     my @queries = @_;
1123     return sub {
1124         my $val = shift;
1125         all { $_->($val) } @queries;
1126     };
1127 }
1128
1129 sub _query_any {
1130     my @queries = @_;
1131     return sub {
1132         my $val = shift;
1133         any { $_->($val) } @queries;
1134     };
1135 }
1136
1137 1;