package autobox::Transform; use strict; use warnings; use 5.010; use parent qw/autobox/; our $VERSION = "1.034"; =head1 NAME autobox::Transform - Autobox methods to transform Arrays and Hashes =head1 CONTEXT L provides the ability to call methods on native types, e.g. strings, arrays, and hashes as if they were objects. L provides the basic methods for Perl core functions like C, C, and C. This module, C, provides higher level and more specific methods to transform and manipulate arrays and hashes, in particular when the values are hashrefs or objects. =head1 SYNOPSIS use autobox::Core; # map, uniq, sort, join, sum, etc. use autobox::Transform; =head2 Arrays # use autobox::Core for ->map etc. # filter (like a more versatile grep) $book_locations->filter(); # true values $books->filter(sub { $_->is_in_library($library) }); $book_names->filter( qr/lord/i ); $book_genres->filter("scifi"); $book_genres->filter({ fantasy => 1, scifi => 1 }); # hash key exists # reject: the inverse of filter $book_genres->reject("fantasy"); # order (like a more succinct sort) $book_genres->order; $book_genres->order("desc"); $book_prices->order([ "num", "desc" ]); $books->order([ sub { $_->{price} }, "desc", "num" ]); $log_lines->order([ num => qr/pid: "(\d+)"/ ]); $books->order( [ sub { $_->{price} }, "desc", "num" ] # first price sub { $_->{name} }, # then name ); # group (aggregate) array into hash $book_genres->group; # "Sci-fi" => "Sci-fi" $book_genres->group_count; # "Sci-fi" => 3 $book_genres->group_array; # "Sci-fi" => [ "Sci-fi", "Sci-fi", "Sci-fi"] # Flatten arrayrefs-of-arrayrefs $authors->map_by("books") # ->books returns an arrayref # [ [ $book1, $book2 ], [ $book3 ] ] $authors->map_by("books")->flat; # [ $book1, $book2, $book3 ] # Return reference, even in list context, e.g. in a parameter list $book_locations->filter()->to_ref; # Return array, even in scalar context @books->to_array; # Turn paired items into a hash @titles_books->to_hash; =head2 Arrays with hashrefs/objects # $books and $authors below are arrayrefs with either objects or # hashrefs (the call syntax is the same). These have methods/hash # keys like C<$book->genre()>, C<$book->{is_sold_out}>, # C<$book->is_in_library($library)>, etc. $books->map_by("genre"); $books->map_by([ price_with_tax => $tax_pct ]); $books->filter_by("is_sold_out"); $books->filter_by([ is_in_library => $library ]); $books->filter_by([ price_with_tax => $rate ], sub { $_ > 56.00 }); $books->filter_by("price", sub { $_ > 56.00 }); $books->filter_by("author", "James A. Corey"); $books->filter_by("author", qr/corey/i); # grep_by is an alias for filter_by $books->grep_by("is_sold_out"); # reject_by: the inverse of filter_by $books->reject_by("is_sold_out"); $books->uniq_by("id"); $books->order_by("name"); $books->order_by(name => "desc"); $books->order_by(price => "num"); $books->order_by(price => [ "num", "desc" ]); $books->order_by(name => [ sub { uc($_) }, "desc" ]); $books->order_by([ price_with_tax => $rate ] => "num"); $books->order_by( author => "str", # first by author price => [ "num", "desc" ], # then by price, most expensive first ); $books->order_by( author => [ "desc", sub { uc($_) } ], [ price_with_tax => $rate ] => [ "num", "desc" ], "name", ); $books->group_by("title"), # { # "Leviathan Wakes" => $books->[0], # "Caliban's War" => $books->[1], # "The Tree-Body Problem" => $books->[2], # "The Name of the Wind" => $books->[3], # }, $authors->group_by([ publisher_affiliation => "with" ]), # { # 'James A. Corey with Orbit' => $authors->[0], # 'Cixin Liu with Head of Zeus' => $authors->[1], # 'Patrick Rothfuss with Gollanz' => $authors->[2], # }, $books->group_by_count("genre"), # { # "Sci-fi" => 3, # "Fantasy" => 1, # }, my $genre_books = $books->group_by_array("genre"); # { # "Sci-fi" => [ $sf_book_1, $sf_book_2, $sf_book_3 ], # "Fantasy" => [ $fantasy_book_1 ], # }, =head2 Hashes # map over each pair # e.g. Upper-case the genre name, and make the count say "n books" # (return a key => value pair) $genre_count->map_each(sub { uc( $_[0] ) => "$_ books" }); # { # "FANTASY" => "1 books", # "SCI-FI" => "3 books", # }, # map over each value # e.g. Make the count say "n books" # (return the new value) $genre_count->map_each_value(sub { "$_ books" }); # { # "Fantasy" => "1 books", # "Sci-fi" => "3 books", # }, # map each pair into an array # e.g. Transform each pair to the string "n: genre" # (return list of items) $genre_count->map_each_to_array(sub { "$_: $_[0]" }); # [ "1: Fantasy", "3: Sci-fi" ] # filter each pair # Genres with more than five books $genre_count->filter_each(sub { $_ > 5 }); # filter out each pair # Genres with no more than five books $genre_count->reject_each(sub { $_ > 5 }); # Return reference, even in list context, e.g. in a parameter list %genre_count->to_ref; # Return hash, even in scalar context $author->book_count->to_hash; # Turn key-value pairs into an array %isbn__book->to_array; =head2 Combined examples my $order_authors = $order->books ->filter_by("title", qr/^The/) ->uniq_by("isbn") ->map_by("author") ->uniq_by("name") ->order_by(publisher => "str", name => "str") ->map_by("name")->uniq->join(", "); my $total_order_amount = $order->books ->reject_by("is_sold_out") ->filter_by([ covered_by_vouchers => $vouchers ], sub { ! $_ }) ->map_by([ price_with_tax => $tax_pct ]) ->sum; =cut use true; use Carp; sub import { my $self = shift; $self->SUPER::import( ARRAY => "autobox::Transform::Array" ); $self->SUPER::import( HASH => "autobox::Transform::Hash" ); } sub throw { my ($error) = @_; ###JPL: remove lib $error =~ s/ at [\\\/\w ]*?\bautobox.Transform\.pm line \d+\.\n?$//; local $Carp::CarpLevel = 1; croak($error); } # Normalize the two method calling styles for accessor + args: # $acessor, $args_arrayref # or # $acessor_and_args_arrayref sub _normalized_accessor_args_subref { my ($accessor, $args, $subref) = @_; # Note: unfortunately, this won't allow the $subref (modifier) to # become an arrayref later on when we do many types of modifiers # (string eq, qr regex match, sub call, arrayref in) for # filtering. # # That has to happen after the deprecation has expired and the old # syntax is removed. if(ref($args) eq "CODE") { $subref = $args; # Move down one step $args = undef; } if(ref($accessor) eq "ARRAY") { ($accessor, my @args) = @$accessor; $args = \@args; } return ($accessor, $args, $subref); } ###JPL: rename subref to predicate # Normalize the two method calling styles for accessor + args: # $acessor, $args_arrayref, $modifier # or # $acessor_and_args_arrayref, $modifier sub _normalized_accessor_args_predicate { my ($accessor, $args, $subref) = @_; # Note: unfortunately, this won't allow the $subref (modifier) to # be an arrayref, or undef for many types of modifiers (string eq, # qr regex match, sub call, arrayref in) for filtering. # # That has to happen after the deprecation has expired and the old # syntax is removed. if(defined($args) && ref($args) ne "ARRAY") { $subref = $args; # Move down one step $args = undef; } if(ref($accessor) eq "ARRAY") { ($accessor, my @args) = @$accessor; $args = \@args; } return ($accessor, $args, $subref); } sub _predicate { my ($name, $predicate, $default_predicate) = @_; # No predicate, use default is_true defined($predicate) or return $default_predicate; # scalar, do string eq my $type = ref($predicate) or return sub { $predicate eq $_ }; $type eq "CODE" and return $predicate; $type eq "Regexp" and return sub { $_ =~ $predicate }; $type eq "HASH" and return sub { exists $predicate->{ $_ } }; # Invalid predicate Carp::croak("->$name() \$predicate: ($predicate) is not one of: subref, string, regex"); } =head1 DESCRIPTION C provides high level autobox methods you can call on arrays, arrayrefs, hashes and hashrefs. =head2 Transforming lists of objects vs list of hashrefs C, C C etc. (all methods named C<*_by>) work with sets of hashrefs or objects. These methods are called the same way regardless of whether the array contains objects or hashrefs. The items in the list must be either all objects or all hashrefs. If the array contains hashrefs, the hash key is looked up on each item. If the array contains objects, a method is called on each object (possibly with the arguments provided). =head3 Calling accessor methods with arguments For method calls, it's possible to provide arguments to the method. Consider C: $array->map_by($accessor) If the $accessor is a string, it's a simple method call. # method call without args $books->map_by("price") # becomes $_->price() or $_->{price} If the $accessor is an arrayref, the first item is the method name, and the rest of the items are the arguments to the method. # method call with args $books->map_by([ price_with_discount => 5.0 ]) # becomes $_->price_with_discount(5.0) =head3 Deprecated syntax There is an older syntax for calling methods with arguments. It was abandoned to open up more powerful ways to use grep/filter type methods. Here it is for reference, in case you run into existing code. $array->filter_by($accessor, $args, $subref) $books->filter_by("price_with_discount", [ 5.0 ], sub { $_ < 15.0 }) Call the method $accessor on each object using the arguments in the $args arrayref like so: $object->$accessor(@$args) I, and planned for removal in version 2.000, so if you have code with the old call style, please: =over 4 =item Replace your existing code with the new style as soon as possible. The change is trivial and the code easily found by grep/ack. =item If need be, pin your version to < 2.000 in your cpanfile, dist.ini or whatever you use to avoid upgrading modules to incompatible versions. =back =head2 Filter predicates There are several methods that filter items, e.g. C<@array-Efilter> (duh), C<@array-Efilter_by>, and C<%hash-Efilter_each>. These methods take a C<$predicate> argument to determine which items to retain or filter out. The C family of methods do the opposite, and I items that match the predicate, i.e. the opposite of the filter methods. If $predicate is an I, it is compared to each value with C. $books->filter_by("author", "James A. Corey"); If $predicate is a I, it is compared to each value with C<=~>. $books->reject_by("author", qr/Corey/); If $predicate is a I, values in @array are retained if the $predicate hash key C (the hash values are irrelevant). $books->filter_by( "author", { "James A. Corey" => undef, "Cixin Liu" => 0, "Patrick Rothfuss" => 1, }, ); If $predicate is a I, the subref is called for each value to check whether this item should remain in the list. The $predicate subref should return a true value to remain. C<$_> is set to the current $value. $authors->filter_by(publisher => sub { $_->name =~ /Orbit/ }); =head2 Sorting using order and order_by Let's first compare how sorting is done with Perl's C and autobox::Transform's C/C. =head3 Sorting with sort =over 4 =item * provide a sub that returns the comparison outcome of two values: C<$a> and C<$b> =item * in case of a tie, provide another comparison of $a and $b =back # If the name is the same, compare age (oldest first) sort { uc( $a->{name} ) cmp uc( $b->{name} ) # first comparison || int( $b->{age} / 10 ) <=> int( $a->{age} / 10 ) # second comparison } @users (note the opposite order of C<$a> and C<$b> for the age comparison, something that's often difficult to discern at a glance) =head3 Sorting with order, order_by =over 4 =item * Provide order options for how one value should be compared with the others: =over 8 =item * how to compare (C or C<<=E>) =item * which direction to sort (Cending or Cending) =item * which value to compare, using a regex or subref, e.g. by uc($_) =back =item * In case of a tie, provide another comparison =back # If the name is the same, compare age (oldest first) # ->order @users->order( sub { uc( $_->{name} ) }, # first comparison [ "num", sub { int( $_->{age} / 10 ) }, "desc" ], # second comparison ) # ->order_by @users->order_by( name => sub { uc }, # first comparison age => [ num => desc => sub { int( $_ / 10 ) } ], # second comparison ) =head3 Comparison Options If there's only one option for a comparison (e.g. C), provide a single option (string/regex/subref) value. If there are many options, provide them in an arrayref in any order. =head3 Comparison operator =over 4 =item * C<"str"> (cmp) - default =item * C<"num"> (<=>) =back =head3 Sort order =over 4 =item * C<"asc"> (ascending) - default =item * C<"desc"> (descending) =back =head3 The value to compare =over 4 =item * A subref - default is: C =over 8 =item * The return value is used in the comparison =back =item * A regex, e.g. C =over 8 =item * The value of C are used in the comparison (C<@captured_groups> are C<$1>, C<$2>, C<$3> etc.) =back =back =head3 Examples of a single comparison # order: the first arg is the comparison options (one or an # arrayref with many options) ->order() # Defaults to str, asc, $_, just like sort ->order("num") ->order(sub { uc($_) }) # compare captured matches, e.g. "John" and "Doe" as "JohnDoe" ->order( qr/first_name: (\w+), last_name: (\w+)/ ) ->order([ num => qr/id: (\d+)/ ]) ->order([ sub { int($_) }, "num" ]) # order_by: the first arg is the accessor, just like with # map_by. Second arg is the comparison options (one or an arrayref # with many options) ->order_by("id") ->order_by("id", "num") ->order_by("id", [ "num", "desc" ]) ->order_by("name", sub { uc($_) }) ->order_by(log_line => qr/first_name: (\w+), last_name: (\w+)/ ) ->order_by("log_line", [ num => qr/id: (\d+)/ ]) ->order_by(age => [ sub { int($_) }, "num" ]) # compare int( $a->age_by_interval(10) ) ->order_by([ age_by_interval => 10 ] => [ sub { int($_) }, "num" ]) # compare uc( $a->name_with_title($title) ) ->order_by([ name_with_title => $title ], sub { uc($_) }) =head3 Examples of fallback comparisons When the first comparison is a tie, the subsequent ones are used. # order: list of comparison options (one or an arrayref with many # options, per comparison) ->order( [ sub { $_->{price} }, "num" ], # First a numeric comparison of price [ sub { $_->{name} }, "desc" ], # or if same, a reverse comparison of the name ) ->order( [ sub { uc($_) }, "desc" ], "str", ) ->order( qr/type: (\w+)/, [ num => desc => qr/duration: (\d+)/ ] [ num => sub { /id: (\d+)/ } ], "str", ) # order_by: pairs of accessor-comparison options ->order_by( price => "num", # First a numeric comparison of price name => "desc", # or if same, a reverse comparison of the name ) ->order_by( price => [ "num", "desc" ], name => "str", ) # accessor is a method call with arg: $_->price_with_discount($discount) ->order_by( [ price_with_discount => $discount ] => [ "num", "desc" ], name => [ str => sub { uc($_) } ], "id", ) =head2 List and Scalar Context Almost all of the methods are context sensitive, i.e. they return a list in list context and an arrayref in scalar context, just like L. B: I When in doubt, assume they work like C and C (i.e. return a list), and convert the return value to references where you might have an non-obvious list context. E.g. =head3 Incorrect $self->my_method( # Wrong, this is list context and wouldn't return an array ref books => $books->filter_by("is_published"), ); =head3 Correct $self->my_method( # Correct, put the returned list in an anonymous array ref books => [ $books->filter_by("is_published") ], ); $self->my_method( # Correct, ensure scalar context to get an array ref books => scalar $books->filter_by("is_published"), ); # Probably the nicest, since ->to_ref goes at the end $self->my_method( # Correct, use ->to_ref to ensure an array ref is returned books => $books->filter_by("is_published")->to_ref, ); =head1 METHODS ON ARRAYS =cut package # hide from PAUSE autobox::Transform::Array; use autobox::Core; use Sort::Maker (); use List::MoreUtils (); =head2 @array->filter($predicate = *is_true_subref*) : @array | @$array Similar to Perl's C, return an C<@array> with values for which $predicate yields a true value. $predicate can be a subref, string, undef, regex, or hashref. See L. The default (no C<$predicate>) is a subref which retains true values in the @array. =head3 Examples my @apples = $fruit->filter("apple"); my @any_apple = $fruit->filter( qr/apple/i ); my @publishers = $authors->filter( sub { $_->publisher->name =~ /Orbit/ }, ); =head3 filter and grep L's C method takes a subref, just like this method. C also supports the other predicate types, like string, regex, etc. =cut sub filter { my $array = shift; my ($predicate) = @_; my $subref = autobox::Transform::_predicate( "filter", $predicate, sub { !! $_ }, ); my $result = eval { [ CORE::grep { $subref->( $_ ) } @$array ] } or autobox::Transform::throw($@); return wantarray ? @$result : $result; } =head2 @array->reject($predicate = *is_false_subref*) : @array | @$array Similar to the Unix command C, return an @array with values for which C<$predicate> yields a I value. $predicate can be a subref, string, undef, regex, or hashref. See L. The default (no $predicate) is a subref which I true values in the C<@array>. Examples: my @apples = $fruit->reject("apple"); my @any_apple = $fruit->reject( qr/apple/i ); my @publishers = $authors->reject( sub { $_->publisher->name =~ /Orbit/ }, ); =cut sub reject { my $array = shift; my ($predicate) = @_; my $subref = autobox::Transform::_predicate( "reject", $predicate, sub { !! $_ }, ); my $result = eval { [ CORE::grep { ! $subref->( $_ ) } @$array ] } or autobox::Transform::throw($@); return wantarray ? @$result : $result; } my $option__group = { str => "operator", num => "operator", asc => "direction", desc => "direction", }; sub _group__value_from_order_options { my ($method_name, $options) = @_; my $group__value = {}; for my $option (grep { $_ } @$options) { my $group; my $ref_option = ref($option); ( $ref_option eq "CODE" ) and $group = "extract"; if ( $ref_option eq "Regexp" ) { my $regex = $option; $option = sub { join("", m/$regex/) }; $group = "extract"; } $group ||= $option__group->{ $option } or Carp::croak("->$method_name(): Invalid comparison option ($option), did you mean ->order_by('$option')?"); exists $group__value->{ $group } and Carp::croak("->$method_name(): Conflicting comparison options: ($group__value->{ $group }) and ($option)"); $group__value->{ $group } = $option; } return $group__value; } my $transform__sorter = { str => "string", num => "number", asc => "ascending", desc => "descending", }; sub _sorter_from_comparisons { my ($method_name, $comparisons) = @_; my @sorter_keys; my @extracts; for my $options (@$comparisons) { ref($options) eq "ARRAY" or $options = [ $options ]; # Check one comparison my $group__value = _group__value_from_order_options( $method_name, $options, ); my $operator = $group__value->{operator} // "str"; my $direction = $group__value->{direction} // "asc"; my $extract = $group__value->{extract} // sub { $_ }; my $sorter_operator = $transform__sorter->{$operator}; my $sorter_direction = $transform__sorter->{$direction}; push(@extracts, $extract); my $extract_index = @extracts; push( @sorter_keys, $sorter_operator => [ $sorter_direction, # Sort this one by the extracted value code => "\$_->[ $extract_index ]", ], ); } my $sorter = Sort::Maker::make_sorter( "plain", "ref_in", "ref_out", @sorter_keys, ) or Carp::croak(__PACKAGE__ . " internal error: $@"); return ($sorter, \@extracts); } sub _item_values_array_from_array_item_extracts { my ($array, $extracts) = @_; # Custom Schwartzian Transform where each array item is arrayref of: # 0: $array item; rest 1..n : comparison values # The sorter keys are simply indexed into the nth value return [ map { ## no critic my $item = $_; [ $item, # array item to compare map { my $extract = $_; local $_ = $item; $extract->(); } @$extracts, # comparison values for array item ]; } @$array ]; } sub _item_values_array_from_map_by_extracts { my ($array, $accessors, $extracts) = @_; # Custom Schwartzian Transform where each array item is arrayref of: # 0: $array item; rest 1..n : comparison values # The sorter keys are simply indexed into the nth value my $accessor_values = $accessors->map( sub { [ map_by($array, $_) ] } ); return [ map { ## no critic my $item = $_; my $accessor_index = 0; [ $item, # array item to compare map { my $extract = $_; my $value = shift @{$accessor_values->[ $accessor_index++ ]}; local $_ = $value; $extract->(); } @$extracts, # comparison values for array item ]; } @$array ]; } =head2 @array->order(@comparisons = ("str")) : @array | @$array Return C<@array> ordered according to the C<@comparisons>. The default comparison is the same as the default sort, e.g. a normal string comparison of the C<@array> values. If the first item in C<@comparison> ends in a tie, the next one is used, etc. Each I consists of a single I