The Perl Advent Calendar needs more articles for 2022. Submit your idea today!
package Scrappy::Project;

BEGIN {
    $Scrappy::Project::VERSION = '0.94112090';
}

use Carp;
use File::Find::Rule;
use Scrappy;
use Moose::Role;

has app => (
    is      => 'ro',
    isa     => 'Any',
    default => sub {
        my $self = shift;
        $self->scraper(Scrappy->new);
        my $meta = $self->meta;
        return $meta->has_method('setup') ? $self->setup : $self;
    }
);

has parsers => (
    is      => 'ro',
    isa     => 'Any',
    default => sub {
        my $self    = shift;
        my $class   = ref $self;
        my @parsers = ();

        $class =~ s/::/\//g;

        my @files =
          File::Find::Rule->file()->name('*.pm')->in(map {"$_/$class"} @INC);

        my %parsers =
          map { $_ => 1 } @files;    #uniquenes

        for my $parser (keys %parsers) {

            my ($plug) = $parser =~ /($class\/.*)\.pm/;

            if ($plug) {
                $plug =~ s/\//::/g;
                push @parsers, $plug;
            }

        }

        return [@parsers];
    }
);

has registry => (
    is      => 'ro',
    isa     => 'HashRef',
    default => sub {

        # map parsers
        my $parsers = {};
        my @parsers = @{shift->parsers};
        foreach my $parser (@parsers) {
            $parsers->{$parser} = $parser;
            $parsers->{lc($parser)} = $parser;
        }
        return $parsers;
    }
);

has records => (
    is      => 'rw',
    isa     => 'HashRef',
    default => sub { {} }
);

has routes => (
    is      => 'rw',
    isa     => 'HashRef',
    default => sub { {} }
);

has scraper => (
    is  => 'rw',
    isa => 'Scrappy'
);

sub route {
    my $self    = shift;
    my $options = {};

    # basic definition
    ($options->{route}, $options->{parser}) = @_ if scalar @_ == 2;

    # odd definition
    if (@_ % 2) {
        my $route = shift;
        $options = {@_};
        $options->{route} = $route;
    }

    # check route and parser spec
    die "Error defining route, must have a route and parser assignment"
      unless $options->{route} && $options->{parser};

    # covert parser from shortcut if used
    if ($options->{parser} !~ ref($self) . "::") {

        my $parser = $options->{parser};

        # make fully-quaified parser name
        $parser = ucfirst $parser;
        $parser = join("::", map(ucfirst, split '-', $parser))
          if $parser =~ /\-/;
        $parser = join("", map(ucfirst, split '_', $parser))
          if $parser =~ /\_/;

        $options->{parser} = ref($self) . "::$parser";
    }

    # find action if not specified
    #unless ( defined $options->{action} ) {
    #    my ($action) = $options->{parser} =~ /\#(.*)$/;
    #    $options->{parser} =~ s/\#(.*)$//;
    #    $options->{action} = $action;
    #}

    $self->routes->{$options->{route}} = $options;
    delete $self->routes->{$options->{route}}->{route};

    return $self;
}

sub parse_document {
    my ($self, $url) = @_;
    my $scraper = $self->scraper;

    croak("Unable to fetch document, URL is not defined") unless $url;
    croak("Can't parse document, No routes defined")
      unless keys %{$self->routes};

    # try to match against route(s)
    foreach my $route (keys %{$self->routes}) {
        my $this = $scraper->page_match($route, $url);
        if ($this) {
            my $parser = $self->routes->{$route}->{parser};

            #my  $action = $self->routes->{$route}->{action};

            no warnings 'redefine';
            no strict 'refs';
            my $module = $parser;
            $module =~ s/::/\//g;
            $module = "$module.pm";

            require $module;

            my $new = $parser->new;
            $new->scraper($scraper);

            $self->records->{$route} = []
              unless defined $self->records->{$route};

            my $record = $new->parse($this);
            push @{$self->records->{$route}}, $record;

            return $record;
        }
    }

    return 0;
}

sub crawl {
    my ($class, $starting_url) = @_;
    my $self = ref $class ? $class : $class->new;

    croak("Error, can't execute without a starting url") unless $starting_url;

    my $q = $self->scraper->queue;
    $q->add($starting_url);

    while (my $url = $q->next) {

        # parse document data
        $self->scraper->get($url);
        $self->parse_document($url)
          if $self->scraper->page_loaded
              && $self->scraper->page_ishtml
              && $self->scraper->page_status == 200;
    }

    return $self->records;
}

1;