# TODO: If the model supports caching roles, add headers and check for http 304 # TODO: Implement and support Accept-Language models (port from RDF::Trine::Store::LanguagePreference) # TODO: Add next/prev link headers if query is paged # TODO: Add configuration and link headers to indicate LDF/SPARQL mirrors use v5.14; use warnings; package AtteanX::Endpoint { our $VERSION = "0.002"; } package AtteanX::Error { use Moo; use Types::Standard qw(Str HashRef); use namespace::clean; has 'message' => (is => 'ro', isa => Str, required => 1); has 'details' => (is => 'ro', isa => HashRef, default => sub { +{} }); has 'uri' => (is => 'ro', isa => Str); } package AtteanX::Endpoint::Error { use Moo; extends 'AtteanX::Error'; use Types::Standard qw(Int); use namespace::clean; has 'code' => (is => 'ro', isa => Int, required => 1); } package AtteanX::Endpoint::ClientError { use Moo; extends 'AtteanX::Endpoint::Error'; use Types::Standard qw(Int); use namespace::clean; has 'code' => (is => 'ro', isa => Int, default => 400); } package AtteanX::Endpoint::ServerError { use Moo; extends 'AtteanX::Endpoint::Error'; use Types::Standard qw(Int); use namespace::clean; has 'code' => (is => 'ro', isa => Int, default => 500); } package Plack::App::AtteanX::Endpoint 0.002 { use parent qw(Plack::Component); use Plack::Request; sub configure { my $self = shift; $self->{config} = shift; return $self; } sub prepare_app { my $self = shift; my $config = $self->{config}; $self->{endpoint} = eval { AtteanX::Endpoint->new( $config ) }; if ($@) { warn $@; } } sub call { my($self, $env) = @_; my $req = Plack::Request->new($env); unless ($req->method =~ /^(GET|HEAD|POST)$/) { return [ 405, [ 'Content-type', 'text/plain' ], [ 'Method not allowed' ] ]; } my $ep = $self->{endpoint}; my $resp = $ep->run( $req ); return $resp->finalize; } } =head1 NAME AtteanX::Endpoint - SPARQL 1.1 Protocol Endpoint =head1 VERSION This document describes AtteanX::Endpoint version 0.002 =head1 SYNOPSIS use v5.14; use Attean; =head1 DESCRIPTION The AtteanX::Endpoint class implements a PSGI SPARQL Protocol endpoint. =head1 ATTRIBUTES =over 4 =item C<< planner >> =item C<< model >> =item C<< conf >> A hash reference containing configuration data for the endpoint. For example: { endpoint => { service_description => { named_graphs => 1, default => 1, }, html => { embed_images => 1, image_width => 200, resource_links => 1, }, load_data => 0, update => 0, } } =item C<< graph >> The L of the graph in the model that represents the default graph. =back =head1 METHODS =over 4 =cut package AtteanX::Endpoint { use Moo; use Attean; use TryCatch; use JSON; use Encode; use Plack::Request; use Plack::Response; use Scalar::Util qw(blessed refaddr); use List::MoreUtils qw(any); use File::ShareDir qw(dist_dir); use HTTP::Negotiate qw(choose); use IO::Compress::Gzip qw(gzip); use HTML::HTML5::Writer qw(DOCTYPE_XHTML_RDFA); use Carp qw(croak); use Types::Standard qw(ConsumerOf CodeRef HashRef ArrayRef Str Int); # use IO::Handle; # use Digest::MD5 qw(md5_base64); use XML::LibXML 1.70; # use RDF::RDFa::Generator 0.102; # use Hash::Merge::Simple qw/ merge /; # use Fcntl qw(:flock SEEK_END); use namespace::clean; with 'MooX::Log::Any'; has 'planner' => ( is => 'ro', isa => ConsumerOf['Attean::API::QueryPlanner'], required => 1, default => sub { Attean::IDPQueryPlanner->new(); } ); has 'model' => (is => 'ro', isa => ConsumerOf['Attean::API::Model'], required => 1); has 'conf' => (is => 'ro', isa => HashRef, required => 1); has 'graph' => (is => 'ro', isa => ConsumerOf['Attean::API::IRI'], required => 1); sub BUILDARGS { my $class = shift; my @params = @_; my %args; if (blessed($params[0]) and $params[0]->does('Attean::API::Model')) { # ->new( $model, \%conf ) $args{ model } = shift @params; $args{ conf } = shift @params; $args{ graph } = Attean::IRI->new('http://example.org/graph'); } elsif (any { blessed($_) && $_->does('Attean::API::Model') } @params) { # Assume the buildargs can be taken directly return $class->SUPER::BUILDARGS(@params); } else { # ->new( \%conf ) my $conf = shift @params; my $store_conf = $conf->{store}; my ($name, $file) = split(';', $store_conf, 2); my $sclass = Attean->get_store($name)->new(); my $store = $sclass->new(); my $model = Attean::MutableQuadModel->new( store => $store ); my $graph = Attean::IRI->new('http://example.org/graph'); if (defined($file) and length($file)) { $graph = Attean::IRI->new('file://' . File::Spec->rel2abs($file)); open(my $fh, '<:encoding(UTF-8)', $file) or die $!; #$self->log->debug("Parsing data from $file..."); my $pclass = Attean->get_parser( filename => $file ) // 'AtteanX::Parser::Turtle'; my $parser = $pclass->new(base => $graph); my $iter = $parser->parse_iter_from_io($fh); my $quads = $iter->as_quads($graph); $model->add_iter($quads); } $args{ model } = $model; $args{ conf } = $conf; $args{ graph } = $graph; } return $class->SUPER::BUILDARGS(%args); } =item C<< run ( $request ) >> Run the SPARQL request contained in the given C<< $request >> object and return a response object. =cut sub run { my $self = shift; my $req = shift; try { return $self->_run($req, @_); } catch (AtteanX::Endpoint::Error $e) { my $resp = Plack::Response->new; my $code = $e->code; my $status = $e->message; my $error = { title => $status, describedby => $e->uri, }; if (my $d = $e->details) { $error->{details} = $d; } my @variants = ( ['text/plain', 0.98, 'text/plain'], ['application/json-problem', 0.99, 'application/json-problem'], ); my $headers = $req->headers; my $stype = choose( \@variants, $headers ) || 'text/plain'; if ($stype eq 'application/json-problem') { $resp->headers->content_type( 'application/json-problem' ); $resp->status($code); my $content = encode_json($error); $resp->body($content); } else { $resp->headers->content_type( 'text/plain' ); $resp->status($code); my @messages = grep { defined($_) } @{ $error }{ qw(title detail) }; my $content = join("\n\n", $status, @messages); $resp->body($content); } return $resp; } } sub _run { my $self = shift; my $req = shift; my $config = $self->{conf}; my $endpoint_path = $config->{endpoint}{endpoint_path} || '/sparql'; my $model = $self->{model}; my $response = Plack::Response->new; our $VERSION; my $server = "AtteanX::Endpoint/$VERSION"; $server .= " " . $response->headers->header('Server') if defined($response->headers->header('Server')); $response->headers->header('Server' => $server); unless ($req->path eq $endpoint_path) { my $content; my $path = $req->path_info; $path =~ s#^/##; my $dir = $ENV{ATTEAN_ENDPOINT_SHAREDIR} || File::Spec->catdir((eval { dist_dir('AtteanX-Endpoint') } || 'share'), 'endpoint'); my $abs = File::Spec->rel2abs($dir); my $file = File::Spec->catfile($abs, 'www', $path); if (-r $file) { open( my $fh, '<', $file ) or croak $!; $response->status(200); $content = $fh; } else { my $path = $req->path; $response->status(404); $content = <<"END"; \n\n404 Not Found\n\n

Not Found

\n

The requested URL $path was not found on this server.

\n END } $response->body($content); return $response; } my $headers = $req->headers; my $type = $headers->header('Accept') || 'application/sparql-results+xml'; if (my $t = $req->param('media-type')) { $type = $t; $headers->header('Accept' => $type); } my $ae = $req->headers->header('Accept-Encoding') || ''; my $sparql; my $content; my $ct = $req->header('Content-type'); if ($req->method !~ /^(GET|POST)$/i) { my $method = uc($req->method); $content = "Unexpected method $method (expecting GET or POST)"; $self->log_error( $req, $content ); $response->header('Allow' => 'GET, POST'); die AtteanX::Endpoint::ClientError->new(code => 405, message => 'Method not allowed', uri => 'http://id.kasei.us/rdf-endpoint/error/bad_http_method'); } elsif (defined($ct) and $ct eq 'application/sparql-query') { $sparql = $req->content; } elsif (defined($ct) and $ct eq 'application/sparql-update') { if ($config->{endpoint}{update} and $req->method eq 'POST') { $sparql = $req->content; } } elsif ($req->param('query')) { my @sparql = $req->param('query'); if (scalar(@sparql) > 1) { $content = "More than one query string submitted"; $self->log_error( $req, $content ); die AtteanX::Endpoint::ClientError->new(code => 400, message => 'Multiple query strings not allowed', uri => 'http://id.kasei.us/rdf-endpoint/error/multiple_queries'); } else { $sparql = $sparql[0]; } } elsif ($req->param('update')) { my @sparql = $req->param('update'); if (scalar(@sparql) > 1) { $content = "More than one update string submitted"; $self->log_error( $req, $content ); die AtteanX::Endpoint::ClientError->new(code => 400, message => 'Multiple update strings not allowed', uri => 'http://id.kasei.us/rdf-endpoint/error/multiple_updates'); } if ($config->{endpoint}{update} and $req->method eq 'POST') { $sparql = $sparql[0]; } elsif ($req->method ne 'POST') { my $method = $req->method; $content = "Update operations must use POST"; $self->log_error( $req, $content ); $response->header('Allow' => 'POST'); die AtteanX::Endpoint::ClientError->new(code => 405, message => "$method Not Allowed for Update Operation", uri => 'http://id.kasei.us/rdf-endpoint/error/bad_http_method_update'); } } if ($sparql) { my %args; $args{ update } = 1 if ($config->{endpoint}{update} and $req->method eq 'POST'); $args{ load_data } = 1 if ($config->{endpoint}{load_data}); my $protocol_specifies_update_dataset = 0; { my @default = $req->param('default-graph-uri'); my @named = $req->param('named-graph-uri'); if (scalar(@default) or scalar(@named)) { delete $args{ load_data }; # TODO: handle custom-dataset $self->log->warn('custom query datasets not supported yet'); # $model = Attean::MutableQuadModel->new( store => Attean->get_store('Memory')->new() ); # foreach my $url (@named) { # RDF::Trine::Parser->parse_url_into_model( $url, $model, context => iri($url) ); # } # foreach my $url (@default) { # RDF::Trine::Parser->parse_url_into_model( $url, $model ); # } } } { my @default = $req->param('using-graph-uri'); my @named = $req->param('using-named-graph-uri'); if (scalar(@named) or scalar(@default)) { $protocol_specifies_update_dataset = 1; # TODO: handle custom-dataset $self->log->warn('custom update datasets not supported yet'); # $model = RDF::Trine::Model::Dataset->new( $model ); # $model->push_dataset( default => \@default, named => \@named ); } } # my $match = $headers->header('if-none-match') || ''; # my $etag = md5_base64( join('#', $self->run_tag, $model->etag, $type, $ae, $sparql) ); # if (length($match)) { # if (defined($etag) and ($etag eq $match)) { # $response->status(304); # return $response; # } # } my $base = $req->base; my $parser = Attean->get_parser('SPARQL')->new(base => $base); $parser->update(1) if ($args{update}); my ($algebra) = eval { $args{update} ? $parser->parse_update($sparql, base => $base) : $parser->parse($sparql, base => $base) }; if ($@ or not($algebra)) { my $error = $@ || 'Internal error'; $self->log_error( $req, $error ); my $eclass = ($error =~ /Syntax/) ? 'AtteanX::Endpoint::ClientError' : 'AtteanX::Endpoint::ServerError'; if ($req->method ne 'POST' and $error =~ /read-only queries/sm) { $error = 'Updates must use a HTTP POST request.'; die $eclass->new(message => 'Updates must use a HTTP POST request', uri => 'http://id.kasei.us/rdf-endpoint/error/bad_http_method_update'); } else { die $eclass->new(message => 'SPARQL query/update parse error', uri => 'http://id.kasei.us/rdf-endpoint/error/parse_error', details => { error => $error, sparql => $sparql }); } } else { $self->log_query( $req, $sparql ); # TODO: handle case where query specifies update dataset # if ($protocol_specifies_update_dataset and $query->specifies_update_dataset) { # my $method = $req->method; # $content = "Update operations cannot specify a dataset in both the query and with protocol parameters"; # $self->log_error( $req, $content ); # die AtteanX::Endpoint::ClientError->new(code => 400, message => 'Multiple datasets specified for update', uri => 'http://id.kasei.us/rdf-endpoint/error/update_specifies_multiple_datasets'); # } if ($self->log->is_trace) { $self->log->trace("Algebra:\n" . $algebra->as_string); } my $graph = $self->graph; my $default_graphs = [$graph]; my $planner = $self->planner; if ($self->log->is_trace) { $self->log->debug('Planning with default graphs:'); foreach my $g (@$default_graphs) { $self->log->trace($g->as_string); } } my $plan = $planner->plan_for_algebra($algebra, $model, $default_graphs); if ($self->log->is_debug) { $self->log->debug("Plan:\n" . $plan->as_string); } eval { my $iter = $plan->evaluate($model); $response->status(200); my $sclass = Attean->negotiate_serializer(request_headers => $headers) // Attean->get_serializer('sparqlxml'); $self->log->debug("Serializer class: $sclass"); my $s = $sclass->new(); $content = $s->serialize_iter_to_bytes($iter); my $stype = $s->canonical_media_type; $response->headers->content_type($stype); }; if ($@) { my $error = $@; $self->log->fatal($error); die AtteanX::Endpoint::ServerError->new(code => 500, message => 'SPARQL query/update execution error', uri => 'http://id.kasei.us/rdf-endpoint/error/execution_error', details => { error => $@, sparql => $sparql }); } } } elsif ($req->method eq 'POST') { $content = "POST without recognized query or update"; $self->log_error( $req, $content ); die AtteanX::Endpoint::ClientError->new(message => 'Missing SPARQL Query/Update String', uri => 'http://id.kasei.us/rdf-endpoint/error/missing_sparql_string'); } else { my $stype = 'text/html'; my $dir = $ENV{ATTEAN_ENDPOINT_SHAREDIR} || File::Spec->catdir((eval { dist_dir('AtteanX-Endpoint') } || 'share'), 'endpoint'); my $template = File::Spec->catfile($dir, 'index.html'); my $parser = XML::LibXML->new(validation => 0, suppress_errors => 1, no_network => 1, recover => 2); my $doc = $parser->parse_file( $template ); # my $gen = RDF::RDFa::Generator->new( style => 'HTML::Head'); # $gen->inject_document($doc, $sdmodel); my $writer = HTML::HTML5::Writer->new( markup => 'xhtml', doctype => DOCTYPE_XHTML_RDFA ); $content = encode_utf8( $writer->document($doc) ); $response->status(200); $response->headers->content_type('text/html'); } $content = $response->body || $content; my $length = 0; my %ae = map { $_ => 1 } split(/\s*,\s*/, $ae); if ($ae{'gzip'}) { my $orig = length($content); my ($rh, $wh); pipe($rh, $wh); if (ref($content)) { gzip $content => $wh; } else { gzip \$content => $wh; } close($wh); my $body = do { local($/) = undef; <$rh> }; $self->log->info("Compressed $orig bytes to " . length($body) . " bytes"); $length = bytes::length($body); $response->headers->header('Content-Encoding' => 'gzip'); $response->headers->header('Content-Length' => $length); $response->body( $body ) unless ($req->method eq 'HEAD'); } else { local($/) = undef; my $body = ref($content) ? <$content> : $content; $length = bytes::length($body); $response->headers->header('Content-Length' => $length); $response->body( $body ) unless ($req->method eq 'HEAD'); } return $response; } =item C<< log_query ( $request, $sparql ) >> Log the C<< $sparql >> query string after having been parsed from the C<< $request >> but before evaluation. =cut sub log_query { my $self = shift; my $req = shift; my $message = shift; $self->log->info("SPARQL query:\n" . $message); $self->_log( $req, { level => 'info', message => $message } ); } =item C<< log_error ( $message ) >> =cut sub log_error { my $self = shift; my $req = shift; my $message = shift; $self->log->error($message); $self->_log( $req, { level => 'error', message => $message } ); } sub _log { my $self = shift; my $req = shift; my $data = shift; my $logger = $req->logger || sub {}; $logger->($data); } sub _set_response_error { my $self = shift; my $req = shift; my $resp = shift; my $code = shift; my $error = shift; my @variants = ( ['text/plain', 1.0, 'text/plain'], ['application/json-problem', 0.99, 'application/json-problem'], ); my $headers = $req->headers; my $stype = choose( \@variants, $headers ) || 'text/plain'; if ($stype eq 'application/json-problem') { $resp->headers->content_type( 'application/json-problem' ); $resp->status($code); my $content = encode_json($error); $resp->body($content); } else { $resp->headers->content_type( 'text/plain' ); $resp->status($code); my @messages = grep { defined($_) } @{ $error }{ qw(title detail) }; my $content = join("\n\n", @messages); $resp->body($content); } return; } } 1; __END__ =back =head1 BUGS Please report any bugs or feature requests to through the GitHub web interface at L. =head1 SEE ALSO L =head1 AUTHOR Gregory Todd Williams C<< >> =head1 COPYRIGHT Copyright (c) 2016 Gregory Todd Williams. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut