#
# Copyright (c) 2006 IBM Corporation.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Eclipse Public License v1.0
# which accompanies this distribution, and is available at
# http://www.eclipse.org/legal/epl-v10.html
#
# File: $Source: /var/lib/cvs/ODO/lib/ODO/Parser/NTriples.pm,v $
# Created by: Stephen Evanchik( <a href="mailto:evanchik@us.ibm.com">evanchik@us.ibm.com </a>)
# Created on: 10/01/2006
# Revision: $Id: NTriples.pm,v 1.2 2009-11-25 17:54:26 ubuntu Exp $
#
# Contributors:
# IBM Corporation - initial API and implementation
#
package ODO::Parser::NTriples;
use strict;
use warnings;
use ODO::Exception;
use ODO::Node;
use ODO::Statement;
use vars qw /$VERSION/;
$VERSION = sprintf "%d.%02d", q$Revision: 1.2 $ =~ /: (\d+)\.(\d+)/;
use base qw/ODO::Parser/;
__PACKAGE__->mk_accessors(qw/base_uri statements/);
=head1 NAME
ODO::Parser::NTriples - Parser for statements serialized in NTriples format.
=head1 SYNOPSIS
use ODO::Parser::NTriples;
my $statements = ODO::Parser::NTriples->parse_file('some/path/to/data.ntriples');
my $rdf = ' ... ntriples format here ... ';
my $other_statements = ODO::Parser::NTriples->parse(\$rdf);
=head1 DESCRIPTION
=head1 CONSTRUCTOR
=head1 METHODS
=over
=item parse( $rdf, [ base_uri=> $base_uri ] )
=cut
sub parse {
my ( $self, $rdf, %parameters ) = @_;
$self = ODO::Parser::NTriples->new(%parameters)
unless(ref $self);
$rdf = $self->_split_text($rdf)
unless(UNIVERSAL::isa($rdf, 'ARRAY'));
foreach my $line (@{ $rdf }) {
my $statement = $self->_make_statement($line);
next
unless($statement);
$self->add_statement($statement);
}
return $self->statements();
}
=item parse_file( $filename, [ base_uri=> $base_uri ] )
=cut
sub parse_file {
my ($self, $filename, %parameters) = @_;
$self = ODO::Parser::NTriples->new(%parameters)
unless(ref $self);
throw ODO::Exception::File::Missing(error=> "Could not locate file: $filename")
unless(-e $filename);
open(RDF_FILE, $filename);
foreach my $line (<RDF_FILE>) {
$line =~ s/\n|\r$//;
my $statement = $self->_make_statement($line);
next
unless($statement);
$self->add_statement($statement);
}
close(RDF_FILE);
return $self->statements();
}
sub init {
my ($self, $config) = @_;
$self->base_uri( $config->{'base_uri'} );
$self->statements( [] );
return $self;
}
sub _split_text {
shift;
my $rdf_text = shift;
my @statements = split(/\r\n|\n|\r/, $rdf_text);
return \@statements;
}
sub _make_statement {
my ($self, $line) = @_;
return undef
unless($line && $line !~ /^#/ && $line !~ /^[\ \t]+$/);
throw ODO::Exception::Parameter::Invalid(error=> "Line does not end with a '.': '$line'")
unless($line =~ m/.*[\ \t]+\.[\ \t]*$/);
my ($s, $p, $o ) = split(/[\ \t]+/, $line);
throw ODO::Exception::Parameter::Invalid(error=> 'Could not split line in to components of a statement')
unless($s && $p && $o);
$s = $self->make_node($s);
$p = $self->make_node($p);
$o = $self->make_node($o);
throw ODO::Exception::Parameter::Invalid(error=> "Could not create ODO::Node's for the raw text")
unless( UNIVERSAL::isa($s, 'ODO::Node')
&& UNIVERSAL::isa($p, 'ODO::Node')
&& UNIVERSAL::isa($o, 'ODO::Node')
);
my $t = ODO::Statement->new('s'=> $s, 'p'=> $p, 'o'=> $o);
throw ODO::Exception::Parameters::Invalid("Could not create statement for: $s, $p, $o")
unless(UNIVERSAL::isa($t, 'ODO::Statement'));
return $t;
}
sub add_statement {
my ($self, $statement) = @_;
push @{ $self->statements() }, $statement;
}
sub make_node {
my ($self, $node) = @_;
if($node =~ /^_:(\w+|\d+)/) {
return ODO::Node::Blank->new($1);
}
elsif($node =~ /^<(.*)>$/) {
return ODO::Node::Resource->new($1);
}
else {
return $self->make_literal($node);
}
}
sub make_literal {
my ($self, $raw) = @_;
my $literal = ODO::Node::Literal->new();
my $value;
my $datatype;
my $language;
if( ( ( $value, $datatype ) = split('^^', $raw)) ) {
if($value =~ /"(.*)"/) {
$value = $1;
}
$literal->datatype($datatype)
if($datatype);
}
elsif( ( ( $value, $language ) = split('@', $raw)) ) {
if($value =~ /"(.*)"/) {
$value = $1;
}
$literal->language($language)
if($language);
}
else {
if($raw=~ /^\w*"(.*)"\w*$/) {
$value = $1;
}
else {
return undef;
}
}
$literal->value($value);
return $literal;
}
=back
=head1 COPYRIGHT
Copyright (c) 2006 IBM Corporation.
All rights reserved. This program and the accompanying materials
are made available under the terms of the Eclipse Public License v1.0
which accompanies this distribution, and is available at
http://www.eclipse.org/legal/epl-v10.html
=cut
1;
__END__