=encoding utf8

=head1 NAME

IRI - Internationalized Resource Identifiers

=head1 VERSION

This document describes IRI version 0.011

=head1 SYNOPSIS

  use IRI;
  
  my $i = IRI->new(value => 'https://example.org:80/index#frag');
  say $i->scheme; # 'https'
  say $i->path; # '/index'

  my $base = IRI->new(value => "http://www.hestebedg\x{e5}rd.dk/");
  my $i = IRI->new(value => '#frag', base => $base);
  say $i->abs; # 'http://www.hestebedgÄrd.dk/#frag'

  # Defer parsing of the IRI until necessary
  my $i = IRI->new(value => "http://www.hestebedg\x{e5}rd.dk/", lazy => 1);
  say $i->path; # path is parsed here

=head1 DESCRIPTION

The IRI module provides an object representation for Internationalized
Resource Identifiers (IRIs) as defined by
L<RFC 3987|http://www.ietf.org/rfc/rfc3987.txt> and supports their parsing,
serializing, and base resolution.

=head1 ATTRIBUTES

=over 4

=item C<< lazy >>

A boolean value indicating whether the IRI should be parsed (and validated)
during object construction (false), or parsed only when an IRI component is
accessed (true). If no components are ever needed (e.g. an IRI is constructed
with a C<< value >> and C<< value >> is the only accessor ever called), no
parsing will take place.

=back

=head1 METHODS

=over 4

=item C<< as_string >>

Returns the absolute IRI string resolved against the base IRI, if present;
the relative IRI string otherwise.

=item C<< abs >>

Returns the absolute IRI string (resolved against the base IRI if present).

=item C<< scheme >>

=item C<< host >>

=item C<< port >>

=item C<< user >>

=item C<< path >>

=item C<< fragment >>

=item C<< query >>

Returns the respective component of the parsed IRI.

=cut

{ 
	package IRI; 
	use v5.10.1;
	use warnings;
	our $VERSION	= '0.011';
	use Moo;
	use MooX::HandlesVia;
	use Types::Standard qw(Str InstanceOf HashRef Bool);
	use Scalar::Util qw(blessed);
	
# 	class_type 'URI';
# 	coerce 'IRI' => from 'Str' => via { IRI->new( value => $_ ) };
# 	coerce 'IRI' => from 'URI' => via { IRI->new( value => $_->as_string ) };

	has 'lazy' => (is => 'ro', isa => Bool, default => 0);
	has '_initialized' => (is => 'rw', isa => Bool, default => 0, init_arg => undef);
	has 'base' => (is => 'ro', isa => InstanceOf['IRI'], predicate => 'has_base', coerce => sub {
		my $base	= shift;
		if (blessed($base)) {
			if ($base->isa('IRI')) {
				return $base;
			} elsif ($base->isa('URI')) {
				return IRI->new( value => $base->as_string );
			}
		} else {
			return IRI->new($base);
		}
	});
	has 'value' => (is => 'ro', isa => Str, default => '');
	has 'components' => (is => 'ro', writer => '_set_components');
	has 'abs' => (is => 'ro', lazy => 1, builder => '_abs');
	has 'resolved_components' => (
		is		=> 'ro',
		isa		=> HashRef,
		lazy	=> 1,
		builder	=> '_resolved_components',
		predicate => 1,
		handles_via	=> 'Hash',
		handles	=> {
			authority	=>  [ accessor => 'authority' ],
			scheme		=>  [ accessor => 'scheme' ],
			host		=>  [ accessor => 'host' ],
			port		=>  [ accessor => 'port' ],
			user		=>  [ accessor => 'user' ],
			path		=>  [ accessor => 'path' ],
			fragment	=>  [ accessor => 'fragment' ],
			query		=>  [ accessor => 'query' ],
		},
	);

	around BUILDARGS => sub {
		my $orig 	= shift;
		my $class	= shift;
		if (scalar(@_) == 1) {
			return $class->$orig(value => shift);
		}
		return $class->$orig(@_);
	};
	
	sub BUILD {
		my $self	= shift;
		if ($self->has_resolved_components) {
		  $self->_set_components($self->resolved_components);
		  $self->_initialized(1);
		} else {
		  unless ($self->lazy) {
			 my $comp	= $self->_parse_components($self->value);
		  }
		}
	 }
	
	before [qw(components as_string abs resolved_components scheme host port user path fragment query)] => sub {
		my $self	= shift;
		if (not $self->_initialized) {
# 			warn "Lazily initializing IRI";
			my $comp	= $self->_parse_components($self->value);
		}
	};

	# These regexes are (mostly) from the syntax grammar in RFC 3987
	my $HEXDIG			= qr<[0-9A-F]>o;
	my $ALPHA			= qr<[A-Za-z]>o;
	my $subdelims		= qr<[!\$&'()*+,;=]>xo;
	my $gendelims		= qr<[":/?#@] | \[ | \]>xo;
	my $reserved		= qr<${gendelims} | ${subdelims}>o;
	my $unreserved		= qr<${ALPHA} | [0-9] | [-._~]>xo;
	my $pctencoded		= qr<%[0-9A-Fa-f]{2}>o;
	my $decoctet		= qr<
							[0-9]			# 0-9
						|	[1-9][0-9]		# 10-99
						|	1 [0-9]{2}		# 100-199
						|	2 [0-4] [0-9]	# 200-249
						|	25 [0-5]		# 250-255
						>xo;
	my $IPv4address		= qr<
							# IPv4address
							${decoctet}[.]${decoctet}[.]${decoctet}[.]${decoctet}
						>xo;
	my $h16				= qr<${HEXDIG}{1,4}>o;
	my $ls32			= qr<
							( ${h16} : ${h16} )
						|	${IPv4address}
						>xo;
	my $IPv6address		= qr<
							# IPv6address
							(								 ( ${h16} : ){6} ${ls32})
						| (							  :: ( ${h16} : ){5} ${ls32})
						| ((					${h16} )? :: ( ${h16} : ){4} ${ls32})
						| (( ( ${h16} : ){0,1} ${h16} )? :: ( ${h16} : ){3} ${ls32})
						| (( ( ${h16} : ){0,2} ${h16} )? :: ( ${h16} : ){2} ${ls32})
						| (( ( ${h16} : ){0,3} ${h16} )? ::   ${h16} :		 ${ls32})
						| (( ( ${h16} : ){0,4} ${h16} )? ::				 ${ls32})
						| (( ( ${h16} : ){0,5} ${h16} )? ::				 ${h16})
						| (( ( ${h16} : ){0,6} ${h16} )? ::)
						>xo;
	my $IPvFuture		= qr<v (${HEXDIG})+ [.] ( ${unreserved} | ${subdelims} | : )+>xo;
	my $IPliteral		= qr<\[
							# IPliteral
							(${IPv6address} | ${IPvFuture})
							\]
						>xo;
	my $port			= qr<(?<port>[0-9]*)>o;
	my $scheme			= qr<(?<scheme>${ALPHA} ( ${ALPHA} | [0-9] | [+] | [-] | [.] )*)>xo;
	my $iprivate		= qr<[\x{E000}-\x{F8FF}] | [\x{F0000}-\x{FFFFD}] | [\x{100000}-\x{10FFFD}]>xo;
	my $ucschar			= qr<
							[\x{a0}-\x{d7ff}] | [\x{f900}-\x{fdcf}] | [\x{fdf0}-\x{ffef}]
						|	[\x{10000}-\x{1FFFD}] | [\x{20000}-\x{2FFFD}] | [\x{30000}-\x{3FFFD}]
						|	[\x{40000}-\x{4FFFD}] | [\x{50000}-\x{5FFFD}] | [\x{60000}-\x{6FFFD}]
						|	[\x{70000}-\x{7FFFD}] | [\x{80000}-\x{8FFFD}] | [\x{90000}-\x{9FFFD}]
						|	[\x{A0000}-\x{AFFFD}] | [\x{B0000}-\x{BFFFD}] | [\x{C0000}-\x{CFFFD}]
						|	[\x{D0000}-\x{DFFFD}] | [\x{E1000}-\x{EFFFD}]
						>xo;
	my $iunreserved		= qr<${ALPHA}|[0-9]|[-._~]|${ucschar}>o;
	my $ipchar			= qr<(${iunreserved})|(${pctencoded})|(${subdelims})|:|@>o;
	my $ifragment		= qr<(?<fragment>(${ipchar}|/|[?])*)>o;
	my $iquery			= qr<(?<query>(${ipchar}|${iprivate}|/|[?])*)>o;
	my $isegmentnznc	= qr<(${iunreserved}|${pctencoded}|${subdelims}|@)+ # non-zero-length segment without any colon ":"
						>xo;
	my $isegmentnz		= qr<${ipchar}+>o;
	my $isegment		= qr<${ipchar}*>o;
	my $ipathempty		= qr<>o;
	my $ipathrootless	= qr<(?<path>${isegmentnz}(/${isegment})*)>o;
	my $ipathnoscheme	= qr<(?<path>${isegmentnznc}(/${isegment})*)>o;
	my $ipathabsolute	= qr<(?<path>/(${isegmentnz}(/${isegment})*)?)>o;
	my $ipathabempty	= qr<(?<path>(/${isegment})*)>o;
	my $ipath			= qr<
							${ipathabempty}		# begins with "/" or is empty
						|	${ipathabsolute}	# begins with "/" but not "//"
						|	${ipathnoscheme}	# begins with a non-colon segment
						|	${ipathrootless}	# begins with a segment
						|	${ipathempty}		# zero characters
						>xo;
	my $iregname		= qr<(${iunreserved}|${pctencoded}|${subdelims})*>o;
	my $ihost			= qr<(?<host>${IPliteral}|${IPv4address}|${iregname})>o;
	my $iuserinfo		= qr<(?<user>(${iunreserved}|${pctencoded}|${subdelims}|:)*)>o;
	my $iauthority		= qr<(?<authority>(${iuserinfo}@)?${ihost}(:${port})?)>o;
	my $irelativepart	= qr<
							(//${iauthority}${ipathabempty})
						|	${ipathabsolute}
						|	${ipathnoscheme}
						|	${ipathempty}
						>xo;
	my $irelativeref	= qr<${irelativepart}([?]${iquery})?(#${ifragment})?>o;
	my $ihierpart		= qr<(//${iauthority}${ipathabempty})|(${ipathabsolute})|(${ipathrootless})|(${ipathempty})>o;
	my $absoluteIRI		= qr<${scheme}:${ihierpart}([?]${iquery})?>o;
	my $IRI				= qr<${scheme}:${ihierpart}([?]${iquery})?(#${ifragment})?>o;
	my $IRIreference	= qr<${IRI}|${irelativeref}>o;
	sub _parse_components {
		my $self	= shift;
		my $v		= shift;
		my $c;
		
		if ($v =~ /^${IRIreference}$/o) {
			%$c = %+;
		} else {
			use Data::Dumper;
			die "Not a valid IRI? " . Dumper($v);
		}
		
		$c->{path}	//= '';
		$self->_set_components($c);
		$self->_initialized(1);
	}
	
	sub _merge {
		my $self	= shift;
		my $base	= shift;
		
		my $bc		= $base->components;
		my $c		= $self->components;
		my $base_has_authority	= ($bc->{user} or $bc->{port} or defined($bc->{host}));
		if ($base_has_authority and not($bc->{path})) {
			return "/" . $c->{path};
		} else {
			my $bp	= $bc->{path};
			my @pathParts	= split('/', $bp, -1);	# -1 limit means $path='/' splits into ('', '')
			pop(@pathParts);
			push(@pathParts, $c->{path});
			my $path	= join('/', @pathParts);
			return $path;
		}
	}

	sub _remove_dot_segments {
		my $self	= shift;
		my $input	= shift;
		my @output;
		while (length($input)) {
			if ($input =~ m<^[.][.]/>) {
				substr($input, 0, 3)	= '';
			} elsif ($input =~ m<^[.]/>) {
				substr($input, 0, 2)	= '';
			} elsif ($input =~ m<^/[.]/>) {
				substr($input, 0, 3)	= '/';
			} elsif ($input eq '/.') {
				$input	= '/';
			} elsif ($input =~ m<^/[.][.]/>) {
				substr($input, 0, 4)	= '/';
				pop(@output);
			} elsif ($input eq '/..') {
				$input	= '/';
				pop(@output);
			} elsif ($input eq '.') {
				$input	= '';
			} elsif ($input eq '..') {
				$input	= '';
			} else {
				my $leadingSlash	= ($input =~ m<^/>);
				if ($leadingSlash) {
					substr($input, 0, 1)	= '';
				}
				my ($part, @parts)	= split('/', $input, -1);
				$part	//= '';
				if (scalar(@parts)) {
					unshift(@parts, '');
				}
				$input	= join('/', @parts);
				if ($leadingSlash) {
					$part	= "/$part";
				}
				push(@output, $part);
			}
		}
		my $newPath = join('', @output);
		return $newPath;
	}

	sub _resolved_components {
		my $self	= shift;
		my $value	= $self->value;
		if ($self->has_base and not($self->components->{scheme})) {
			# Resolve IRI relative to the base IRI
			my $base	= $self->base;
			my $v		= $self->value;
			my $bv		= $base->value;
# 			warn "resolving IRI <$v> relative to the base IRI <$bv>";
			my %components	= %{ $self->components };
			my %base		= %{ $base->components };
			my %target;
			
			if ($components{scheme}) {
				foreach my $k (qw(scheme user port host path query)) {
					if (exists $components{$k}) {
						$target{$k} = $components{$k};
					}
				}
			} else {
				if ($components{user} or $components{port} or defined($components{host})) {
					foreach my $k (qw(scheme user port host query)) {
						if (exists $components{$k}) {
							$target{$k} = $components{$k};
						}
					}
					my $path		= $components{path};
					$target{path}	= $self->_remove_dot_segments($path);
				} else {
					if ($components{path} eq '') {
						$target{path}	= $base{path};
						if ($components{query}) {
							$target{query}	= $components{query};
						} else {
							if ($base{query}) {
								$target{query}	= $base{query};
							}
						}
					} else {
						if ($components{path} =~ m<^/>) {
							my $path		= $components{path};
							$target{path}	= $self->_remove_dot_segments($path);
						} else {
							my $path		= $self->_merge($base);
							$target{path}	= $self->_remove_dot_segments($path);
						}
						if (defined($components{query})) {
							$target{query}	= $components{query};
						}
					}
					if ($base{user} or $base{port} or defined($base{host})) {
						foreach my $k (qw(user port host)) {
							if (exists $base{$k}) {
								$target{$k} = $base{$k};
							}
						}
					}
				}
				if (defined($base{scheme})) {
					$target{scheme} = $base{scheme};
				}
			}
			
			if (defined($components{fragment})) {
				$target{fragment}	= $components{fragment};
			}
			
			return \%target;
		}
		return $self->components;
	}
	
	sub _abs {
		my $self	= shift;
		my $value	= $self->_string_from_components( $self->resolved_components );
		return $value;
	}

=item C<< rel ( $base ) >>

Returns a new relative IRI object which, when resolved against the C<< $base >>
IRI, is equal to this IRI.

=cut

	sub rel {
		# based on code in URI <https://metacpan.org/source/OALDERS/URI-1.76/lib/URI/_generic.pm#L191>
		my $self	= shift;
		my $base	= shift;
		my $rel		= IRI->new(value => $self->abs);
		
		if (($base->scheme // '') ne ($rel->scheme // '')) {
			return IRI->new(value => $rel->abs);
		}

		my $scheme = $rel->scheme;
		my $auth   = $rel->authority;
		my $path   = $rel->path;
		
		if (!defined($scheme) and !defined($auth)) {
			return $rel;
		}

		my $bscheme = $base->scheme;
		my $bauth   = $base->authority;
		my $bpath   = $base->path;

		for ($bscheme, $bauth, $auth) {
			$_ = '' unless defined($_);
		}
		
		if ($scheme eq $bscheme) {
			$rel->scheme(undef);
		}
		
		unless ($scheme eq $bscheme and $auth eq $bauth) {
			return IRI->new(value => $rel->_abs);
		}

		for ($path, $bpath) {
			$_ = "/$_" unless m{^/};
		}

		# Make it relative by eliminating:
		# the scheme,
		$rel->scheme(undef);

		# ... and authority
		$rel->host(undef);
		$rel->port(undef);
		$rel->user(undef);
		
		
		my @rparts	= split('/', $path);
		my @bparts	= split('/', $bpath);
		shift(@rparts);
		shift(@bparts);
		if (scalar(@rparts) and (scalar(@bparts) and $rparts[0] ne $bparts[0])) {
			# use an absolute path, because $rel differs from $base at the very beginning
		} else {
			# This loop is based on code from Nicolai Langfeldt <janl@ifi.uio.no>.
			# First we calculate common initial path components length ($li).
			my $li = 1;
			while (1) {
				my $i = index($path, '/', $li);
				last if $i < 0 ||
					$i != index($bpath, '/', $li) ||
					substr($path,$li,$i-$li) ne substr($bpath,$li,$i-$li);
				$li=$i+1;
			}
		
			# then we nuke it from both paths
			substr($path, 0,$li) = '';
			substr($bpath,0,$li) = '';


			if ($path eq $bpath) {
				$rel->path('');
				if (defined($rel->query) and defined($base->query)) {
					if ($rel->query eq $base->query) {
						$rel->query(undef);
					} else {
						#
					}
				} elsif (defined($rel->query)) {
					#
				} elsif (defined($base->query)) {
					$rel->path($path);
				} else {
					#
				}
			} else {
				# Add one "../" for each path component left in the base path
				$path = ('../' x $bpath =~ tr|/|/|) . $path;
				$path = "./" if $path eq '';
				$rel->path($path);
			}
		}
		return IRI->new(value => $rel->_abs);
	}

	sub as_string {
		my $self	= shift;
		if ($self->has_base || $self->has_resolved_components) {
			return $self->abs;
		} else {
			return $self->value;
		}
	}
	
	sub _string_from_components {
		my $self		= shift;
		my $components	= shift;
		my $iri			= "";
		if (my $s = $components->{scheme}) {
			$iri	.= "${s}:";
		}
		
		if ($components->{user} or $components->{port} or defined($components->{host})) {
			# has authority
			$iri .= "//";
			if (my $u = $components->{user}) {
				$iri	.= sprintf('%s@', $u);
			}
			if (defined(my $h = $components->{host})) {
				$iri	.= $h // '';
			}
			if (my $p = $components->{port}) {
				$iri	.= ":$p";
			}
		}
		
		if (defined(my $p = $components->{path})) {
			$iri	.= $p;
		}
		
		if (defined(my $q = $components->{query})) {
			$iri	.= '?' . $q;
		}
		
		if (defined(my $f = $components->{fragment})) {
			$iri	.= '#' . $f;
		}
		
		return $iri;
	}
	
	sub _encode {
		my $str	= shift;
		$str	=~ s~([%])~'%' . sprintf('%02x', ord($1))~ge;	# gen-delims
		$str	=~ s~([/:?#@]|\[|\])~'%' . sprintf('%02x', ord($1))~ge;	# gen-delims
		$str	=~ s~([$!&'()*+,;=])~'%' . sprintf('%02x', ord($1))~ge;	# sub-delims
		return $str;
	}
	
	sub _unencode {
		my $str	= shift;
		if (defined($str)) {
			$str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
		}
		return $str;
	}
	
=item C<< query_form >>

Returns a HASH of key-value mappings for the unencoded, parsed query form data.

=cut

	sub query_form {
		my $self	= shift;
		my $q		= $self->query // return;
		my @pairs	= split(/&/, $q);
		return map { _unencode($_) } map { split(/=/, $_) } @pairs;
	}

=item C<< set_query_param ( $key => $value ) >>

sets the respective query form value and returns a new L<IRI> object.

=cut

	sub set_query_param {
		my $self	= shift;
		my $q		= $self->query // return;
		my %map		= map { _unencode($_) } map { split(/=/, $_) } split(/&/, $q);
		while (my ($k, $v)	= splice(@_, 0, 2)) {
			$map{$k}	= $v;
		}
		
		my %c		= %{ $self->components };
		my @pairs	= map { join('=', (_encode($_), _encode($map{$_}))) } keys %map;
		warn Dumper(\@pairs);
		$c{query}	= join('&', @pairs);
		
		my $v		= $self->_string_from_components(\%c);
		return $self->new( value => $v );
	}
}

1;

__END__

=back

=head1 SEE ALSO

L<http://www.ietf.org/rfc/rfc3987.txt>

=head1 AUTHOR

Gregory Todd Williams  C<< <gwilliams@cpan.org> >>

=head1 COPYRIGHT

Copyright (c) 2014--2018 Gregory Todd Williams. This
program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=cut