package JSONSchema::Validator::Format;

# ABSTRACT: Formats of JSON Schema specification

use strict;
use warnings;
use Time::Piece;

use Scalar::Util 'looks_like_number';

our @ISA = 'Exporter';
our @EXPORT_OK = qw(
    validate_date_time validate_date validate_time
    validate_email validate_hostname
    validate_idn_email
    validate_uuid
    validate_ipv4 validate_ipv6
    validate_byte
    validate_int32 validate_int64
    validate_float validate_double
    validate_regex
    validate_json_pointer validate_relative_json_pointer
    validate_uri validate_uri_reference
    validate_iri validate_iri_reference
    validate_uri_template
);

my $DATE_PATTERN = qr/(\d{4})-(\d\d)-(\d\d)/;
my $TIME_PATTERN = qr/(\d\d):(\d\d):(\d\d)(?:\.\d+)?/;
my $ZONE_PATTERN = qr/[zZ]|([+-])(\d\d):(\d\d)/;
my $DATETIME_PATTERN = qr/^${DATE_PATTERN}[tT ]${TIME_PATTERN}(?:${ZONE_PATTERN})?$/;
my $DATE_PATTERN_FULL = qr/\A${DATE_PATTERN}\z/;
my $TIME_PATTERN_FULL = qr/\A${TIME_PATTERN}(?:${ZONE_PATTERN})?\z/;
my $HEX_PATTERN = qr/[0-9A-Fa-f]/;
my $UUID_PATTERN = qr/\A${HEX_PATTERN}{8}-${HEX_PATTERN}{4}-${HEX_PATTERN}{4}-[089abAB]${HEX_PATTERN}{3}-${HEX_PATTERN}{12}\z/;
my $IPV4_OCTET_PATTERN = qr/\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]/;
my $IPV4_PATTERN = qr/${IPV4_OCTET_PATTERN}(?:\.${IPV4_OCTET_PATTERN}){3}/;
my $IPV4_FINAL_PATTERN = qr/\A${IPV4_PATTERN}\z/;
my $IPV6_SINGLE_PATTERN = qr/\A(?:${HEX_PATTERN}{1,4}:){7}${HEX_PATTERN}{1,4}\z/;
my $IPV6_GROUP_PATTERN = qr/(?:${HEX_PATTERN}{1,4}:)*${HEX_PATTERN}{1,4}/;
my $IPV6_MULTI_GROUP_PATTERN = qr/\A(?:${IPV6_GROUP_PATTERN}|)::(?:${IPV6_GROUP_PATTERN}|)\z/;
my $IPV6_SINGLE_IPV4_PATTERN = qr/\A((?:${HEX_PATTERN}{1,4}:){6})((?:\d{1,3}\.){3}\d{1,3})\z/;
my $IPV6_MULTI_GROUP_IPV4_PATTERN = qr/\A((?:${IPV6_GROUP_PATTERN}|)::(?:${IPV6_GROUP_PATTERN}:|))((?:\d{1,3}\.){3}\d{1,3})\z/;
my $BASE64_PATTERN = qr/\A(?:|[A-Za-z0-9\+\/]+=?=?)\z/;
my $INTEGER_PATTERN = qr/\A[\+\-]?\d+\z/;
my $UCSCHAR_PATTERN = qr/
    [\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}] |
    [\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}] |
    [\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}] |
    [\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}] |
    [\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}] |
    [\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}]
/x;
my $IPRIVATE_PATTERN = qr/[\x{E000}-\x{F8FF}\x{F0000}-\x{FFFFD}\x{100000}-\x{10FFFD}]/;
my $IPV6_PATTERN = do {
    my $HEXDIG = qr/[A-Fa-f0-9]/;
    my $h16 = qr/${HEXDIG}{1,4}/;
    my $ls32 = qr/(?:${h16}:${h16})|${IPV4_PATTERN}/;
    qr/
                                        (?:${h16}:){6} ${ls32} |
                                        :: (?:${h16}:){5} ${ls32} |
        (?:                 ${h16})? :: (?:${h16}:){4} ${ls32} |
        (?:(?:${h16}:){0,1} ${h16})? :: (?:${h16}:){3} ${ls32} |
        (?:(?:${h16}:){0,2} ${h16})? :: (?:${h16}:){2} ${ls32} |
        (?:(?:${h16}:){0,3} ${h16})? :: (?:${h16}:){1} ${ls32} |
        (?:(?:${h16}:){0,4} ${h16})? ::                ${ls32} |
        (?:(?:${h16}:){0,5} ${h16})? ::                ${h16} |
        (?:(?:${h16}:){0,6} ${h16})? ::
    /x;
};
my $IPV6_FINAL_PATTERN = qr/\A${IPV6_PATTERN}\z/;

my $HOSTNAME_PATTERN = do {
    my $ldh_str = qr/(?:[A-Za-z0-9\-])+/;
    my $label = qr/[A-Za-z](?:(?:${ldh_str})?[A-Za-z0-9])?/;
    qr/\A${label}(?:\.${label})*\z/;
};

my $EMAIL_PATTERN = do {
    use re 'eval';
    my $obs_NO_WS_CTL = qr/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/;
    my $obs_qp = qr/\\(?:\x00|${obs_NO_WS_CTL}|\n|\r)/;
    my $quoted_pair = qr/\\(?:[\x21-\x7e]|[ \t])|${obs_qp}/;
    my $obs_FWS = qr/[ \t]+(?:\r\n[ \t]+)*/;
    my $FWS = qr/(?:[ \t]*\r\n)?[ \t]+|${obs_FWS}/;
    my $ctext = qr/[\x21-\x27\x2a-\x5b\x5d-\x7e]|${obs_NO_WS_CTL}/;
    my $comment;
    $comment = qr/\((?:(?:${FWS})?(?:${ctext}|${quoted_pair}|(??{$comment})))*(?:${FWS})?\)/;
    my $CFWS = qr/(?:(?:${FWS})?${comment})+(?:${FWS})?|${FWS}/;
    my $atext = qr/[A-Za-z0-9!#\$\%&'*+\/=?\^_`{|}~\-]/;
    my $dot_atom_text = qr/(?:${atext})+(?:\.(?:${atext})+)*/;
    my $dot_atom = qr/(?:${CFWS})?${dot_atom_text}(?:${CFWS})?/;
    my $obs_dtext = qr/${obs_NO_WS_CTL}|${quoted_pair}/;
    my $dtext = qr/[\x21-\x5a\x5e-\x7e]|${obs_dtext}/;
    my $domain_literal = qr/(?:${CFWS})?\[(?:(?:${FWS})?${dtext})*(?:${FWS})?\](?:${CFWS})?/;
    my $obs_qtext = $obs_NO_WS_CTL;
    my $qtext = qr/[\x21\x23-\x5b\x5d-\x7e]|${obs_qtext}/;
    my $qcontent = qr/${qtext}|${quoted_pair}/;
    my $quoted_string = qr/(?:${CFWS})?\x22(?:(?:${FWS})?${qcontent})*(?:${FWS})?\x22(?:${CFWS})?/;
    my $atom = qr/(?:${CFWS})?(?:${atext})+(?:${CFWS})?/;
    my $word = qr/${atom}|${quoted_string}/;
    my $obs_local_part = qr/${word}(?:\.${word})*/;
    my $local_part = qr/${dot_atom}|${quoted_string}|${obs_local_part}/;
    my $obs_domain = qr/${atom}(?:\.${atom})*/;
    my $domain = qr/${dot_atom}|${domain_literal}|${obs_domain}/;
    qr/\A${local_part}\@${domain}\z/;
};

my $IDN_EIMAIL_PATTERN = do {
    # from rfc3629 UTF-{1,4} given in octet sequence of utf8
    # transform it to unicode number
    my $UTF8_non_ascii = qr/
        [\x80-\x{D7FF}] | [\x{E000}-\x{FDCF}] | [\x{FDF0}-\x{FFFD}] |
        [\x{10000}-\x{1FFFD}] | [\x{20000}-\x{2FFFD}] | [\x{30000}-\x{3FFFD}] |
        [\x{40000}-\x{4fffd}] | [\x{50000}-\x{5fffd}] | [\x{60000}-\x{6fffd}] |
        [\x{70000}-\x{7fffd}] | [\x{80000}-\x{8fffd}] | [\x{90000}-\x{9fffd}] |
        [\x{a0000}-\x{afffd}] | [\x{b0000}-\x{bfffd}] | [\x{c0000}-\x{cfffd}] |
        [\x{d0000}-\x{dfffd}] | [\x{e0000}-\x{efffd}] | [\x{f0000}-\x{ffffd}] |
        [\x{100000}-\x{10fffd}]
    /x;
    my $atext = qr/[A-Za-z0-9!#\$\%&'*+\/=?\^_`{|}~\-]|${UTF8_non_ascii}/;
    my $quoted_pairSMTP = qr/\x5c[\x20-\x7e]/;
    my $qtextSMTP = qr/[\x20\x21\x23-\x5b\x5d-\x7e]|${UTF8_non_ascii}/;
    my $QcontentSMTP = qr/${qtextSMTP}|${quoted_pairSMTP}/;
    my $quoted_string = qr/\x22(?:${QcontentSMTP})*\x22/;
    my $atom = qr/(?:${atext})+/;
    my $dot_string = qr/${atom}(?:\.${atom})*/;
    my $local_part = qr/${dot_string}|${quoted_string}/;
    my $let_dig = qr/[A-Za-z0-9]/;
    my $ldh_str = qr/(?:[A-Za-z0-9\-])*${let_dig}/;
    my $Standardized_tag = qr/${ldh_str}/;
    my $dcontent = qr/[\x21-\x5a\x5e-\x7e]/;
    my $General_address_literal = qr/${Standardized_tag}:(?:${dcontent})+/;
    my $IPv6_address_literal = qr/IPv6:${IPV6_PATTERN}/;
    my $address_literal = qr/\[(?:${IPV4_PATTERN}|${IPv6_address_literal}|${General_address_literal})\]/;
    my $sub_domain = qr/${let_dig}(?:${ldh_str})?|(?:${UCSCHAR_PATTERN})*/; # couldn't find ABNF for U-label from rfc5890 use ucschar instead
    my $domain = qr/${sub_domain}(?:\.${sub_domain})*/;
    qr/\A${local_part}\@(?:${domain}|${address_literal})\z/;
};

sub URI_IRI_REGEXP_BUILDER {
    my $is_iri = shift;

    my $alpha = qr/[A-Za-z]/;
    my $HEXDIG = qr/[A-Fa-f0-9]/;
    my $h16 = qr/${HEXDIG}{1,4}/;
    my $sub_delims = qr/[!\$&'\(\)\*\+,;=]/;
    my $gen_delims = qr/[:\/\?#\[\]\@]/;
    my $reserved = qr/${gen_delims}|${sub_delims}/;
    my $unreserved = qr/${alpha}|\d|\-|\.|_|~/;
    my $iunreserved = $unreserved;
    if ($is_iri) {
        $iunreserved = qr/${alpha}|\d|\-|\.|_|~|${UCSCHAR_PATTERN}/;
    }
    my $pct_encoded = qr/\%${HEXDIG}${HEXDIG}/;
    my $pchar = qr/${iunreserved}|${pct_encoded}|${sub_delims}|:|\@/;
    my $fragment = qr/(?:${pchar}|\/|\?)*/;
    my $query = qr/(?:${pchar}|\/|\?)*/;
    if ($is_iri) {
        $query = qr/(?:${pchar}|${IPRIVATE_PATTERN}|\/|\?)*/;
    }
    my $segment_nz_nc = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims}|\@)+/;
    my $segment_nz = qr/(?:${pchar})+/;
    my $segment = qr/(?:${pchar})*/;
    my $path_rootless = qr/${segment_nz}(?:\/${segment})*/;
    my $path_noscheme = qr/${segment_nz_nc}(?:\/${segment})*/;
    my $path_absolute = qr/\/(?:${segment_nz}(?:\/${segment})*)?/;
    my $path_abempty = qr/(?:\/${segment})*/;
    my $reg_name = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims})*/;
    my $IPvFuture = qr/v${HEXDIG}+\.(?:${unreserved}|${sub_delims}|:)+/; # must be unreserved, not iunreserved
    my $IP_literal = qr/\[(?:${IPV6_PATTERN}|${IPvFuture})\]/;
    my $port = qr/\d*/;
    my $host = qr/${IP_literal}|${IPV4_PATTERN}|${reg_name}/;
    my $userinfo = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims}|:)*/;
    my $authority = qr/(?:${userinfo}\@)?${host}(?::${port})?/;
    my $scheme = qr/${alpha}(?:${alpha}|\d|\+|\-|\.)*/;
    my $hier_part = qr!//${authority}${path_abempty}|${path_absolute}|${path_rootless}|!;
    my $uri = qr/\A${scheme}:${hier_part}(?:\?${query})?(?:#${fragment})?\z/;
    my $relative_part = qr!//${authority}${path_abempty}|${path_absolute}|${path_noscheme}|!;
    my $relative_ref = qr/\A${relative_part}(?:\?${query})?(?:#${fragment})?\z/;
    my $uri_reference = qr/${uri}|${relative_ref}/;
    ($uri, $uri_reference);
}

my ($URI_PATTERN, $URI_REFERENCE_PATTERN) = URI_IRI_REGEXP_BUILDER(0);
my ($IRI_PATTERN, $IRI_REFERENCE_PATTERN) = URI_IRI_REGEXP_BUILDER(1);

my $URI_TEMPLATE_PATTERN = do {
    my $alpha = qr/[A-Za-z]/;
    my $HEXDIG = qr/[A-Fa-f0-9]/;
    my $pct_encoded = qr/\%${HEXDIG}${HEXDIG}/;
    my $unreserved = qr/${alpha}|\d|\-|\.|_|~/;
    my $sub_delims = qr/[!\$&'\(\)\*\+,;=]/;
    my $gen_delims = qr/[:\/\?#\[\]\@]/;
    my $reserved = qr/${gen_delims}|${sub_delims}/;
    my $explode = qr/\*/;
    my $max_length = qr/[1-9]\d{0,3}/;
    my $prefix = qr/:${max_length}/;
    my $modifier_level4 = qr/${prefix}|${explode}/;
    my $varchar = qr/${alpha}|\d|_|${pct_encoded}/;
    my $varname = qr/${varchar}(?:\.?${varchar})*/;
    my $varspec = qr/${varname}(?:${modifier_level4})?/;
    my $variable_list = qr/${varspec}(?:,${varspec})*/;
    my $op_reserve = qr/[=,!\@\|]/;
    my $op_level3 = qr/[\.\/;\?&]/;
    my $op_level2 = qr/[\+#]/;
    my $operator = qr/${op_level2}|${op_level3}|${op_reserve}/;
    my $expression = qr/\{(?:${operator})?${variable_list}\}/;
    my $literals = qr/
        [\x21\x23\x24\x26\x28-\x3B\x3D\x3F-\x5B] |
        [\x5D\x5F\x61-\x7A\x7E] |
        ${UCSCHAR_PATTERN} |
        ${IPRIVATE_PATTERN} |
        ${pct_encoded}
    /x;
    qr/\A(?:${literals}|${expression})*\z/;
};

sub validate_date_time {
    my @dt = $_[0] =~ $DATETIME_PATTERN;

    my ($Y, $m, $d, $H, $M, $S, $sign, $HH, $MM) = @dt;

    my $r = _validate_date($Y, $m, $d);
    return 0 unless $r;

    $r = _validate_time($H, $M, $S, $sign, $HH, $MM);
    return 0 unless $r;

    return 1;
}

sub validate_date {
    my @dt = $_[0] =~ $DATE_PATTERN_FULL;
    return _validate_date(@dt);
}

sub _validate_date {
    my ($Y, $m, $d) = @_;

    for ($Y, $m, $d) {
        return 0 unless defined $_;
    }

    my $date2;
    eval { $date2 = Time::Piece->strptime("$Y-$m-$d", '%Y-%m-%d'); };
    return 0 if $@;

    # need to recheck values (test 2019-02-30)
    return 0 unless $date2->year == $Y;
    return 0 unless $date2->mon == $m;
    return 0 unless $date2->mday == $d;

    return 1;
}

sub validate_time {
    my @dt = $_[0] =~ $TIME_PATTERN_FULL;
    return _validate_time(@dt);
}

sub _validate_time {
    my ($H, $M, $S, $sign, $HH, $MM) = @_;

    for ($H, $M, $S) {
        return 0 unless defined $_;
    }

    return 0 if $H > 23;
    return 0 if $M > 59;
    return 0 if $S > 60;

    if ($HH && $MM) {
        return 0 if $HH > 23;
        return 0 if $MM > 59;
    }

    return 1;
}

sub validate_uuid {
    # from rfc4122
    # Today, there are versions 1-5. Version 6-F for future use.
    # [089abAB] - variants
    return $_[0] =~ $UUID_PATTERN ? 1 : 0;
}

sub validate_ipv4 {
    # from rfc2673
    return $_[0] =~ $IPV4_FINAL_PATTERN ? 1 : 0;
}

sub validate_ipv6 {
    # from rfc2373
    return $_[0] =~ $IPV6_FINAL_PATTERN ? 1 : 0;
}

sub validate_hostname {
    # from rfc1034
    my $hostname = shift;
    return 0 if length $hostname > 255;

    # remove root empty label
    $hostname =~ s/\.\z//;

    return 0 unless $hostname =~ $HOSTNAME_PATTERN;

    my @labels = split /\./, $hostname, -1;
    my @filtered = grep { length() <= 63 } @labels;
    return 0 unless scalar(@labels) == scalar(@filtered);
    return 1;
}

sub validate_email {
    # from rfc5322 section 3.4.1 addr-spec
    # not compatible with rfc5321 section 4.1.2 Mailbox
    return $_[0] =~ $EMAIL_PATTERN ? 1 : 0;
}

sub validate_idn_email {
    # from rfc6531 section 3.3 which extend rfc5321 section 4.1.2
    # not compatible with rfc5322 section 3.4.1 add-spec
    return $_[0] =~ $IDN_EIMAIL_PATTERN ? 1 : 0;
}

sub validate_byte {
    return 0 if length($_[0]) % 4 != 0;
    return 1 if $_[0] =~ $BASE64_PATTERN;
    return 0;
}

sub validate_int32 {
    return _validate_int_32_64($_[0], '214748364');
}

sub validate_int64 {
    return _validate_int_32_64($_[0], '922337203685477580');
}

sub _validate_int_32_64 {
    my ($num, $abs) = @_;
    return 0 unless $num =~ $INTEGER_PATTERN;

    my $sign = index($num, '-') == -1 ? 1 : -1;
    $num =~ s/\A[\+\-]?0*//;

    my $length_num = length $num;
    my $length_abs = 1 + length $abs;

    return 0 if $length_num > $length_abs;
    return 1 if $length_num < $length_abs;

    return 1 if $sign > 0 && (($abs . '7') cmp $num) >= 0;
    return 1 if $sign < 0 && (($abs . '8') cmp $num) >= 0;
    return 0;
}

sub validate_json_pointer {
    # from rfc6901:
    # CORE::state $pointer_regexp = do {
    #     my $escaped = qr/~[01]/;
    #     my $unescaped = qr/\x00-\x2e\x30-\x7d\x7f-\x10FFFF/;
    #     my $reference_token = qr/(?:${unescaped}|${escaped})*/;
    #     qr/(?:\/${reference_token})*/;
    # };

    # more simple solution:
    return 1 if $_[0] eq '';
    return 0 unless index($_[0], '/') == 0;
    return 0 if $_[0] =~ m/~(?:[^01]|\z)/;
    return 1;
}

sub validate_relative_json_pointer {
    # from draft-handrews-relative-json-pointer-01:
    # CORE::state $pointer_regexp = do {
    #     my $non_negative_integer = qr/0|[1-9][0-9]*/;
    #     my $relative_json_pointer = qr/${non_negative_integer}(?:#|${json_pointer})/;
    # };

    # more simple solution:
    my ($integer, $pointer) = $_[0] =~ m/\A(0|[1-9][0-9]*)(.*)\z/s;
    return 0 unless defined $integer;
    return 1 if $pointer eq '#';
    return validate_json_pointer($pointer);
}

sub validate_uri {
    # from rfc3986 Appendix A.
    return $_[0] =~ $URI_PATTERN ? 1 : 0;
}

sub validate_uri_reference {
    # from rfc3986 Appendix A.
    return $_[0] =~ $URI_REFERENCE_PATTERN ? 1 : 0;
}

sub validate_iri {
    # from rfc3987 section 2.2
    return $_[0] =~ $IRI_PATTERN ? 1 : 0;
}

sub validate_iri_reference {
    # from rfc3987 section 2.2
    return $_[0] =~ $IRI_REFERENCE_PATTERN ? 1 : 0;
}

sub validate_uri_template {
    # from rfc6570
    return $_[0] =~ $URI_TEMPLATE_PATTERN ? 1 : 0;
}

# validators below need to be improved

# no difference between double and float
sub validate_float {
    return 0 if $_[0] =~ m/\A\s+|\s+\z/;
    return 0 unless looks_like_number $_[0];
    return 1;
}

sub validate_double {
    return validate_float($_[0]);
}

# match perl regex but need ecma-262 regex
sub validate_regex {
    return eval { qr/$_[0]/; } ? 1 : 0;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

JSONSchema::Validator::Format - Formats of JSON Schema specification

=head1 VERSION

version 0.008

=head1 AUTHORS

=over 4

=item *

Alexey Stavrov <logioniz@ya.ru>

=item *

Ivan Putintsev <uid@rydlab.ru>

=item *

Anton Fedotov <tosha.fedotov.2000@gmail.com>

=item *

Denis Ibaev <dionys@gmail.com>

=item *

Andrey Khozov <andrey@rydlab.ru>

=back

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2021 by Alexey Stavrov.

This is free software, licensed under:

  The MIT (X11) License

=cut