package JSONSchema::Validator::Format; # ABSTRACT: Formats of JSON Schema specification use strict; use warnings; use Time::Piece; use Scalar::Util 'looks_like_number'; our @ISA = 'Exporter'; our @EXPORT_OK = qw( validate_date_time validate_date validate_time validate_email validate_hostname validate_idn_email validate_uuid validate_ipv4 validate_ipv6 validate_byte validate_int32 validate_int64 validate_float validate_double validate_regex validate_json_pointer validate_relative_json_pointer validate_uri validate_uri_reference validate_iri validate_iri_reference validate_uri_template ); my $DATE_PATTERN = qr/(\d{4})-(\d\d)-(\d\d)/; my $TIME_PATTERN = qr/(\d\d):(\d\d):(\d\d)(?:\.\d+)?/; my $ZONE_PATTERN = qr/[zZ]|([+-])(\d\d):(\d\d)/; my $DATETIME_PATTERN = qr/^${DATE_PATTERN}[tT ]${TIME_PATTERN}(?:${ZONE_PATTERN})?$/; my $DATE_PATTERN_FULL = qr/\A${DATE_PATTERN}\z/; my $TIME_PATTERN_FULL = qr/\A${TIME_PATTERN}(?:${ZONE_PATTERN})?\z/; my $HEX_PATTERN = qr/[0-9A-Fa-f]/; my $UUID_PATTERN = qr/\A${HEX_PATTERN}{8}-${HEX_PATTERN}{4}-${HEX_PATTERN}{4}-[089abAB]${HEX_PATTERN}{3}-${HEX_PATTERN}{12}\z/; my $IPV4_OCTET_PATTERN = qr/\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]/; my $IPV4_PATTERN = qr/${IPV4_OCTET_PATTERN}(?:\.${IPV4_OCTET_PATTERN}){3}/; my $IPV4_FINAL_PATTERN = qr/\A${IPV4_PATTERN}\z/; my $IPV6_SINGLE_PATTERN = qr/\A(?:${HEX_PATTERN}{1,4}:){7}${HEX_PATTERN}{1,4}\z/; my $IPV6_GROUP_PATTERN = qr/(?:${HEX_PATTERN}{1,4}:)*${HEX_PATTERN}{1,4}/; my $IPV6_MULTI_GROUP_PATTERN = qr/\A(?:${IPV6_GROUP_PATTERN}|)::(?:${IPV6_GROUP_PATTERN}|)\z/; my $IPV6_SINGLE_IPV4_PATTERN = qr/\A((?:${HEX_PATTERN}{1,4}:){6})((?:\d{1,3}\.){3}\d{1,3})\z/; my $IPV6_MULTI_GROUP_IPV4_PATTERN = qr/\A((?:${IPV6_GROUP_PATTERN}|)::(?:${IPV6_GROUP_PATTERN}:|))((?:\d{1,3}\.){3}\d{1,3})\z/; my $BASE64_PATTERN = qr/\A(?:|[A-Za-z0-9\+\/]+=?=?)\z/; my $INTEGER_PATTERN = qr/\A[\+\-]?\d+\z/; my $UCSCHAR_PATTERN = qr/ [\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}] | [\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}] | [\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}] | [\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}] | [\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}] | [\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}] /x; my $IPRIVATE_PATTERN = qr/[\x{E000}-\x{F8FF}\x{F0000}-\x{FFFFD}\x{100000}-\x{10FFFD}]/; my $IPV6_PATTERN = do { my $HEXDIG = qr/[A-Fa-f0-9]/; my $h16 = qr/${HEXDIG}{1,4}/; my $ls32 = qr/(?:${h16}:${h16})|${IPV4_PATTERN}/; qr/ (?:${h16}:){6} ${ls32} | :: (?:${h16}:){5} ${ls32} | (?: ${h16})? :: (?:${h16}:){4} ${ls32} | (?:(?:${h16}:){0,1} ${h16})? :: (?:${h16}:){3} ${ls32} | (?:(?:${h16}:){0,2} ${h16})? :: (?:${h16}:){2} ${ls32} | (?:(?:${h16}:){0,3} ${h16})? :: (?:${h16}:){1} ${ls32} | (?:(?:${h16}:){0,4} ${h16})? :: ${ls32} | (?:(?:${h16}:){0,5} ${h16})? :: ${h16} | (?:(?:${h16}:){0,6} ${h16})? :: /x; }; my $IPV6_FINAL_PATTERN = qr/\A${IPV6_PATTERN}\z/; my $HOSTNAME_PATTERN = do { my $ldh_str = qr/(?:[A-Za-z0-9\-])+/; my $label = qr/[A-Za-z](?:(?:${ldh_str})?[A-Za-z0-9])?/; qr/\A${label}(?:\.${label})*\z/; }; my $EMAIL_PATTERN = do { use re 'eval'; my $obs_NO_WS_CTL = qr/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/; my $obs_qp = qr/\\(?:\x00|${obs_NO_WS_CTL}|\n|\r)/; my $quoted_pair = qr/\\(?:[\x21-\x7e]|[ \t])|${obs_qp}/; my $obs_FWS = qr/[ \t]+(?:\r\n[ \t]+)*/; my $FWS = qr/(?:[ \t]*\r\n)?[ \t]+|${obs_FWS}/; my $ctext = qr/[\x21-\x27\x2a-\x5b\x5d-\x7e]|${obs_NO_WS_CTL}/; my $comment; $comment = qr/\((?:(?:${FWS})?(?:${ctext}|${quoted_pair}|(??{$comment})))*(?:${FWS})?\)/; my $CFWS = qr/(?:(?:${FWS})?${comment})+(?:${FWS})?|${FWS}/; my $atext = qr/[A-Za-z0-9!#\$\%&'*+\/=?\^_`{|}~\-]/; my $dot_atom_text = qr/(?:${atext})+(?:\.(?:${atext})+)*/; my $dot_atom = qr/(?:${CFWS})?${dot_atom_text}(?:${CFWS})?/; my $obs_dtext = qr/${obs_NO_WS_CTL}|${quoted_pair}/; my $dtext = qr/[\x21-\x5a\x5e-\x7e]|${obs_dtext}/; my $domain_literal = qr/(?:${CFWS})?\[(?:(?:${FWS})?${dtext})*(?:${FWS})?\](?:${CFWS})?/; my $obs_qtext = $obs_NO_WS_CTL; my $qtext = qr/[\x21\x23-\x5b\x5d-\x7e]|${obs_qtext}/; my $qcontent = qr/${qtext}|${quoted_pair}/; my $quoted_string = qr/(?:${CFWS})?\x22(?:(?:${FWS})?${qcontent})*(?:${FWS})?\x22(?:${CFWS})?/; my $atom = qr/(?:${CFWS})?(?:${atext})+(?:${CFWS})?/; my $word = qr/${atom}|${quoted_string}/; my $obs_local_part = qr/${word}(?:\.${word})*/; my $local_part = qr/${dot_atom}|${quoted_string}|${obs_local_part}/; my $obs_domain = qr/${atom}(?:\.${atom})*/; my $domain = qr/${dot_atom}|${domain_literal}|${obs_domain}/; qr/\A${local_part}\@${domain}\z/; }; my $IDN_EIMAIL_PATTERN = do { # from rfc3629 UTF-{1,4} given in octet sequence of utf8 # transform it to unicode number my $UTF8_non_ascii = qr/ [\x80-\x{D7FF}] | [\x{E000}-\x{FDCF}] | [\x{FDF0}-\x{FFFD}] | [\x{10000}-\x{1FFFD}] | [\x{20000}-\x{2FFFD}] | [\x{30000}-\x{3FFFD}] | [\x{40000}-\x{4fffd}] | [\x{50000}-\x{5fffd}] | [\x{60000}-\x{6fffd}] | [\x{70000}-\x{7fffd}] | [\x{80000}-\x{8fffd}] | [\x{90000}-\x{9fffd}] | [\x{a0000}-\x{afffd}] | [\x{b0000}-\x{bfffd}] | [\x{c0000}-\x{cfffd}] | [\x{d0000}-\x{dfffd}] | [\x{e0000}-\x{efffd}] | [\x{f0000}-\x{ffffd}] | [\x{100000}-\x{10fffd}] /x; my $atext = qr/[A-Za-z0-9!#\$\%&'*+\/=?\^_`{|}~\-]|${UTF8_non_ascii}/; my $quoted_pairSMTP = qr/\x5c[\x20-\x7e]/; my $qtextSMTP = qr/[\x20\x21\x23-\x5b\x5d-\x7e]|${UTF8_non_ascii}/; my $QcontentSMTP = qr/${qtextSMTP}|${quoted_pairSMTP}/; my $quoted_string = qr/\x22(?:${QcontentSMTP})*\x22/; my $atom = qr/(?:${atext})+/; my $dot_string = qr/${atom}(?:\.${atom})*/; my $local_part = qr/${dot_string}|${quoted_string}/; my $let_dig = qr/[A-Za-z0-9]/; my $ldh_str = qr/(?:[A-Za-z0-9\-])*${let_dig}/; my $Standardized_tag = qr/${ldh_str}/; my $dcontent = qr/[\x21-\x5a\x5e-\x7e]/; my $General_address_literal = qr/${Standardized_tag}:(?:${dcontent})+/; my $IPv6_address_literal = qr/IPv6:${IPV6_PATTERN}/; my $address_literal = qr/\[(?:${IPV4_PATTERN}|${IPv6_address_literal}|${General_address_literal})\]/; my $sub_domain = qr/${let_dig}(?:${ldh_str})?|(?:${UCSCHAR_PATTERN})*/; # couldn't find ABNF for U-label from rfc5890 use ucschar instead my $domain = qr/${sub_domain}(?:\.${sub_domain})*/; qr/\A${local_part}\@(?:${domain}|${address_literal})\z/; }; sub URI_IRI_REGEXP_BUILDER { my $is_iri = shift; my $alpha = qr/[A-Za-z]/; my $HEXDIG = qr/[A-Fa-f0-9]/; my $h16 = qr/${HEXDIG}{1,4}/; my $sub_delims = qr/[!\$&'\(\)\*\+,;=]/; my $gen_delims = qr/[:\/\?#\[\]\@]/; my $reserved = qr/${gen_delims}|${sub_delims}/; my $unreserved = qr/${alpha}|\d|\-|\.|_|~/; my $iunreserved = $unreserved; if ($is_iri) { $iunreserved = qr/${alpha}|\d|\-|\.|_|~|${UCSCHAR_PATTERN}/; } my $pct_encoded = qr/\%${HEXDIG}${HEXDIG}/; my $pchar = qr/${iunreserved}|${pct_encoded}|${sub_delims}|:|\@/; my $fragment = qr/(?:${pchar}|\/|\?)*/; my $query = qr/(?:${pchar}|\/|\?)*/; if ($is_iri) { $query = qr/(?:${pchar}|${IPRIVATE_PATTERN}|\/|\?)*/; } my $segment_nz_nc = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims}|\@)+/; my $segment_nz = qr/(?:${pchar})+/; my $segment = qr/(?:${pchar})*/; my $path_rootless = qr/${segment_nz}(?:\/${segment})*/; my $path_noscheme = qr/${segment_nz_nc}(?:\/${segment})*/; my $path_absolute = qr/\/(?:${segment_nz}(?:\/${segment})*)?/; my $path_abempty = qr/(?:\/${segment})*/; my $reg_name = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims})*/; my $IPvFuture = qr/v${HEXDIG}+\.(?:${unreserved}|${sub_delims}|:)+/; # must be unreserved, not iunreserved my $IP_literal = qr/\[(?:${IPV6_PATTERN}|${IPvFuture})\]/; my $port = qr/\d*/; my $host = qr/${IP_literal}|${IPV4_PATTERN}|${reg_name}/; my $userinfo = qr/(?:${iunreserved}|${pct_encoded}|${sub_delims}|:)*/; my $authority = qr/(?:${userinfo}\@)?${host}(?::${port})?/; my $scheme = qr/${alpha}(?:${alpha}|\d|\+|\-|\.)*/; my $hier_part = qr!//${authority}${path_abempty}|${path_absolute}|${path_rootless}|!; my $uri = qr/\A${scheme}:${hier_part}(?:\?${query})?(?:#${fragment})?\z/; my $relative_part = qr!//${authority}${path_abempty}|${path_absolute}|${path_noscheme}|!; my $relative_ref = qr/\A${relative_part}(?:\?${query})?(?:#${fragment})?\z/; my $uri_reference = qr/${uri}|${relative_ref}/; ($uri, $uri_reference); } my ($URI_PATTERN, $URI_REFERENCE_PATTERN) = URI_IRI_REGEXP_BUILDER(0); my ($IRI_PATTERN, $IRI_REFERENCE_PATTERN) = URI_IRI_REGEXP_BUILDER(1); my $URI_TEMPLATE_PATTERN = do { my $alpha = qr/[A-Za-z]/; my $HEXDIG = qr/[A-Fa-f0-9]/; my $pct_encoded = qr/\%${HEXDIG}${HEXDIG}/; my $unreserved = qr/${alpha}|\d|\-|\.|_|~/; my $sub_delims = qr/[!\$&'\(\)\*\+,;=]/; my $gen_delims = qr/[:\/\?#\[\]\@]/; my $reserved = qr/${gen_delims}|${sub_delims}/; my $explode = qr/\*/; my $max_length = qr/[1-9]\d{0,3}/; my $prefix = qr/:${max_length}/; my $modifier_level4 = qr/${prefix}|${explode}/; my $varchar = qr/${alpha}|\d|_|${pct_encoded}/; my $varname = qr/${varchar}(?:\.?${varchar})*/; my $varspec = qr/${varname}(?:${modifier_level4})?/; my $variable_list = qr/${varspec}(?:,${varspec})*/; my $op_reserve = qr/[=,!\@\|]/; my $op_level3 = qr/[\.\/;\?&]/; my $op_level2 = qr/[\+#]/; my $operator = qr/${op_level2}|${op_level3}|${op_reserve}/; my $expression = qr/\{(?:${operator})?${variable_list}\}/; my $literals = qr/ [\x21\x23\x24\x26\x28-\x3B\x3D\x3F-\x5B] | [\x5D\x5F\x61-\x7A\x7E] | ${UCSCHAR_PATTERN} | ${IPRIVATE_PATTERN} | ${pct_encoded} /x; qr/\A(?:${literals}|${expression})*\z/; }; sub validate_date_time { my @dt = $_[0] =~ $DATETIME_PATTERN; my ($Y, $m, $d, $H, $M, $S, $sign, $HH, $MM) = @dt; my $r = _validate_date($Y, $m, $d); return 0 unless $r; $r = _validate_time($H, $M, $S, $sign, $HH, $MM); return 0 unless $r; return 1; } sub validate_date { my @dt = $_[0] =~ $DATE_PATTERN_FULL; return _validate_date(@dt); } sub _validate_date { my ($Y, $m, $d) = @_; for ($Y, $m, $d) { return 0 unless defined $_; } my $date2; eval { $date2 = Time::Piece->strptime("$Y-$m-$d", '%Y-%m-%d'); }; return 0 if $@; # need to recheck values (test 2019-02-30) return 0 unless $date2->year == $Y; return 0 unless $date2->mon == $m; return 0 unless $date2->mday == $d; return 1; } sub validate_time { my @dt = $_[0] =~ $TIME_PATTERN_FULL; return _validate_time(@dt); } sub _validate_time { my ($H, $M, $S, $sign, $HH, $MM) = @_; for ($H, $M, $S) { return 0 unless defined $_; } return 0 if $H > 23; return 0 if $M > 59; return 0 if $S > 60; if ($HH && $MM) { return 0 if $HH > 23; return 0 if $MM > 59; } return 1; } sub validate_uuid { # from rfc4122 # Today, there are versions 1-5. Version 6-F for future use. # [089abAB] - variants return $_[0] =~ $UUID_PATTERN ? 1 : 0; } sub validate_ipv4 { # from rfc2673 return $_[0] =~ $IPV4_FINAL_PATTERN ? 1 : 0; } sub validate_ipv6 { # from rfc2373 return $_[0] =~ $IPV6_FINAL_PATTERN ? 1 : 0; } sub validate_hostname { # from rfc1034 my $hostname = shift; return 0 if length $hostname > 255; # remove root empty label $hostname =~ s/\.\z//; return 0 unless $hostname =~ $HOSTNAME_PATTERN; my @labels = split /\./, $hostname, -1; my @filtered = grep { length() <= 63 } @labels; return 0 unless scalar(@labels) == scalar(@filtered); return 1; } sub validate_email { # from rfc5322 section 3.4.1 addr-spec # not compatible with rfc5321 section 4.1.2 Mailbox return $_[0] =~ $EMAIL_PATTERN ? 1 : 0; } sub validate_idn_email { # from rfc6531 section 3.3 which extend rfc5321 section 4.1.2 # not compatible with rfc5322 section 3.4.1 add-spec return $_[0] =~ $IDN_EIMAIL_PATTERN ? 1 : 0; } sub validate_byte { return 0 if length($_[0]) % 4 != 0; return 1 if $_[0] =~ $BASE64_PATTERN; return 0; } sub validate_int32 { return _validate_int_32_64($_[0], '214748364'); } sub validate_int64 { return _validate_int_32_64($_[0], '922337203685477580'); } sub _validate_int_32_64 { my ($num, $abs) = @_; return 0 unless $num =~ $INTEGER_PATTERN; my $sign = index($num, '-') == -1 ? 1 : -1; $num =~ s/\A[\+\-]?0*//; my $length_num = length $num; my $length_abs = 1 + length $abs; return 0 if $length_num > $length_abs; return 1 if $length_num < $length_abs; return 1 if $sign > 0 && (($abs . '7') cmp $num) >= 0; return 1 if $sign < 0 && (($abs . '8') cmp $num) >= 0; return 0; } sub validate_json_pointer { # from rfc6901: # CORE::state $pointer_regexp = do { # my $escaped = qr/~[01]/; # my $unescaped = qr/\x00-\x2e\x30-\x7d\x7f-\x10FFFF/; # my $reference_token = qr/(?:${unescaped}|${escaped})*/; # qr/(?:\/${reference_token})*/; # }; # more simple solution: return 1 if $_[0] eq ''; return 0 unless index($_[0], '/') == 0; return 0 if $_[0] =~ m/~(?:[^01]|\z)/; return 1; } sub validate_relative_json_pointer { # from draft-handrews-relative-json-pointer-01: # CORE::state $pointer_regexp = do { # my $non_negative_integer = qr/0|[1-9][0-9]*/; # my $relative_json_pointer = qr/${non_negative_integer}(?:#|${json_pointer})/; # }; # more simple solution: my ($integer, $pointer) = $_[0] =~ m/\A(0|[1-9][0-9]*)(.*)\z/s; return 0 unless defined $integer; return 1 if $pointer eq '#'; return validate_json_pointer($pointer); } sub validate_uri { # from rfc3986 Appendix A. return $_[0] =~ $URI_PATTERN ? 1 : 0; } sub validate_uri_reference { # from rfc3986 Appendix A. return $_[0] =~ $URI_REFERENCE_PATTERN ? 1 : 0; } sub validate_iri { # from rfc3987 section 2.2 return $_[0] =~ $IRI_PATTERN ? 1 : 0; } sub validate_iri_reference { # from rfc3987 section 2.2 return $_[0] =~ $IRI_REFERENCE_PATTERN ? 1 : 0; } sub validate_uri_template { # from rfc6570 return $_[0] =~ $URI_TEMPLATE_PATTERN ? 1 : 0; } # validators below need to be improved # no difference between double and float sub validate_float { return 0 if $_[0] =~ m/\A\s+|\s+\z/; return 0 unless looks_like_number $_[0]; return 1; } sub validate_double { return validate_float($_[0]); } # match perl regex but need ecma-262 regex sub validate_regex { return eval { qr/$_[0]/; } ? 1 : 0; } 1; __END__ =pod =encoding UTF-8 =head1 NAME JSONSchema::Validator::Format - Formats of JSON Schema specification =head1 VERSION version 0.010 =head1 AUTHORS =over 4 =item * Alexey Stavrov =item * Ivan Putintsev =item * Anton Fedotov =item * Denis Ibaev =item * Andrey Khozov =back =head1 COPYRIGHT AND LICENSE This software is Copyright (c) 2021 by Alexey Stavrov. This is free software, licensed under: The MIT (X11) License =cut