package JSON::MaybeUTF8;
# ABSTRACT: Simple wrapper for explicit JSON Unicode text/UTF-8 byte functions

use strict;
use warnings;

our $VERSION = '2.000';

=head1 NAME

JSON::MaybeUTF8 - provide explicit text/UTF-8 JSON functions

=head1 SYNOPSIS

 use JSON::MaybeUTF8 qw(:v1);
 binmode STDOUT, ':encoding(UTF-8)';
 binmode STDERR, ':raw';
 (*STDOUT)->print(encode_json_text({ text => '...' }));
 (*STDERR)->print(encode_json_utf8({ text => '...' }));

=head1 DESCRIPTION

Combines L<JSON::MaybeXS> with L<Unicode::UTF8> to provide
4 functions that handle the combinations of JSON and UTF-8
encoding/decoding.

The idea is to make the UTF-8-or-not behaviour more explicit
in code that deals with multiple transport layers such as
database, cache and I/O.

This is a trivial wrapper around two other modules.

=cut

use feature qw(state);

use JSON::MaybeXS;
use Unicode::UTF8 qw(encode_utf8 decode_utf8);

use Exporter qw(import export_to_level);

=head2 BOM removal

The C<< $JSON::Maybe::UTF8::REMOVE_BOM >> flag is B<set by default> due
to L<https://github.com/rurban/Cpanel-JSON-XS/issues/125>. If you would
prefer to disable this, add C<< $JSON::Maybe::UTF8::REMOVE_BOM = 0; >>
in your code.

Note that this only affects things when L<Cpanel::JSON::XS> is used (preferred by L<JSON::MaybeXS>
if it can be loaded).

=cut

our $REMOVE_BOM = 1;

our @EXPORT_OK = qw(
    decode_json_utf8
    encode_json_utf8
    decode_json_text
    encode_json_text
    format_json_text
);
our %EXPORT_TAGS = (
    v1 => [ qw(
        decode_json_utf8
        encode_json_utf8
        decode_json_text
        encode_json_text
    ) ],
    v2 => [ @EXPORT_OK ],
);

=head2 decode_json_utf8

Given a UTF-8-encoded JSON byte string, returns a Perl data
structure. May optionally remove the UTF-8 L<BOM|https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8>
if it exists.

=cut

sub decode_json_utf8 {
    state $json = JSON::MaybeXS->new;
    die 'bad json state' if $json->get_utf8;
    return $json->decode_utf8($_[0]) unless $REMOVE_BOM;
    (my $txt = decode_utf8(shift)) =~ s{^\x{feff}}{};
    return $json->decode($txt);
}

=head2 encode_json_utf8

Given a Perl data structure, returns a UTF-8-encoded JSON
byte string.

=cut

sub encode_json_utf8 {
    state $json = JSON::MaybeXS->new;
    die 'bad json state' if $json->get_utf8;
    encode_utf8($json->encode(shift))
}

=head2 decode_json_text

Given a JSON string composed of Unicode characters (in
Perl's internal encoding), returns a Perl data structure.

=cut

sub decode_json_text {
    state $json = JSON::MaybeXS->new;
    die 'bad json state' if $json->get_utf8;
    my $txt = shift;
    $txt =~ s{^\x{feff}}{} if $REMOVE_BOM;
    $json->decode($txt);
}

=head2 encode_json_text

Given a Perl data structure, returns a JSON string composed
of Unicode characters (in Perl's internal encoding).

=cut

sub encode_json_text {
    state $json = JSON::MaybeXS->new;
    die 'bad json state' if $json->get_utf8;
    $json->encode(shift)
}

=head2 encode_json_text

Given a Perl data structure, returns a formatted JSON string composed
of Unicode characters (in Perl's internal encoding).

This is functionally identical to L</encode_json_text>, but with
indentation to make it readable, and with defined key ordering which
should make it easier to C<diff> two different data structures.

=cut

sub format_json_text {
    state $json = JSON::MaybeXS->new(
        pretty    => 1,
        canonical => 1,
    );
    die 'bad json state' if $json->get_utf8;
    $json->encode(shift)
}

1;

=head1 AUTHOR

Tom Molesworth <TEAM@cpan.org>

=head1 LICENSE

Copyright Tom Molesworth 2017-2021. Licensed under the same terms as Perl itself.