#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#define NEED_sv_2pv_flags
#include "ppport.h"

static SV *
url_decode(pTHX_ const char *s, const STRLEN len, SV *dsv) {
    #define __ 0xFF
    static const U8 hexval[0x100] = {
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 00-0F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 10-1F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 20-2F */
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,__,__,__,__,__,__, /* 30-3F */
        __,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 40-4F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 50-5F */
        __,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 60-6F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 70-7F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 80-8F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 90-9F */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* A0-AF */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* B0-BF */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* C0-CF */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* D0-DF */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* E0-EF */
        __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* F0-FF */
    };
    #undef __
    const char *e;
    char *d;

    if (!dsv)
        dsv = sv_newmortal();

    SvUPGRADE(dsv, SVt_PV);
    d = SvGROW(dsv, len + 1);

    e = s + len - 2;
    for (; s < e; s++, d++) {
        const U8 c = *s;
        if (c == '+')
            *d = ' ';
        else if (c != '%')
            *d = c;
        else {
            const U8 v1 = hexval[(U8)*++s];
            const U8 v2 = hexval[(U8)*++s];
            if ((v1 | v2) != 0xFF)
                *d = (v1 << 4) | v2;
            else
                *d = c, s -= 2;
        }
    }

    e += 2;
    for (; s < e; s++, d++) {
        const U8 c = *s;
        if (c == '+')
            *d = ' ';
        else
            *d = c;
    }

    *d = 0;
    SvCUR_set(dsv, d - SvPVX(dsv));
    SvPOK_only(dsv);
    return dsv;
}

static SV *
url_decode_utf8(pTHX_ const char *s, const STRLEN len, SV *dsv) {
    dsv = url_decode(aTHX_ s, len, dsv);
    if (!sv_utf8_decode(dsv))
        croak("Malformed UTF-8 in URL decoded string");
    return dsv;
}

static SV *
url_encode(pTHX_ const char *s, const STRLEN len, SV *dsv) {
    static const char xdigit[0x10] = "0123456789ABCDEF";
    static const U8 url_unreserved[0x100] = {
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x00-0x0F */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x10-0x1F */
        0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0, /* 0x20-0x2F */
        1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0x30-0x3F */
        0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */
        1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 0x50-0x5F */
        0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */
        1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0, /* 0x70-0x7F */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xC0-0xCF */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xD0-0xDF */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xE0-0xEF */
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */
    };
    const char *e = s + len;
    char *d;

    if (!dsv)
        dsv = sv_newmortal();

    SvUPGRADE(dsv, SVt_PV);
    d = SvGROW(dsv, len * 3 + 1);

    for (; s < e; s++) {
        const U8 c = *s;
        if (url_unreserved[c])
            *d++ = *s;
        else if (c == ' ')
            *d++ = '+';
        else {
            *d++ = '%';
            *d++ = xdigit[c >> 4];
            *d++ = xdigit[c & 15];
        }
    }
    *d = 0;
    SvCUR_set(dsv, d - SvPVX(dsv));
    SvPOK_only(dsv);
    return dsv;
}

static bool
url_encoded(const char *s, const STRLEN len) {
    const char *e = s + len;
    for (; s < e; s++) {
        switch (*s) {
            case '+':
            case '%':
                return TRUE;
        }
    }
    return FALSE;
}

typedef struct _ust ust_t;
struct _ust {
    SV * (*decode) (pTHX_ const char *, const STRLEN, SV *);
    void (*cb)     (pTHX_ const ust_t *, const char *, STRLEN, bool, const char *, STRLEN);
    SV *sv;
};

static void
url_params_each(pTHX_ const char *cur, const STRLEN len, const ust_t *u) {
    const char * const end = cur + len;
    const char *key, *val;
    STRLEN klen, vlen;
    SV *tmpsv = NULL;
    bool is_utf8 = FALSE;

    while (cur < end) {
        for (key = cur; cur < end; cur++) {
            const char c = *cur;
            if (c == '=' || c == '&' || c == ';')
                break;
        }
        klen = cur - key;
        if (*cur != '=') {
            val  = NULL;
            vlen = 0;
        }
        else {
            for (val = ++cur; cur < end; cur++) {
                const char c = *cur;
                if (c == '&' || c == ';')
                    break;
            }
            vlen = cur - val;
        }

        if (u->decode == &url_decode_utf8 || url_encoded(key, klen)) {
            tmpsv   = u->decode(aTHX_ key, klen, tmpsv);
            key     = (const char *)SvPVX(tmpsv);
            klen    = SvCUR(tmpsv);
            if (u->decode == &url_decode_utf8)
                is_utf8 = SvUTF8(tmpsv);
        }
        u->cb(aTHX_ u, key, klen, is_utf8, val, vlen);
        cur++;
    }

    if (len) {
        const char c = end[-1];
        if (c == '&' || c == ';')
            u->cb(aTHX_ u, "", 0, FALSE, NULL, 0);
    }
}

static void
url_params_mixed_cb(pTHX_ const ust_t *u, const char *k, STRLEN klen, bool is_utf8, const char *v, STRLEN vlen) {
    SV **svp, *sv;

    if (!hv_exists((HV *)u->sv, k, is_utf8 ? -klen : klen)) {
        svp = hv_fetch((HV *)u->sv, k, is_utf8 ? -klen : klen, 1);
        if (v)
            u->decode(aTHX_ v, vlen, *svp);
    }
    else {
        SV *val = newSV(0);
        AV *av;

        svp = hv_fetch((HV *)u->sv, k, is_utf8 ? -klen : klen, 0);
        if (SvROK(*svp))
            av = (AV *)SvRV(*svp);
        else {
            sv = *svp;
            av = newAV();
            *svp = newRV_noinc((SV *)av);
            av_push(av, sv);
        }
        av_push(av, val);
        if (v)
            u->decode(aTHX_ v, vlen, val);
    }
}

static void
url_params_multi_cb(pTHX_ const ust_t *u, const char *k, STRLEN klen, bool is_utf8, const char *v, STRLEN vlen) {
    SV **svp, *val;
    AV *av;

    svp = hv_fetch((HV *)u->sv, k, is_utf8 ? -klen : klen, 1);
    val = newSV(0);

    if (SvROK(*svp))
        av = (AV *)SvRV(*svp);
    else {
        av = newAV();
        SvREFCNT_dec(*svp);
        *svp = newRV_noinc((SV *)av);
    }
    av_push(av, val);
    if (v)
        u->decode(aTHX_ v, vlen, val);
}

static void
url_params_flat_cb(pTHX_ const ust_t *u, const char *k, STRLEN klen, bool is_utf8, const char *v, STRLEN vlen) {
    SV *key, *val;

    key = newSVpvn(k, klen);
    val = newSV(0);

    if (is_utf8)
        SvUTF8_on(key);

    av_push((AV *)u->sv, key);
    av_push((AV *)u->sv, val);
    if (v)
        u->decode(aTHX_ v, vlen, val);
}

static void
url_params_each_cb(pTHX_ const ust_t *u, const char *k, STRLEN klen, bool is_utf8, const char *v, STRLEN vlen) {
    SV *key, *val;
    dSP;

    key = sv_2mortal(newSVpvn(k, klen));
    val = sv_2mortal(newSV(0));

    if (v)
        u->decode(aTHX_ v, vlen, val);

    if (is_utf8)
        SvUTF8_on(key);

    ENTER;
    SAVETMPS;

    PUSHMARK(SP);
    EXTEND(SP, 2);
    PUSHs(key);
    PUSHs(val);
    PUTBACK;

    call_sv(u->sv, G_DISCARD);

    FREETMPS;
    LEAVE;
}


MODULE = URL::Encode::XS   PACKAGE = URL::Encode::XS

PROTOTYPES: DISABLE

void
url_decode(octets)
    SV *octets
  ALIAS:
    URL::Encode::XS::url_decode      = 0
    URL::Encode::XS::url_decode_utf8 = 1
    URL::Encode::XS::url_encode      = 2
  PREINIT:
    dXSTARG;
    const char *s;
    STRLEN len;
  PPCODE:
    SvGETMAGIC(octets);
    if (SvUTF8(octets)) {
        octets = sv_mortalcopy(octets);
        if (!sv_utf8_downgrade(octets, 1))
            croak("Wide character in octet string");
    }
    s = SvPV_nomg_const(octets, len);
    switch (ix) {
        case 0:
            url_decode(aTHX_ s, len, TARG);
            break;
        case 1:
            url_decode_utf8(aTHX_ s, len, TARG);
            break;
        case 2:
            url_encode(aTHX_ s, len, TARG);
            break;
    }
    PUSHTARG;

void
url_encode_utf8(string)
    SV *string
  PREINIT:
    dXSTARG;
    const char *s;
    STRLEN len;
  PPCODE:
    SvGETMAGIC(string);
    if (!SvUTF8(string)) {
        string = sv_mortalcopy(string);
        sv_utf8_encode(string);
    }
    s = SvPV_nomg_const(string, len);
    url_encode(aTHX_ s, len, TARG);
    PUSHTARG;

void
url_params_flat(octets, utf8=FALSE)
    SV *octets
    bool utf8
  ALIAS:
    URL::Encode::XS::url_params_flat  = 0
    URL::Encode::XS::url_params_mixed = 1
    URL::Encode::XS::url_params_multi = 2
  PREINIT:
    const char *s;
    STRLEN len;
    ust_t u;
  PPCODE:
    SvGETMAGIC(octets);
    if (SvUTF8(octets)) {
        octets = sv_mortalcopy(octets);
        if (!sv_utf8_downgrade(octets, 1))
            croak("Wide character in octet string");
    }

    u.decode = utf8 ? &url_decode_utf8 : &url_decode;
    switch(ix) {
        case 0:
            u.cb = &url_params_flat_cb;
            u.sv = (SV *)newAV();
            break;
        case 1:
            u.cb = &url_params_mixed_cb;
            u.sv = (SV *)newHV();
            break;
        case 2:
            u.cb = &url_params_multi_cb;
            u.sv = (SV *)newHV();
            break;
    }
    s = SvPV_nomg_const(octets, len);
    ST(0) = sv_2mortal(newRV_noinc(u.sv));
    url_params_each(aTHX_ s, len, &u);
    XSRETURN(1);


void
url_params_each(octets, callback, utf8=FALSE)
    SV *octets
    CV *callback
    bool utf8
  PREINIT:
    const char *s;
    STRLEN len;
    ust_t u;
  PPCODE:
    SvGETMAGIC(octets);
    if (SvUTF8(octets)) {
        octets = sv_mortalcopy(octets);
        if (!sv_utf8_downgrade(octets, 1))
            croak("Wide character in octet string");
    }
    s = SvPV_nomg_const(octets, len);
    u.decode = utf8 ? &url_decode_utf8 : &url_decode;
    u.cb     = &url_params_each_cb;
    u.sv     = (SV *)callback;
    url_params_each(aTHX_ s, len, &u);