/*
    Minimalistic C implementation of Fabian Loitsch's Grisu-algorithm
    (http://florian.loitsch.com/publications/dtoa-pldi2010.pdf).

    Grisu converts floating point numbers to an optimal decimal string
    representation without loss of precision.
*/
/*
Copyright (c) 2013 Andreas Samoljuk

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/


#include <stdint.h>
#include <string.h>

#ifdef _MSC_VER
#define inline __inline
#endif

#define DOT_ZERO

/*
#include "powers.h"
*/
/*--------------------------------------------------------------------------*/
#define npowers     87
#define steppowers  8
#define firstpower -348 /* 10 ^ -348 */

#define expmax     -32
#define expmin     -60


typedef struct Fp {
    uint64_t frac;
    int exp;
} Fp;

static uint64_t powers_ten[ 87 ] = {
    18054884314459144840U, 13451937075301367670U,
    10022474136428063862U, 14934650266808366570U,
    11127181549972568877U, 16580792590934885855U,
    12353653155963782858U, 18408377700990114895U,
    13715310171984221708U, 10218702384817765436U,
    15227053142812498563U, 11345038669416679861U,
    16905424996341287883U, 12595523146049147757U,
    9384396036005875287U,  13983839803942852151U,
    10418772551374772303U, 15525180923007089351U,
    11567161174868858868U, 17236413322193710309U,
    12842128665889583758U, 9568131466127621947U,
    14257626930069360058U, 10622759856335341974U,
    15829145694278690180U, 11793632577567316726U,
    17573882009934360870U, 13093562431584567480U,
    9755464219737475723U,  14536774485912137811U,
    10830740992659433045U, 16139061738043178685U,
    12024538023802026127U, 17917957937422433684U,
    13349918974505688015U, 9946464728195732843U,
    14821387422376473014U, 11042794154864902060U,
    16455045573212060422U, 12259964326927110867U,
    18268770466636286478U, 13611294676837538539U,
    10141204801825835212U, 15111572745182864684U,
    11258999068426240000U, 16777216000000000000U,
    12500000000000000000U, 9313225746154785156U,
    13877787807814456755U, 10339757656912845936U,
    15407439555097886824U, 11479437019748901445U,
    17105694144590052135U, 12744735289059618216U,
    9495567745759798747U,  14149498560666738074U,
    10542197943230523224U, 15709099088952724970U,
    11704190886730495818U, 17440603504673385349U,
    12994262207056124023U, 9681479787123295682U,
    14426529090290212157U, 10748601772107342003U,
    16016664761464807395U, 11933345169920330789U,
    17782069995880619868U, 13248674568444952270U,
    9871031767461413346U,  14708983551653345445U,
    10959046745042015199U, 16330252207878254650U,
    12166986024289022870U, 18130221999122236476U,
    13508068024458167312U, 10064294952495520794U,
    14996968138956309548U, 11173611982879273257U,
    16649979327439178909U, 12405201291620119593U,
    9242595204427927429U,  13772540099066387757U,
    10261342003245940623U, 15290591125556738113U,
    11392378155556871081U, 16975966327722178521U,
    12648080533535911531U
};

static int16_t powers_ten_exp[ 87 ] =
{
    -1220,-1193,
    -1166,-1140,
    -1113,-1087,
    -1060,-1034,
    -1007,-980,
    -954, -927,
    -901, -874,
    -847, -821,
    -794, -768,
    -741, -715,
    -688, -661,
    -635, -608,
    -582, -555,
    -529, -502,
    -475, -449,
    -422, -396,
    -369, -343,
    -316, -289,
    -263, -236,
    -210, -183,
    -157, -130,
    -103, -77,
    -50,  -24,
      3,   30,
     56,   83,
    109,  136,
    162,  189,
    216,  242,
    269,  295,
    322,  348,
    375,  402,
    428,  455,
    481,  508,
    534,  561,
    588,  614,
    641,  667,
    694,  720,
    747,  774,
    800,  827,
    853,  880,
    907,  933,
    960,  986,
    1013, 1039,
    1066
};

static void find_cachedpow10(int exp, int* k, Fp* out)
{
    const double one_log_ten = 0.30102999566398114;

    int approx = -(exp + npowers) * one_log_ten;
    int idx = (approx - firstpower) / steppowers;

    while(1) {
        int current = exp + powers_ten_exp[idx] + 64;

        if(current < expmin) {
            idx++;
            continue;
        }

        if(current > expmax) {
            idx--;
            continue;
        }

        *k = (firstpower + idx * steppowers);

        out->frac = powers_ten[idx];
        out->exp  = powers_ten_exp[idx];
        break;
    }
}
/*--------------------------------------------------------------------------*/

#define fracmask  0x000FFFFFFFFFFFFFU
#define expmask   0x7FF0000000000000U
#define hiddenbit 0x0010000000000000U
#define signmask  0x8000000000000000U
#define expbias   (1023 + 52)

#define absv(n) ((n) < 0 ? -(n) : (n))
#define minv(a, b) ((a) < (b) ? (a) : (b))

static uint64_t tens[] = {
    10000000000000000000U, 1000000000000000000U, 100000000000000000U,
    10000000000000000U, 1000000000000000U, 100000000000000U,
    10000000000000U, 1000000000000U, 100000000000U,
    10000000000U, 1000000000U, 100000000U,
    10000000U, 1000000U, 100000U,
    10000U, 1000U, 100U,
    10U, 1U
};

static inline uint64_t get_dbits(double d)
{
    union {
        double   dbl;
        uint64_t i;
    } dbl_bits = { d };

    return dbl_bits.i;
}

static Fp build_fp(double d)
{
    uint64_t bits = get_dbits(d);

    Fp fp;
    fp.frac = bits & fracmask;
    fp.exp = (bits & expmask) >> 52;

    if(fp.exp) {
        fp.frac += hiddenbit;
        fp.exp -= expbias;

    } else {
        fp.exp = -expbias + 1;
    }

    return fp;
}

static void normalize(Fp* fp)
{
    while ((fp->frac & hiddenbit) == 0) {
        fp->frac <<= 1;
        fp->exp--;
    }

    {
    int shift = 64 - 52 - 1;
    fp->frac <<= shift;
    fp->exp -= shift;
    }
}

static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper)
{
    upper->frac = (fp->frac << 1) + 1;
    upper->exp  = fp->exp - 1;

    while ((upper->frac & (hiddenbit << 1)) == 0) {
        upper->frac <<= 1;
        upper->exp--;
    }

    {
    int u_shift = 64 - 52 - 2;
    upper->frac <<= u_shift;
    upper->exp = upper->exp - u_shift;
    }

    {
    int l_shift = fp->frac == hiddenbit ? 2 : 1;
    lower->frac = (fp->frac << l_shift) - 1;
    lower->exp = fp->exp - l_shift;
    }

    lower->frac <<= lower->exp - upper->exp;
    lower->exp = upper->exp;
}

static Fp multiply(Fp* a, Fp* b)
{
    const uint64_t lomask = 0x00000000FFFFFFFF;

    Fp fp;
    uint64_t ah_bl = (a->frac >> 32)    * (b->frac & lomask);
    uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32);
    uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask);
    uint64_t ah_bh = (a->frac >> 32)    * (b->frac >> 32);

    uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32);
    /* round up */
    tmp += 1U << 31;

    fp.frac = ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32);
    fp.exp  = a->exp + b->exp + 64;
    return fp;
}

static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac)
{
    while (rem < frac && delta - rem >= kappa &&
           (rem + kappa < frac || frac - rem > rem + kappa - frac)) {

        digits[ndigits - 1]--;
        rem += kappa;
    }
}

static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
{
    uint64_t wfrac = upper->frac - fp->frac;
    uint64_t delta = upper->frac - lower->frac;
    Fp one;
    uint64_t part1;
    uint64_t part2;
    uint64_t* divp;
    uint64_t* unit;
    uint64_t tmp;
    unsigned digit;
    int idx = 0, kappa = 10;

    one.frac = 1ULL << -upper->exp;
    one.exp  = upper->exp;

    part1 = upper->frac >> -one.exp;
    part2 = upper->frac & (one.frac - 1);

    /* 1000000000 */
    for(divp = tens + 10; kappa > 0; divp++) {

        uint64_t div = *divp;
        unsigned digit = part1 / div;

        if (digit || idx) {
            digits[idx++] = digit + '0';
        }

        part1 -= digit * div;
        kappa--;

        tmp = (part1 <<-one.exp) + part2;
        if (tmp <= delta) {
            *K += kappa;
            round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac);

            return idx;
        }
    }

    /* 10 */
    unit = tens + 18;

    while(1) {
        part2 *= 10;
        delta *= 10;
        kappa--;

        digit = part2 >> -one.exp;
        if (digit || idx) {
            digits[idx++] = digit + '0';
        }

        part2 &= one.frac - 1;
        if (part2 < delta) {
            *K += kappa;
            round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit);

            return idx;
        }

        unit--;
    }
}

static int grisu2(double d, char* digits, int* K)
{
    Fp w, cp;
    Fp lower, upper;
    int k;


    w = build_fp(d);
    get_normalized_boundaries(&w, &lower, &upper);

    normalize(&w);

    find_cachedpow10(upper.exp, &k, &cp);

    w     = multiply(&w,     &cp);
    upper = multiply(&upper, &cp);
    lower = multiply(&lower, &cp);

    lower.frac++;
    upper.frac--;

    *K = -k;

    return generate_digits(&w, &upper, &lower, digits, K);
}

static int emit_digits(char* digits, int ndigits, char* dest, int K, int neg)
{
    int exp = absv(K + ndigits - 1);
    int offset;
//#ifdef EMIT_EXP
    int idx;
    int cent;
    char sign;
//#endif

    /* write plain integer */
    if(K >= 0 && (exp < (ndigits + 7))) {
        memcpy(dest, digits, ndigits);
        memset(dest + ndigits, '0', K);
#ifdef DOT_ZERO
        dest += ndigits + K;
        dest[0] = '.';
        dest[1] = '0';
        return ndigits + K + 2;
#else
        return ndigits + K;
#endif
    }

//#ifdef EMIT_EXP
    /* write decimal w/o scientific notation */
    if(K < 0 && (K > -7 || exp < 4)) {
//#endif
        offset = ndigits - absv(K);
        /* fp < 1.0 -> write leading zero */
        if(offset <= 0) {
            offset = -offset;
            dest[0] = '0';
            dest[1] = '.';
            memset(dest + 2, '0', offset);
            memcpy(dest + offset + 2, digits, ndigits);

            return ndigits + 2 + offset;

        /* fp > 1.0 */
        } else {
            memcpy(dest, digits, offset);
            dest[offset] = '.';
            memcpy(dest + offset + 1, digits + offset, ndigits - offset);

            return ndigits + 1;
        }
//#ifdef EMIT_EXP
    }

    /* write decimal w/ scientific notation */
    ndigits = minv(ndigits, 18 - neg);

    idx = 0;
    dest[idx++] = digits[0];

    if(ndigits > 1) {
        dest[idx++] = '.';
        memcpy(dest + idx, digits + 1, ndigits - 1);
        idx += ndigits - 1;
    }

    dest[idx++] = 'e';

    sign = K + ndigits - 1 < 0 ? '-' : '+';
    dest[idx++] = sign;

    cent = 0;

    if(exp > 99) {
        cent = exp / 100;
        dest[idx++] = cent + '0';
        exp -= cent * 100;
    }
    if(exp > 9) {
        int dec = exp / 10;
        dest[idx++] = dec + '0';
        exp -= dec * 10;

    } else if(cent) {
        dest[idx++] = '0';
    }

    dest[idx++] = exp % 10 + '0';

    return idx;
//#endif
}

static int filter_special(double fp, char* dest)
{
    uint64_t bits;
    int nan;

    if(fp == 0.0) {
        dest[0] = '0';
#ifdef DOT_ZERO
        dest[1] = '.';
        dest[2] = '0';
        return 3;
#else
        return 1;
#endif
    }

    bits = get_dbits(fp);

    nan = (bits & expmask) == expmask;
    if(!nan) {
        return 0;
    }

    if(bits & fracmask) {
        dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n';
    } else {
        dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f';
    }

    return 3;
}

int fpconv_dtoa(double d, char dest[24])
{
    char digits[18];
    int str_len = 0;
    int neg = 0;
    int spec, K, ndigits;

    if(get_dbits(d) & signmask) {
        dest[0] = '-';
        str_len++;
        neg = 1;
    }

    spec = filter_special(d, dest + str_len);
    if(spec) {
        return str_len + spec;
    }

    K = 0;
    ndigits = grisu2(d, digits, &K);

    str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);

    return str_len;
}

/*--------------------------------------------------------------------------*/

/*
    \param digits    Pointer to string of digits with room at digits[-1] for
                     extra '1' if needed.
    \param pos       Index into digits where rounding begins.

    \return Zero if digits are unmodified, 1 if rounding was done, and -1 if
            a '1' digit was prefixed.
*/
static int fpc_round_up( char* digits, int pos )
{
    char* cp = digits + pos;
    int cv;

#if 0
    printf( "KR fpc_round_up " );
    for( cv = 0; cv <= pos; ++cv )
        printf( "%c", digits[cv] );
    printf( "\n" );
#endif

    if( *cp < '5' )
        return 0;

    while( cp != digits )
    {
        cv = *--cp;
        if( cv != '9' )
        {
            *cp = cv + 1;
            return 1;
        }
        *cp = '0';
    }
    digits[ -1 ] = '1';
    return -1;
}


int fpconv_ftoa(double d, char dest[24])
{
    char dbuf[18 + 1];
    char* digits = dbuf + 1;
    int str_len = 0;
    int neg = 0;
    int spec, K, ndigits;

    if(get_dbits(d) & signmask) {
        dest[0] = '-';
        str_len++;
        neg = 1;
    }

    spec = filter_special(d, dest + str_len);
    if(spec)
        return str_len + spec;

    K = 0;
    ndigits = grisu2(d, digits, &K);
    //printf( "KR n:%d k:%d %d\n", ndigits, K, absv(K + ndigits - 1) );

#define RPOS    9

    if( ndigits > RPOS ) {
        int res;
        int i;

        K += ndigits - RPOS;
        ndigits = RPOS;
        res = fpc_round_up( digits, RPOS );

        /* Remove trailing zeros. */
        for( i = RPOS - 1; i > 1; --i )
        {
            if( digits[i] != '0' )
               break;
            --ndigits;
            ++K;
        }

        if( res < 0 )
        {
            --digits;
            ++K;
        }
    }

    str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
    return str_len;
}


#ifdef UNIT_TEST
/*
    gcc -DUNIT_TEST -O3 fpconv.c

    0x7f800001-0x7fffffff  nan -> 7fc00000
    0xff800001-0xffffffff -nan -> 7fc00000
*/

#include <stdio.h>
#include <stdlib.h>

union UIntFloat
{
    uint32_t i;
    float f;
};

int main( int argc, char** argv )
{
    union UIntFloat uf;
    union UIntFloat c1;
    union UIntFloat c2;
    char str[26];
    char str2[26];
    char tmp[26];
    uint64_t total = 0;
    uint64_t modified = 0;
    uint64_t failed = 0;
    uint64_t per;
    uint32_t rangeEnd = 0x7fffffff;
    int len;

    uf.i = 0;
    if( argc > 1 )
    {
        uf.i = strtol( argv[1], NULL, 0 );
        if( argc > 2 )
            rangeEnd = strtol( argv[2], NULL, 0 );
    }
    printf( "Converting 0x%08x through 0x%08x\n", uf.i, rangeEnd );

    for( ; uf.i <= rangeEnd; ++uf.i )
    {
        len = fpconv_ftoa( (double) uf.f, str );
        str[ len ] = '\0';
        c1.f = (float) strtod( str, 0 );

        //printf( "KR ftoa %08x %s\n", uf.i, str );

        if( c1.i != uf.i )
        {
            ++modified;
#if 1
            len = fpconv_dtoa( (double) uf.f, tmp );
            tmp[ len ] = '\0';
            printf( "modified %08x  %08x  %s  %s\n", uf.i, c1.i, str, tmp );
#endif
            len = fpconv_ftoa( (double) c1.f, str2 );
            str2[ len ] = '\0';
            c2.f = (float) strtod( str2, 0 );

            if( c2.i != c1.i )
            {
                printf( "failed %08x  %08x  %s  %s\n", c1.i, c2.i, str, str2 );
                ++failed;
            }
        }
        ++total;

        if( (uf.i & 0x000fffff) == 0 )
        {
            per = modified * 100 / total;
            printf( "completed %08x: tot %ld, mod %ld (~%d%%), fail %ld\n",
                    uf.i, total, modified, (int) per, failed );
        }
    }

    per = modified * 100 / total;
    printf( "fpconv_ftoa conversions: %ld, modified %ld (~%d%%), failed %ld\n",
            total, modified, (int) per, failed );
    return 0;
}
#endif
