#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <math.h>
#include "pgemu.h"

#ifdef ICC_RCD
#define ONEHALF (0.0) // Intel CC with -rcd switch
#else
#define ONEHALF (0.5) // standard C
#endif

// utilities for internal use.
static UINT64 conv_cdouble_to_float(double dsrc, int wexp, int wman);
static UINT64 conv_cdouble_to_log(double dsrc, int wexp, int wman);
static double conv_float_to_cdouble(UINT64 src, int wexp, int wman);
static UINT64 pow_float(UINT64 src, int n, int m, int wexp, int wman, int wentry);
static UINT64 conv_int_to_float(UINT64 src, int w, int wexp, int wman);
static UINT64 exp_offset(int wexp);
static void warn_float(int level, UINT64 sing, UINT64 exp, UINT64 man);
static UINT64 compose_float(UINT64 sign, UINT64 exp, UINT64 man, int wexp, int wman);
static void decompose_float(UINT64 src, int wexp, int wman, UINT64 *signp, UINT64 *expp, UINT64 *manp);
static UINT64 compose_log(UINT64 sign, UINT64 exp, UINT64 man, int wexp, int wman);
static void decompose_log(UINT64 src, int wexp, int wman, UINT64 *signp, UINT64 *expp, UINT64 *manp);
static UINT64 round_ubf1(UINT64 src, UINT64 wsrc, UINT64 wdst);
static int priority_encoder(UINT64 src, int w);
static int width_of(int value);
static void create_pow_table(int sign, int n, int m, int depth, int wman,
                              UINT64 **expp, UINT64 **man0thp, UINT64 **man1stp, UINT64 **man2ndp,
                              int *wexpp, int *wman0thp, int *wman1stp, int *wman2ndp);
static void allocate_pow_table(int sign, int n, int m, int depth,
                               UINT64 **expp, UINT64 **man0thp, UINT64 **man1stp, UINT64 **man2ndp);
static void interpolate_2nd(double f0, double fhalf, double f1, double dx, double *coedd1stp, double *coeff2ndp);
static void showusage(char *programname);
static void width_match_or_die(char *fname, int w1, int m1, int w2, int m2);

// test routines.
static void test_conv_cdouble_to_float(void);
static void test_pow_float(void);
static void plot_pow_float(void);
static void generate_pow_table(void);
static void test_conv_int_to_float(void);
static void test_conv_float_to_cdouble(void);
static void generate_conv_float_to_log_table(void);
static void generate_conv_log_to_float_table(void);
static void test_conv_cdouble_to_log(void);

#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
static int warn_level = 2; // warning message output level. the higher the more verbose.

typedef struct {
    void (*func)();
    char *usage;
} TestMode;

static TestMode testmode[] = {
    test_conv_cdouble_to_float, "test ieee754double -> float conversion.",
    test_pow_float,             "test power function.",
    plot_pow_float ,            "plot power function outputs.",
    generate_pow_table,         "generate .mif file for power function table",
    test_conv_int_to_float,     "test int -> float conversion.",
    test_conv_float_to_cdouble, "test float -> ieee754double conversion.",
    generate_conv_float_to_log_table, "generate .mif file for float -> log conversion table",
    generate_conv_log_to_float_table, "generate .mif file for log -> float conversion table",
    test_conv_cdouble_to_log,   "test ieee754double -> log conversion.",
};


/*
 * global functions.
 * intended for use with emulation functions generated by 'pgdl2emulator'.
 */

void
pg_delay(UINT64 src, UINT64 *dstp)
{
    *dstp = src;
}

void
pg_selector(UINT64 cond, int w1, int m1, UINT64 srca, int w2, int m2, UINT64 srcb, int w3, int m3, 
            UINT64 *dstp, int w4, int m4)
{
    if (cond == 1) {
        *dstp = srca;
    }
    else {
        *dstp = srcb;
    }
}

/*
 *  unsigned int comparator.
 *
 *  output is 1 if:
 *  mode  L     E      G     GE     NE      LE
 *        a<b   a==b   a>b   a>=b   a!=b    a<=b
 */
static int
comp_int(UINT64 srca, UINT64 srcb, char *mode)
{
    int ret;

    if (!strcmp(mode, "L") || !strcmp(mode, "l")) {
        ret = (srca < srcb) ? 1 : 0;
    }
    else if (!strcmp(mode, "E") || !strcmp(mode, "e")) {
        ret = (srca == srcb) ? 1 : 0;
    }
    else if (!strcmp(mode, "G") || !strcmp(mode, "g")) {
        ret = (srca > srcb) ? 1 : 0;
    }
    else if (!strcmp(mode, "GE") || !strcmp(mode, "ge")) {
        ret = (srca >= srcb) ? 1 : 0;
    }
    else if (!strcmp(mode, "NE") || !strcmp(mode, "ne")) {
        ret = (srca != srcb) ? 1 : 0;
    }
    else if (!strcmp(mode, "LE") || !strcmp(mode, "le")) {
        ret = (srca <= srcb) ? 1 : 0;
    }
    else {
        fprintf(stderr, "Error: comp_int: invalid comparation mode '%s'.\n", mode);
        exit(1);
    }

    return ret;
}

void
pg_comp_int(UINT64 srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3, char *mode)
{            
    UINT64 mask, signa, signb, usrca, usrcb;

    width_match_or_die("pg_comp_int", w1, m1, w2, m2);
    mask = (UINT64)1 << (w1-1);
    signa = (srca & mask) ? 1 : 0;
    signb = (srcb & mask) ? 1 : 0;
    usrca = srca & (mask - 1);
    usrcb = srcb & (mask - 1);

    int ret = comp_int(usrca, usrcb, mode);
    int signab = signa * 2 + signb;

    if (!strcmp(mode, "L") || !strcmp(mode, "l")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 0;
            break;
          case 2: // a < 0  && 0 < b
            ret = 1;
            break;
          case 3: // a < 0  && b < 0
            ret = !ret;
            break;
        }
    }
    else if (!strcmp(mode, "E") || !strcmp(mode, "e")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 0;
            break;
          case 2: // a < 0  && 0 < b
            ret = 0;
            break;
          case 3: // a < 0  && b < 0
            break;
        }
    }
    else if (!strcmp(mode, "G") || !strcmp(mode, "g")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 1;
            break;
          case 2: // a < 0  && 0 < b
            ret = 0;
            break;
          case 3: // a < 0  && b < 0
            ret = !ret;
            break;
        }
    }
    else if (!strcmp(mode, "GE") || !strcmp(mode, "ge")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 1;
            break;
          case 2: // a < 0  && 0 < b
            ret = 0;
            break;
          case 3: // a < 0  && b < 0
            ret = !ret;
            break;
        }
    }
    else if (!strcmp(mode, "NE") || !strcmp(mode, "ne")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 1;
            break;
          case 2: // a < 0  && 0 < b
            ret = 1;
            break;
          case 3: // a < 0  && b < 0
            break;
        }
    }
    else if (!strcmp(mode, "LE") || !strcmp(mode, "le")) {
        switch (signab) {
          case 0: // 0 < a  && 0 < b 
            break;
          case 1: // 0 < a  && b < 0
            ret = 0;
            break;
          case 2: // a < 0  && 0 < b
            ret = 1;
            break;
          case 3: // a < 0  && b < 0
            ret = !ret;
            break;
        }
    }
    else {
        fprintf(stderr, "Error: pg_comp_int: invalid comparation mode '%s'.\n", mode);
        exit(1);
    }

    *dstp = ret;
}

void
pg_comp_float(UINT64 srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3, 
            char *mode)
{
    pg_comp_int(srca, w1, m1, srcb, w2, m2, dstp, w3, m3, mode);
}

void
pg_comp_log(UINT64 srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3, 
            char *mode)
{
    pg_comp_int(srca, w1, m1, srcb, w2, m2, dstp, w3, m3, mode);
}


#if 1 // new version

/*
 * hidden-1 & mantissa are passed to DSP.
 */

void
pg_mul_float(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signa, expa, mana;
    UINT64 signb, expb, manb;
    UINT64 sign, exp, man;
    int e1 = w1 - m1 - 1;
    int nonzero;

    width_match_or_die("pg_mul_float", w1, m1, w2, m2);
    width_match_or_die("pg_mul_float", w1, m1, w3, m3);

    decompose_float(srca, e1, m1, &signa, &expa, &mana);
    decompose_float(srcb, e1, m1, &signb, &expb, &manb);

    sign = signa ^ signb;

    if ((expa == 0 && mana == 0) || (expb == 0 && manb == 0)) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    exp = expa + expb;
    exp -= (UINT64)1 << (e1 - 1);

    // add hidden one.
    mana  |= ((UINT64)1 << m1);
    manb  |= ((UINT64)1 << m2);

    // multiply mantissa parts (w hidden one).
    man = mana * manb;

    //    fprintf(stderr, "srca: 0x%016llx  mana: 0x%016llx  expa: %d\n", srca, mana, (int)expa);
    //    fprintf(stderr, "srcb: 0x%016llx  manb: 0x%016llx  expb: %d\n", srcb, manb, (int)expb);
    //    fprintf(stderr, "man : 0x%016llx  exp: %d\n", man, (int)exp);

    if (man >> (m1 + m2 + 1)) { // MSB carried out
        man = round_ubf1(man, (m1 + 1) + (m2 + 1), m3 + 1);
        exp++;
    }
    else {
        man = round_ubf1(man, (m1 + 1) + (m2 + 1) - 1, m3 + 1);
    }
    if (! (man >> m3)) {
        fprintf(stderr, "Error: pg_mul_float: something going wrong in normalization procedure.\n");
        exit(1);
    }
    man  &= ((UINT64)1 << m1) - 1; // remove hidden one.

    if (nonzero) {
        *dstp = compose_float(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }
}

#else // old version

/*
 * mantissa without hidden-1 is passed to DSP.
 *
 * (1 + ma) * (1 + mb) -> 1 + ma + mb + ma * mb
 *                     = round(ma * mb) + (ma + mb) + 1
 */

void
pg_mul_float(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signa, expa, mana;
    UINT64 signb, expb, manb;
    UINT64 sign, exp, man;
    int e1 = w1 - m1 - 1;
    int nonzero;

    width_match_or_die("pg_mul_float", w1, m1, w2, m2);
    width_match_or_die("pg_mul_float", w1, m1, w3, m3);

    decompose_float(srca, e1, m1, &signa, &expa, &mana);
    decompose_float(srcb, e1, m1, &signb, &expb, &manb);

    sign = signa ^ signb;

    if ((expa == 0 && mana == 0) || (expb == 0 && manb == 0)) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    exp = expa + expb;
    exp -= (UINT64)1 << (e1 - 1);

    // multiply mantissa parts (w/o hidden one).
    man = mana * manb;

    //    fprintf(stderr, "srca: 0x%016llx  mana: 0x%016llx  expa: %d\n", srca, mana, (int)expa);
    //    fprintf(stderr, "srcb: 0x%016llx  manb: 0x%016llx  expb: %d\n", srcb, manb, (int)expb);
    //    fprintf(stderr, "man : 0x%016llx  exp: %d\n", man, (int)exp);

    man = round_ubf1(man, 2 * m1, m1);
    man += (mana + manb);
    man += (UINT64)1 << m1;
    if (man >> (m1 + 1)) { // MSB carried out
        man = round_ubf1(man, m1 + 2, m1 + 1);
        exp++;
    }

    //    fprintf(stderr, "man : 0x%016llx  exp: %d\n", man, (int)exp);

    if (! (man >> m1)) {
        fprintf(stderr, "Error: pg_mul_float: something going wrong in normalization procedure.\n");
        exit(1);
    }
    man  &= ((UINT64)1 << m1) - 1; // remove hidden one.

    if (nonzero) {
        *dstp = compose_float(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }
}

#endif // pg_mul_float

void
pg_add_float(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signa, expa, mana;
    UINT64 signb, expb, manb;
    UINT64 sign, exp, man;
    int nza, nzb;
    int isadder, swap, one_at;
    int e1 = w1 - m1 - 1;
    int nonzero;

    width_match_or_die("pg_add_float", w1, m1, w2, m2);
    width_match_or_die("pg_add_float", w1, m1, w3, m3);

    decompose_float(srca, e1, m1, &signa, &expa, &mana);
    decompose_float(srcb, e1, m1, &signb, &expb, &manb);

    //    fprintf(stderr, "w:%d e:%d m:%d\n", w1, e1, m1);
    //    fprintf(stderr, "srca: %lld %llx %016llx\n", signa, expa, mana);
    //    fprintf(stderr, "srcb: %lld %llx %016llx\n", signb, expb, manb);


    isadder = (signa ^ signb ? 0 : 1); // perform addition if isadder == 1, subtraction if 0.

    nza = (expa == 0 && mana == 0) ? 0 : 1;
    nzb = (expb == 0 && manb == 0) ? 0 : 1;

    if ((expa == expb && mana == manb && !isadder) || (!nza && !nzb)) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    swap = 0;
    if ((expa < expb) ||
        (expa == expb && mana < manb)) {
        swap = 1;
    }

    if (swap) {
        UINT64 tmp;

        tmp   = signa;
        signa = signb;
        signb = tmp;

        tmp  = expa;
        expa = expb;
        expb = tmp;

        tmp  = mana;
        mana = manb;
        manb = tmp;
    }
    sign = signa;
    exp  = expa;

    // append hidden one.
    mana |= (UINT64)1 << m1;
    manb |= (UINT64)1 << m1;

    // shift manb so that exponent of b matches that of a.
    int width_left = m1 + 1 - (expa - expb);

    if (width_left > 0) {
	//	fprintf(stderr, "        manb:%llx\n", manb);
        manb = round_ubf1(manb, m1 + 1, width_left);
	//	fprintf(stderr, "rounded manb:%llx  left:%d  expa:%d  expb:%d\n", manb, width_left, expa, expb);
    }
    else if (width_left > - (m1 + 1)) {
	manb = nzb; // manb is shifted out. but it should not be ignored.
    }
    else {
	manb = 0;
    }

    if (isadder) {
        man = mana + manb;
	//	fprintf(stderr, "mana:%llx + manb:%llx = man:%llx\n", mana, manb, man);

        if (man >> (m1 + 1)) { // MSB carried out.
	    man = round_ubf1(man, m1 + 2, m1 + 1);
            exp++;
	    //	    fprintf(stderr, "shifted man:%llx\n", man);
        }
    }
    else {
        man = mana - manb;

	//	fprintf(stderr, "mana:%llx - manb:%llx = man:%llx\n", mana, manb, man);
        one_at = priority_encoder(man, m1 + 1);
	//        man = round_ubf1(man, m1 + 1, one_at + 1);
        man = round_ubf1(man, one_at + 1, m1 + 1);
        exp -= (m1 - one_at);

	//	fprintf(stderr, "shifted man:%llx  exp:%llx\n", man, exp);
    }
    man  &= ((UINT64)1 << m1) - 1; // remove hidden one.

    if (nonzero) {
        *dstp = compose_float(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }

    //    fprintf(stderr, "dst : %lld %llx %016llx\n", sign, exp, man);
    //    fprintf(stderr, "nonzero:%d  isadder:%d\n", nonzero, isadder);
    //    fprintf(stderr, "*dstp : %016llx\n\n", *dstp);
}

void
pg_sub_float(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signb, expb, manb;
    int e2 = w2 - m2 - 1;

    // negate srcb.
    decompose_float(srcb, e2, m2, &signb, &expb, &manb);
    signb = ((signb == 0) ? 1 : 0);
    srcb = compose_float(signb, expb, manb, e2, m2);

    // give it to pg_add_float().
    pg_add_float(srca, w1, m1, srcb, w2, m2, dstp, w3, m3);
}

void
pg_add_int(UINT64 srca, int w1, int d1, UINT64 srcb, int w2, int d2, UINT64 *dstp, int w3, int d3)
{
    width_match_or_die("pg_add_int", w1, 0, w2, 0);
    *dstp = srca + srcb;
}

void
pg_sub_int(UINT64 srca, int w1, int d1, UINT64 srcb, int w2, int d2, UINT64 *dstp, int w3, int d3)
{
    width_match_or_die("pg_sub_int", w1, 0, w2, 0);
    *dstp = srca - srcb;
}

/*
 * src  : int of 2's complement.
 * dstp : int of 2's complement.
 */
void
pg_inc_int(UINT64  src , int w1, int d1, UINT64 *dstp, int w2, int d2, int clear)
{
    if (clear) {
        *dstp = 0;
    }

    *dstp += src;

    // fprintf(stderr, "pg_inc_int src:0x%016llx  *dstp:0x%016llx   clear:%d\n", src, *dstp, clear);
}

void
pg_inc_float(UINT64  src , int w1, int m1, UINT64 *dstp, int w2, int m2, int clear)
{
    if (clear) {
        *dstp = 0;
        
    }

    UINT64 dst = *dstp;
    pg_add_float(src, w1, m1, dst, w2, m2, dstp, w2, m2);

}

void
pg_pow_float(UINT64 src, UINT64 *dstp, int width, int mantissa, int n, int m, int resolution)
{
    *dstp = pow_float(src, n, m, width - mantissa - 1, mantissa, resolution);
}

void
pg_shift_float(UINT64 src, int w1, int m1, UINT64 distance, int d2a, int d2b, UINT64 *dstp, int w3, int m3, int direction)
{
    UINT64 sign, exp, man, dst;
    int e1 = w1 - m1 - 1;
    int nonzero;

    //    fprintf(stderr, "pg_shift_float(0x%016llx, %d, %d, %lld, dummy, dummy, *dstp, %d, %d, %d)\n",
    //            src, w1, m1, distance, w3, m3, direction);

    width_match_or_die("pg_shift_float", w1, m1, w3, m3);

    if (direction != 1 && direction != -1) {
        fprintf(stderr, "Error: pg_shift_float: illegal direction (%d).\n", direction);
        exit(1);
    }

    decompose_float(src, e1, m1, &sign, &exp, &man);
    //    fprintf(stderr, "pg_shift_float: exp:%lld  distance:%lld\n", exp, distance);


    if (exp == 0 && man == 0) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    if (nonzero) {
        exp += direction * distance; // shift to the left if 'direction' == +1, shift to the right if -1.
        *dstp = compose_float(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }
}

void
pg_conv_float_to_int(UINT64   src, int w1, int m1, UINT64 *dstp, int w2, int d2)
{
    UINT64 sign, exp, man, dst;
    int e1 = w1 - m1 - 1;
    int iexp;

    decompose_float(src, e1, m1, &sign, &exp, &man);

    iexp = exp;
    iexp -= (UINT64)1 << (e1 - 1); // remove offset.

    if (iexp >= w2 - 1) {
        fprintf(stderr, "Warning: pg_conv_float_to_int: too large exponent (%d).\n"
                "conversion result would not fit in %d-bit causing overflow.\n", iexp, w2);
	fprintf(stderr, "src:0x%016llx\n", src);
        //        exit(1);
    }

    man |= (UINT64)1 << m1; // append hidden one.
    if ((iexp - m1) >= 0) {
        man <<= (iexp - m1);
    }
    else if (iexp >= 0) {
        man >>= -(iexp - m1);
    }
    else {
        man = 0;
    }

    man &= ((UINT64)1 << (w2 - 1)) - 1;
    if (sign) {
        dst = ~man + 1;
    }
    else {
        dst = man;
    }

    *dstp = dst;
}

void
pg_conv_float_to_float(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 sign, exp, man;
    int e1 = w1 - m1 - 1;
    int e2 = w2 - m2 - 1;

    decompose_float(src, e1, m1, &sign, &exp, &man);

    //    fprintf(stderr, "src:0x%016llx\nsign:%lld  exp:%lld  man:0x%016llx   e:%d m:%d\n", src, sign, exp, man, e1, m1);

    if (e1 > e2) {
        fprintf(stderr, "Error: pg_conv_float_to_float: "
                "exponent of input must have smaller or equal width of that of output.\n");
        exit(1);
    }
    else if (e1 < e2) {
        exp -= (UINT64)1 << (e1 - 1); // remove offset from source's exponent.
        exp += (UINT64)1 << (e2 - 1); // add offset to destination's exponent.
    }

    if (m1 > m2) {
        man = round_ubf1(man, m1, m2);
    }
    else {
        man <<= (m2 - m1);
    }

    //    fprintf(stderr, "sign:%lld  exp:%lld  man:0x%016llx   e:%d m:%d\n\n", sign, exp, man, e2, m2);


    *dstp = compose_float(sign, exp, man, e2, m2);
}

void
pg_conv_float_to_log(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 signa, expa, mana;
    UINT64 signb, expb, manb;
    int e1 = w1 - m1 - 1;
    int e2 = w2 - m2 - 1;
    double fixed, logarithmic;
    int i, j;
    int fmax, lmax, fman, lman;

    fman = m1; // width of mantissa of float (w/o hidden1).
    lman = m2; // width of mantissa of log.
    fmax = (1<<fman);
    lmax = (1<<lman);

    decompose_float(src, e1, m1, &signa, &expa, &mana);
    signb = signa;

    expa -= (UINT64)1 << (e1 - 1); // remove offset.
    expb = expa;
    expb += (UINT64)1 << (e2 - 1); // add offset.

    fixed = 1.0 + (double)(mana+0.5) / fmax;
    logarithmic = log(fixed) / log(2.0);
    manb = (logarithmic * lmax + 0.5);
    if (lmax <= manb) {
        expb++;
        manb &= ((UINT64)1 << lman) - 1;
        if (manb) {
            fprintf(stderr, "Error: pg_conv_float_to_log: manb (0x" UINT64XFMT
                    ") should be zero, but not.\n", manb);
            exit(1);
        }
    }
    *dstp = compose_log(signb, expb, manb, e2, m2);
}

void
pg_conv_log_to_float(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 signa, expa, mana;
    UINT64 signb, expb, manb;
    int e1 = w1 - m1 - 1;
    int e2 = w2 - m2 - 1;
    double fixed, logarithmic;
    int i, j;
    int fmax, lmax, fman, lman;

    lman = m1; // width of mantissa of log.
    fman = m2; // width of mantissa of float (w/o hidden1).
    lmax = (1<<lman);
    fmax = (1<<fman);

    decompose_log(src, e1, m1, &signa, &expa, &mana);
    signb = signa;

    expa -= (UINT64)1 << (e1 - 1); // remove offset.
    expb = expa;
    expb += (UINT64)1 << (e2 - 1); // add offset.

    logarithmic = (double)mana / lmax;
    fixed = (pow(2.0, logarithmic) - 1.0);
    manb = (fixed * fmax + 0.5);

    *dstp = compose_float(signb, expb, manb, e2, m2);
}

void
pg_conv_log_to_log(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 sign, exp, man;
    int e1 = w1 - m1 - 1;
    int e2 = w2 - m2 - 1;

    decompose_log(src, e1, m1, &sign, &exp, &man);

    //    fprintf(stderr, "src:0x%016llx\nsign:%lld  exp:%lld  man:0x%016llx   e:%d m:%d\n", src, sign, exp, man, e1, m1);

    if (e1 > e2) {
        fprintf(stderr, "Error: pg_conv_log_to_log: "
                "exponent of input must have smaller or equal width of that of output.\n");
        exit(1);
    }
    else if (e1 < e2) {
        exp -= (UINT64)1 << (e1 - 1); // remove offset from source's exponent.
        exp += (UINT64)1 << (e2 - 1); // add offset to destination's exponent.
    }

    if (m1 > m2) {
        man = round_ubf1(man, m1, m2);
    }
    else {
        man <<= (m2 - m1);
    }

    //    fprintf(stderr, "sign:%lld  exp:%lld  man:0x%016llx   e:%d m:%d\n\n", sign, exp, man, e2, m2);


    *dstp = compose_log(sign, exp, man, e2, m2);
}

void
pg_conv_int_to_int(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 dst, mask;

    if (w1 == w2) {
        dst = src;
    }
    else if (w1 > w2) {
        mask = ((UINT64)1 << w2) - 1; // take lower w2 bits.
        dst = src & mask;
    }
    else {
        int sign = src >> (w1 - 1);
        if (sign) {
            mask = (UINT64)-1;             // 0xffff_ffff_ffff_ffff
            mask ^= ((UINT64)1 << w1) - 1; // 0xffff_ffff_ff00_0000
            dst = src | mask;              // 0xffff_ffff_ffss_ssss
        }
        else {
            dst = src;
        }
    }

    *dstp = dst;
}

void
pg_sqrt_log_log(UINT64 src, int w1, int m1, UINT64 *dstp, int w2, int m2)
{
    UINT64 signa, expa, mana, expmana;;
    UINT64 signb, expb, manb, expmanb;
    int e1 = w1 - m1 - 1;
    int e2 = w2 - m2 - 1;

    width_match_or_die("pg_sqrt_log_log", w1, m1, w2, m2);

    decompose_log(src, e1, m1, &signa, &expa, &mana);
    signb = signa;
    if (signb) {
        fprintf(stderr, "Warning: pg_sqrt_log_log: negative src (0x" UINT64XFMT ").\n", src);
    }

    expa -= (UINT64)1 << (e1 - 1); // remove offset.
    expmana = (expa << m1) | mana;
    expmanb = expmana >> 1;
    expb = expmanb >> m1;
    expb += (UINT64)1 << (e1 - 1); // add offset.
    manb = expmanb & (((UINT64)1 << m1) - 1);

    *dstp = compose_log(signb, expb, manb, e2, m2);
}

void
pg_mul_log(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signa, expa, mana, expmana;;
    UINT64 signb, expb, manb, expmanb;
    UINT64 sign, exp, man, expman;
    int e1 = w1 - m1 - 1;
    int nonzero;

    width_match_or_die("pg_mul_log", w1, m1, w2, m2);
    width_match_or_die("pg_mul_log", w1, m1, w3, m3);

    decompose_log(srca, e1, m1, &signa, &expa, &mana);
    decompose_log(srcb, e1, m1, &signb, &expb, &manb);

    sign = signa ^ signb;

    if ((expa == 0 && mana == 0) || (expb == 0 && manb == 0)) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    expmana = (expa << m1) | mana;
    expmanb = (expb << m1) | manb;
    expman  = expmana + expmanb; // perform multiplication.
    exp = expman >> m1;
    exp -= (UINT64)1 << (e1 - 1); // remove offset.
    man = expman & (((UINT64)1 << m1) - 1);

    if (nonzero) {
        *dstp = compose_log(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }
}

void
pg_div_log(UINT64  srca, int w1, int m1, UINT64 srcb, int w2, int m2, UINT64 *dstp, int w3, int m3)
{
    UINT64 signa, expa, mana, expmana;
    UINT64 signb, expb, manb, expmanb;
    UINT64 sign, exp, man, expman, offset;
    int e1 = w1 - m1 - 1;
    int nonzero;

    width_match_or_die("pg_div_log", w1, m1, w2, m2);
    width_match_or_die("pg_div_log", w1, m1, w3, m3);

    decompose_log(srca, e1, m1, &signa, &expa, &mana);
    decompose_log(srcb, e1, m1, &signb, &expb, &manb);

    sign = signa ^ signb;

    if (expb == 0 && manb == 0) {
        fprintf(stderr, "Warning: pg_div_log: division by zero (0x"
                UINT64XFMT ").\n", srcb);
    }
    if ((expa == 0 && mana == 0) || (expb == 0 && manb == 0)) {
        nonzero = 0;
    }
    else {
        nonzero = 1;
    }

    expmana = (expa << m1) | mana;
    expmanb = (expb << m1) | manb;

    offset = ((UINT64)1 << (e1 - 1));
    if (expa + offset < expb) {
        fprintf(stderr, "Warning: pg_div_log: exponent underflow\n"
		"expa:" UINT64DFMT " expb:" UINT64DFMT " offset:" UINT64DFMT 
		" expa-expb+offset:" UINT64DFMT "\n",
		expa, expb, offset, expa - expb + offset);
    }
    expman  = expmana - expmanb; // perform division.
    exp = expman >> m1;
    exp += (UINT64)1 << (e1 - 1); // add offset.
    man = expman & (((UINT64)1 << m1) - 1);

    if (nonzero) {
        *dstp = compose_log(sign, exp, man, e1, m1);
    }
    else {
        *dstp = 0;
    }

    //    fprintf(stderr, "==== signa:%llx  expa:%llx  mana:%llx\n", signa, expa, mana);
    //    fprintf(stderr, "==== signb:%llx  expb:%llx  manb:%llx\n", signb, expb, manb);
    //    fprintf(stderr, "==== *dstp:%llx\n", *dstp);
}


void
pg_conv_int_to_float(UINT64  src, int w1, int d1, UINT64 *dstp, int w2, int m2)
{
    *dstp = conv_int_to_float(src, w1, w2 - m2 - 1, m2);
}

void
pg_conv_cdouble_to_float(double src, UINT64 *dstp, int w1, int m1)
{
    *dstp = conv_cdouble_to_float(src, w1 - m1 - 1, m1);
}

void
pg_conv_cdouble_to_log(double src, UINT64 *dstp, int w1, int m1)
{
    *dstp = conv_cdouble_to_log(src, w1 - m1 - 1, m1);
}

void
pg_conv_float_to_cdouble(UINT64 src, int w1, int m1, double *dstp)
{
    *dstp = conv_float_to_cdouble(src, w1 - m1 - 1, m1);
}

/*
 * local functions.
 * intended for internal use only.
 */

static UINT64
conv_cdouble_to_float(double dsrc, int wexp, int wman)
{
    UINT64 sign, exp, man, dst;
    int iexp;
    double dman, asrc;

    WARN(3, "\nconv_cdouble_to_float():\n");
    WARN(3, "dsrc: "    DOUBLEFMT "  " , dsrc);
    WARN(3, "wexp: "   INT32DFMT "  " , wexp);
    WARN(3, "wman: "   INT32DFMT "  " , wman);
    WARN(3, "\n");

    sign = (dsrc > 0.0) ? 0 : 1;
    asrc = (dsrc > 0.0) ? dsrc : -dsrc;
    dman = frexp(asrc, &iexp); // 0.25 <= dman < 0.50
    iexp--;
    dman *= 2.0;               // 0.50 <= dman < 1.00
    man = rint(ldexp(dman, wman));
    WARN(3, "man w/ hidden-1: "    UINT64XFMT "  " , man);
    WARN(3, "\n");
    if (man >> (wman + 1)) {
        iexp++;
        man >>= 1;
        WARN(2, "man shifted 1-bit to the right. man:" UINT64XFMT "\n", man);
    }
    man  &= ((UINT64)1 << wman) - 1;
    exp = (UINT64)(iexp + ((UINT64)1 << (wexp - 1)));
    dst = (dsrc == 0.0) ? 0 : compose_float(sign, exp, man, wexp, wman);

    warn_float(3, sign, exp, man);
    WARN(3, "return: " UINT64XFMT "  " , dst);
    WARN(3, "\n\n");

    return dst;
}

static void
uint64_to_binary(UINT64 dst, char *bdst)
{
    int i;

    for (i = 0; i < 64; i++) {
        bdst[i] = ((dst >> (63 - i)) & 1) ? '1' : '0';
    }
    bdst[64] = 0;
}

static UINT64
conv_cdouble_to_log(double dsrc, int wexp, int wman)
{
    UINT64 sign, dst = 0;
    UINT64 signmask;
    INT64 logpart;
    double asrc;
    int width = wexp + wman + 2;
    char bdst[128];

    WARN(3, "\nconv_cdouble_to_log():\n");
    WARN(3, "dsrc: "    DOUBLEFMT "  " , dsrc);
    WARN(3, "wexp: "   INT32DFMT "  " , wexp);
    WARN(3, "wman: "   INT32DFMT "  " , wman);
    WARN(3, "\n");

    static double l2 = 0.0;
    if (l2 == 0.0) {
	l2 = log(2.0);
    }
    signmask = (UINT64)1 << (width - 2);

    sign = (dsrc > 0.0) ? 0 : 1;
    asrc = (dsrc > 0.0) ? dsrc : -dsrc;

    if (asrc == 0.0) { // a zero is mapped to a zero.
        return 0;
    }

    if (dsrc < 0.0) {
        dst |= signmask;
    }
    logpart = ONEHALF + ((double)((UINT64)1 << wman)) * (log(asrc) / l2);

    if (logpart < 0) { // a very small input is mapped to a zero.
        return 0;
    }

    logpart += (INT64)1 << (wexp + wman - 1); // add offset.

    if (logpart >= signmask) {
        fprintf(stderr, "Warning: conv_cdouble_to_log: too large src %e\n", dsrc);
        dst = 0; // a very large input is mapped to a zero.
        return dst;
    }

    dst |= logpart;

    uint64_to_binary(dst, bdst);

    WARN(3, "return: " UINT64XFMT "  (%s)" , dst, bdst);
    WARN(3, "\n\n");

    return dst;
}


static int
priority_encoder(UINT64 src, int w)
{
    int one_at;

    for (one_at = w - 1; one_at > 0; one_at--){
        if (src >> one_at) break;
    }
    WARN(4, "priority_encoder: src: " UINT64XFMT "  one_at: %d\n", src, one_at);
    return one_at;
}

static UINT64
conv_int_to_float(UINT64 src, int w, int wexp, int wman)
{
    UINT64 sign, exp, man, dst;
    UINT64 asrc;
    int one_at, iexp;

    WARN(3, "\nconv_int_to_float():\n");
    WARN(3, "src: "    UINT64XFMT "  w: %d  ", src, w);
    WARN(3, "wexp: %d  wman: %d\n" , wexp, wman);

    sign = (src >> (w - 1)) ? 1 : 0; // check MSB of src.
    asrc = src;
    if (sign) {
        asrc = ~src;
        asrc++;
    }
    asrc &= ((UINT64)1 << (w - 1)) - 1; // take lower w-1 bits.
    WARN(3, "|src|: " INT64XFMT "\n", asrc);

    one_at = priority_encoder(asrc, w - 1);
    iexp = one_at;
    exp = iexp + exp_offset(wexp);

    man = round_ubf1(asrc, one_at + 1, wman + 1);
    man &= ((UINT64)1 << wman) - 1; // remove hidden-1 bit.

    dst = (src == (UINT64)0) ? 0 : compose_float(sign, exp, man, wexp, wman);

    decompose_float(dst, wexp, wman, &sign, &exp, &man);
    warn_float(3, sign, exp, man);
    WARN(3, "return: " UINT64XFMT "  " , dst);
    WARN(3, "\n\n");

    return dst;
}

static double
conv_float_to_cdouble(UINT64 src, int wexp, int wman)
{
    UINT64 sign, exp, man;
    double ddst;

    WARN(3, "\nconv_float_to_cdouble():\n");
    WARN(3, "src: "    INT64XFMT "  " , src);
    WARN(3, "wexp: "   INT32DFMT "  " , wexp);
    WARN(3, "wman: "   INT32DFMT "  " , wman);
    WARN(3, "\n");

    decompose_float(src, wexp, wman, &sign, &exp, &man);
    warn_float(4, sign, exp, man);
    WARN(4, "\n\n");

    ddst = ((UINT64)1 << wman) | man; // add hidden-1 bit.
    ddst = ldexp(ddst, (int)exp - exp_offset(wexp) - wman);

    if (exp == 0 && man == 0) ddst = 0.0;
    if (sign) ddst *= -1;

    WARN(3, "return: " DOUBLEFMT "  " , ddst);
    WARN(3, "\n\n");

    return ddst;
}

static UINT64
pow_float(UINT64 src, int n, int m, int wexp, int wman, int wentry)
{
    static UINT64 *exp_table = NULL;
    static UINT64 *man0th_table = NULL;
    static UINT64 *man1st_table = NULL;
    static UINT64 *man2nd_table = NULL;
    UINT64 sign, exp, man, dst;
    UINT64 entry, entry_exp, entry_man;
    UINT64 man_lowerbits;
    UINT64 delta_exp, delta0th, delta1st;
    UINT64 coeff0th, coeff1st, coeff2nd;
    int an, am, snm;
    int wentry_exp, wentry_man, wman_lowerbits;
    int wexp_table, wman0th_table, wman1st_table, wman2nd_table;

    WARN(3, "\npow_float():\n");
    WARN(3, "src: "    UINT64XFMT "  " , src);
    WARN(3, "n: "      INT32DFMT "  " , n);
    WARN(3, "m: "      INT32DFMT "  " , m);
    WARN(3, "wexp: "   INT32DFMT "  " , wexp);
    WARN(3, "wman: "   INT32DFMT "  " , wman);
    WARN(3, "\n");

    if (src < 0.0) {
        fprintf(stderr, "Error: pow_float: src (" DOUBLEFMT ") must be positive.\n", src);
        exit(1);
    }

    snm = (n * m < 0.0) ? -1 : 1;    // sign of n/m

    an  = abs(n);
    am  = abs(m);
    WARN(3, "sign n/m:%d  abs n:%d  abs m:%d\n", snm, an, am);

    decompose_float(src, wexp, wman, &sign, &exp, &man);
    warn_float(3, sign, exp, man);

    /*
     * exponent part
     */
    exp -= exp_offset(wexp);
    WARN(4, "remove offset. exp: " UINT64XFMT "\n", exp);

    wentry_exp = log2(am);                             // lower bits of exp are used as
    entry_exp = exp & (((UINT64)1 << wentry_exp) - 1); // a part of mantissa-table entry.

    exp >>= wentry_exp;
    WARN(4, "divide by %d: " UINT64XFMT "\n", am, exp);
    exp *= an;
    WARN(4, "multiply by %d: " UINT64XFMT "\n", an, exp);
    if (snm < 0) {
        exp = exp_offset(wexp) - exp;
    }
    else {
        exp = exp_offset(wexp) + exp;
    }
    WARN(4, "append sign & offset: " UINT64XFMT " (" UINT64DFMT ")\n", exp, exp);

    /*
     * mantissa part
     */

    // recreate table, if necessary.
    create_pow_table(snm, an, am, wentry, wman,
                      &exp_table, &man0th_table, &man1st_table, &man2nd_table,
                      &wexp_table, &wman0th_table, &wman1st_table, &wman2nd_table);

    // calculate table entry
    wentry_man = wentry - wentry_exp;
    entry_man  = man >> (wman - wentry_man);
    entry      = (entry_exp << wentry_man) | entry_man;
    WARN(4, "wentry:%d wentry_exp:%d  wentry_man:%d\n", wentry, wentry_exp, wentry_man);
    WARN(3,
         "entry:"     UINT64XFMT "  "
         "entry_exp:" UINT64XFMT "  "
         "entry_man:" UINT64XFMT "  \n", entry, entry_exp, entry_man);

    // table lookup
    wman_lowerbits = wman - wentry_man;
    man_lowerbits = man & (((UINT64)1 << wman_lowerbits) - 1);
    delta_exp = exp_table[entry];
    coeff0th  = man0th_table[entry];
    coeff0th |= (UINT64)1 << wman; // add hidden-1 bit.
    coeff1st  = man1st_table[entry];
    coeff2nd  = man2nd_table[entry];

    // 2nd-order interpolation
    delta1st  = (coeff2nd * man_lowerbits) >> wman_lowerbits; // take higher coeff2nd'length bits.
    coeff1st -= delta1st;
    delta0th  = (coeff1st * man_lowerbits) >> wman_lowerbits; // take higher coeff1st'length bits.
    coeff0th -= delta0th;

    // normalization
    if (!(coeff0th >> wman)) {
        coeff0th <<=1;
        exp--;
        WARN(3, "coeff0th shifted 1-bit to the left. man:" UINT64XFMT "\n", coeff0th);
    }

    man = coeff0th;
    exp -= delta_exp;
    dst = compose_float(sign, exp, man, wexp, wman);

    decompose_float(dst, wexp, wman, &sign, &exp, &man);
    warn_float(3, sign, exp, man);
    WARN(3, "return: " UINT64XFMT "  " , dst);
    WARN(3, "\n\n");

    return dst;
}

static UINT64
exp_offset(int wexp)
{
    return ((UINT64)1 << (wexp - 1));
}

static void
warn_float(int level, UINT64 sign, UINT64 exp, UINT64 man)
{
    WARN(level, "sign: "   UINT64DFMT "  " , sign);
    WARN(level, "exp: "    UINT64DFMT "  (" UINT64XFMT ")  ", exp, exp);
    WARN(level, "man: "    UINT64XFMT "  " , man);
    WARN(level, "\n");
}

static UINT64
compose_float(UINT64 sign, UINT64 exp, UINT64 man, int wexp, int wman)
{
    UINT64 dst;

    WARN(4, "compose_float\n");
    warn_float(4, sign, exp, man);
    WARN(4, "wexp:%d  wman:%d\n", wexp, wman);

    sign &= (UINT64)1;
    exp  &= ((UINT64)1 << wexp) - 1;
    man  &= ((UINT64)1 << wman) - 1;

    dst  = sign << (wexp + wman);
    dst |= exp  << wman;
    dst |= man;

    decompose_float(dst, wexp, wman, &sign, &exp, &man);
    warn_float(4, sign, exp, man);

    return dst;
}

static void
decompose_float(UINT64 src, int wexp, int wman, UINT64 *signp, UINT64 *expp, UINT64 *manp)
{
    *signp = src >> (wexp + wman);
    *signp &= (UINT64)1;

    *expp = src >> wman;
    *expp &= ((UINT64)1 << wexp) - 1;

    *manp = src;
    *manp &= ((UINT64)1 << wman) - 1;
}

/*
 * (de)compose_log is an alias to (de)compose_float for now,
 * but this may not be true in future.
 * caller of this function should not rely on this fact.
 */
static UINT64
compose_log(UINT64 sign, UINT64 exp, UINT64 man, int wexp, int wman)
{
    return compose_float(sign, exp, man, wexp, wman);
}

static void
decompose_log(UINT64 src, int wexp, int wman, UINT64 *signp, UINT64 *expp, UINT64 *manp)
{
    decompose_float(src, wexp, wman, signp, expp, manp);
}

/*
 * unbiased force-1 rounding
 */
static UINT64
round_ubf1(UINT64 src, UINT64 wsrc, UINT64 wdst)
{
    UINT64 lsb, dst;

    // append zeros below lsb.
    if (wsrc < wdst){
	return src << (wdst - wsrc);
    }

    // nothing to do.
    if (wsrc == wdst){
	return src;
    }

    // force-1 lsb, if necessary.
    lsb = (UINT64)1 << (wsrc - wdst);
    if (src & (lsb - 1)) {
        src |= lsb;
    }
    dst = src >> (wsrc - wdst);

    //    fprintf(stderr, "src:0x%016llx  wsrc:%d  wdst:%d  dst:0x%016llx\n",
    //            src, wsrc, wdst, dst);

    return dst;
}

/*
 * returns necessary bit width to express value.
 * used by perl text embedded in the template files.
 */
static int 
width_of(int value)
{
    int width;

    if (abs(value) > 1) {
        width = (int)log2(abs(value) - 1) + 1;
    }
    else {
        width = 1;
    }

    return width;
}

/*
 * create tables used to calculate mantissa part of pow(src, sign * n/m).
 *
 * table entry:
 *   [depth-1 : depth-log2(m)] : lower  log2(m)       bits of src's exponent.
 *   [depth-log2(m)-1 : 0]     : higher depth-log2(m) bits of src's mantissa.
 *
 * table output: following arrays of size (1 << depth).
 *
 *   ------------------------
 *   array       width
 *   ------------------------
 *   exp[ ]      *wexpp
 *   man0th[ ]   *wman0thp
 *   man1st[ ]   *wman1stp
 *   man2nd[ ]   *wman2ndp
 *   ------------------------
 * 
 */
static void
create_pow_table(int sign, int n, int m, int depth, int wman,
                  UINT64 **expp, UINT64 **man0thp, UINT64 **man1stp, UINT64 **man2ndp,
                  int *wexpp, int *wman0thp, int *wman1stp, int *wman2ndp)
{
    // allocate new tables, if necessary.
    allocate_pow_table(sign, n, m, depth, expp, man0thp, man1stp, man2ndp);
    UINT64 *exp    = *expp;
    UINT64 *man0th = *man0thp;
    UINT64 *man1st = *man1stp;
    UINT64 *man2nd = *man2ndp;

    int i, segment, ngps;
    int size = 1 << depth;
    int halfsize = size / 2;
    double pindex = (double)sign * n / m;
    double fscale = (UINT64)1 << wman;
    double x, dx;
    double f0, fhalf, f1;
    double dman1st, dman2nd;
    int exp_adjust;
    int dman1st_exp, dman2nd_exp;
    UINT64 exp_max = (UINT64)0, man0th_max = (UINT64)0, man1st_max = (UINT64)0, man2nd_max = (UINT64)0;

    WARN(4, "create_pow_table\n");
    WARN(4, "pindex:%f depth:%d  size:%d  fscale: %f\n", pindex, depth, size, fscale);

    if (n <= 0 || m <= 0) {
        fprintf(stderr, "Error: create_pow_table: n & m (numerator & denominator of power index)\n"
                "must be positive.\n");
        exit(1);
    }

    if (sign > 0.0) {
        fprintf(stderr, "Error: create_pow_table: does not accept positive power index "
                "(%d/%d) so far. \n", n, m);
        exit(1);
    }

    if (pow(2.0, (int)log2(m)) != m) {
        fprintf(stderr, "Error: create_pow_table: m (denominator of power index)\n"
                "must be a power of 2. i.e. {m|1, 2, 4, 8, ...}\n");
        exit(1);
    }

    dx = m / size;
    x = 1.0;
    ngps = size / m; // number of grids per segment.
    for (i = 0; i < size; i++) {
        segment = i / ngps;                 // 2^segment <= i < 2^(segment + 1)
        dx = (double)(1 << segment) / ngps;
        x = (double)(1 << segment) + (i - ngps * segment) * dx;
        WARN(5, "i: %5d  ngps:%d  segment:%d  dx:%f  x:%f\n",
             i, ngps, segment, dx, x);

        f0    = pow(x + dx * 0.0, pindex);  // 1.0 / 32.0 < f0 <= 1.0
        fhalf = pow(x + dx * 0.5, pindex);
        f1    = pow(x + dx * 1.0, pindex);
        interpolate_2nd(f0, fhalf, f1, dx, &dman1st, &dman2nd);

        exp_adjust = 0;
        while (f0 < 1.0) {                  // 1.0 <= f0 < 2.0
            exp_adjust++;
            f0       *= 2;
            dman1st *= 2;
            dman2nd *= 2;
        }

        exp[i] = exp_adjust;
        man0th[i]  =  rint(f0      * fscale);
        man0th[i] &=  ((UINT64)1 << wman) - 1; // remove hidden-1 bit.
        man1st[i]  = -rint(dman1st * fscale);
        man2nd[i]  =  rint(dman2nd * fscale);

        if (exp[i] > exp_max)       exp_max    = exp[i];
        if (man0th[i] > man0th_max) man0th_max = man0th[i];
        if (man1st[i] > man1st_max) man1st_max = man1st[i];
        if (man2nd[i] > man2nd_max) man2nd_max = man2nd[i];
						
        WARN(5, "i:%4d  x:%le  dx:%le  ", i, x, dx);
        WARN(5, "f:%le  df:%le  df1:%le\n", f0, dman1st, dman2nd);
        WARN(5, "exp:%3lld  man0th:0x%llx  man1st:0x%llx  man2nd:0x%llx\n\n",
             exp[i], man0th[i], man1st[i], man2nd[i]);
             
    }
    WARN(4,
         "exp_max:" UINT64XFMT "  man0th_max:" UINT64XFMT
         "  man1st_max:" UINT64XFMT "  man2nd_max:" UINT64XFMT "\n",
         exp_max, man0th_max, man1st_max, man2nd_max);

    if (man0th_max > ((UINT64)1 << (wman + 1))) {
        fprintf(stderr,
                "Error: create_pow_table: necessary width of man0th "
                "exceeds width of (mantissa + hidden-1). abort.\n"
                "man0th_max:" UINT64XFMT "  wman:%d\n", man0th_max, wman);
        exit(1);
    }

    *wexpp    = width_of(exp_max);
    *wman0thp = wman;
    *wman1stp = width_of(man1st_max);
    *wman2ndp = width_of(man2nd_max);

    WARN(4, "necessary table width:  exp:%d  man0th:%d  man1st:%d  man2nd:%d\n",
         *wexpp, *wman0thp, *wman1stp, *wman2ndp);

    fflush(stdout);
}

static void
interpolate_2nd(double f0, double fhalf, double f1, double dx,
                double *coeff1stp, double *coeff2ndp)
{
    double d1f, df, df0, d1f0;

    d1f = 4 * (f0 + f1 - 2 * fhalf) / (dx * dx);
    df  = (f1 - f0) / dx;
    df0 = df - d1f * dx * 0.5;    
    df0 *= dx;
    d1f0 = d1f * dx * dx * 0.5;

    *coeff1stp = df0;
    *coeff2ndp = d1f0;
}

static void
allocate_pow_table(int sign, int n, int m, int depth,
                   UINT64 **expp, UINT64 **man0thp, UINT64 **man1stp, UINT64 **man2ndp)
{
    static int sign0  = 0;
    static int n0     = 0;
    static int m0     = 0;
    static int depth0 = 0;
    int size;

    // reuse old table.
    if (sign == sign0 && n == n0 && m == m0 && depth == depth0 &&
        *expp != NULL) {
        return;
    }

    // free old tables and save info of new ones.
    if (*expp != NULL) {
        free(*expp);
        free(*man0thp);
        free(*man1stp);
        free(*man2ndp);
    }
    sign0  = sign;
    n0     = n;
    m0     = m;
    depth0 = depth;

    // allocate new tables.
    size = 1 << depth;

    *expp    = (UINT64 *)calloc(size, sizeof(UINT64));
    *man0thp = (UINT64 *)calloc(size, sizeof(UINT64));
    *man1stp = (UINT64 *)calloc(size, sizeof(UINT64));
    *man2ndp = (UINT64 *)calloc(size, sizeof(UINT64));
    if (*expp == NULL || *man0thp == NULL || *man1stp == NULL || *man2ndp == NULL) {
        perror("Error: allocate_pow_table");
        exit(1);
    }
}

static void
width_match_or_die(char *fname, int w1, int m1, int w2, int m2)
{
    if (w1 != w2) {
        fprintf(stderr, "Error: %s: widths of input (%d) and output (%d) do not match.\n", fname, w1, w2);
        exit(1);
    }
    if (m1 != m2) {
        fprintf(stderr, "Error: %s: widths of mantissa of input (%d) and output (%d) do not match.\n", fname, m1, m2);
        exit(1);
    }
}


/*
 * test routines
 */

static void
test_conv_cdouble_to_float(void)
{
    double src;
    int wexp, wman;
    UINT64 dst;

    fprintf(stderr, "conv_cdouble_to_float() double src, int wexp, int wman: ");
    scanf(DOUBLEFMT INT32DFMT INT32DFMT, &src, &wexp, &wman);
    fprintf(stderr, "\n\n");
    dst = conv_cdouble_to_float(src, wexp, wman);
}


static void
test_conv_float_to_cdouble(void)
{
    double ddst;
    int wexp, wman;
    UINT64 src;

    fprintf(stderr, "conv_float_to_cdouble() UINT64 src, int wexp, int wman: ");
    scanf(UINT64XFMT " %d %d", &src, &wexp, &wman);
    fprintf(stderr, "\n\n");

    ddst = conv_float_to_cdouble(src, wexp, wman);

    printf("src: " UINT64XFMT "  ->  dst: %e\n", src, ddst);
}


static void
test_pow_float(void)
{
    double dsrc, ddst;
    int n, m, wexp, wman, wentry;
    UINT64 src, dst;

    fprintf(stderr, "pow_float() double src, int n, int m, int wexp, int wman, int wentry: ");
    scanf(DOUBLEFMT INT32DFMT INT32DFMT INT32DFMT INT32DFMT INT32DFMT,
          &dsrc, &n, &m, &wexp, &wman, &wentry);
    fprintf(stderr, "\n\n");
    src  = conv_cdouble_to_float(dsrc, wexp, wman);
    dst  = pow_float(src, n, m, wexp, wman, wentry);
    ddst = conv_float_to_cdouble(dst, wexp, wman);

    printf("src: %f  ->  dst: %f\n", dsrc, ddst);
}

static void
plot_pow_float(void)
{
    double x, dx, y, y_exact;
    double lowerlimit, upperlimit;
    int n, m, wexp, wman, wentry;
    UINT64 src, dst;

    fprintf(stderr, "pow_float() int n, int m, int wexp, int wman, int wentry: ");
    scanf(INT32DFMT INT32DFMT INT32DFMT INT32DFMT INT32DFMT,
          &n, &m, &wexp, &wman, &wentry);
    fprintf(stderr, "\n\n");

    lowerlimit = 1.0;
    upperlimit = pow(2.0, m);
    dx = 1.0 / 2048.0;
    printf("# x    y    y_exact    err_abs    err_rel\n");
    for (x = lowerlimit; x < upperlimit; x *= (1.0 + dx)) {
        src  = conv_cdouble_to_float(x, wexp, wman);
        dst  = pow_float(src, n, m, wexp, wman, wentry);
        y = conv_float_to_cdouble(dst, wexp, wman);
        y_exact = pow(x, (double)n / m);
        printf("%e  %e  %e  % e  % e\n",
               x, y, y_exact, (y - y_exact),
               (y - y_exact) / (y_exact == 0.0 ? 1.0 : y_exact));
    }
}

static void
generate_pow_table(void)
{
    static UINT64 *exp_table = NULL;
    static UINT64 *man0th_table = NULL;
    static UINT64 *man1st_table = NULL;
    static UINT64 *man2nd_table = NULL;
    static UINT64 exp, man0th, man1st, man2nd, tableval;
    int width_exp, width_man0th, width_man1st, width_man2nd, width;
    int shift_exp, shift_man0th, shift_man1st, shift_man2nd;
    int n, m, wexp, wman, wentry;
    int an, am, snm;
    int size, i;
    int wexp_table, wman0th_table, wman1st_table, wman2nd_table;
    char fmtall[256], fmteach[256], tmpfilename[256];
    FILE *fp;

    // get parameters from stdin.
    fprintf(stderr, "generate_pow_table() int n, int m, int wexp, int wman, int wentry, char *tmpfilename: ");
    scanf(INT32DFMT INT32DFMT INT32DFMT INT32DFMT INT32DFMT " %s",
          &n, &m, &wexp, &wman, &wentry, tmpfilename);
    fprintf(stderr, "\n\n");

    // create tables.
    snm = (n * m < 0.0) ? -1 : 1;    // sign of n/m
    an  = abs(n);
    am  = abs(m);
    create_pow_table(snm, an, am, wentry, wman,
                      &exp_table, &man0th_table, &man1st_table, &man2nd_table,
                      &width_exp, &width_man0th, &width_man1st, &width_man2nd);

    // combine the tables and dump.
    size = 1 << wentry;

    width = width_exp + width_man0th + width_man1st + width_man2nd;
    shift_exp    = width_man0th + width_man1st + width_man2nd;
    shift_man0th = width_man1st + width_man2nd;
    shift_man1st = width_man2nd;
    shift_man2nd = 0;

    sprintf(fmtall,  "    %%0%dX : %%0%dllX;",
            (wentry - 1) / 4 + 1,
            (width  - 1) / 4 + 1);
    sprintf(fmteach, "  --  %%0%dllX  %%0%dllX  %%0%dllX  %%0%dllX",
            (width_exp    - 1) / 4 + 1,
            (width_man0th - 1) / 4 + 1,
            (width_man1st - 1) / 4 + 1,
            (width_man2nd - 1) / 4 + 1);

    printf("DEPTH = %d;\n", size);
    printf("WIDTH = %d;\n", width);
    printf("\n");
    printf("ADDRESS_RADIX = HEX;\n");
    printf("DATA_RADIX = HEX;\n");
    printf("\n");
    printf("CONTENT\n");
    printf("  BEGIN\n");
    for (i = 0; i < size; i++) {

        // get values from tables.
        exp       = exp_table[i];
        man0th    = man0th_table[i];
        man1st    = man1st_table[i];
        man2nd    = man2nd_table[i];

        // mask them.
        exp      &= ((UINT64)1 << width_exp)    - 1;
        man0th   &= ((UINT64)1 << width_man0th) - 1;
        man1st   &= ((UINT64)1 << width_man1st) - 1;
        man2nd   &= ((UINT64)1 << width_man2nd) - 1;

        // shift and combine them.
        tableval  = exp    << shift_exp;
        tableval |= man0th << shift_man0th;
        tableval |= man1st << shift_man1st;
        tableval |= man2nd << shift_man2nd;

        printf(fmtall, i, tableval);
        printf(fmteach, exp, man0th, man1st, man2nd);

        printf("\n");
    }
    printf("  END;\n");

    fp = fopen(tmpfilename, "w");
    if (!fp) {
        perror("Error: generate_pow_table");
        exit(1);
    }
    fprintf(fp, "%d %d %d %d\n", width_exp, width_man0th, width_man1st, width_man2nd);
    fclose(fp);
}

static void
test_conv_int_to_float(void)
{
    UINT64 src;
    int w, wexp, wman;
    UINT64 dst;

    fprintf(stderr, "conv_int_to_float() UINT64 src (in 0x... format), int w, int wexp, int wman: ");
    scanf(UINT64XFMT INT32DFMT INT32DFMT INT32DFMT, &src, &w, &wexp, &wman);
    fprintf(stderr, "\n\n");
    dst = conv_int_to_float(src, w, wexp, wman);
}

/*
 * convert fracional part of a number expressed in floating-point format
 * into mantissa of that number in logarithmic format.
 * 
 */
static void
generate_conv_float_to_log_table(void)
{
    double fixed;
    double logarithmic;
    int i, j;
    int fmax, lmax;
    int fman, lman;

    // get parameters from stdin.
    // mantissa of float will have fman bits (w/o hidden1).
    // mantissa of log will have lman bits .
    fprintf(stderr, "generate_conv_float_to_log_table() int fman, int lman: ");
    scanf(INT32DFMT INT32DFMT, &fman, &lman);
    fprintf(stderr, "\n\n");

    printf("DEPTH = %d;\n", 1<<fman);
    printf("WIDTH = %d;\n", lman + 1); // an extra 1 bit indicates output overflow at input very close to 2.0.
    printf("\n");
    printf("ADDRESS_RADIX = HEX;\n");
    printf("DATA_RADIX = HEX;\n");
    printf("\n");
    printf("CONTENT\n");
    printf("  BEGIN\n");

    fmax = (1<<fman);
    lmax = (1<<lman);
    for (i = 0; i < fmax; i++) {
        fixed = 1.0 + (double)(i+0.5) / fmax;
        logarithmic = log(fixed) / log(2.0);
        j = (logarithmic * lmax + 0.5);
#if 1
        printf("    %X : %X ;\n",i, j);
#else
        printf("%8.7lf    %8.7lf        0x%03x    0x%02x\n",
               fixed, logarithmic, i, j);
#endif
    }
    printf("  END;\n");
}


/*
 * convert mantissa of a number expressed in logarithmic format
 * into that of the number in floating-point format.
 */
static void
generate_conv_log_to_float_table(void)
{
    double fixed;
    double logarithmic;
    int i, j;
    int fmax, lmax;
    int fman, lman;

    // get parameters from stdin.
    // mantissa of log will have lman bits .
    // mantissa of float will have fman bits (w/o hidden1).
    fprintf(stderr, "generate_conv_log_to_float_table() int lman, int fman: ");
    scanf(INT32DFMT INT32DFMT, &lman, &fman);
    fprintf(stderr, "\n\n");

    printf("DEPTH = %d;\n", 1<<lman);
    printf("WIDTH = %d;\n", fman);
    printf("\n");
    printf("ADDRESS_RADIX = HEX;\n");
    printf("DATA_RADIX = HEX;\n");
    printf("\n");
    printf("CONTENT\n");
    printf("  BEGIN\n");

    lmax = (1<<lman);
    fmax = (1<<fman);
    for (i = 0; i < lmax; i++) {
        logarithmic = (double)i / lmax;
        fixed = (pow(2.0, logarithmic) - 1.0);
        j = (fixed * fmax + 0.5);

#if 1
        printf("    %X : %X ;\n", i, (((UINT64)1 << fman) - 1) & j);
#else
        printf("%8.7lf    %8.7lf        0x%03x    0x%02x\n",
               fixed, logarithmic, i, j);
#endif
    }
    printf("  END;\n");
}

static void
test_conv_cdouble_to_log(void)
{
    double src;
    int wexp, wman;
    UINT64 dst;

    fprintf(stderr, "conv_cdouble_to_log() double src, int wexp, int wman: ");
    scanf(DOUBLEFMT INT32DFMT INT32DFMT, &src, &wexp, &wman);
    dst = conv_cdouble_to_log(src, wexp, wman);
}

static void
set_warn_level(int level)
{
    warn_level = level;
}

static void
showusage(char *programname)
{
    int i;
    int nitems = sizeof(testmode)/sizeof(testmode[0]);

    fprintf(stderr, "usage: %s <test_program_ID> [warning_level]\n", programname);
    for (i = 0; i < nitems; i++) {
	fprintf(stderr, "  %2d) %s\n", i, testmode[i].usage);
    }
}

#if MAINFUNC

int
main(int argc, char **argv)
{
    int mode;
    int wlevel = 0;

    if (argc < 2) {
	showusage(argv[0]);
	exit (1);
    }

    mode = atoi(argv[1]);
    if (mode < 0 ||  sizeof(testmode)/sizeof(testmode[0]) <= mode) {
	showusage(argv[0]);
	exit (1);
    }

    if (argc > 2) {
        wlevel = atoi(argv[2]);
    }
    set_warn_level(wlevel);

    testmode[mode].func();

    exit (0);
}

#endif // MAINFUNC
