/*
 * <libname>.c: <prefix> emulator user library.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#include <assert.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <<prefix>util.h>

#ifdef ICC_RCD
#define ONEHALF (0.0) // Intel CC with -rcd switch
#else
#define ONEHALF (0.5) // standard C
#endif

#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
static int warn_level = 2; // warning message output level. the higher the more verbose.

#define NPIPE 1
#define JMEMSIZE <JWORDS>

static UINT32 Nbodies;

/*
 * raw bit data
 */
<COEFFVARS>
<IPVARS>
<JPVARS>
<FOVARS>

/*
 * local function prototypes
 */
static void processor(int ni);
static void pipeline(<PIPEARGS>
    int run_begin);

// COEFF conversion
<COEFFCONV_PROTOTYPE>
// JP conversion
<JPCONV_PROTOTYPE>
// IP conversion
<IPCONV_PROTOTYPE>
// FO conversion
<FOCONV_PROTOTYPE>

<FOUND_COEFFSET>
void
<prefix>_set_coeffMC(int devid, <COEFFARGS>)
{
    int k;
<COEFFCONV>
}
</FOUND_COEFFSET>

<FOUND_JPSET>
void
<prefix>_set_jpMC(int devid, int adr, int nj, <JPARGS>)
{
    int j, k;

    for (j = 0; j < nj; j++) {
<JPCONV>
    }
}
</FOUND_JPSET>

<FOUND_IPSET>
void
<prefix>_set_ipMC(int devid, int ni, <IPARGS>)
{
    int i, k;

    for (i = 0; i < ni; i++) {
<IPCONV>
    }
}
</FOUND_IPSET>

<FOUND_FOSET>
void
<prefix>_get_foutMC(int devid, int ni, <FOARGS>)
{
    int i, k;

    processor(ni);
    for (i = 0; i < ni; i++) {
<FOCONV>
    }
}
</FOUND_FOSET>

void
<prefix>_openMC(int devid)
{
    // do nothing.
}

void
<prefix>_closeMC(int devid)
{
    // do nothing.
}

void
<prefix>_runMC(int devid)
{
    // do nothing.
}

void
<prefix>_set_nMC(int devid, int n)
{
    Nbodies = n;
}

int
<prefix>_get_number_of_pipelinesMC(int devid)
{
    return NPIPE;
}

int
<prefix>_get_jmemsizeMC(int devid)
{
    return JMEMSIZE;
}

void
<prefix>_open(void)
{
    int devid = 0;
    <prefix>_openMC(devid);
}

void
<prefix>_close(void)
{
    int devid = 0;
    <prefix>_closeMC(devid);
}

<FOUND_COEFFSET>
void
<prefix>_set_coeff(<COEFFARGS>)
{
    int devid = 0;
    <prefix>_set_coeffMC(devid<COEFFCALL>);
}
</FOUND_COEFFSET>

<FOUND_JPSET>
void
<prefix>_set_jp(int adr, int nj, <JPARGS>)
{
    int devid = 0;
    int j0 = 0;
    <prefix>_set_jpMC(devid, adr, nj<JPCALL>);
}
</FOUND_JPSET>

<FOUND_IPSET>
void
<prefix>_set_ip(int ni, <IPARGS>)
{
    int devid = 0;
    <prefix>_set_ipMC(devid, ni<IPCALL>);
}
</FOUND_IPSET>

void
<prefix>_run(void)
{
    int devid = 0;
    <prefix>_runMC(devid);
}

void
<prefix>_set_n(int nj)
{
    int devid = 0;
    <prefix>_set_nMC(devid, nj);
}

<FOUND_FOSET>
void
<prefix>_get_fout(int ni, <FOARGS>)
{
    int devid = 0;
    <prefix>_get_foutMC(devid, ni<FOCALL>);
}
</FOUND_FOSET>

int
<prefix>_get_number_of_pipelines(void)
{
    int devid = 0;
    return <prefix>_get_number_of_pipelinesMC(devid);
}

int
<prefix>_get_jmemsize(void)
{
    int devid = 0;
    <prefix>_get_jmemsizeMC(devid);
}

<FOUND_IPSET>
<FOUND_FOSET>
void
<prefix>_calculate_fout_on_ip(<IPARGS>, <FOARGS>, int ni)
{
    int off, nii, np;

    np = <prefix>_get_number_of_pipelines();

    for (off = 0; off < ni; off += np) {
	nii = np;
	if (off+nii > ni) {
	    nii = ni - off;
	}

	<prefix>_set_ip(nii<IPCALL_OFF>);
	<prefix>_run();
	<prefix>_get_fout(nii<FOCALL_OFF>);
    }
}
</FOUND_FOSET>
</FOUND_IPSET>

<COEFFRANGE_DEFINITION>
<JPRANGE_DEFINITION>
<IPRANGE_DEFINITION>
<FORANGE_DEFINITION>

<COEFFCONV_DEFINITION>
<JPCONV_DEFINITION>
<IPCONV_DEFINITION>
<FOCONV_DEFINITION>

<OLD_API_EXISTS>
/*
 * API for backward compatibility.
 */

#define NIPMAX NPIPE

</OLD_API_EXISTS>

<OLD_API_FOUND_ETA>
void
<prefix>_set_eta(double eta)
{
    <prefix>_set_coeff(eta);
}

void
<prefix>_set_etaMC(int devid, double eta)
{
    <prefix>_set_coeffMC(devid, eta);
}
</OLD_API_FOUND_ETA>

<OLD_API_FOUND_XMJ>
void
<prefix>_set_xmj(int adr, int nj, double (*xj)[3], double *mj)
{
    int j0 = 0;
    <prefix>_set_jp(adr, nj<JPCALL>);
}

void
<prefix>_set_xmjMC(int devid, int adr, int nj, double (*xj)[3], double *mj)
{
    int j0 = 0;
    <prefix>_set_jpMC(devid, adr, nj<JPCALL>);
}
</OLD_API_FOUND_XMJ>
<OLD_API_FOUND_EPS2>

static double Eps2[NHIB][NIPMAX];

void
<prefix>_set_xi(int ni, double (*xi)[3])
{
    int devid = 0;
    <prefix>_set_xiMC(devid, ni, xi);
}

void
<prefix>_set_xiMC(int devid, int ni, double (*xi)[3])
{
    <prefix>_set_ipMC(devid, ni, xi, Eps2[devid]);
}

void
<prefix>_set_eps(int ni, double *eps)
{
    int devid = 0;
    <prefix>_set_epsMC(devid, ni, eps);
}

void
<prefix>_set_epsMC(int devid, int ni, double *eps)
{
    int i;

    assert(ni <= NIPMAX);

    for (i = 0; i < ni; i++) {
        Eps2[devid][i] = eps[i] * eps[i];
    }
}

void
<prefix>_set_eps2(int ni, double *eps2)
{
    int devid = 0;

    assert(ni <= NIPMAX);

    <prefix>_set_eps2MC(devid, ni, eps2);
}

void
<prefix>_set_eps2MC(int devid, int ni, double *eps2)
{
    int i;

    assert(ni <= NIPMAX);

    for (i = 0; i < ni; i++) {
        Eps2[devid][i] = eps2[i];
    }
}

void
<prefix>_set_eps_to_all(double eps)
{
    int devid = 0;

    <prefix>_set_eps_to_allMC(devid, eps);
}

void
<prefix>_set_eps_to_allMC(int devid, double eps)
{
    int i;

    for (i = 0; i < NIPMAX; i++) {
        Eps2[devid][i] = eps * eps;
    }
}

void
<prefix>_set_eps2_to_all(double eps2)
{
    int devid = 0;

    <prefix>_set_eps2_to_allMC(devid, eps2);
}

void
<prefix>_set_eps2_to_allMC(int devid, double eps2)
{
    int i;

    for (i = 0; i < NIPMAX; i++) {
        Eps2[devid][i] = eps2;
    }
}

</OLD_API_FOUND_EPS2>

<OLD_API_FOUND_A>

void
<prefix>_get_force(int ni, double (*a)[3], double *p)
{
    <prefix>_get_fout(ni, a <OLD_API_P>);
}

void
<prefix>_get_forceMC(int devid, int ni, double (*a)[3], double *p)
{
    <prefix>_get_foutMC(devid, ni, a <OLD_API_P>);
}

</OLD_API_FOUND_A>

<OLD_API_FOUND_EPS2_A>

void
<prefix>_calculate_force_on_x(double (*xi)[3], double (*a)[3], double *p, int ni)
{
    int off, nii, np;

    np = <prefix>_get_number_of_pipelines();

    for (off = 0; off < ni; off += np) {
	nii = np;
	if (off+nii > ni) {
	    nii = ni - off;
	}

	<prefix>_set_xi(nii, (double (*)[3])xi[off]);
	<prefix>_run();
	<prefix>_get_force(nii, (double (*)[3])a[off], &p[off]);
    }
}
</OLD_API_FOUND_EPS2_A>

/*
 *
 * local functions
 *
 */

/*
 * processor-unit emulator
 */
static void
processor(int ni)
{
    int i, j;
    for (i = 0; i < ni; i++) {
        int run_begin = 1;
        for (j = 0; j < Nbodies; j++) {
            pipeline(<PIPECALL>
                run_begin);
            run_begin = 0;
        }
    }
}

/*
 * pipeline emulator
 */
static void
pipeline(<PIPEARGS>
    int run_begin)
{
<PIPELINE>
}

#ifdef MAINFUNC

typedef struct {
    void (*func)();
    char *usage;
} TestMode;

static void test_pipeline(void);
static void generate_testvector(void);

static TestMode testmode[] = {
    test_pipeline, "test pipeline emulator.",
    generate_testvector, "generate .tbl file taking input test vector from stdin",
};

static void
showusage(char *programname)
{
    int i;
    int nitems = sizeof(testmode)/sizeof(testmode[0]);

    fprintf(stderr, "usage: %s <test_program_ID> [warning_level]\n", programname);
    for (i = 0; i < nitems; i++) {
	fprintf(stderr, "  %2d) %s\n", i, testmode[i].usage);
    }
}

static void
set_warn_level(int level)
{
    warn_level = level;
}

static void
test_pipeline(void)
{
<PIPETEST>
}


static int
ndigit(int nbit)
{
    return (nbit - 1) / 4 + 1;
}

static void
print_X(int n)
{
    int i;
    for (i = 0; i < n; i++) {
        printf("X");
    }
}

#define NVEC (1024)
static void
generate_testvector(void)
{
    int i, nvec, run_begin, nchar;
    int delay = <DELAY>;
    static char buf[2046];
    static char fmt[2046];
    static int run[NVEC];
    <TESTVECVARS>

    nvec = 0;
    while (!feof(stdin) && nvec + delay < NVEC) {
        fgets(buf, sizeof(buf), stdin);
        <TESTVECSCANF>

        if (nvec == 0) { // do not set output of at 1st clk.
            nvec++;
            continue;
        }

        if (run[nvec] == 1) { // run the pipeline.
            if (run[nvec - 1] == 0) { // rising edge of run.
                run_begin = 1;
            }
            else {
                run_begin = 0;
            }
            pipeline(<TESTVECCALL>run_begin);
        }
<TESTVECSAVE>
        nvec++;
    }

    printf("# run ");

<TESTVECHEAD>
    printf("\n");

    for (i = 0; i < nvec; i++) {
        printf("  %d   ", run[i]);

<TESTVECBODY>
    }
}

int
main(int argc, char **argv)
{
    int mode;
    int wlevel = 0;

    if (argc < 2) {
	showusage(argv[0]);
	exit (1);
    }

    mode = atoi(argv[1]);
    if (mode < 0 ||  sizeof(testmode)/sizeof(testmode[0]) <= mode) {
	showusage(argv[0]);
	exit (1);
    }

    if (argc > 2) {
        wlevel = atoi(argv[2]);
    }
    set_warn_level(wlevel);

    testmode[mode].func();

    exit (0);
}

#endif // MAINFUNC
