prefix f0_0 ############################################################################ ############################################################################ ############################################################################# # generated on 2010-01-15 09:53:46 # generated from gravperf_0.q # VARI x_i_0_ x_i_1_ x_i_2_ x_id20543__0_ x_id20543__1_ x_id20543__2_ eps2 # VARJ x_j_0_ x_j_1_ x_j_2_ m_j_ # VARF a_i_0_ a_i_1_ a_i_2_ pot_i_ a_id20543__0_ a_id20543__1_ a_id20543__2_ pot_id20543_ ############################################################################# ########################################################## var vector long x_i_0_ hlt flt64to72 var vector long x_i_1_ hlt flt64to72 var vector long x_i_2_ hlt flt64to72 var vector long x_id20543__0_ hlt flt64to72 var vector long x_id20543__1_ hlt flt64to72 var vector long x_id20543__2_ hlt flt64to72 var vector long eps2 hlt flt64to72 bvar vector long x_j_0_ elt flt64to72 bvar vector long x_j_1_ elt flt64to72 bvar vector long x_j_2_ elt flt64to72 bvar vector long m_j_ elt flt64to72 var vector long a_i_0_ rrn flt72to64 fadd var vector long a_i_1_ rrn flt72to64 fadd var vector long a_i_2_ rrn flt72to64 fadd var vector long pot_i_ rrn flt72to64 fadd var vector long a_id20543__0_ rrn flt72to64 fadd var vector long a_id20543__1_ rrn flt72to64 fadd var vector long a_id20543__2_ rrn flt72to64 fadd var vector long pot_id20543_ rrn flt72to64 fadd # constants are loaded on BMEM bvar vector long ccc_reg0 # 0.000000000000000e+00 ########################################################## ############################################################################ bvar vector long ccc_reg101 # il"60" bvar vector long ccc_reg102 # hl"7fe" bvar vector long ccc_reg103 # hl"000fffffffffffffff" bvar vector long ccc_reg104 # hl"3ff000000000000000" bvar vector long ccc_reg105 # fl"0.45" bvar vector long ccc_reg106 # fl"1.35" bvar vector long ccc_reg107 # fl"2.0" bvar vector long ccc_reg108 # il"61" bvar vector long ccc_reg109 # hl"5fe" bvar vector long ccc_reg110 # hl"1000000000000000" bvar vector short ccc_reg111 # f"0.14823" bvar vector short ccc_reg112 # f"-0.73758" bvar vector short ccc_reg113 # f"1.58935" bvar vector short ccc_reg114 # f"1.414213" bvar vector short ccc_reg115 # f"0.5" bvar vector short ccc_reg116 # f"1.5" bvar vector long negc ############################################################################ loop initialization vlen 4 uxor $t $t $t upassa il"60" $lr0v $lr0v ; upassa hl"7fe" $lr8v $lr8v ; upassa hl"000fffffffffffffff" $lr16v $lr16v; bm $lr0v ccc_reg101 0 upassa hl"3ff000000000000000" $lr24v $lr24v; bm $lr8v ccc_reg102 0 upassa fl"0.45" $lr32v $lr32v ; bm $lr16v ccc_reg103 0 upassa fl"1.35" $lr40v $lr40v ; bm $lr24v ccc_reg104 0 upassa fl"2.0" $lr48v $lr48v ; bm $lr32v ccc_reg105 0 upassa il"61" $lr56v $lr56v ; bm $lr40v ccc_reg106 0 upassa hl"5fe" $lr0v $lr0v ; bm $lr48v ccc_reg107 0 upassa hl"1000000000000000" $lr8v $lr8v ; bm $lr56v ccc_reg108 0 upassa f"0.14823" $r16v $r16v ; bm $lr0v ccc_reg109 0 upassa f"-0.73758" $r20v $r20v ; bm $lr8v ccc_reg110 0 upassa f"1.58935" $r24v $r24v ; bm $r16v ccc_reg111 0 upassa f"1.414213" $r28v $r28v ; bm $r20v ccc_reg112 0 upassa f"0.5" $r32v $r32v ; bm $r24v ccc_reg113 0 upassa f"1.5" $r36v $r36v ; bm $r28v ccc_reg114 0 upassa hl"800000000000000000" $lr0v $lr0v ; bm $r32v ccc_reg115 0 bm $r36v ccc_reg116 0 bm $lr0v negc 0 # constants upassa fl" 0.0000000000000000" $lr0v $lr0v; nop bm $lr0v ccc_reg0 0 upassa $t $t $lm56v upassa $t $t $lm64v upassa $t $t $lm72v upassa $t $t $lm80v upassa $t $t $lm88v upassa $t $t $lm96v upassa $t $t $lm104v upassa $t $t $lm112v nop nop ############################################################################ loop body vlen 4 ############################################################################ bm ccc_reg0 $t upassa $ti $lr0v $lm120v nop nop bm x_j_0_ $lm168v nop nop upassa $lm168v $lm168v $t fsub $ti $lm0v $lr32v bm x_j_1_ $lm176v nop nop upassa $lm176v $lm176v $t fsub $ti $lm8v $lr40v bm x_j_2_ $lm184v nop nop upassa $lm184v $lm184v $t fsub $ti $lm16v $lr48v dmul0 $lr32v $lr32v $t dmul1 $lr32v $lr32v faddAB $fb $t $lr8v dmul0 $lr40v $lr40v $t dmul1 $lr40v $lr40v faddAB $fb $t $t fadd $lr8v $ti $lr0v dmul0 $lr48v $lr48v $t dmul1 $lr48v $lr48v faddAB $fb $t $t fadd $lr0v $ti $t fadd $ti $lm48v $t ############################################################ # # calculate x^(-0.5) : double precision version # # argument : $ti -> $lr0v # resuts : $lr24v # # using 3 registers (0, 8, 16) and 1 LM (504) ############################################################ # il"61" ccc_reg108 # il"60" ccc_reg101 # hl"5fe" ccc_reg109 # hl"1000000000000000" ccc_reg110 # hl"000fffffffffffffff" ccc_reg103 # hl"3ff000000000000000" ccc_reg104 # f"0.14823" ccc_reg111 # f"-0.73758" ccc_reg112 # f"1.58935" ccc_reg113 # f"1.414213" ccc_reg114 # f"0.5" ccc_reg115 # f"1.5" ccc_reg116 # alias vh $lm504v upassa $ti $ti $t $lr0v upassa $ti $ti vh; bm ccc_reg109 $lr16v #nop #upassa $lr0v $lr0v vh; bm ccc_reg109 $lr16v bm ccc_reg108 $t ulsr $lr0v $ti $t; bm ccc_reg101 $lr0v usub $lr16v $ti $t; ulsl $ti $lr0v $r8v; bm ccc_reg110 $t moi 1 uand $ti vh; bm ccc_reg103 $t moi 0 uand $ti vh $t; bm ccc_reg104 $lr0v nop uor $lr0v $t $t $lr0v bm ccc_reg111 $r12v bm ccc_reg112 $r16v fmul $t $r12v $t fadd $ti $r16v $t; bm ccc_reg113 $r12v fmul $ti $lr0v $t fadd $ti $r12v $t fmul $ti $r8v $t; bm ccc_reg114 $r16v nop mi 1 fmul $t $r16v $t mi 0 bm ccc_reg115 $lr0v upassa $t $t $lr8v dmul0 vh $lr0v $t dmul1 vh $lr0v faddAB $fb $t $lr0v # dmul0 $lr8v f"0.5" $t # dmul1 $lr8v f"0.5" # faddAB $fb $t $lr8v ################################# dmul0 $lr8v $lr8v $t dmul1 $lr8v $lr8v faddAB $fb $t $t; bm ccc_reg116 $lr16v dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $t $lr8v ################################# dmul0 $ti $ti $t dmul1 $lr8v $lr8v faddAB $fb $t $t dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $t $lr8v ################################# dmul0 $ti $ti $t dmul1 $lr8v $lr8v faddAB $fb $t $t dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $lr24v ######################################################## end powm12 bm m_j_ $lm152v nop nop dmul0 $lr24v $lm152v $t dmul1 $lr24v $lm152v faddAB $fb $t $lr0v $t dmul0 $ti $lr24v $t dmul1 $lr0v $lr24v faddAB $fb $t $t dmul0 $ti $lr24v $t dmul1 $t $lr24v faddAB $fb $t $lr8v $t dmul0 $ti $lr32v $t dmul1 $lr8v $lr32v faddAB $fb $t $t fadd $ti $lm120v $t upassa $ti $ti $lm128v dmul0 $lr8v $lr40v $t dmul1 $lr8v $lr40v faddAB $fb $t $t fadd $ti $lm120v $t upassa $ti $ti $lm136v dmul0 $lr8v $lr48v $t dmul1 $lr8v $lr48v faddAB $fb $t $t fadd $ti $lm120v $t upassa $ti $ti $lm144v bm negc $t ixor $ti $lr0v $lm160v nop nop upassa $lm168v $lm168v $t fsub $ti $lm24v $lr40v upassa $lm176v $lm176v $t fsub $ti $lm32v $lr48v upassa $lm184v $lm184v $t fsub $ti $lm40v $lr56v dmul0 $lr40v $lr40v $t dmul1 $lr40v $lr40v faddAB $fb $t $lr8v dmul0 $lr48v $lr48v $t dmul1 $lr48v $lr48v faddAB $fb $t $t fadd $lr8v $ti $lr0v dmul0 $lr56v $lr56v $t dmul1 $lr56v $lr56v faddAB $fb $t $t fadd $lr0v $ti $t fadd $ti $lm48v $t ############################################################ # # calculate x^(-0.5) : double precision version # # argument : $ti -> $lr0v # resuts : $lr24v # # using 3 registers (0, 8, 16) and 1 LM (504) ############################################################ # il"61" ccc_reg108 # il"60" ccc_reg101 # hl"5fe" ccc_reg109 # hl"1000000000000000" ccc_reg110 # hl"000fffffffffffffff" ccc_reg103 # hl"3ff000000000000000" ccc_reg104 # f"0.14823" ccc_reg111 # f"-0.73758" ccc_reg112 # f"1.58935" ccc_reg113 # f"1.414213" ccc_reg114 # f"0.5" ccc_reg115 # f"1.5" ccc_reg116 # alias vh $lm504v upassa $ti $ti $t $lr0v upassa $ti $ti vh; bm ccc_reg109 $lr16v #nop #upassa $lr0v $lr0v vh; bm ccc_reg109 $lr16v bm ccc_reg108 $t ulsr $lr0v $ti $t; bm ccc_reg101 $lr0v usub $lr16v $ti $t; ulsl $ti $lr0v $r8v; bm ccc_reg110 $t moi 1 uand $ti vh; bm ccc_reg103 $t moi 0 uand $ti vh $t; bm ccc_reg104 $lr0v nop uor $lr0v $t $t $lr0v bm ccc_reg111 $r12v bm ccc_reg112 $r16v fmul $t $r12v $t fadd $ti $r16v $t; bm ccc_reg113 $r12v fmul $ti $lr0v $t fadd $ti $r12v $t fmul $ti $r8v $t; bm ccc_reg114 $r16v nop mi 1 fmul $t $r16v $t mi 0 bm ccc_reg115 $lr0v upassa $t $t $lr8v dmul0 vh $lr0v $t dmul1 vh $lr0v faddAB $fb $t $lr0v # dmul0 $lr8v f"0.5" $t # dmul1 $lr8v f"0.5" # faddAB $fb $t $lr8v ################################# dmul0 $lr8v $lr8v $t dmul1 $lr8v $lr8v faddAB $fb $t $t; bm ccc_reg116 $lr16v dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $t $lr8v ################################# dmul0 $ti $ti $t dmul1 $lr8v $lr8v faddAB $fb $t $t dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $t $lr8v ################################# dmul0 $ti $ti $t dmul1 $lr8v $lr8v faddAB $fb $t $t dmul0 $ti $lr0v $t dmul1 $t $lr0v faddAB $fb $t $t fsub $lr16v $ti $t dmul0 $lr8v $ti $t dmul1 $lr8v $t faddAB $fb $t $lr24v $t ######################################################## end powm12 dmul0 $ti $lm152v $t dmul1 $lr24v $lm152v faddAB $fb $t $lr32v $t dmul0 $ti $lr24v $t dmul1 $lr32v $lr24v faddAB $fb $t $t dmul0 $ti $lr24v $t dmul1 $t $lr24v faddAB $fb $t $lr16v $t dmul0 $ti $lr40v $t dmul1 $lr16v $lr40v faddAB $fb $t $t fadd $ti $lm120v $lr0v dmul0 $lr16v $lr48v $t dmul1 $lr16v $lr48v faddAB $fb $t $t fadd $ti $lm120v $lr8v dmul0 $lr16v $lr56v $t dmul1 $lr16v $lr56v faddAB $fb $t $t fadd $ti $lm120v $lr16v bm negc $t ixor $ti $lr32v $lr24v upassa $lm128v $lm128v $t fadd $ti $lm56v $t upassa $ti $ti $lm56v nop nop upassa $lm136v $lm136v $t fadd $ti $lm64v $t upassa $ti $ti $lm64v nop nop upassa $lm144v $lm144v $t fadd $ti $lm72v $t upassa $ti $ti $lm72v nop nop upassa $lm160v $lm160v $t fadd $ti $lm80v $t upassa $ti $ti $lm80v nop nop fadd $lr0v $lm88v $t upassa $ti $ti $lm88v nop nop fadd $lr8v $lm96v $t upassa $ti $ti $lm96v nop nop fadd $lr16v $lm104v $t upassa $ti $ti $lm104v nop nop fadd $lr24v $lm112v $t upassa $ti $ti $lm112v nop nop ############################################################################ #loop finalization #nop #nop