--- /dev/null
+#define NV_MME_PRED_MODE_UUUU 0
+#define NV_MME_PRED_MODE_TTTT 1
+#define NV_MME_PRED_MODE_FFFF 2
+#define NV_MME_PRED_MODE_TTUU 3
+#define NV_MME_PRED_MODE_FFUU 4
+#define NV_MME_PRED_MODE_TFUU 5
+#define NV_MME_PRED_MODE_TUUU 6
+#define NV_MME_PRED_MODE_FUUU 7
+#define NV_MME_PRED_MODE_UUTT 8
+#define NV_MME_PRED_MODE_UUTF 9
+#define NV_MME_PRED_MODE_UUTU 10
+#define NV_MME_PRED_MODE_UUFT 11
+#define NV_MME_PRED_MODE_UUFF 12
+#define NV_MME_PRED_MODE_UUFU 13
+#define NV_MME_PRED_MODE_UUUT 14
+#define NV_MME_PRED_MODE_UUUF 15
+
+#define NV_MME_REG_R0 0
+#define NV_MME_REG_R1 1
+#define NV_MME_REG_R2 2
+#define NV_MME_REG_R3 3
+#define NV_MME_REG_R4 4
+#define NV_MME_REG_R5 5
+#define NV_MME_REG_R6 6
+#define NV_MME_REG_R7 7
+#define NV_MME_REG_R8 8
+#define NV_MME_REG_R9 9
+#define NV_MME_REG_R10 10
+#define NV_MME_REG_R11 11
+#define NV_MME_REG_R12 12
+#define NV_MME_REG_R13 13
+#define NV_MME_REG_R14 14
+#define NV_MME_REG_R15 15
+#define NV_MME_REG_R16 16
+#define NV_MME_REG_R17 17
+#define NV_MME_REG_R18 18
+#define NV_MME_REG_R19 19
+#define NV_MME_REG_R20 20
+#define NV_MME_REG_R21 21
+#define NV_MME_REG_R22 22
+#define NV_MME_REG_R23 23
+#define NV_MME_REG_ZERO 24
+#define NV_MME_REG_IMMED 25
+#define NV_MME_REG_IMMEDPAIR 26
+#define NV_MME_REG_IMMED32 27
+#define NV_MME_REG_LOAD0 28
+#define NV_MME_REG_LOAD1 29
+
+#define NV_MME_ALU_ADD 0
+#define NV_MME_ALU_ADDC 1
+#define NV_MME_ALU_SUB 2
+#define NV_MME_ALU_SUBB 3
+#define NV_MME_ALU_MUL 4
+#define NV_MME_ALU_MULH 5
+#define NV_MME_ALU_MULU 6
+#define NV_MME_ALU_EXTENDED 7
+#define NV_MME_ALU_CLZ 8
+#define NV_MME_ALU_SLL 9
+#define NV_MME_ALU_SRL 10
+#define NV_MME_ALU_SRA 11
+#define NV_MME_ALU_AND 12
+#define NV_MME_ALU_NAND 13
+#define NV_MME_ALU_OR 14
+#define NV_MME_ALU_XOR 15
+#define NV_MME_ALU_MERGE 16
+#define NV_MME_ALU_SLT 17
+#define NV_MME_ALU_SLTU 18
+#define NV_MME_ALU_SLE 19
+#define NV_MME_ALU_SLEU 20
+#define NV_MME_ALU_SEQ 21
+#define NV_MME_ALU_STATE 22
+#define NV_MME_ALU_LOOP 23
+#define NV_MME_ALU_JAL 24
+#define NV_MME_ALU_BLT 25
+#define NV_MME_ALU_BLTU 26
+#define NV_MME_ALU_BLE 27
+#define NV_MME_ALU_BLEU 28
+#define NV_MME_ALU_BEQ 29
+#define NV_MME_ALU_DREAD 30
+#define NV_MME_ALU_DWRITE 31
+
+#define NV_MME_OUT_NONE 0
+#define NV_MME_OUT_ALU0 1
+#define NV_MME_OUT_ALU1 2
+#define NV_MME_OUT_LOAD0 3
+#define NV_MME_OUT_LOAD1 4
+#define NV_MME_OUT_IMMED0 5
+#define NV_MME_OUT_IMMED1 6
+#define NV_MME_OUT_RESERVED 7
+#define NV_MME_OUT_IMMEDHIGH0 8
+#define NV_MME_OUT_IMMEDHIGH1 9
+#define NV_MME_OUT_IMMED32_0 10
+
+#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \
+ ((e1) << (92 - 64) | (m1) << (89 - 64) | \
+ (e0) << (85 - 64) | (m0) << (82 - 64) | \
+ (i1) << (66 - 64) | (b1) >> (64 - 61)), \
+ (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \
+ (d1) << (51 - 32) | (o1) << (46 - 32) | \
+ (i0) >> (32 - 30)), \
+ (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \
+ (pr) << 5 | (pm) << 1 | (en))
+
+#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \
+ MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \
+ NV_MME_ALU_##o0, NV_MME_REG_##d0, \
+ NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \
+ NV_MME_ALU_##o1, NV_MME_REG_##d1, \
+ NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \
+ NV_MME_OUT_##m0, NV_MME_OUT_##e0, \
+ NV_MME_OUT_##m1, NV_MME_OUT_##e1)
+
+uint32_t mmec597_per_instance_bf[] = {
+// r1 = load(); // count
+// r3 = load(); // mask
+// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0]
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r1) {
+// send(r3 & 1);
+// r3 >>= 1;
+// }
+ MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_vertex_array_select[] = {
+// r1 = load(); // array
+// r2 = load(); // limit hi
+// r3 = load(); // limit lo
+// r4 = load(); // start hi
+// r5 = load(); // start lo
+// r6 = (r1 & 0x1f) << 2;
+// r7 = (r1 & 0x1f) << 1;
+// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[]
+// send(r4);
+// send(r5);
+// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[]
+// send(r2);
+// send(r3);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE),
+ MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE,
+ ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1,
+ ADD, ZERO, R2, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_blend_enables[] = {
+// r1 = load(); // enable mask
+// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[]
+// send((r1 >> 0) & 1);
+// send((r1 >> 1) & 1);
+// send((r1 >> 2) & 1);
+// send((r1 >> 3) & 1);
+// send((r1 >> 4) & 1);
+// send((r1 >> 5) & 1);
+// send((r1 >> 6) & 1);
+// send((r1 >> 7) & 1);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1),
+ MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1),
+};
+
+uint32_t mmec597_poly_mode_front[] = {
+// r1 = load();
+// mthd(0x0dac,0); // POLYGON_MODE_FRONT
+// send(r1);
+// r2 = read(0x0db0); // POLYGON_MODE_BACK
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_poly_mode_back[] = {
+// r1 = load();
+// mthd(0x0db0,0); // POLYGON_MODE_BACK
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_gp_select[] = {
+// r1 = load();
+// mthd(0x2100,0); // SP_SELECT[4]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x20c0); // SP_SELECT[3]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_tep_select[] = {
+// r1 = load();
+// mthd(0x20c0,0); // SP_SELECT[3]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect[] = {
+// r1 = load(); // mode
+// r5 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1<<26); // set INSTANCE_NEXT
+// } while(--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1438, 0); // restore VB_INSTANCE_BASE
+// send(r5);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect[] = {
+// r1 = load(); // mode
+// r8 = read(0x1434); // VB_ELEMENT_BASE
+// r9 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r3 = load(); // count
+// r2 = load(); // instance_count
+// mthd(0x17dc, 0); // INDEX_BATCH_FIRST
+// send(load()); // start
+// r4 = load(); // index_bias
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(r4); // index_bias
+// r5 = load(); // start_instance
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, R5, LOAD0, ZERO, 0, NONE, NONE),
+// send(r5); // start_instance
+// send(r6); // draw_id
+// mthd(0x1434, 1); // VB_ELEMENT_BASE
+// send(r4); // index_bias
+// send(r5); // start_instance
+// mthd(0x1118, 0); // VERTEX_ID_BASE
+// send(r4); // index_bias
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x17e0, 0); // INDEX_BATCH_COUNT
+// send(r3); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8); // restore VB_ELEMENT_BASE
+// send(r9); // restore VB_INSTANCE_BASE
+// mthd(0x1118, 0);
+// send(r8); // restore VERTEX_ID_BASE
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect_count[] = {
+// r1 = load(); // mode
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+// r5 = load(); // totaldraws
+// r8 = read(0x1438); // VB_INSTANCE_BASE
+// r5 = r5 - r6; // remaining draws
+// if (r5 > r7)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ SUB, R5, R5, R6, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw_id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2);
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1; // draw_id++
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// load();
+// load();
+// load();
+// load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+// exit mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r8);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect_count[] = {
+// r8 = read(0x1434);
+// r1 = load();
+// r9 = read(0x1438);
+// r6 = load();
+// r7 = load();
+// r5 = load();
+// r5 = r5 - r6;
+// if (r5 > r7)
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE,
+ ADD, R1, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r3 = load();
+// r2 = load();
+// mthd(0x17dc, 0);
+// send(load());
+// r4 = load();
+// mthd(0x238c, 1);
+// send(256 + 160);
+// send(r4);
+// r10 = load();
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, R10, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// send(r10);
+// send(r6);
+// mthd(0x1434, 1);
+// send(r4);
+// send(r10);
+// mthd(0x1118, 0);
+// send(r4);
+// r1 &= ~(1 << 26);
+ MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R10, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0);
+// send(r1);
+// mthd(0x17e0, 0);
+// send(r3);
+// mthd(0x1614, 0);
+// send(0);
+// r1 |= (1 << 26);
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8);
+// send(r9);
+// exit mthd(0x1118, 0);
+// send(r8);
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_query_buffer_write[] = {
+// r1 = load(); // clamp value
+// r2 = load(); // end value (lo)
+// r3 = load(); // end value (hi)
+// r4 = load(); // start value (lo)
+// r5 = load(); // start value (hi)
+// r8 = load(); // desired sequence
+// r9 = load(); // actual sequence
+// r7 = load(); // query address (hi)
+// r6 = load(); // query address (lo)
+// if (r9 >= r8) {
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R8, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R7, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r3,r2] = [r3,r2] - [r5,r4];
+// if (r1) {
+ MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE,
+ SUBB, R3, R3, R5, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r3 != 0 || r1 < r2)
+// r2 = r1;
+// }
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r2)
+// send(0x10000000);
+// if (!r1) {
+ MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r7,r6] = [r7,r6] + 4;
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r3);
+// send(0x10000000);
+// }
+ MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// mthd(0x0110, 0);
+// send(0);
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// }
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_conservative_raster_state[] = {
+// r1 = load();
+// mthd(0x3400, 1);
+// send(0);
+// send(((r1 >> 8) & 7) << 23);
+// send(0x03800000);
+// mthd(0x2310, 1);
+// send(0x00418800);
+// r2 = r1 & 0xf;
+// r3 = 16;
+// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
+// mthd(0x0a1c, 8);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE),
+ MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE,
+ ADD, R3, ZERO, IMMED, 16, NONE, NONE),
+ MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r3)
+// send(r2);
+ MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1148, 0);
+// send(1);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_compute_counter[] = {
+// r0 = load();
+// r1 = 1;
+// r2 = 0;
+// while (HW_LOOP_COUNT < r2) {
+ MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R1, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE,
+ ADD, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = load();
+// [r1,r0] *= r3;
+// }
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE,
+ MULH, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = read(0x3410);
+// r4 = read(0x3414);
+// [r4,r3] += [r2,r1];
+// mthd(0x3410, 1);
+// send(r3);
+// send(r4);
+ MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0,
+ ADDC, R4, R4, R2, 0, NONE, ALU1),
+};
+
+uint32_t mmec597_compute_counter_to_query[] = {
+// r1 = load();
+// r3 = read(0x3410);
+// r2 = load();
+// r4 = read(0x3414);
+// [r2,r1] = [r2,r1] + [r4,r3];
+// mthd(0x1b00, 1);
+// r3 = load();
+// send(r3);
+// r4 = load();
+// send(r4);
+// send(r1);
+// send(0x10000000);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE),
+ MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE,
+ ADDC, R2, R2, R4, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// [r3,r4] = [r3,r4] + 4;
+// mthd(0x1b00, 1);
+// send(r3);
+// send(r4);
+// send(r2);
+// send(0x10000000);
+ MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+};