From: Ben Skeggs Date: Sat, 6 Jun 2020 23:52:49 +0000 (+1000) Subject: nvc0: initial support for tu1xx X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=af3c2f3cfd81186b0041e5297db5225fc788b04e;p=mesa.git nvc0: initial support for tu1xx v2: - add proper method definitions Signed-off-by: Ben Skeggs Acked-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h index 899d73d7398..31e7cf82233 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h @@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_2D_PATTERN_SELECT_BITMAP_1X64 0x00000002 #define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 -#define NVC0_2D_UNK02B8(i0) (0x000002b8 + 0x4*(i0)) -#define NVC0_2D_UNK02B8__ESIZE 0x00000004 -#define NVC0_2D_UNK02B8__LEN 0x00000009 +#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 #define NVC0_2D_UNK2DC 0x000002dc diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index 0c1337028f3..fac195d4846 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GP100_3D_CLASS 0x0000c097 #define GP102_3D_CLASS 0x0000c197 #define GV100_3D_CLASS 0x0000c397 +#define TU102_3D_CLASS 0x0000c597 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 @@ -209,6 +210,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GP100_COMPUTE_CLASS 0x0000c0c0 #define GP104_COMPUTE_CLASS 0x0000c1c0 #define GV100_COMPUTE_CLASS 0x0000c3c0 +#define TU102_COMPUTE_CLASS 0x0000c5c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h new file mode 100644 index 00000000000..390741cbd04 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h @@ -0,0 +1,904 @@ +#define NV_MME_PRED_MODE_UUUU 0 +#define NV_MME_PRED_MODE_TTTT 1 +#define NV_MME_PRED_MODE_FFFF 2 +#define NV_MME_PRED_MODE_TTUU 3 +#define NV_MME_PRED_MODE_FFUU 4 +#define NV_MME_PRED_MODE_TFUU 5 +#define NV_MME_PRED_MODE_TUUU 6 +#define NV_MME_PRED_MODE_FUUU 7 +#define NV_MME_PRED_MODE_UUTT 8 +#define NV_MME_PRED_MODE_UUTF 9 +#define NV_MME_PRED_MODE_UUTU 10 +#define NV_MME_PRED_MODE_UUFT 11 +#define NV_MME_PRED_MODE_UUFF 12 +#define NV_MME_PRED_MODE_UUFU 13 +#define NV_MME_PRED_MODE_UUUT 14 +#define NV_MME_PRED_MODE_UUUF 15 + +#define NV_MME_REG_R0 0 +#define NV_MME_REG_R1 1 +#define NV_MME_REG_R2 2 +#define NV_MME_REG_R3 3 +#define NV_MME_REG_R4 4 +#define NV_MME_REG_R5 5 +#define NV_MME_REG_R6 6 +#define NV_MME_REG_R7 7 +#define NV_MME_REG_R8 8 +#define NV_MME_REG_R9 9 +#define NV_MME_REG_R10 10 +#define NV_MME_REG_R11 11 +#define NV_MME_REG_R12 12 +#define NV_MME_REG_R13 13 +#define NV_MME_REG_R14 14 +#define NV_MME_REG_R15 15 +#define NV_MME_REG_R16 16 +#define NV_MME_REG_R17 17 +#define NV_MME_REG_R18 18 +#define NV_MME_REG_R19 19 +#define NV_MME_REG_R20 20 +#define NV_MME_REG_R21 21 +#define NV_MME_REG_R22 22 +#define NV_MME_REG_R23 23 +#define NV_MME_REG_ZERO 24 +#define NV_MME_REG_IMMED 25 +#define NV_MME_REG_IMMEDPAIR 26 +#define NV_MME_REG_IMMED32 27 +#define NV_MME_REG_LOAD0 28 +#define NV_MME_REG_LOAD1 29 + +#define NV_MME_ALU_ADD 0 +#define NV_MME_ALU_ADDC 1 +#define NV_MME_ALU_SUB 2 +#define NV_MME_ALU_SUBB 3 +#define NV_MME_ALU_MUL 4 +#define NV_MME_ALU_MULH 5 +#define NV_MME_ALU_MULU 6 +#define NV_MME_ALU_EXTENDED 7 +#define NV_MME_ALU_CLZ 8 +#define NV_MME_ALU_SLL 9 +#define NV_MME_ALU_SRL 10 +#define NV_MME_ALU_SRA 11 +#define NV_MME_ALU_AND 12 +#define NV_MME_ALU_NAND 13 +#define NV_MME_ALU_OR 14 +#define NV_MME_ALU_XOR 15 +#define NV_MME_ALU_MERGE 16 +#define NV_MME_ALU_SLT 17 +#define NV_MME_ALU_SLTU 18 +#define NV_MME_ALU_SLE 19 +#define NV_MME_ALU_SLEU 20 +#define NV_MME_ALU_SEQ 21 +#define NV_MME_ALU_STATE 22 +#define NV_MME_ALU_LOOP 23 +#define NV_MME_ALU_JAL 24 +#define NV_MME_ALU_BLT 25 +#define NV_MME_ALU_BLTU 26 +#define NV_MME_ALU_BLE 27 +#define NV_MME_ALU_BLEU 28 +#define NV_MME_ALU_BEQ 29 +#define NV_MME_ALU_DREAD 30 +#define NV_MME_ALU_DWRITE 31 + +#define NV_MME_OUT_NONE 0 +#define NV_MME_OUT_ALU0 1 +#define NV_MME_OUT_ALU1 2 +#define NV_MME_OUT_LOAD0 3 +#define NV_MME_OUT_LOAD1 4 +#define NV_MME_OUT_IMMED0 5 +#define NV_MME_OUT_IMMED1 6 +#define NV_MME_OUT_RESERVED 7 +#define NV_MME_OUT_IMMEDHIGH0 8 +#define NV_MME_OUT_IMMEDHIGH1 9 +#define NV_MME_OUT_IMMED32_0 10 + +#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \ + ((e1) << (92 - 64) | (m1) << (89 - 64) | \ + (e0) << (85 - 64) | (m0) << (82 - 64) | \ + (i1) << (66 - 64) | (b1) >> (64 - 61)), \ + (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \ + (d1) << (51 - 32) | (o1) << (46 - 32) | \ + (i0) >> (32 - 30)), \ + (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \ + (pr) << 5 | (pm) << 1 | (en)) + +#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \ + MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \ + NV_MME_ALU_##o0, NV_MME_REG_##d0, \ + NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \ + NV_MME_ALU_##o1, NV_MME_REG_##d1, \ + NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \ + NV_MME_OUT_##m0, NV_MME_OUT_##e0, \ + NV_MME_OUT_##m1, NV_MME_OUT_##e1) + +uint32_t mmec597_per_instance_bf[] = { +// r1 = load(); // count +// r3 = load(); // mask +// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0] + MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE, + ADD, R3, LOAD1, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r1) { +// send(r3 & 1); +// r3 >>= 1; +// } + MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_vertex_array_select[] = { +// r1 = load(); // array +// r2 = load(); // limit hi +// r3 = load(); // limit lo +// r4 = load(); // start hi +// r5 = load(); // start lo +// r6 = (r1 & 0x1f) << 2; +// r7 = (r1 & 0x1f) << 1; +// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[] +// send(r4); +// send(r5); +// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[] +// send(r2); +// send(r3); + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + ADD, R2, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, + ADD, R4, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, + MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE), + MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE, + ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, + ADD, ZERO, R5, ZERO, 0, NONE, ALU1), + MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1, + ADD, ZERO, R2, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_blend_enables[] = { +// r1 = load(); // enable mask +// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[] +// send((r1 >> 0) & 1); +// send((r1 >> 1) & 1); +// send((r1 >> 2) & 1); +// send((r1 >> 3) & 1); +// send((r1 >> 4) & 1); +// send((r1 >> 5) & 1); +// send((r1 >> 6) & 1); +// send((r1 >> 7) & 1); + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE, + ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE), + MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0, + MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1), + MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0, + MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1), + MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0, + MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1), + MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0, + MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1), +}; + +uint32_t mmec597_poly_mode_front[] = { +// r1 = load(); +// mthd(0x0dac,0); // POLYGON_MODE_FRONT +// send(r1); +// r2 = read(0x0db0); // POLYGON_MODE_BACK +// r3 = read(0x20c0); // SP_SELECT[3] +// r7 = r1 | r2; +// r4 = read(0x2100); // SP_SELECT[4] +// r6 = 0x60; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0, + STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE), + MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, + OR, R7, R1, R2, 0, NONE, NONE), + MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, + ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0x200; + MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r3 | r4; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0; + MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x02ec, 0); +// send(r6); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_poly_mode_back[] = { +// r1 = load(); +// mthd(0x0db0,0); // POLYGON_MODE_BACK +// send(r1); +// r2 = read(0x0dac); // POLYGON_MODE_FRONT +// r3 = read(0x20c0); // SP_SELECT[3] +// r7 = r1 | r2; +// r4 = read(0x2100); // SP_SELECT[4] +// r6 = 0x60; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0, + STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), + MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, + OR, R7, R1, R2, 0, NONE, NONE), + MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, + ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0x200; + MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r3 | r4; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0; + MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x02ec, 0); +// send(r6); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_gp_select[] = { +// r1 = load(); +// mthd(0x2100,0); // SP_SELECT[4] +// send(r1); +// r2 = read(0x0dac); // POLYGON_MODE_FRONT +// r3 = read(0x0db0); // POLYGON_MODE_BACK +// r7 = r2 | r3; +// r4 = read(0x20c0); // SP_SELECT[3] +// r6 = 0x60; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0, + STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), + MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, + OR, R7, R2, R3, 0, NONE, NONE), + MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE, + ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0x200; + MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r1 | r4; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0; + MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x02ec, 0); +// send(r6); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_tep_select[] = { +// r1 = load(); +// mthd(0x20c0,0); // SP_SELECT[3] +// send(r1); +// r2 = read(0x0dac); // POLYGON_MODE_FRONT +// r3 = read(0x0db0); // POLYGON_MODE_BACK +// r7 = r2 | r3; +// r4 = read(0x2100); // SP_SELECT[4] +// r6 = 0x60; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0, + STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), + MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, + OR, R7, R2, R3, 0, NONE, NONE), + MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, + ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0x200; + MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r1 | r4; +// r7 = r7 & 1; +// if (r7 != 0) + MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = 0; + MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x02ec, 0); +// send(r6); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_draw_arrays_indirect[] = { +// r1 = load(); // mode +// r5 = read(0x1438); // VB_INSTANCE_BASE +// r6 = load(); // start_drawid +// r7 = load(); // numparams + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + ADD, R6, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, + STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE), +// while (HW_LOOP_COUNT < r7) { +// r2 = load(); // count +// r3 = load(); // instance_count +// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST +// send(load()); // start +// r4 = load(); // start_instance +// if (r3) { + MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, + ADD, R3, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, + ADD, R4, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x238c, 1); // CB_POS +// send(256 + 160); +// send(0); // base_vertex +// send(r4); // start_instance +// send(r6); // draw id +// mthd(0x1438, 0); // VB_INSTANCE_BASE +// send(r4); +// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT + MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, + ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, + ADD, ZERO, R6, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, + MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), +// do { +// mthd(0x1618, 0); // VERTEX_BEGIN_GL +// send(r1); // mode +// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT +// send(r2); // count +// mthd(0x1614, 0); // VERTEX_END_GL +// send(0); +// r1 |= (1<<26); // set INSTANCE_NEXT +// } while(--r3); +// } + MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, + ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, + ADD, R4, IMMED, ZERO, 1, NONE, NONE), + MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, + SUB, R3, R3, IMMED, 1, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = r6 + 1; +// }; + MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x1438, 0); // restore VB_INSTANCE_BASE +// send(r5); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_draw_elts_indirect[] = { +// r1 = load(); // mode +// r8 = read(0x1434); // VB_ELEMENT_BASE +// r9 = read(0x1438); // VB_INSTANCE_BASE +// r6 = load(); // start_drawid +// r7 = load(); // numparams + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE), + MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, + ADD, R6, LOAD0, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r7) { +// r3 = load(); // count +// r2 = load(); // instance_count +// mthd(0x17dc, 0); // INDEX_BATCH_FIRST +// send(load()); // start +// r4 = load(); // index_bias +// mthd(0x238c, 1); // CB_POS +// send(256 + 160); +// send(r4); // index_bias +// r5 = load(); // start_instance +// if (r2) { + MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE, + ADD, R2, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, + ADD, R4, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, + ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1), + MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, + ADD, R5, LOAD0, ZERO, 0, NONE, NONE), +// send(r5); // start_instance +// send(r6); // draw_id +// mthd(0x1434, 1); // VB_ELEMENT_BASE +// send(r4); // index_bias +// send(r5); // start_instance +// mthd(0x1118, 0); // VERTEX_ID_BASE +// send(r4); // index_bias +// r1 &= ~(1 << 26); // clear INSTANCE_NEXT + MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, + ADD, ZERO, R6, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, + ADD, ZERO, R5, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0, + MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), +// do { +// mthd(0x1618, 0); // VERTEX_BEGIN_GL +// send(r1); // mode +// mthd(0x17e0, 0); // INDEX_BATCH_COUNT +// send(r3); // count +// mthd(0x1614, 0); // VERTEX_END_GL +// send(0); +// r1 |= (1 << 26); // set INSTANCE_NEXT +// } while (--r2); +// } + MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, + ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, + ADD, R4, IMMED, ZERO, 1, NONE, NONE), + MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, + SUB, R2, R2, IMMED, 1, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = r6 + 1; +// }; + MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x1434, 1); +// send(r8); // restore VB_ELEMENT_BASE +// send(r9); // restore VB_INSTANCE_BASE +// mthd(0x1118, 0); +// send(r8); // restore VERTEX_ID_BASE + MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, + ADD, ZERO, R9, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_draw_arrays_indirect_count[] = { +// r1 = load(); // mode +// r6 = load(); // start_drawid +// r7 = load(); // numparams +// r5 = load(); // totaldraws +// r8 = read(0x1438); // VB_INSTANCE_BASE +// r5 = r5 - r6; // remaining draws +// if (r5 > r7) + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + ADD, R6, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, + ADD, R5, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE, + SUB, R5, R5, R6, 0, NONE, NONE), + MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r5 = r7; + MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// if (r5 >= 0) { + MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r5) { +// r2 = load(); // count +// r3 = load(); // instance_count +// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST +// send(load()); // start +// r4 = load(); // start_instance +// if (r3) { + MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, + ADD, R3, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, + ADD, R4, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x238c, 1); // CB_POS +// send(256 + 160); +// send(0); // base_vertex +// send(r4); // start_instance +// send(r6); // draw_id +// mthd(0x1438, 0); // VB_INSTANCE_BASE +// send(r4); +// r1 &= ~(1 << 26); // clear INSTANCE_NEXT + MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, + ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, + ADD, ZERO, R6, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, + MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), +// do { +// mthd(0x1618, 0); // VERTEX_BEGIN_GL +// send(r1); // mode +// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT +// send(r2); +// mthd(0x1614, 0); // VERTEX_END_GL +// send(0); +// r1 |= (1 << 26); // set INSTANCE_NEXT +// } while (--r3); +// } + MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, + ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, + ADD, R4, IMMED, ZERO, 1, NONE, NONE), + MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, + SUB, R3, R3, IMMED, 1, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = r6 + 1; // draw_id++ +// } + MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r7 - r5; // unneeded params +// } + MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r7) { +// load(); +// load(); +// load(); +// load(); +// } + MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), +// exit mthd(0x1438, 0); // VB_INSTANCE_BASE +// send(r8); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_draw_elts_indirect_count[] = { +// r8 = read(0x1434); +// r1 = load(); +// r9 = read(0x1438); +// r6 = load(); +// r7 = load(); +// r5 = load(); +// r5 = r5 - r6; +// if (r5 > r7) + MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE, + ADD, R1, LOAD0, ZERO, 0, NONE, NONE), + MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, + ADD, R6, LOAD0, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, + ADD, R5, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r5 = r7; + MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// if (r5 >= 0) { + MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r5) { +// r3 = load(); +// r2 = load(); +// mthd(0x17dc, 0); +// send(load()); +// r4 = load(); +// mthd(0x238c, 1); +// send(256 + 160); +// send(r4); +// r10 = load(); +// if (r2) { + MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE, + ADD, R2, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0, + ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1), + MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, + ADD, R10, LOAD0, ZERO, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// send(r10); +// send(r6); +// mthd(0x1434, 1); +// send(r4); +// send(r10); +// mthd(0x1118, 0); +// send(r4); +// r1 &= ~(1 << 26); + MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0, + ADD, ZERO, R6, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, + ADD, ZERO, R10, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, + MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), +// do { +// mthd(0x1618, 0); +// send(r1); +// mthd(0x17e0, 0); +// send(r3); +// mthd(0x1614, 0); +// send(0); +// r1 |= (1 << 26); +// } while (--r2); +// } + MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, + ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, + ADD, R4, IMMED, ZERO, 1, NONE, NONE), + MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, + SUB, R2, R2, IMMED, 1, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r6 = r6 + 1; +// } + MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// r7 = r7 - r5; // unneeded params +// } + MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// while (HW_LOOP_COUNT < r7) { +// r2 = load(); +// r2 = load(); +// r2 = load(); +// r2 = load(); +// r2 = load(); +// } + MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x1434, 1); +// send(r8); +// send(r9); +// exit mthd(0x1118, 0); +// send(r8); + MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, + ADD, ZERO, R9, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_query_buffer_write[] = { +// r1 = load(); // clamp value +// r2 = load(); // end value (lo) +// r3 = load(); // end value (hi) +// r4 = load(); // start value (lo) +// r5 = load(); // start value (hi) +// r8 = load(); // desired sequence +// r9 = load(); // actual sequence +// r7 = load(); // query address (hi) +// r6 = load(); // query address (lo) +// if (r9 >= r8) { + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + ADD, R2, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, + ADD, R4, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, + ADD, R8, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE, + ADD, R7, LOAD1, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// [r3,r2] = [r3,r2] - [r5,r4]; +// if (r1) { + MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE, + SUBB, R3, R3, R5, 0, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// if (r3 != 0 || r1 < r2) +// r2 = r1; +// } + MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x1b00, 1); +// send(r7); +// send(r6); +// send(r2) +// send(0x10000000); +// if (!r1) { + MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0, + ADD, ZERO, R6, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), + MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// [r7,r6] = [r7,r6] + 4; +// mthd(0x1b00, 1); +// send(r7); +// send(r6); +// send(r3); +// send(0x10000000); +// } + MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1, + ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), + MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), +// mthd(0x0110, 0); +// send(0); + MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// } + MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_conservative_raster_state[] = { +// r1 = load(); +// mthd(0x3400, 1); +// send(0); +// send(((r1 >> 8) & 7) << 23); +// send(0x03800000); +// mthd(0x2310, 1); +// send(0x00418800); +// r2 = r1 & 0xf; +// r3 = 16; +// r2 = r2 | (((r1 >> 4) & 0xf) << 8); +// mthd(0x0a1c, 8); + MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE), + MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE, + ADD, R3, ZERO, IMMED, 16, NONE, NONE), + MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE, + ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE), +// while (HW_LOOP_COUNT < r3) +// send(r2); + MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +// mthd(0x1148, 0); +// send(1); + MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), +}; + +uint32_t mmec597_compute_counter[] = { +// r0 = load(); +// r1 = 1; +// r2 = 0; +// while (HW_LOOP_COUNT < r2) { + MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE, + ADD, R1, IMMED, ZERO, 1, NONE, NONE), + MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE, + ADD, R2, ZERO, ZERO, 0, NONE, NONE), +// r3 = load(); +// [r1,r0] *= r3; +// } + MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE, + MULH, R2, ZERO, ZERO, 0, NONE, NONE), +// r3 = read(0x3410); +// r4 = read(0x3414); +// [r4,r3] += [r2,r1]; +// mthd(0x3410, 1); +// send(r3); +// send(r4); + MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0, + ADDC, R4, R4, R2, 0, NONE, ALU1), +}; + +uint32_t mmec597_compute_counter_to_query[] = { +// r1 = load(); +// r3 = read(0x3410); +// r2 = load(); +// r4 = read(0x3414); +// [r2,r1] = [r2,r1] + [r4,r3]; +// mthd(0x1b00, 1); +// r3 = load(); +// send(r3); +// r4 = load(); +// send(r4); +// send(r1); +// send(0x10000000); + MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, + STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE), + MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE, + STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE), + MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE, + ADDC, R2, R2, R4, 0, NONE, NONE), + MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0, + ADD, R4, LOAD1, ZERO, 0, NONE, ALU1), + MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), +// [r3,r4] = [r3,r4] + 4; +// mthd(0x1b00, 1); +// send(r3); +// send(r4); +// send(r2); +// send(0x10000000); + MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1, + ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), + MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, + ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), + MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, + ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h index 1c5a8dc0b1f..539bdc75022 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h @@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_UNK0220__ESIZE 0x00000004 #define NVC0_3D_UNK0220__LEN 0x00000028 +#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH 0x00000238 + +#define TU102_3D_INDEX_ARRAY_LIMIT_LOW 0x0000023c + +#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 + #define NVC0_3D_UNK02C0 0x000002c0 #define NVC0_3D_UNK02C4 0x000002c4 @@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_UNK0400__ESIZE 0x00000004 #define NVC0_3D_UNK0400__LEN 0x000000c0 +#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00000600 + 0x8*(i0)) +#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00000604 + 0x8*(i0)) + #define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) #define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 #define NVC0_3D_TFB_STREAM__LEN 0x00000004 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 578335d7001..a095515e48d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d) return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d); } +static uint32_t +tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) +{ + uint32_t kind; + + if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) + return 0; + if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) + return 0; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + if (compressed) + kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC + else + kind = 0x01; // NV_MMU_PTE_KIND_Z16 + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (compressed) + kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC + else + kind = 0x05; // NV_MMU_PTE_KIND_Z24S8 + break; + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (compressed) + kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC + else + kind = 0x03; // NV_MMU_PTE_KIND_S8Z24 + break; + case PIPE_FORMAT_X32_S8X24_UINT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (compressed) + kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC + else + kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8 + break; + case PIPE_FORMAT_Z32_FLOAT: + default: + kind = 0x06; + break; + } + + return kind; +} + static uint32_t nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) { @@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen, if (pt->bind & PIPE_BIND_LINEAR) pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; - bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); + if (dev->chipset < 0x160) + bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); + else + bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed); if (!nvc0_miptree_init_ms_mode(mt)) { FREE(mt); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index b9fff341f28..d2b2de47c8d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -737,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; int ret; - uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + uint32_t size = prog->code_size; + + if (!is_cp) { + if (screen->eng3d->oclass < TU102_3D_CLASS) + size += GF100_SHADER_HEADER_SIZE; + else + size += TU102_SHADER_HEADER_SIZE; + } /* On Fermi, SP_START_ID must be aligned to 0x40. * On Kepler, the first instruction must be aligned to 0x80 because @@ -753,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base = prog->mem->start; if (!is_cp) { - if (screen->base.class_3d >= NVE4_3D_CLASS) { + if (screen->base.class_3d >= NVE4_3D_CLASS && + screen->base.class_3d < TU102_3D_CLASS) { switch (prog->mem->start & 0xff) { case 0x40: prog->code_base += 0x70; break; case 0x80: prog->code_base += 0x30; break; @@ -780,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) { struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; - uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + uint32_t code_pos = prog->code_base; + uint32_t size_sph = 0; + + if (!is_cp) { + if (screen->eng3d->oclass < TU102_3D_CLASS) + size_sph = GF100_SHADER_HEADER_SIZE; + else + size_sph = TU102_SHADER_HEADER_SIZE; + } + code_pos += size_sph; if (prog->relocs) nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, @@ -806,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!is_cp) nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, - NV_VRAM_DOMAIN(&screen->base), - NVC0_SHADER_HEADER_SIZE, prog->hdr); + NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr); nvc0->base.push_data(&nvc0->base, screen->text, code_pos, NV_VRAM_DOMAIN(&screen->base), prog->code_size, @@ -820,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; int ret; - uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + uint32_t size = prog->code_size; + + if (!is_cp) { + if (screen->eng3d->oclass < TU102_3D_CLASS) + size += GF100_SHADER_HEADER_SIZE; + else + size += TU102_SHADER_HEADER_SIZE; + } ret = nvc0_program_alloc_code(nvc0, prog); if (ret) { @@ -955,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) unsigned base = 0; unsigned i; if (prog->type != PIPE_SHADER_COMPUTE) - base = NVC0_SHADER_HEADER_SIZE; + base = GF100_SHADER_HEADER_SIZE; for (i = 0; i < prog->cp.num_syms; ++i) if (syms[i].label == label) return prog->code_base + base + syms[i].offset; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 6e965ae9d9e..2c465b342e9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state { }; -#define NVC0_SHADER_HEADER_SIZE (20 * 4) +#define GF100_SHADER_HEADER_SIZE (20 * 4) +#define TU102_SHADER_HEADER_SIZE (32 * 4) +#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE struct nvc0_program { struct pipe_shader_state pipe; @@ -30,7 +32,7 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ - uint32_t hdr[20]; + uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4]; uint32_t flags[2]; struct { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 2807b59a4fd..073b44dc79f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -37,6 +37,7 @@ #include "nvc0/mme/com9097.mme.h" #include "nvc0/mme/com90c0.mme.h" +#include "nvc0/mme/comc597.mme.h" #include "nv50/g80_texture.xml.h" @@ -728,6 +729,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, return pos + size; } +static int +tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + unsigned size, const uint32_t *data) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + + size /= 4; + + assert((pos + size) <= 0x800); + + BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); + PUSH_DATA (push, (m - 0x3800) / 8); + PUSH_DATA (push, pos); + BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); + PUSH_DATA (push, pos); + PUSH_DATAp(push, data, size); + + return pos + (size / 3); +} + static void nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) { @@ -838,6 +859,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0x120: case 0x130: case 0x140: + case 0x160: return nve4_screen_compute_setup(screen, screen->base.pushbuf); default: return -1; @@ -997,6 +1019,7 @@ nvc0_screen_create(struct nouveau_device *dev) case 0x120: case 0x130: case 0x140: + case 0x160: break; default: return NULL; @@ -1074,6 +1097,7 @@ nvc0_screen_create(struct nouveau_device *dev) } switch (dev->chipset & ~0xf) { + case 0x160: case 0x140: case 0x130: case 0x120: @@ -1128,6 +1152,9 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->fence.bo->offset + 16); switch (dev->chipset & ~0xf) { + case 0x160: + obj_class = TU102_3D_CLASS; + break; case 0x140: obj_class = GV100_3D_CLASS; break; @@ -1378,25 +1405,47 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 16384 << 16); } + if (screen->eng3d->oclass < TU102_3D_CLASS) { #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); - i = 0; - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); - MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); - MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); - MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); - MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); - MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); - MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); - MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); - MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); - MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); - MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); - MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); + i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); + MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); + MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); + MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); + MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); + MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); + } else { +#undef MK_MACRO +#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n); + + i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf); + MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables); + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select); + MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select); + MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write); + MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query); + } BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 0bba3ada8da..731b0b5dbf8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -140,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst, PUSH_DATA (push, bo->offset + offset); } + if (dst) { + IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE), + util_format_is_depth_or_stencil(pformat)); + } + #if 0 if (dst) { BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); @@ -1233,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) } } + if (screen->eng3d->oclass >= TU102_3D_CLASS) { + IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), + util_format_is_depth_or_stencil(info->dst.format)); + } + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); @@ -1293,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) PUSH_DATAh(push, vtxbuf); PUSH_DATA (push, vtxbuf); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + if (screen->eng3d->oclass < TU102_3D_CLASS) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + else + BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, vtxbuf + length - 1); PUSH_DATA (push, vtxbuf + length - 1); @@ -1370,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) /* restore viewport transform */ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); + if (screen->eng3d->oclass >= TU102_3D_CLASS) + IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0); } static void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 92bd7eb5b8e..8287d8431b1 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); } - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + + if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + else + BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2); PUSH_DATAh(push, res->address + limit); PUSH_DATA (push, res->address + limit); @@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); PUSH_DATAh(push, buf->address + offset); PUSH_DATA (push, buf->address + offset); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + + if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + else + BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2); PUSH_DATAh(push, buf->address + limit); PUSH_DATA (push, buf->address + limit); @@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) assert(nouveau_resource_mapped_by_gpu(&buf->base)); PUSH_SPACE(push, 6); - BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); - PUSH_DATAh(push, buf->address); - PUSH_DATA (push, buf->address); - PUSH_DATAh(push, buf->address + buf->base.width0 - 1); - PUSH_DATA (push, buf->address + buf->base.width0 - 1); - PUSH_DATA (push, info->index_size >> 1); + if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); + PUSH_DATAh(push, buf->address); + PUSH_DATA (push, buf->address); + PUSH_DATAh(push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, info->index_size >> 1); + } else { + BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2); + PUSH_DATAh(push, buf->address); + PUSH_DATA (push, buf->address); + BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2); + PUSH_DATAh(push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, buf->address + buf->base.width0 - 1); + BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1); + PUSH_DATA (push, info->index_size >> 1); + } BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 8aa7088dfec..d49a5dfd2cf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); PUSH_DATAh(push, va); PUSH_DATA (push, va); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + + if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + else + BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); @@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx, PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + + if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + else + BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 3a3f0a926de..d4687b652ba 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -52,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, uint64_t address; switch (dev->chipset & ~0xf) { + case 0x160: + obj_class = TU102_COMPUTE_CLASS; + break; case 0x140: obj_class = GV100_COMPUTE_CLASS; break; diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index 2dbe7be0211..d123c8a1c17 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -105,6 +105,7 @@ nouveau_drm_screen_create(int fd) case 0x120: case 0x130: case 0x140: + case 0x160: init = nvc0_screen_create; break; default: