nvc0: initial support for tu1xx
authorBen Skeggs <bskeggs@redhat.com>
Sat, 6 Jun 2020 23:52:49 +0000 (09:52 +1000)
committerMarge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 22:52:42 +0000 (22:52 +0000)
v2:
- add proper method definitions

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>

13 files changed:
src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
src/gallium/drivers/nouveau/nv_object.xml.h
src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h [new file with mode: 0644]
src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
src/gallium/drivers/nouveau/nvc0/nvc0_program.c
src/gallium/drivers/nouveau/nvc0/nvc0_program.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c
src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c

index 899d73d7398f5d63422141091001e602e16eb3ea..31e7cf822334a942d868f18114e15b038d0f5af0 100644 (file)
@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NV50_2D_PATTERN_SELECT_BITMAP_1X64                     0x00000002
 #define NV50_2D_PATTERN_SELECT_COLOR                           0x00000003
 
-#define NVC0_2D_UNK02B8(i0)                                   (0x000002b8 + 0x4*(i0))
-#define NVC0_2D_UNK02B8__ESIZE                                 0x00000004
-#define NVC0_2D_UNK02B8__LEN                                   0x00000009
+#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE  0x000002b8
 
 #define NVC0_2D_UNK2DC                                         0x000002dc
 
index 0c1337028f321325d6083c88ff84511fa8d30d37..fac195d484668f7f2b840b63b8eeff8b2168d55a 100644 (file)
@@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GP100_3D_CLASS                                         0x0000c097
 #define GP102_3D_CLASS                                         0x0000c197
 #define GV100_3D_CLASS                                         0x0000c397
+#define TU102_3D_CLASS                                         0x0000c597
 #define NV50_2D_CLASS                                          0x0000502d
 #define NVC0_2D_CLASS                                          0x0000902d
 #define NV50_COMPUTE_CLASS                                     0x000050c0
@@ -209,6 +210,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GP100_COMPUTE_CLASS                                    0x0000c0c0
 #define GP104_COMPUTE_CLASS                                    0x0000c1c0
 #define GV100_COMPUTE_CLASS                                    0x0000c3c0
+#define TU102_COMPUTE_CLASS                                    0x0000c5c0
 #define NV84_CRYPT_CLASS                                       0x000074c1
 #define BLOB_NVC0_PCOPY1_CLASS                                 0x000090b8
 #define BLOB_NVC0_PCOPY0_CLASS                                 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
new file mode 100644 (file)
index 0000000..390741c
--- /dev/null
@@ -0,0 +1,904 @@
+#define NV_MME_PRED_MODE_UUUU                0
+#define NV_MME_PRED_MODE_TTTT                1
+#define NV_MME_PRED_MODE_FFFF                2
+#define NV_MME_PRED_MODE_TTUU                3
+#define NV_MME_PRED_MODE_FFUU                4
+#define NV_MME_PRED_MODE_TFUU                5
+#define NV_MME_PRED_MODE_TUUU                6
+#define NV_MME_PRED_MODE_FUUU                7
+#define NV_MME_PRED_MODE_UUTT                8
+#define NV_MME_PRED_MODE_UUTF                9
+#define NV_MME_PRED_MODE_UUTU                10
+#define NV_MME_PRED_MODE_UUFT                11
+#define NV_MME_PRED_MODE_UUFF                12
+#define NV_MME_PRED_MODE_UUFU                13
+#define NV_MME_PRED_MODE_UUUT                14
+#define NV_MME_PRED_MODE_UUUF                15
+
+#define NV_MME_REG_R0                       0
+#define NV_MME_REG_R1                       1
+#define NV_MME_REG_R2                       2
+#define NV_MME_REG_R3                       3
+#define NV_MME_REG_R4                       4
+#define NV_MME_REG_R5                       5
+#define NV_MME_REG_R6                       6
+#define NV_MME_REG_R7                       7
+#define NV_MME_REG_R8                       8
+#define NV_MME_REG_R9                       9
+#define NV_MME_REG_R10                      10
+#define NV_MME_REG_R11                      11
+#define NV_MME_REG_R12                      12
+#define NV_MME_REG_R13                      13
+#define NV_MME_REG_R14                      14
+#define NV_MME_REG_R15                      15
+#define NV_MME_REG_R16                      16
+#define NV_MME_REG_R17                      17
+#define NV_MME_REG_R18                      18
+#define NV_MME_REG_R19                      19
+#define NV_MME_REG_R20                      20
+#define NV_MME_REG_R21                      21
+#define NV_MME_REG_R22                      22
+#define NV_MME_REG_R23                      23
+#define NV_MME_REG_ZERO                     24
+#define NV_MME_REG_IMMED                    25
+#define NV_MME_REG_IMMEDPAIR                26
+#define NV_MME_REG_IMMED32                  27
+#define NV_MME_REG_LOAD0                    28
+#define NV_MME_REG_LOAD1                    29
+
+#define NV_MME_ALU_ADD                    0
+#define NV_MME_ALU_ADDC                   1
+#define NV_MME_ALU_SUB                    2
+#define NV_MME_ALU_SUBB                   3
+#define NV_MME_ALU_MUL                    4
+#define NV_MME_ALU_MULH                   5
+#define NV_MME_ALU_MULU                   6
+#define NV_MME_ALU_EXTENDED               7
+#define NV_MME_ALU_CLZ                    8
+#define NV_MME_ALU_SLL                    9
+#define NV_MME_ALU_SRL                    10
+#define NV_MME_ALU_SRA                    11
+#define NV_MME_ALU_AND                    12
+#define NV_MME_ALU_NAND                   13
+#define NV_MME_ALU_OR                     14
+#define NV_MME_ALU_XOR                    15
+#define NV_MME_ALU_MERGE                  16
+#define NV_MME_ALU_SLT                    17
+#define NV_MME_ALU_SLTU                   18
+#define NV_MME_ALU_SLE                    19
+#define NV_MME_ALU_SLEU                   20
+#define NV_MME_ALU_SEQ                    21
+#define NV_MME_ALU_STATE                  22
+#define NV_MME_ALU_LOOP                   23
+#define NV_MME_ALU_JAL                    24
+#define NV_MME_ALU_BLT                    25
+#define NV_MME_ALU_BLTU                   26
+#define NV_MME_ALU_BLE                    27
+#define NV_MME_ALU_BLEU                   28
+#define NV_MME_ALU_BEQ                    29
+#define NV_MME_ALU_DREAD                  30
+#define NV_MME_ALU_DWRITE                 31
+
+#define NV_MME_OUT_NONE                 0
+#define NV_MME_OUT_ALU0                 1
+#define NV_MME_OUT_ALU1                 2
+#define NV_MME_OUT_LOAD0                3
+#define NV_MME_OUT_LOAD1                4
+#define NV_MME_OUT_IMMED0               5
+#define NV_MME_OUT_IMMED1               6
+#define NV_MME_OUT_RESERVED             7
+#define NV_MME_OUT_IMMEDHIGH0           8
+#define NV_MME_OUT_IMMEDHIGH1           9
+#define NV_MME_OUT_IMMED32_0            10
+
+#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1)           \
+   ((e1) << (92 - 64) | (m1) << (89 - 64) |                                    \
+    (e0) << (85 - 64) | (m0) << (82 - 64) |                                    \
+    (i1) << (66 - 64) | (b1) >> (64 - 61)),                                    \
+   (((b1) & 7)  << (61 - 32) | (a1) << (56 - 32) |                             \
+    (d1) << (51 - 32) | (o1) << (46 - 32) |                                    \
+    (i0) >> (32 - 30)),                                                        \
+   (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 |     \
+    (pr) << 5 | (pm) << 1 | (en))
+
+#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1)                 \
+   MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO,                      \
+            NV_MME_ALU_##o0, NV_MME_REG_##d0,                               \
+            NV_MME_REG_##a0, NV_MME_REG_##b0, (i0),                            \
+            NV_MME_ALU_##o1, NV_MME_REG_##d1,                               \
+            NV_MME_REG_##a1, NV_MME_REG_##b1, (i1),                            \
+            NV_MME_OUT_##m0, NV_MME_OUT_##e0,                                  \
+            NV_MME_OUT_##m1, NV_MME_OUT_##e1)
+
+uint32_t mmec597_per_instance_bf[] = {
+// r1 = load();      // count
+// r3 = load();      // mask
+// mthd(0x1880, 1);  // VERTEX_ARRAY_PER_INSTANCE[0]
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x1880/4, IMMED0,   NONE,
+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
+// while (HW_LOOP_COUNT < r1) {
+//    send(r3 & 1);
+//    r3 >>= 1;
+// }
+   MME_INSN(0,  LOOP, ZERO,    R1,  ZERO,            0x0003,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   AND, ZERO,    R3, IMMED,                 1,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   SRL,   R3,    R3, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_vertex_array_select[] = {
+// r1 = load();            // array
+// r2 = load();            // limit hi
+// r3 = load();            // limit lo
+// r4 = load();            // start hi
+// r5 = load();            // start lo
+// r6 = (r1 & 0x1f) << 2;
+// r7 = (r1 & 0x1f) << 1;
+// mthd(0x1c04 + r6, 1);   // VERTEX_ARRAY_START_HIGH[]
+// send(r4);
+// send(r5);
+// mthd(0x0600 + r7, 1);   // VERTEX_ARRAY_LIMIT_HIGH[]
+// send(r2);
+// send(r3);
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE,
+               MERGE,   R6,  ZERO,    R1,  (2<<10)|(5<<5)|0,   NONE,   NONE),
+   MME_INSN(0, MERGE,   R7,  ZERO,    R1,  (1<<10)|(5<<5)|0,   ALU1,   NONE,
+                 ADD, ZERO,    R6, IMMED,  (1<<12)|0x1c04/4,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(1,   ADD, ZERO,    R7, IMMED,  (1<<12)|0x0600/4,   ALU0,   ALU1,
+                 ADD, ZERO,    R2,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_blend_enables[] = {
+// r1 = load();         // enable mask
+// mthd(0x1360, 1);     // NVC0_3D_BLEND_ENABLE[]
+// send((r1 >> 0) & 1);
+// send((r1 >> 1) & 1);
+// send((r1 >> 2) & 1);
+// send((r1 >> 3) & 1);
+// send((r1 >> 4) & 1);
+// send((r1 >> 5) & 1);
+// send((r1 >> 6) & 1);
+// send((r1 >> 7) & 1);
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0, IMMED1,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x1360/4,   NONE,   NONE),
+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|0,   NONE,   ALU0,
+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|1,   NONE,   ALU1),
+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|2,   NONE,   ALU0,
+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|3,   NONE,   ALU1),
+   MME_INSN(1, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|4,   NONE,   ALU0,
+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|5,   NONE,   ALU1),
+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|6,   NONE,   ALU0,
+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|7,   NONE,   ALU1),
+};
+
+uint32_t mmec597_poly_mode_front[] = {
+// r1 = load();
+// mthd(0x0dac,0);      // POLYGON_MODE_FRONT
+// send(r1);
+// r2 = read(0x0db0);   // POLYGON_MODE_BACK
+// r3 = read(0x20c0);   // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100);   // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0dac/4, IMMED0,   ALU0,
+               STATE,   R2, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE),
+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
+                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0x200;
+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0;
+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_poly_mode_back[] = {
+// r1 = load();
+// mthd(0x0db0,0);      // POLYGON_MODE_BACK
+// send(r1);
+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
+// r3 = read(0x20c0);   // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100);   // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0db0/4, IMMED0,   ALU0,
+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
+                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0x200;
+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0;
+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_gp_select[] = {
+// r1 = load();
+// mthd(0x2100,0);      // SP_SELECT[4]
+// send(r1);
+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
+// r3 = read(0x0db0);   // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x20c0);   // SP_SELECT[3]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x2100/4, IMMED0,   ALU0,
+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
+                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0x200;
+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0;
+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_tep_select[] = {
+// r1 = load();
+// mthd(0x20c0,0);      // SP_SELECT[3]
+// send(r1);
+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
+// r3 = read(0x0db0);   // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x2100);   // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x20c0/4, IMMED0,   ALU0,
+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
+                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0x200;
+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = 0;
+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect[] = {
+// r1 = load();         // mode
+// r5 = read(0x1438);   // VB_INSTANCE_BASE
+// r6 = load();         // start_drawid
+// r7 = load();         // numparams
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                0,   NONE,   NONE,
+                 ADD,   R6, LOAD1,  ZERO,                0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                0,   NONE,   NONE,
+               STATE,   R5, IMMED,  ZERO,         0x1438/4,   NONE,   NONE),
+// while (HW_LOOP_COUNT < r7) {
+//    r2 = load();      // count
+//    r3 = load();      // instance_count
+//    mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
+//    send(load());     // start
+//    r4 = load();      // start_instance
+//    if (r3) {
+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000c,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//       mthd(0x238c, 1);     // CB_POS
+//       send(256 + 160);
+//       send(0);             // base_vertex
+//       send(r4);            // start_instance
+//       send(r6);            // draw id
+//       mthd(0x1438, 0);     // VB_INSTANCE_BASE
+//       send(r4);
+//       r1 = r1 & ~(1<<26);  // clear INSTANCE_NEXT
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
+                 ADD, ZERO,  ZERO,  ZERO,         256 + 160,   NONE,   ALU0),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
+//       do {
+//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
+//          send(r1);         // mode
+//          mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
+//          send(r2);         // count
+//          mthd(0x1614, 0);  // VERTEX_END_GL
+//          send(0);
+//          r1 |= (1<<26);    // set INSTANCE_NEXT
+//       } while(--r3);
+//    }
+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
+                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r6 = r6 + 1;
+// };
+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x1438, 0);  // restore VB_INSTANCE_BASE
+// send(r5);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect[] = {
+// r1 = load();         // mode
+// r8 = read(0x1434);   // VB_ELEMENT_BASE
+// r9 = read(0x1438);   // VB_INSTANCE_BASE
+// r6 = load();         // start_drawid
+// r7 = load();         // numparams
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
+               STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE),
+   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
+                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// while (HW_LOOP_COUNT < r7) {
+//    r3 = load();      // count
+//    r2 = load();      // instance_count
+//    mthd(0x17dc, 0);  // INDEX_BATCH_FIRST
+//    send(load());     // start
+//    r4 = load();      // index_bias
+//    mthd(0x238c, 1);  // CB_POS
+//    send(256 + 160);
+//    send(r4);         // index_bias
+//    r5 = load();      // start_instance
+//    if (r2) {
+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000d,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,          0x17dc/4, IMMED0,   NONE,
+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
+                 ADD, ZERO,    R4,  ZERO,         256 + 160,   NONE,   ALU1),
+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
+                 ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE),
+//       send(r5);         // start_instance
+//       send(r6);         // draw_id
+//       mthd(0x1434, 1);  // VB_ELEMENT_BASE
+//       send(r4);         // index_bias
+//       send(r5);         // start_instance
+//       mthd(0x1118, 0);  // VERTEX_ID_BASE
+//       send(r4);         // index_bias
+//       r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1118/4, IMMED0,   ALU0,
+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
+//       do {
+//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
+//          send(r1);         // mode
+//          mthd(0x17e0, 0);  // INDEX_BATCH_COUNT
+//          send(r3);         // count
+//          mthd(0x1614, 0);  // VERTEX_END_GL
+//          send(0);
+//          r1 |= (1 << 26);  // set INSTANCE_NEXT
+//       } while (--r2);
+//    }
+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
+                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//   r6 = r6 + 1;
+// };
+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x1434, 1);
+// send(r8);         // restore VB_ELEMENT_BASE
+// send(r9);         // restore VB_INSTANCE_BASE
+// mthd(0x1118, 0);
+// send(r8);         // restore VERTEX_ID_BASE
+   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,          0x1118/4, IMMED0,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect_count[] = {
+// r1 = load();         // mode
+// r6 = load();         // start_drawid
+// r7 = load();         // numparams
+// r5 = load();         // totaldraws
+// r8 = read(0x1438);   // VB_INSTANCE_BASE
+// r5 = r5 - r6;        // remaining draws
+// if (r5 > r7)
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD,   R6, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
+                 SUB,   R5,    R5,    R6,                 0,   NONE,   NONE),
+   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r5 = r7;
+   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// if (r5 >= 0) {
+   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000e,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    while (HW_LOOP_COUNT < r5) {
+//       r2 = load();      // count
+//       r3 = load();      // instance_count
+//       mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
+//       send(load());     // start
+//       r4 = load();      // start_instance
+//       if (r3) {
+   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000c,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//          mthd(0x238c, 1);  // CB_POS
+//          send(256 + 160);
+//          send(0);          // base_vertex
+//          send(r4);         // start_instance
+//          send(r6);         // draw_id
+//          mthd(0x1438, 0);  // VB_INSTANCE_BASE
+//          send(r4);
+//          r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
+                 ADD, ZERO,  ZERO,  ZERO,           256+160,   NONE,   ALU0),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
+//          do {
+//             mthd(0x1618, 0);  // VERTEX_BEGIN_GL
+//             send(r1);         // mode
+//             mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
+//             send(r2);
+//             mthd(0x1614, 0);  // VERTEX_END_GL
+//             send(0);
+//             r1 |= (1 << 26);  // set INSTANCE_NEXT
+//          } while (--r3);
+//       }
+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
+                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//       r6 = r6 + 1;   // draw_id++
+//    }
+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r7 = r7 - r5;  // unneeded params
+// }
+   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// while (HW_LOOP_COUNT < r7) {
+//    load();
+//    load();
+//    load();
+//    load();
+// }
+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0003,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
+// exit mthd(0x1438, 0);   // VB_INSTANCE_BASE
+// send(r8);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect_count[] = {
+// r8 = read(0x1434);
+// r1 = load();
+// r9 = read(0x1438);
+// r6 = load();
+// r7 = load();
+// r5 = load();
+// r5 = r5 - r6;
+// if (r5 > r7)
+   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE,
+                 ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
+                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   SUB,   R5,    R5,    R6,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r5 = r7;
+   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// if (r5 >= 0) {
+   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000f,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    while (HW_LOOP_COUNT < r5) {
+//       r3 = load();
+//       r2 = load();
+//       mthd(0x17dc, 0);
+//       send(load());
+//       r4 = load();
+//       mthd(0x238c, 1);
+//       send(256 + 160);
+//       send(r4);
+//       r10 = load();
+//       if (r2) {
+   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000d,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,  (0<<12)|0x17dc/4, IMMED0,   NONE,
+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,  (1<<12)|0x238c/4,   NONE,   ALU0,
+                 ADD,   R4, LOAD1,  ZERO,         256 + 160, IMMED0, IMMED1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
+                 ADD,  R10, LOAD0,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//          send(r10);
+//          send(r6);
+//          mthd(0x1434, 1);
+//          send(r4);
+//          send(r10);
+//          mthd(0x1118, 0);
+//          send(r4);
+//          r1 &= ~(1 << 26);
+   MME_INSN(0,   ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU0,
+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
+                 ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
+//          do {
+//             mthd(0x1618, 0);
+//             send(r1);
+//             mthd(0x17e0, 0);
+//             send(r3);
+//             mthd(0x1614, 0);
+//             send(0);
+//             r1 |= (1 << 26);
+//          } while (--r2);
+//       }
+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
+                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//       r6 = r6 + 1;
+//    }
+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+//    r7 = r7 - r5; // unneeded params
+// }
+   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// while (HW_LOOP_COUNT < r7) {
+//    r2 = load();
+//    r2 = load();
+//    r2 = load();
+//    r2 = load();
+//    r2 = load();
+// }
+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0004,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+// mthd(0x1434, 1);
+// send(r8);
+// send(r9);
+// exit mthd(0x1118, 0);
+// send(r8);
+   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
+                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
+};
+
+uint32_t mmec597_query_buffer_write[] = {
+// r1 = load();   // clamp value
+// r2 = load();   // end value (lo)
+// r3 = load();   // end value (hi)
+// r4 = load();   // start value (lo)
+// r5 = load();   // start value (hi)
+// r8 = load();   // desired sequence
+// r9 = load();   // actual sequence
+// r7 = load();   // query address (hi)
+// r6 = load();   // query address (lo)
+// if (r9 >= r8) {
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD,   R8, LOAD1,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R9, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD,   R7, LOAD1,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R6, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   BLT, ZERO,    R9,    R8,    (2<<14)|0x000e,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+//    [r3,r2] = [r3,r2] - [r5,r4];
+//    if (r1) {
+   MME_INSN(0,   SUB,   R2,    R2,    R4,                 0,   NONE,      NONE,
+                SUBB,   R3,    R3,    R5,                 0,   NONE,      NONE),
+   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (2<<14)|0x0004,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+//       if (r3 != 0 || r1 < r2)
+//          r2 = r1;
+//    }
+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x0002,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,  BLTU, ZERO,    R1,    R2,    (1<<14)|0x0002,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R2,    R1,  ZERO,                 0,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+//    mthd(0x1b00, 1);
+//    send(r7);
+//    send(r6);
+//    send(r2)
+//    send(0x10000000);
+//    if (!r1) {
+   MME_INSN(0,   ADD, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4, IMMED0,      ALU0,
+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,      ALU1),
+   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (1<<14)|0x0004,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+//       [r7,r6] = [r7,r6] + 4;
+//       mthd(0x1b00, 1);
+//       send(r7);
+//       send(r6);
+//       send(r3);
+//       send(0x10000000);
+//    }
+   MME_INSN(0,   ADD, ZERO,    R6, IMMED,                 4, IMMED1,      ALU1,
+                ADDC, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
+   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+//    mthd(0x0110, 0);
+//    send(0);
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x0110/4, IMMED0,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+// }
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+};
+
+uint32_t mmec597_conservative_raster_state[] = {
+// r1 = load();
+// mthd(0x3400, 1);
+// send(0);
+// send(((r1 >> 8) & 7) << 23);
+// send(0x03800000);
+// mthd(0x2310, 1);
+// send(0x00418800);
+// r2 = r1 & 0xf;
+// r3 = 16;
+// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
+// mthd(0x0a1c, 8);
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x3400/4, IMMED0,    IMMED1,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1, (23<<10)|(3<<5)|8,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0380,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x2310/4, IMMED0,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0041,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x8800,   NONE,      NONE),
+   MME_INSN(0,   AND,   R2,    R1, IMMED,               0xf,   NONE,      NONE,
+                 ADD,   R3,  ZERO, IMMED,                16,   NONE,      NONE),
+   MME_INSN(0, MERGE,   R2,    R2,    R1,  (8<<10)|(4<<5)|4, IMMED1,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,  (8<<12)|0x0a1c/4,   NONE,      NONE),
+// while (HW_LOOP_COUNT < r3)
+//    send(r2);
+   MME_INSN(0,  LOOP, ZERO,    R3,  ZERO,            0x0002,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+// mthd(0x1148, 0);
+// send(1);
+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x1148/4, IMMED0,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 1,   NONE,    IMMED1,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+};
+
+uint32_t mmec597_compute_counter[] = {
+// r0 = load();
+// r1 = 1;
+// r2 = 0;
+// while (HW_LOOP_COUNT < r2) {
+   MME_INSN(0,   ADD,   R0, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD,   R1, IMMED,  ZERO,                 1,   NONE,      NONE),
+   MME_INSN(0,  LOOP, ZERO,    R0,  ZERO,            0x0003,   NONE,      NONE,
+                 ADD,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
+//    r3 = load();
+//    [r1,r0] *= r3;
+// }
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,  MULU,   R1,    R1,    R3,                 0,   NONE,      NONE,
+                MULH,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
+// r3 = read(0x3410);
+// r4 = read(0x3414);
+// [r4,r3] += [r2,r1];
+// mthd(0x3410, 1);
+// send(r3);
+// send(r4);
+   MME_INSN(0, STATE, ZERO,  ZERO,  ZERO,          0x3410/4,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(1, STATE, ZERO,  ZERO,  ZERO,          0x3414/4,   NONE,      NONE,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R3,    R3,    R1,  (1<<12)|0x3410/4, IMMED0,      ALU0,
+                ADDC,   R4,    R4,    R2,                 0,   NONE,      ALU1),
+};
+
+uint32_t mmec597_compute_counter_to_query[] = {
+// r1 = load();
+// r3 = read(0x3410);
+// r2 = load();
+// r4 = read(0x3414);
+// [r2,r1] = [r2,r1] + [r4,r3];
+// mthd(0x1b00, 1);
+// r3 = load();
+// send(r3);
+// r4 = load();
+// send(r4);
+// send(r1);
+// send(0x10000000);
+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
+               STATE,   R3, IMMED,  ZERO,          0x3410/4,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,                 0,   NONE,      NONE,
+               STATE,   R4, IMMED,  ZERO,          0x3414/4,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R1,    R1,    R3,  (1<<12)|0x1b00/4, IMMED0,      NONE,
+                ADDC,   R2,    R2,    R4,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      ALU0,
+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      ALU1),
+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+// [r3,r4] = [r3,r4] + 4;
+// mthd(0x1b00, 1);
+// send(r3);
+// send(r4);
+// send(r2);
+// send(0x10000000);
+   MME_INSN(0,   ADD, ZERO,    R4, IMMED,                 4, IMMED1,      ALU1,
+                ADDC, ZERO,    R3,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
+   MME_INSN(1,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
+};
index 1c5a8dc0b1f4839cf0ada71a09a9829fd9260bd0..539bdc750226325e0a74cdfa95efd333fbd3a72f 100644 (file)
@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVC0_3D_UNK0220__ESIZE                                 0x00000004
 #define NVC0_3D_UNK0220__LEN                                   0x00000028
 
+#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH                                0x00000238
+
+#define TU102_3D_INDEX_ARRAY_LIMIT_LOW                         0x0000023c
+
+#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE              0x000002b8
+
 #define NVC0_3D_UNK02C0                                        0x000002c0
 
 #define NVC0_3D_UNK02C4                                        0x000002c4
@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVC0_3D_UNK0400__ESIZE                                 0x00000004
 #define NVC0_3D_UNK0400__LEN                                   0x000000c0
 
+#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0)                  (0x00000600 + 0x8*(i0))
+#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0)                   (0x00000604 + 0x8*(i0))
+
 #define NVC0_3D_TFB_STREAM(i0)                                (0x00000700 + 0x10*(i0))
 #define NVC0_3D_TFB_STREAM__ESIZE                              0x00000010
 #define NVC0_3D_TFB_STREAM__LEN                                0x00000004
index 578335d70014672550ac68d10952cd9e5a2b2ab8..a095515e48df98514771092d5af5e1f6e16f5072 100644 (file)
@@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
    return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
 }
 
+static uint32_t
+tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
+{
+   uint32_t kind;
+
+   if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+      return 0;
+   if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+      return 0;
+
+   switch (mt->base.base.format) {
+   case PIPE_FORMAT_Z16_UNORM:
+      if (compressed)
+         kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC
+      else
+         kind = 0x01; // NV_MMU_PTE_KIND_Z16
+      break;
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
+   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+      if (compressed)
+         kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC
+      else
+         kind = 0x05; // NV_MMU_PTE_KIND_Z24S8
+      break;
+   case PIPE_FORMAT_X24S8_UINT:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      if (compressed)
+         kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC
+      else
+         kind = 0x03; // NV_MMU_PTE_KIND_S8Z24
+      break;
+   case PIPE_FORMAT_X32_S8X24_UINT:
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      if (compressed)
+         kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC
+      else
+         kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8
+      break;
+   case PIPE_FORMAT_Z32_FLOAT:
+   default:
+      kind = 0x06;
+      break;
+   }
+
+   return kind;
+}
+
 static uint32_t
 nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
 {
@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
    if (pt->bind & PIPE_BIND_LINEAR)
       pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
 
-   bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+   if (dev->chipset < 0x160)
+      bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+   else
+      bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed);
 
    if (!nvc0_miptree_init_ms_mode(mt)) {
       FREE(mt);
index b9fff341f28edadfa525c5f692f018da2f4d6549..d2b2de47c8dc66423d19470c707f116b6d912c20 100644 (file)
@@ -737,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
    struct nvc0_screen *screen = nvc0->screen;
    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
    int ret;
-   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+   uint32_t size = prog->code_size;
+
+   if (!is_cp) {
+      if (screen->eng3d->oclass < TU102_3D_CLASS)
+         size += GF100_SHADER_HEADER_SIZE;
+      else
+         size += TU102_SHADER_HEADER_SIZE;
+   }
 
    /* On Fermi, SP_START_ID must be aligned to 0x40.
     * On Kepler, the first instruction must be aligned to 0x80 because
@@ -753,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
    prog->code_base = prog->mem->start;
 
    if (!is_cp) {
-      if (screen->base.class_3d >= NVE4_3D_CLASS) {
+      if (screen->base.class_3d >= NVE4_3D_CLASS &&
+          screen->base.class_3d < TU102_3D_CLASS) {
          switch (prog->mem->start & 0xff) {
          case 0x40: prog->code_base += 0x70; break;
          case 0x80: prog->code_base += 0x30; break;
@@ -780,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
 {
    struct nvc0_screen *screen = nvc0->screen;
    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
-   uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+   uint32_t code_pos = prog->code_base;
+   uint32_t size_sph = 0;
+
+   if (!is_cp) {
+      if (screen->eng3d->oclass < TU102_3D_CLASS)
+         size_sph = GF100_SHADER_HEADER_SIZE;
+      else
+         size_sph = TU102_SHADER_HEADER_SIZE;
+   }
+   code_pos += size_sph;
 
    if (prog->relocs)
       nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
@@ -806,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
 
    if (!is_cp)
       nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
-                           NV_VRAM_DOMAIN(&screen->base),
-                           NVC0_SHADER_HEADER_SIZE, prog->hdr);
+                           NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
 
    nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
                         NV_VRAM_DOMAIN(&screen->base), prog->code_size,
@@ -820,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
    struct nvc0_screen *screen = nvc0->screen;
    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
    int ret;
-   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+   uint32_t size = prog->code_size;
+
+   if (!is_cp) {
+      if (screen->eng3d->oclass < TU102_3D_CLASS)
+         size += GF100_SHADER_HEADER_SIZE;
+      else
+         size += TU102_SHADER_HEADER_SIZE;
+   }
 
    ret = nvc0_program_alloc_code(nvc0, prog);
    if (ret) {
@@ -955,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
    unsigned base = 0;
    unsigned i;
    if (prog->type != PIPE_SHADER_COMPUTE)
-      base = NVC0_SHADER_HEADER_SIZE;
+      base = GF100_SHADER_HEADER_SIZE;
    for (i = 0; i < prog->cp.num_syms; ++i)
       if (syms[i].label == label)
          return prog->code_base + base + syms[i].offset;
index 6e965ae9d9ea6cae43ba17e001def69699f8340f..2c465b342e922212f303401a8f6d8d8bce4ebd45 100644 (file)
@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state {
 };
 
 
-#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+#define GF100_SHADER_HEADER_SIZE (20 * 4)
+#define TU102_SHADER_HEADER_SIZE (32 * 4)
+#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
 
 struct nvc0_program {
    struct pipe_shader_state pipe;
@@ -30,7 +32,7 @@ struct nvc0_program {
    unsigned code_size;
    unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
 
-   uint32_t hdr[20];
+   uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4];
    uint32_t flags[2];
 
    struct {
index 2807b59a4fdec9f5af4c5eb5b0a531c8e205ec8b..073b44dc79fd6bfb87e5dc715b5d61144ea36b4d 100644 (file)
@@ -37,6 +37,7 @@
 
 #include "nvc0/mme/com9097.mme.h"
 #include "nvc0/mme/com90c0.mme.h"
+#include "nvc0/mme/comc597.mme.h"
 
 #include "nv50/g80_texture.xml.h"
 
@@ -728,6 +729,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
    return pos + size;
 }
 
+static int
+tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+                     unsigned size, const uint32_t *data)
+{
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+   size /= 4;
+
+   assert((pos + size) <= 0x800);
+
+   BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
+   PUSH_DATA (push, (m - 0x3800) / 8);
+   PUSH_DATA (push, pos);
+   BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+   PUSH_DATA (push, pos);
+   PUSH_DATAp(push, data, size);
+
+   return pos + (size / 3);
+}
+
 static void
 nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
 {
@@ -838,6 +859,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
    case 0x120:
    case 0x130:
    case 0x140:
+   case 0x160:
       return nve4_screen_compute_setup(screen, screen->base.pushbuf);
    default:
       return -1;
@@ -997,6 +1019,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    case 0x120:
    case 0x130:
    case 0x140:
+   case 0x160:
       break;
    default:
       return NULL;
@@ -1074,6 +1097,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    }
 
    switch (dev->chipset & ~0xf) {
+   case 0x160:
    case 0x140:
    case 0x130:
    case 0x120:
@@ -1128,6 +1152,9 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATA (push, screen->fence.bo->offset + 16);
 
    switch (dev->chipset & ~0xf) {
+   case 0x160:
+      obj_class = TU102_3D_CLASS;
+      break;
    case 0x140:
       obj_class = GV100_3D_CLASS;
       break;
@@ -1378,25 +1405,47 @@ nvc0_screen_create(struct nouveau_device *dev)
       PUSH_DATA (push, 16384 << 16);
    }
 
+   if (screen->eng3d->oclass < TU102_3D_CLASS) {
 #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
 
-   i = 0;
-   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
-   MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
-   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
-   MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
-   MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
-   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
-   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
-   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
-   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
-   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
-   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
-   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
-   MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
-   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
-   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
-   MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+      i = 0;
+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
+      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
+      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
+      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
+      MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+   } else {
+#undef MK_MACRO
+#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n);
+
+      i = 0;
+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf);
+      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables);
+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select);
+      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select);
+      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select);
+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front);
+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count);
+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
+      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
+      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
+   }
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);
index 0bba3ada8da4dce99631824e5af6e54128bcac0c..731b0b5dbf857207035fde9d01018e46bc5fe26e 100644 (file)
@@ -140,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
       PUSH_DATA (push, bo->offset + offset);
    }
 
+   if (dst) {
+      IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
+                 util_format_is_depth_or_stencil(pformat));
+   }
+
 #if 0
    if (dst) {
       BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
@@ -1233,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
       }
    }
 
+   if (screen->eng3d->oclass >= TU102_3D_CLASS) {
+      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
+                 util_format_is_depth_or_stencil(info->dst.format));
+   }
+
    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
    IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
               NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
@@ -1293,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    PUSH_DATAh(push, vtxbuf);
    PUSH_DATA (push, vtxbuf);
    PUSH_DATA (push, 0);
-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+   if (screen->eng3d->oclass < TU102_3D_CLASS)
+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+   else
+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
    PUSH_DATAh(push, vtxbuf + length - 1);
    PUSH_DATA (push, vtxbuf + length - 1);
 
@@ -1370,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
 
    /* restore viewport transform */
    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+   if (screen->eng3d->oclass >= TU102_3D_CLASS)
+      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
 }
 
 static void
index 92bd7eb5b8ec03079ea90ff076a758f1d1ecc3ef..8287d8431b109f49d0a04257d9d0441c9734a8bf 100644 (file)
@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
          PUSH_DATAh(push, res->address + offset);
          PUSH_DATA (push, res->address + offset);
       }
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+
+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+      else
+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
       PUSH_DATAh(push, res->address + limit);
       PUSH_DATA (push, res->address + limit);
 
@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
       PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
       PUSH_DATAh(push, buf->address + offset);
       PUSH_DATA (push, buf->address + offset);
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+
+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+      else
+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
       PUSH_DATAh(push, buf->address + limit);
       PUSH_DATA (push, buf->address + limit);
 
@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       assert(nouveau_resource_mapped_by_gpu(&buf->base));
 
       PUSH_SPACE(push, 6);
-      BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
-      PUSH_DATAh(push, buf->address);
-      PUSH_DATA (push, buf->address);
-      PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
-      PUSH_DATA (push, buf->address + buf->base.width0 - 1);
-      PUSH_DATA (push, info->index_size >> 1);
+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) {
+         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+         PUSH_DATAh(push, buf->address);
+         PUSH_DATA (push, buf->address);
+         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+         PUSH_DATA (push, info->index_size >> 1);
+      } else {
+         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2);
+         PUSH_DATAh(push, buf->address);
+         PUSH_DATA (push, buf->address);
+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2);
+         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+         BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1);
+         PUSH_DATA (push, info->index_size >> 1);
+      }
 
       BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
    }
index 8aa7088dfec95270a5b48449edcfe468a1f17dba..d49a5dfd2cfd0284e5c4b190e554192d1aa57821 100644 (file)
@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
    BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
    PUSH_DATAh(push, va);
    PUSH_DATA (push, va);
-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+
+   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+   else
+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
    PUSH_DATAh(push, va + size - 1);
    PUSH_DATA (push, va + size - 1);
 
@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
    PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
    PUSH_DATAh(push, va);
    PUSH_DATA (push, va);
-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+
+   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+   else
+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
    PUSH_DATAh(push, va + info->count * index_size - 1);
    PUSH_DATA (push, va + info->count * index_size - 1);
 
index 3a3f0a926de2d4afadbdcc7e1e101ebbef287362..d4687b652ba41e868295033d7777b3f701b35246 100644 (file)
@@ -52,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    uint64_t address;
 
    switch (dev->chipset & ~0xf) {
+   case 0x160:
+      obj_class = TU102_COMPUTE_CLASS;
+      break;
    case 0x140:
       obj_class = GV100_COMPUTE_CLASS;
       break;
index 2dbe7be021190290b29d7fda7c5a963d0e8f8cf5..d123c8a1c177e65ae14233181a9097f73ef815f1 100644 (file)
@@ -105,6 +105,7 @@ nouveau_drm_screen_create(int fd)
        case 0x120:
        case 0x130:
        case 0x140:
+       case 0x160:
                init = nvc0_screen_create;
                break;
        default: