turnip: Put VK_KHR_external_fence_fd stubs back
[mesa.git] / src / gallium / drivers / v3d / v3dx_simulator.c
index a1e72e0e1d9ba60d2b96612d8929a148828e3797..a9d3d8cd780261fd4fb5404c0bfc601a562fb6ca 100644 (file)
@@ -49,7 +49,7 @@
 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
 
 static void
-v3d_flush_l3(struct v3d_hw *v3d)
+v3d_invalidate_l3(struct v3d_hw *v3d)
 {
         if (!v3d_hw_has_gca(v3d))
                 return;
@@ -62,10 +62,13 @@ v3d_flush_l3(struct v3d_hw *v3d)
 #endif
 }
 
-/* Invalidates the L2 cache.  This is a read-only cache. */
+/* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
 static void
-v3d_flush_l2(struct v3d_hw *v3d)
+v3d_invalidate_l2c(struct v3d_hw *v3d)
 {
+        if (V3D_VERSION >= 33)
+                return;
+
         V3D_WRITE(V3D_CTL_0_L2CACTL,
                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
                   V3D_CTL_0_L2CACTL_L2CENA_SET);
@@ -73,7 +76,7 @@ v3d_flush_l2(struct v3d_hw *v3d)
 
 /* Invalidates texture L2 cachelines */
 static void
-v3d_flush_l2t(struct v3d_hw *v3d)
+v3d_invalidate_l2t(struct v3d_hw *v3d)
 {
         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
@@ -82,22 +85,125 @@ v3d_flush_l2t(struct v3d_hw *v3d)
                   (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
 }
 
+/* Flushes dirty texture cachelines from the L1 write combiner */
+static void
+v3d_flush_l1td(struct v3d_hw *v3d)
+{
+        V3D_WRITE(V3D_CTL_0_L2TCACTL,
+                  V3D_CTL_0_L2TCACTL_TMUWCF_SET);
+
+        assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
+}
+
+/* Flushes dirty texture L2 cachelines */
+static void
+v3d_flush_l2t(struct v3d_hw *v3d)
+{
+        V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
+        V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
+        V3D_WRITE(V3D_CTL_0_L2TCACTL,
+                  V3D_CTL_0_L2TCACTL_L2TFLS_SET |
+                  (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+
+        assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
+}
+
 /* Invalidates the slice caches.  These are read-only caches. */
 static void
-v3d_flush_slices(struct v3d_hw *v3d)
+v3d_invalidate_slices(struct v3d_hw *v3d)
 {
         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
 }
 
 static void
+v3d_invalidate_caches(struct v3d_hw *v3d)
+{
+        v3d_invalidate_l3(v3d);
+        v3d_invalidate_l2c(v3d);
+        v3d_invalidate_l2t(v3d);
+        v3d_invalidate_slices(v3d);
+}
+
+static uint32_t g_gmp_ofs;
+static void
+v3d_reload_gmp(struct v3d_hw *v3d)
+{
+        /* Completely reset the GMP. */
+        V3D_WRITE(V3D_GMP_0_CFG,
+                  V3D_GMP_0_CFG_PROTENABLE_SET);
+        V3D_WRITE(V3D_GMP_0_TABLE_ADDR, g_gmp_ofs);
+        V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0);
+        while (V3D_READ(V3D_GMP_0_STATUS) &
+               V3D_GMP_0_STATUS_CFG_BUSY_SET) {
+                ;
+        }
+}
+
+static UNUSED void
 v3d_flush_caches(struct v3d_hw *v3d)
 {
-        v3d_flush_l3(v3d);
-        v3d_flush_l2(v3d);
+        v3d_flush_l1td(v3d);
         v3d_flush_l2t(v3d);
-        v3d_flush_slices(v3d);
 }
 
+int
+v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
+                                 struct drm_v3d_submit_tfu *args)
+{
+        int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
+
+        V3D_WRITE(V3D_TFU_IIA, args->iia);
+        V3D_WRITE(V3D_TFU_IIS, args->iis);
+        V3D_WRITE(V3D_TFU_ICA, args->ica);
+        V3D_WRITE(V3D_TFU_IUA, args->iua);
+        V3D_WRITE(V3D_TFU_IOA, args->ioa);
+        V3D_WRITE(V3D_TFU_IOS, args->ios);
+        V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
+        V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
+        V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
+        V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
+
+        V3D_WRITE(V3D_TFU_ICFG, args->icfg);
+
+        while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
+                v3d_hw_tick(v3d);
+        }
+
+        return 0;
+}
+
+#if V3D_VERSION >= 41
+int
+v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
+                                 struct drm_v3d_submit_csd *args,
+                                 uint32_t gmp_ofs)
+{
+        g_gmp_ofs = gmp_ofs;
+        v3d_reload_gmp(v3d);
+
+        v3d_invalidate_caches(v3d);
+
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
+        /* CFG0 kicks off the job */
+        V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
+
+        while (V3D_READ(V3D_CSD_0_STATUS) &
+               (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET |
+                V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET)) {
+                v3d_hw_tick(v3d);
+        }
+
+        v3d_flush_caches(v3d);
+
+        return 0;
+}
+#endif
+
 int
 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                                 struct drm_v3d_get_param *args)
@@ -112,6 +218,18 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
         };
 
+        switch (args->param) {
+        case DRM_V3D_PARAM_SUPPORTS_TFU:
+                args->value = 1;
+                return 0;
+        case DRM_V3D_PARAM_SUPPORTS_CSD:
+                args->value = V3D_VERSION >= 41;
+                return 0;
+        case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
+                args->value = 1;
+                return 0;
+        }
+
         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
                 args->value = V3D_READ(reg_map[args->param]);
                 return 0;
@@ -122,6 +240,44 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
         abort();
 }
 
+static struct v3d_hw *v3d_isr_hw;
+
+static void
+v3d_isr(uint32_t hub_status)
+{
+        struct v3d_hw *v3d = v3d_isr_hw;
+
+        /* Check the per-core bits */
+        if (hub_status & (1 << 0)) {
+                uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
+                V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
+
+                if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
+                        uint32_t size = 256 * 1024;
+                        uint32_t offset = v3d_simulator_get_spill(size);
+
+                        v3d_reload_gmp(v3d);
+
+                        V3D_WRITE(V3D_PTB_0_BPOA, offset);
+                        V3D_WRITE(V3D_PTB_0_BPOS, size);
+                        return;
+                }
+
+                if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
+                        fprintf(stderr, "GMP violation at 0x%08x\n",
+                                V3D_READ(V3D_GMP_0_VIO_ADDR));
+                        abort();
+                } else {
+                        fprintf(stderr,
+                                "Unexpected ISR with core status 0x%08x\n",
+                                core_status);
+                }
+                abort();
+        }
+
+        return;
+}
+
 void
 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
 {
@@ -136,23 +292,25 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
          */
         V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
 #endif
+
+        uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
+                                    V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
+        V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
+        V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
+
+        v3d_isr_hw = v3d;
+        v3d_hw_set_isr(v3d, v3d_isr);
 }
 
 void
-v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
-                      uint32_t gmp_ofs)
+v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
+                                struct drm_v3d_submit_cl *submit,
+                                uint32_t gmp_ofs)
 {
-        /* Completely reset the GMP. */
-        V3D_WRITE(V3D_GMP_0_CFG,
-                  V3D_GMP_0_CFG_PROTENABLE_SET);
-        V3D_WRITE(V3D_GMP_0_TABLE_ADDR, gmp_ofs);
-        V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0);
-        while (V3D_READ(V3D_GMP_0_STATUS) &
-               V3D_GMP_0_STATUS_CFG_BUSY_SET) {
-                ;
-        }
+        g_gmp_ofs = gmp_ofs;
+        v3d_reload_gmp(v3d);
 
-        v3d_flush_caches(v3d);
+        v3d_invalidate_caches(v3d);
 
         if (submit->qma) {
                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
@@ -168,14 +326,17 @@ v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
 
-        /* Wait for bin to complete before firing render, as it seems the
-         * simulator doesn't implement the semaphores.
+        /* Wait for bin to complete before firing render.  The kernel's
+         * scheduler implements this using the GPU scheduler blocking on the
+         * bin fence completing.  (We don't use HW semaphores).
          */
         while (V3D_READ(V3D_CLE_0_CT0CA) !=
                V3D_READ(V3D_CLE_0_CT0EA)) {
                 v3d_hw_tick(v3d);
         }
 
+        v3d_invalidate_caches(v3d);
+
         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);