v3d: request the kernel to flush caches when TMU is dirty
authorIago Toral Quiroga <itoral@igalia.com>
Tue, 3 Sep 2019 08:31:42 +0000 (10:31 +0200)
committerJose Maria Casanova Crespo <jmcasanova@igalia.com>
Fri, 18 Oct 2019 12:08:52 +0000 (14:08 +0200)
This adapts the v3d driver to the new CL submit ioctl interface that
allows the driver to request a flush of the caches after the render
job has completed. This seems to eliminate the kernel write violation
errors reported during CTS and Piglit excutions, fixing some CTS tests
and GPU resets along the way.

v2:
  - Adapt to changes in the kernel side.
  - Disable shader storage and shader images if the kernel doesn't
    implement cache flushing.

Fixes CTS tests:
KHR-GLES31.core.shader_image_size.basic-nonMS-fs-float
KHR-GLES31.core.shader_image_size.basic-nonMS-fs-int
KHR-GLES31.core.shader_image_size.basic-nonMS-fs-uint
KHR-GLES31.core.shader_image_size.advanced-nonMS-fs-float
KHR-GLES31.core.shader_image_size.advanced-nonMS-fs-int
KHR-GLES31.core.shader_image_size.advanced-nonMS-fs-uint
KHR-GLES31.core.shader_atomic_counters.advanced-usage-many-draw-calls2
KHR-GLES31.core.shader_atomic_counters.advanced-usage-draw-update-draw
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-int
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std140-matR
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std140-struct
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std430-matC-pad
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std430-vec

Reviewed-by: Eric Anholt <eric@anholt.net>
include/drm-uapi/v3d_drm.h
src/gallium/drivers/v3d/v3d_job.c
src/gallium/drivers/v3d/v3d_screen.c
src/gallium/drivers/v3d/v3d_screen.h
src/gallium/drivers/v3d/v3dx_rcl.c
src/gallium/drivers/v3d/v3dx_simulator.c

index 58fbe48c91e905af2af35a888ea170fa411e3b81..b8c6f9d909424f205d733a576b6c53127fda85d3 100644 (file)
@@ -48,6 +48,8 @@ extern "C" {
 #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
 
+#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
+
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
  * engine.
@@ -124,8 +126,7 @@ struct drm_v3d_submit_cl {
        /* Number of BO handles passed in (size is that times 4). */
        __u32 bo_handle_count;
 
-       /* Pad, must be zero-filled. */
-       __u32 pad;
+       __u32 flags;
 };
 
 /**
@@ -193,6 +194,7 @@ enum drm_v3d_param {
        DRM_V3D_PARAM_V3D_CORE0_IDENT2,
        DRM_V3D_PARAM_SUPPORTS_TFU,
        DRM_V3D_PARAM_SUPPORTS_CSD,
+       DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
 };
 
 struct drm_v3d_get_param {
index 70176045545e376a64f0ac64b99aaf0819d7b326..0acd8b033c2b9b2c5579c8bc61fab7b0bab4f885 100644 (file)
@@ -497,6 +497,10 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
 
+        job->submit.flags = 0;
+        if (job->tmu_dirty_rcl && screen->has_cache_flush)
+                job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
+
         /* On V3D 4.1, the tile alloc/state setup moved to register writes
          * instead of binner packets.
          */
index 957420a5311b7886fc41ded015d53fcbe11f7061..2d63a43e2ddd3c152aa6c888eeca839f5e222373 100644 (file)
@@ -175,7 +175,10 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                 return 4;
 
         case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
-                return 4;
+                if (screen->has_cache_flush)
+                        return 4;
+                else
+                        return 0; /* Disables shader storage */
 
         case PIPE_CAP_GLSL_FEATURE_LEVEL:
                 return 330;
@@ -356,16 +359,24 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                 return V3D_MAX_TEXTURE_SAMPLERS;
 
         case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
-                if (shader == PIPE_SHADER_VERTEX)
-                        return 0;
+                if (screen->has_cache_flush) {
+                        if (shader == PIPE_SHADER_VERTEX)
+                                return 0;
 
-                return PIPE_MAX_SHADER_BUFFERS;
+                        return PIPE_MAX_SHADER_BUFFERS;
+                 } else {
+                        return 0;
+                 }
 
         case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
-                if (screen->devinfo.ver < 41)
+                if (screen->has_cache_flush) {
+                        if (screen->devinfo.ver < 41)
+                                return 0;
+                        else
+                                return PIPE_MAX_SHADER_IMAGES;
+                } else {
                         return 0;
-                else
-                        return PIPE_MAX_SHADER_IMAGES;
+                }
 
         case PIPE_SHADER_CAP_PREFERRED_IR:
                 return PIPE_SHADER_IR_NIR;
@@ -670,6 +681,8 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config,
         slab_create_parent(&screen->transfer_pool, sizeof(struct v3d_transfer), 16);
 
         screen->has_csd = v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_CSD);
+        screen->has_cache_flush =
+                v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH);
 
         v3d_fence_init(screen);
 
index 2f276ac9b0d82a3fa99eb8008c9c9a90996d39d5..db2138d20beb6d1a609604d83ca27a93aba40c38 100644 (file)
@@ -78,6 +78,7 @@ struct v3d_screen {
         uint32_t bo_count;
 
         bool has_csd;
+        bool has_cache_flush;
         bool nonmsaa_texture_size_limit;
 
         struct v3d_simulator_file *sim_file;
index e3ead90787fd4ae3bc753e028cc005fc1208783c..0434fe848361db1cbcce9e020d76dd387cdf93a0 100644 (file)
@@ -795,20 +795,5 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 }
         }
 
-        if (job->tmu_dirty_rcl) {
-           cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) {
-              flush.tmu_config_cache_clear = 0xf;
-              flush.tmu_data_cache_clear = 0xf;
-              flush.uniforms_cache_clear = 0xf;
-              flush.instruction_cache_clear = 0xf;
-           }
-
-           cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) {
-              flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN;
-              flush.l2t_flush_start = cl_address(NULL, 0);
-              flush.l2t_flush_end = cl_address(NULL, ~0);
-           }
-        }
-
         cl_emit(&job->rcl, END_OF_RENDERING, end);
 }
index 87bd2fd0e20599e006c685256616a04e5265844c..a9d3d8cd780261fd4fb5404c0bfc601a562fb6ca 100644 (file)
@@ -225,6 +225,9 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
         case DRM_V3D_PARAM_SUPPORTS_CSD:
                 args->value = V3D_VERSION >= 41;
                 return 0;
+        case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
+                args->value = 1;
+                return 0;
         }
 
         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {