vc4: Move job-submit skip cases to vc4_job_submit().
[mesa.git] / src / gallium / drivers / vc4 / vc4_job.c
index dcade15443a8cc21ba94fee5f4d07e9a7ec45352..e71f6f4cf9105ba7db8a7924e7776662f8c2c569 100644 (file)
@@ -44,8 +44,7 @@ void
 vc4_job_reset(struct vc4_context *vc4)
 {
         struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < (vc4->bo_handles.next -
-                             vc4->bo_handles.base) / 4; i++) {
+        for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
                 vc4_bo_unreference(&referenced_bos[i]);
         }
         vc4_reset_cl(&vc4->bcl);
@@ -56,7 +55,7 @@ vc4_job_reset(struct vc4_context *vc4)
         vc4->shader_rec_count = 0;
 
         vc4->needs_flush = false;
-        vc4->draw_call_queued = false;
+        vc4->draw_calls_queued = 0;
 
         /* We have no hardware context saved between our draw calls, so we
          * need to flag the next draw as needing all state emitted.  Emitting
@@ -90,31 +89,37 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
         submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
         submit_surf->offset = surf->offset;
 
-        if (is_depth) {
-                submit_surf->bits =
-                        VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
-                                      VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+        if (psurf->texture->nr_samples <= 1) {
+                if (is_depth) {
+                        submit_surf->bits =
+                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
+                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER);
 
+                } else {
+                        submit_surf->bits =
+                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
+                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+                                VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
+                                              VC4_LOADSTORE_TILE_BUFFER_BGR565 :
+                                              VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
+                                              VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+                }
+                submit_surf->bits |=
+                        VC4_SET_FIELD(surf->tiling,
+                                      VC4_LOADSTORE_TILE_BUFFER_TILING);
         } else {
-                submit_surf->bits =
-                        VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
-                                      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
-                        VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
-                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
-                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
-                                      VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+                assert(!is_write);
+                submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
         }
-        submit_surf->bits |=
-                VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING);
 
         if (is_write)
                 rsc->writes++;
 }
 
 static void
-vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
-                                struct drm_vc4_submit_rcl_surface *submit_surf,
-                                struct pipe_surface *psurf)
+vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
+                                           struct drm_vc4_submit_rcl_surface *submit_surf,
+                                           struct pipe_surface *psurf)
 {
         struct vc4_surface *surf = vc4_surface(psurf);
 
@@ -127,58 +132,122 @@ vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
         submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
         submit_surf->offset = surf->offset;
 
-        submit_surf->bits =
-                VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
-                              VC4_RENDER_CONFIG_FORMAT_BGR565 :
-                              VC4_RENDER_CONFIG_FORMAT_RGBA8888,
-                              VC4_RENDER_CONFIG_FORMAT) |
-                VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
+        if (psurf->texture->nr_samples <= 1) {
+                submit_surf->bits =
+                        VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
+                                      VC4_RENDER_CONFIG_FORMAT_BGR565 :
+                                      VC4_RENDER_CONFIG_FORMAT_RGBA8888,
+                                      VC4_RENDER_CONFIG_FORMAT) |
+                        VC4_SET_FIELD(surf->tiling,
+                                      VC4_RENDER_CONFIG_MEMORY_FORMAT);
+        }
 
         rsc->writes++;
 }
 
+static void
+vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4,
+                                  struct drm_vc4_submit_rcl_surface *submit_surf,
+                                  struct pipe_surface *psurf)
+{
+        struct vc4_surface *surf = vc4_surface(psurf);
+
+        if (!surf) {
+                submit_surf->hindex = ~0;
+                return;
+        }
+
+        struct vc4_resource *rsc = vc4_resource(psurf->texture);
+        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+        submit_surf->offset = surf->offset;
+        submit_surf->bits = 0;
+        rsc->writes++;
+}
+
 /**
  * Submits the job to the kernel and then reinitializes it.
  */
 void
 vc4_job_submit(struct vc4_context *vc4)
 {
+        if (!vc4->needs_flush)
+                return;
+
+        /* The RCL setup would choke if the draw bounds cause no drawing, so
+         * just drop the drawing if that's the case.
+         */
+        if (vc4->draw_max_x <= vc4->draw_min_x ||
+            vc4->draw_max_y <= vc4->draw_min_y) {
+                vc4_job_reset(vc4);
+                return;
+        }
+
         if (vc4_debug & VC4_DEBUG_CL) {
                 fprintf(stderr, "BCL:\n");
-                vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
+                vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
         }
 
+        if (cl_offset(&vc4->bcl) > 0) {
+                /* Increment the semaphore indicating that binning is done and
+                 * unblocking the render thread.  Note that this doesn't act
+                 * until the FLUSH completes.
+                 */
+                cl_ensure_space(&vc4->bcl, 8);
+                struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+                cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+                /* The FLUSH caps all of our bin lists with a
+                 * VC4_PACKET_RETURN.
+                 */
+                cl_u8(&bcl, VC4_PACKET_FLUSH);
+                cl_end(&vc4->bcl, bcl);
+        }
         struct drm_vc4_submit_cl submit;
         memset(&submit, 0, sizeof(submit));
 
-        cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t));
-        cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *));
+        cl_ensure_space(&vc4->bo_handles, 6 * sizeof(uint32_t));
+        cl_ensure_space(&vc4->bo_pointers, 6 * sizeof(struct vc4_bo *));
 
         vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
                                      vc4->color_read, false, false);
-        vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write,
-                                        vc4->color_write);
+        vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write,
+                                                   vc4->color_write);
         vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
                                      vc4->zs_read, true, false);
         vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
                                      vc4->zs_write, true, true);
 
+        vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_color_write,
+                                          vc4->msaa_color_write);
+        vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_zs_write,
+                                          vc4->msaa_zs_write);
+
+        if (vc4->msaa) {
+                /* This bit controls how many pixels the general
+                 * (i.e. subsampled) loads/stores are iterating over
+                 * (multisample loads replicate out to the other samples).
+                 */
+                submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
+                /* Controls whether color_write's
+                 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
+                 */
+                submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
+        }
+
         submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
-        submit.bo_handle_count = (vc4->bo_handles.next -
-                                  vc4->bo_handles.base) / 4;
+        submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
         submit.bin_cl = (uintptr_t)vc4->bcl.base;
-        submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
+        submit.bin_cl_size = cl_offset(&vc4->bcl);
         submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
-        submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
+        submit.shader_rec_size = cl_offset(&vc4->shader_rec);
         submit.shader_rec_count = vc4->shader_rec_count;
         submit.uniforms = (uintptr_t)vc4->uniforms.base;
-        submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+        submit.uniforms_size = cl_offset(&vc4->uniforms);
 
         assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
-        submit.min_x_tile = vc4->draw_min_x / 64;
-        submit.min_y_tile = vc4->draw_min_y / 64;
-        submit.max_x_tile = (vc4->draw_max_x - 1) / 64;
-        submit.max_y_tile = (vc4->draw_max_y - 1) / 64;
+        submit.min_x_tile = vc4->draw_min_x / vc4->tile_width;
+        submit.min_y_tile = vc4->draw_min_y / vc4->tile_height;
+        submit.max_x_tile = (vc4->draw_max_x - 1) / vc4->tile_width;
+        submit.max_y_tile = (vc4->draw_max_y - 1) / vc4->tile_height;
         submit.width = vc4->draw_width;
         submit.height = vc4->draw_height;
         if (vc4->cleared) {
@@ -197,17 +266,28 @@ vc4_job_submit(struct vc4_context *vc4)
 #else
                 ret = vc4_simulator_flush(vc4, &submit);
 #endif
-                if (ret) {
-                        fprintf(stderr, "VC4 submit failed\n");
-                        abort();
+                static bool warned = false;
+                if (ret && !warned) {
+                        fprintf(stderr, "Draw call returned %s.  "
+                                        "Expect corruption.\n", strerror(errno));
+                        warned = true;
+                } else if (!ret) {
+                        vc4->last_emit_seqno = submit.seqno;
                 }
         }
 
-        vc4->last_emit_seqno = submit.seqno;
+        if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
+                if (!vc4_wait_seqno(vc4->screen,
+                                    vc4->last_emit_seqno - 5,
+                                    PIPE_TIMEOUT_INFINITE,
+                                    "job throttling")) {
+                        fprintf(stderr, "Job throttling failed\n");
+                }
+        }
 
         if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
                 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
-                                    PIPE_TIMEOUT_INFINITE)) {
+                                    PIPE_TIMEOUT_INFINITE, "sync")) {
                         fprintf(stderr, "Wait failed.\n");
                         abort();
                 }