v3d: keep track of resources written by transform feedback
[mesa.git] / src / gallium / drivers / v3d / v3dx_rcl.c
index 5be29aca1fd42f4b34114c520c0f75990664377e..8706ac0fc3ff506376d424f990f17b1a4aa99f1e 100644 (file)
@@ -74,6 +74,7 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
                 else
                         load.input_image_format = surf->format;
+                load.r_b_swap = surf->swap_rb;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
                     surf->tiling == VC5_TILING_UIF_XOR) {
@@ -130,16 +131,14 @@ store_general(struct v3d_job *job,
                 store.address = cl_address(rsc->bo, surf->offset);
 
 #if V3D_VERSION >= 40
-                store.clear_buffer_being_stored =
-                        ((job->cleared & pipe_bit) &&
-                         (general_color_clear ||
-                          !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS)));
+                store.clear_buffer_being_stored = false;
 
                 if (separate_stencil)
                         store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
                 else
                         store.output_image_format = surf->format;
 
+                store.r_b_swap = surf->swap_rb;
                 store.memory_format = surf->tiling;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
@@ -164,18 +163,18 @@ store_general(struct v3d_job *job,
                 assert(buffer != ZSTENCIL);
                 store.raw_mode = true;
                 if (!last_store) {
-                        store.disable_colour_buffers_clear_on_write = true;
+                        store.disable_color_buffers_clear_on_write = true;
                         store.disable_z_buffer_clear_on_write = true;
                         store.disable_stencil_buffer_clear_on_write = true;
                 } else {
-                        store.disable_colour_buffers_clear_on_write =
+                        store.disable_color_buffers_clear_on_write =
                                 !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
                                    general_color_clear &&
-                                   (job->cleared & pipe_bit)));
+                                   (job->clear & pipe_bit)));
                         store.disable_z_buffer_clear_on_write =
-                                !(job->cleared & PIPE_CLEAR_DEPTH);
+                                !(job->clear & PIPE_CLEAR_DEPTH);
                         store.disable_stencil_buffer_clear_on_write =
-                                !(job->cleared & PIPE_CLEAR_STENCIL);
+                                !(job->clear & PIPE_CLEAR_STENCIL);
                 }
                 store.padded_height_of_output_image_in_uif_blocks =
                         surf->padded_height_of_output_image_in_uif_blocks;
@@ -206,9 +205,9 @@ zs_buffer_from_pipe_bits(int pipe_clear_bits)
 static void
 v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl)
 {
-        uint32_t loads_pending = job->resolve & ~job->cleared;
+        uint32_t loads_pending = job->load;
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(loads_pending & bit))
                         continue;
@@ -249,8 +248,8 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl)
          * tile coordinates.
          */
         if (loads_pending) {
-                cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
-                        load.disable_colour_buffer_load =
+                cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) {
+                        load.disable_color_buffer_load =
                                 (~loads_pending &
                                  PIPE_CLEAR_COLOR_BUFFERS) >>
                                 PIPE_FIRST_COLOR_BUFFER_BIT;
@@ -269,9 +268,10 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl)
 static void
 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
 {
-        MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS;
-        MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH;
-        MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL;
+#if V3D_VERSION < 40
+        MAYBE_UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS;
+        MAYBE_UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH;
+        MAYBE_UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL;
 
         /* For clearing color in a TLB general on V3D 3.3:
          *
@@ -288,10 +288,13 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
          * TLB color buffers.
          */
         bool general_color_clear = (needs_color_clear &&
-                                    (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) ==
-                                    (job->resolve & PIPE_CLEAR_COLOR_BUFFERS));
+                                    (job->clear & PIPE_CLEAR_COLOR_BUFFERS) ==
+                                    (job->store & PIPE_CLEAR_COLOR_BUFFERS));
+#else
+        bool general_color_clear = false;
+#endif
 
-        uint32_t stores_pending = job->resolve;
+        uint32_t stores_pending = job->store;
 
         /* For V3D 4.1, use general stores for all TLB stores.
          *
@@ -302,9 +305,9 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
          * perspective.  Non-MSAA surfaces will use
          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
          */
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
-                if (!(job->resolve & bit))
+                if (!(job->store & bit))
                         continue;
 
                 struct pipe_surface *psurf = job->cbufs[i];
@@ -317,18 +320,18 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
                               &stores_pending, general_color_clear);
         }
 
-        if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
+        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
             !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
                 if (rsc->separate_stencil) {
-                        if (job->resolve & PIPE_CLEAR_DEPTH) {
+                        if (job->store & PIPE_CLEAR_DEPTH) {
                                 store_general(job, cl, job->zsbuf, Z,
                                               PIPE_CLEAR_DEPTH,
                                               &stores_pending,
                                               general_color_clear);
                         }
 
-                        if (job->resolve & PIPE_CLEAR_STENCIL) {
+                        if (job->store & PIPE_CLEAR_STENCIL) {
                                 store_general(job, cl, job->zsbuf, STENCIL,
                                               PIPE_CLEAR_STENCIL,
                                               &stores_pending,
@@ -336,14 +339,14 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
                         }
                 } else {
                         store_general(job, cl, job->zsbuf,
-                                      zs_buffer_from_pipe_bits(job->resolve),
-                                      job->resolve & PIPE_CLEAR_DEPTHSTENCIL,
+                                      zs_buffer_from_pipe_bits(job->store),
+                                      job->store & PIPE_CLEAR_DEPTHSTENCIL,
                                       &stores_pending, general_color_clear);
                 }
         }
 
-        if (stores_pending) {
 #if V3D_VERSION < 40
+        if (stores_pending) {
                 cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
 
                         store.disable_color_buffer_write =
@@ -355,30 +358,45 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl)
                         /* Note that when set this will clear all of the color
                          * buffers.
                          */
-                        store.disable_colour_buffers_clear_on_write =
+                        store.disable_color_buffers_clear_on_write =
                                 !needs_color_clear;
                         store.disable_z_buffer_clear_on_write =
                                 !needs_z_clear;
                         store.disable_stencil_buffer_clear_on_write =
                                 !needs_s_clear;
                 };
-#else /* V3D_VERSION >= 40 */
-                unreachable("All color buffers should have been stored.");
-#endif /* V3D_VERSION >= 40 */
         } else if (needs_color_clear && !general_color_clear) {
                 /* If we didn't do our color clears in the general packet,
                  * then emit a packet to clear all the TLB color buffers now.
                  */
-#if V3D_VERSION < 40
                 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
                         store.buffer_to_store = NONE;
                 }
+        }
 #else /* V3D_VERSION >= 40 */
+        /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
+         * we still need to emit some sort of store.
+         */
+        if (!job->store) {
+                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+                        store.buffer_to_store = NONE;
+                }
+        }
+
+        assert(!stores_pending);
+
+        /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
+         * buffer bit is broken for depth/stencil.  In addition, the
+         * clear packet's Z/S bit is broken, but the RTs bit ends up
+         * clearing Z/S.
+         */
+        if (job->clear) {
                 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
+                        clear.clear_z_stencil_buffer = true;
                         clear.clear_all_render_targets = true;
                 }
-#endif /* V3D_VERSION >= 40 */
         }
+#endif /* V3D_VERSION >= 40 */
 }
 
 static void
@@ -410,8 +428,7 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int last_cbuf)
         /* The binner starts out writing tiles assuming that the initial mode
          * is triangles, so make sure that's the case.
          */
-        cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) {
-                fmt.data_type = LIST_INDEXED;
+        cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
                 fmt.primitive_type = LIST_TRIANGLES;
         }
 
@@ -451,7 +468,7 @@ static void
 v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
                           struct v3d_resource *rsc, bool is_separate_stencil)
 {
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) {
                 zs.address = cl_address(rsc->bo, surf->offset);
 
                 if (!is_separate_stencil) {
@@ -468,9 +485,9 @@ v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
                 zs.memory_format = surf->tiling;
         }
 
-        if (job->resolve & (is_separate_stencil ?
-                            PIPE_CLEAR_STENCIL :
-                            PIPE_CLEAR_DEPTHSTENCIL)) {
+        if (job->store & (is_separate_stencil ?
+                          PIPE_CLEAR_STENCIL :
+                          PIPE_CLEAR_DEPTHSTENCIL)) {
                 rsc->writes++;
         }
 }
@@ -490,20 +507,19 @@ v3dX(emit_rcl)(struct v3d_job *job)
         v3d_job_add_bo(job, job->rcl.bo);
 
         int nr_cbufs = 0;
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (job->cbufs[i])
                         nr_cbufs = i + 1;
         }
 
-        /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
+        /* Comon config must be the first TILE_RENDERING_MODE_CFG
          * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
          * optional updates to the previous HW state.
          */
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
-                config) {
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
 #if V3D_VERSION < 40
-                config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
-                config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
+                config.enable_z_store = job->store & PIPE_CLEAR_DEPTH;
+                config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL;
 #else /* V3D_VERSION >= 40 */
                 if (job->zsbuf) {
                         struct v3d_surface *surf = v3d_surface(job->zsbuf);
@@ -532,8 +548,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 config.image_width_pixels = job->draw_width;
                 config.image_height_pixels = job->draw_height;
 
-                config.number_of_render_targets_minus_1 =
-                        MAX2(nr_cbufs, 1) - 1;
+                config.number_of_render_targets = MAX2(nr_cbufs, 1);
 
                 config.multisample_mode_4x = job->msaa;
 
@@ -567,7 +582,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 }
 
 #if V3D_VERSION < 40
-                cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
                         rt.address = cl_address(rsc->bo, surf->offset);
                         rt.internal_type = surf->internal_type;
                         rt.output_image_format = surf->format;
@@ -576,12 +591,12 @@ v3dX(emit_rcl)(struct v3d_job *job)
                         rt.render_target_number = i;
                         rt.pad = config_pad;
 
-                        if (job->resolve & PIPE_CLEAR_COLOR0 << i)
+                        if (job->store & PIPE_CLEAR_COLOR0 << i)
                                 rsc->writes++;
                 }
 #endif /* V3D_VERSION < 40 */
 
-                cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
+                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
                         clear) {
                         clear.clear_color_low_32_bits = job->clear_color[i][0];
                         clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
@@ -589,7 +604,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 };
 
                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
-                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2,
+                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
                                 clear) {
                                 clear.clear_color_mid_low_32_bits =
                                         ((job->clear_color[i][1] >> 24) |
@@ -602,7 +617,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 }
 
                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
-                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3,
+                        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
                                 clear) {
                                 clear.uif_padded_height_in_uif_blocks = clear_pad;
                                 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
@@ -612,7 +627,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
         }
 
 #if V3D_VERSION >= 40
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
                 v3d_setup_render_target(job, 0,
                                         &rt.render_target_0_internal_bpp,
                                         &rt.render_target_0_internal_type,
@@ -654,10 +669,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
 #endif /* V3D_VERSION < 40 */
 
         /* Ends rendering mode config. */
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
+        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
                 clear) {
                 clear.z_clear_value = job->clear_z;
-                clear.stencil_vg_mask_clear_value = job->clear_s;
+                clear.stencil_clear_value = job->clear_s;
         };
 
         /* Always set initial block size before the first branch, which needs
@@ -678,7 +693,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 list.address = cl_address(job->tile_alloc, 0);
         }
 
-        cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
+        cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
                 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
                 const uint32_t max_supertiles = 256;
 
@@ -699,11 +714,12 @@ v3dX(emit_rcl)(struct v3d_job *job)
                                 supertile_h++;
                 }
 
+                config.number_of_bin_tile_lists = 1;
                 config.total_frame_width_in_tiles = job->draw_tiles_x;
                 config.total_frame_height_in_tiles = job->draw_tiles_y;
 
-                config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
-                config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
+                config.supertile_width_in_tiles = supertile_w;
+                config.supertile_height_in_tiles = supertile_h;
 
                 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
                 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
@@ -754,9 +770,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
 
         v3d_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
 
-        cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
-
-        /* XXX: Use Morton order */
+        /* XXX perf: We should expose GL_MESA_tile_raster_order to improve X11
+         * performance, but we should use Morton order otherwise to improve
+         * cache locality.
+         */
         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
@@ -778,5 +795,20 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 }
         }
 
+        if (job->tmu_dirty_rcl) {
+           cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) {
+              flush.tmu_config_cache_clear = 0xf;
+              flush.tmu_data_cache_clear = 0xf;
+              flush.uniforms_cache_clear = 0xf;
+              flush.instruction_cache_clear = 0xf;
+           }
+
+           cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) {
+              flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN;
+              flush.l2t_flush_start = cl_address(NULL, 0);
+              flush.l2t_flush_end = cl_address(NULL, ~0);
+           }
+        }
+
         cl_emit(&job->rcl, END_OF_RENDERING, end);
 }