vc4: Simplify pack header usage
authorEric Anholt <eric@anholt.net>
Sun, 11 Dec 2016 22:59:17 +0000 (14:59 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 30 Jun 2017 19:25:45 +0000 (12:25 -0700)
Take the CL pointer in, which will be useful for enabling relocs.
However, our code expands a bit more:

before:
   4449       0       0    4449    1161 src/gallium/drivers/vc4/.libs/vc4_draw.o
    988       0       0     988     3dc src/gallium/drivers/vc4/.libs/vc4_emit.o

after:
   4481       0       0    4481    1181 src/gallium/drivers/vc4/.libs/vc4_draw.o
   1020       0       0    1020     3fc src/gallium/drivers/vc4/.libs/vc4_emit.o

src/gallium/drivers/vc4/vc4_cl.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_emit.c
src/gallium/drivers/vc4/vc4_job.c

index bec177cd03bd2305e748f79230481091910f6980..966756f5038fecde559eeb8990e38945161934dc 100644 (file)
@@ -255,17 +255,20 @@ cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
  * Also, *dst is actually of the wrong type, it's the
  * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
  */
-#define cl_emit(cl_out, packet, name)                            \
+#define cl_emit(cl, packet, name)                                \
         for (struct cl_packet_struct(packet) name = {            \
                 cl_packet_header(packet)                         \
         },                                                       \
-        *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
-        __builtin_expect(_dst != NULL, 1);                       \
+        *_loop_terminate = &name;                                \
+        __builtin_expect(_loop_terminate != NULL, 1);            \
         ({                                                       \
-                cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name);  \
-                VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst,           \
+                struct vc4_cl_out *cl_out = cl_start(cl);        \
+                cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+                VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out,         \
                                                  cl_packet_length(packet))); \
-                _dst = NULL;                                     \
+                cl_advance(&cl_out, cl_packet_length(packet));   \
+                cl_end(cl, cl_out);                              \
+                _loop_terminate = NULL;                          \
         }))                                                      \
 
 #endif /* VC4_CL_H */
index 4b3fa8ab8ff4a6d20041e1fe8bbcc9a3843f40f3..f7955ad3a843b2704ab5a0a692b059e94f52add2 100644 (file)
@@ -81,8 +81,7 @@ vc4_start_draw(struct vc4_context *vc4)
 
         vc4_get_draw_cl_space(job, 0);
 
-        struct vc4_cl_out *bcl = cl_start(&job->bcl);
-        cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
+        cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
                 bin.width_in_tiles = job->draw_tiles_x;
                 bin.height_in_tiles = job->draw_tiles_y;
                 bin.multisample_mode_4x = job->msaa;
@@ -93,14 +92,14 @@ vc4_start_draw(struct vc4_context *vc4)
          * figure out what new state packets need to be written to that tile's
          * command list.
          */
-        cl_emit(&bcl, START_TILE_BINNING, start);
+        cl_emit(&job->bcl, START_TILE_BINNING, start);
 
         /* Reset the current compressed primitives format.  This gets modified
          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
          * of every tile.
          */
-        cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
+        cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) {
                 list.data_type = _16_BIT_INDEX;
                 list.primitive_type = TRIANGLES_LIST;
         }
@@ -108,8 +107,6 @@ vc4_start_draw(struct vc4_context *vc4)
         job->needs_flush = true;
         job->draw_width = vc4->framebuffer.width;
         job->draw_height = vc4->framebuffer.height;
-
-        cl_end(&job->bcl, bcl);
 }
 
 static void
@@ -216,8 +213,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
         }
         cl_end(&job->shader_rec, shader_rec);
 
-        struct vc4_cl_out *bcl = cl_start(&job->bcl);
-        cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
+        cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
                 /* Note that number of attributes == 0 in the packet means 8
                  * attributes.  This field also contains the offset into
                  * shader_rec.
@@ -226,7 +222,6 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
                 shader_state.number_of_attribute_arrays =
                         num_elements_emit & 0x7;
         }
-        cl_end(&job->bcl, bcl);
 
         vc4_write_uniforms(vc4, vc4->prog.fs,
                            &vc4->constbuf[PIPE_SHADER_FRAGMENT],
@@ -336,7 +331,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         /* Note that the primitive type fields match with OpenGL/gallium
          * definitions, up to but not including QUADS.
          */
-        struct vc4_cl_out *bcl = cl_start(&job->bcl);
         if (info->index_size) {
                 uint32_t index_size = info->index_size;
                 uint32_t offset = info->start * index_size;
@@ -359,6 +353,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 }
                 struct vc4_resource *rsc = vc4_resource(prsc);
 
+                struct vc4_cl_out *bcl = cl_start(&job->bcl);
                 cl_start_reloc(&job->bcl, &bcl, 1);
                 cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
                 cl_u8(&bcl,
@@ -369,6 +364,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 cl_u32(&bcl, info->count);
                 cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
                 cl_u32(&bcl, vc4->max_index);
+                cl_end(&job->bcl, bcl);
                 job->draw_calls_queued++;
 
                 if (info->index_size == 4 || info->has_user_indices)
@@ -395,10 +391,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                          * plus whatever remainder.
                          */
                         if (extra_index_bias) {
-                                cl_end(&job->bcl, bcl);
                                 vc4_emit_gl_shader_state(vc4, info,
                                                          extra_index_bias);
-                                bcl = cl_start(&job->bcl);
                         }
 
                         if (start + count > max_verts) {
@@ -434,7 +428,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                                 }
                         }
 
-                        cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
+                        cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) {
                                 array.primitive_mode = info->mode;
                                 array.length = this_count;
                                 array.index_of_first_vertex = start;
@@ -446,7 +440,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                         start = 0;
                 }
         }
-        cl_end(&job->bcl, bcl);
 
         /* We shouldn't have tripped the HW_2116 bug with the GFXH-515
          * workaround.
index 9fc266e5baa95392feb58c269d77758a7108f205..8fb379df5e0f5ad2361f652f8e5e6967bd468ca4 100644 (file)
@@ -29,7 +29,6 @@ vc4_emit_state(struct pipe_context *pctx)
         struct vc4_context *vc4 = vc4_context(pctx);
         struct vc4_job *job = vc4->job;
 
-        struct vc4_cl_out *bcl = cl_start(&job->bcl);
         if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT |
                           VC4_DIRTY_RASTERIZER)) {
                 float *vpscale = vc4->viewport.scale;
@@ -60,7 +59,7 @@ vc4_emit_state(struct pipe_context *pctx)
                         maxy = MIN2(vp_maxy, vc4->scissor.maxy);
                 }
 
-                cl_emit(&bcl, CLIP_WINDOW, clip) {
+                cl_emit(&job->bcl, CLIP_WINDOW, clip) {
                         clip.clip_window_left_pixel_coordinate = minx;
                         clip.clip_window_bottom_pixel_coordinate = miny;
                         clip.clip_window_height_in_pixels = maxy - miny;
@@ -79,6 +78,7 @@ vc4_emit_state(struct pipe_context *pctx)
                 uint8_t ez_enable_mask_out = ~0;
                 uint8_t rasosm_mask_out = ~0;
 
+                struct vc4_cl_out *bcl = cl_start(&job->bcl);
                 /* HW-2905: If the RCL ends up doing a full-res load when
                  * multisampling, then early Z tracking may end up with values
                  * from the previous tile due to a HW bug.  Disable it to
@@ -111,41 +111,42 @@ vc4_emit_state(struct pipe_context *pctx)
                 cl_u8(&bcl,
                       (vc4->rasterizer->config_bits[2] |
                        vc4->zsa->config_bits[2]) & ez_enable_mask_out);
+                cl_end(&job->bcl, bcl);
         }
 
         if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
-                cl_emit(&bcl, DEPTH_OFFSET, depth) {
+                cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
                         depth.depth_offset_units =
                                 vc4->rasterizer->offset_units;
                         depth.depth_offset_factor =
                                 vc4->rasterizer->offset_factor;
                 }
 
-                cl_emit(&bcl, POINT_SIZE, points) {
+                cl_emit(&job->bcl, POINT_SIZE, points) {
                         points.point_size = vc4->rasterizer->point_size;
                 }
 
-                cl_emit(&bcl, LINE_WIDTH, points) {
+                cl_emit(&job->bcl, LINE_WIDTH, points) {
                         points.line_width = vc4->rasterizer->base.line_width;
                 }
         }
 
         if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
-                cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
+                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
                         clip.viewport_half_width_in_1_16th_of_pixel =
                                 vc4->viewport.scale[0] * 16.0f;
                         clip.viewport_half_height_in_1_16th_of_pixel =
                                 vc4->viewport.scale[1] * 16.0f;
                 }
 
-                cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+                cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
                         clip.viewport_z_offset_zc_to_zs =
                                 vc4->viewport.translate[2];
                         clip.viewport_z_scale_zc_to_zs =
                                 vc4->viewport.scale[2];
                 }
 
-                cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
+                cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
                         vp.viewport_centre_x_coordinate =
                                 16 * vc4->viewport.translate[0];
                         vp.viewport_centre_y_coordinate =
@@ -154,12 +155,10 @@ vc4_emit_state(struct pipe_context *pctx)
         }
 
         if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
-                cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
+                cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
                         if (vc4->rasterizer->base.flatshade)
                                 flags.flat_shading_flags =
                                         vc4->prog.fs->color_inputs;
                 }
         }
-
-        cl_end(&job->bcl, bcl);
 }
index afdac8c991d4af99ffc62d90874800e0542211a6..ed6c86c3e6c904c9d4bfaa60c8cf40521f48bb87 100644 (file)
@@ -377,13 +377,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
                  * until the FLUSH completes.
                  */
                 cl_ensure_space(&job->bcl, 8);
-                struct vc4_cl_out *bcl = cl_start(&job->bcl);
-                cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
+                cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
                 /* The FLUSH caps all of our bin lists with a
                  * VC4_PACKET_RETURN.
                  */
-                cl_emit(&bcl, FLUSH, flush);
-                cl_end(&job->bcl, bcl);
+                cl_emit(&job->bcl, FLUSH, flush);
         }
         struct drm_vc4_submit_cl submit = {
                 .color_read.hindex = ~0,