From 8d36bd3d086f2a3ab76b06ca21f3b1b2d12f7277 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 11 Dec 2016 14:59:17 -0800 Subject: [PATCH] vc4: Simplify pack header usage Take the CL pointer in, which will be useful for enabling relocs. However, our code expands a bit more: before: 4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o 988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o after: 4481 0 0 4481 1181 src/gallium/drivers/vc4/.libs/vc4_draw.o 1020 0 0 1020 3fc src/gallium/drivers/vc4/.libs/vc4_emit.o --- src/gallium/drivers/vc4/vc4_cl.h | 15 +++++++++------ src/gallium/drivers/vc4/vc4_draw.c | 21 +++++++-------------- src/gallium/drivers/vc4/vc4_emit.c | 21 ++++++++++----------- src/gallium/drivers/vc4/vc4_job.c | 6 ++---- 4 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index bec177cd03b..966756f5038 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -255,17 +255,20 @@ cl_get_emit_space(struct vc4_cl_out **cl, size_t size) * Also, *dst is actually of the wrong type, it's the * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet). */ -#define cl_emit(cl_out, packet, name) \ +#define cl_emit(cl, packet, name) \ for (struct cl_packet_struct(packet) name = { \ cl_packet_header(packet) \ }, \ - *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \ - __builtin_expect(_dst != NULL, 1); \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ ({ \ - cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \ + struct vc4_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \ cl_packet_length(packet))); \ - _dst = NULL; \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ })) \ #endif /* VC4_CL_H */ diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 4b3fa8ab8ff..f7955ad3a84 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -81,8 +81,7 @@ vc4_start_draw(struct vc4_context *vc4) vc4_get_draw_cl_space(job, 0); - struct vc4_cl_out *bcl = cl_start(&job->bcl); - cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) { + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) { bin.width_in_tiles = job->draw_tiles_x; bin.height_in_tiles = job->draw_tiles_y; bin.multisample_mode_4x = job->msaa; @@ -93,14 +92,14 @@ vc4_start_draw(struct vc4_context *vc4) * figure out what new state packets need to be written to that tile's * command list. */ - cl_emit(&bcl, START_TILE_BINNING, start); + cl_emit(&job->bcl, START_TILE_BINNING, start); /* Reset the current compressed primitives format. This gets modified * by VC4_PACKET_GL_INDEXED_PRIMITIVE and * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start * of every tile. */ - cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) { + cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) { list.data_type = _16_BIT_INDEX; list.primitive_type = TRIANGLES_LIST; } @@ -108,8 +107,6 @@ vc4_start_draw(struct vc4_context *vc4) job->needs_flush = true; job->draw_width = vc4->framebuffer.width; job->draw_height = vc4->framebuffer.height; - - cl_end(&job->bcl, bcl); } static void @@ -216,8 +213,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, } cl_end(&job->shader_rec, shader_rec); - struct vc4_cl_out *bcl = cl_start(&job->bcl); - cl_emit(&bcl, GL_SHADER_STATE, shader_state) { + cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) { /* Note that number of attributes == 0 in the packet means 8 * attributes. This field also contains the offset into * shader_rec. @@ -226,7 +222,6 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, shader_state.number_of_attribute_arrays = num_elements_emit & 0x7; } - cl_end(&job->bcl, bcl); vc4_write_uniforms(vc4, vc4->prog.fs, &vc4->constbuf[PIPE_SHADER_FRAGMENT], @@ -336,7 +331,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ - struct vc4_cl_out *bcl = cl_start(&job->bcl); if (info->index_size) { uint32_t index_size = info->index_size; uint32_t offset = info->start * index_size; @@ -359,6 +353,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } struct vc4_resource *rsc = vc4_resource(prsc); + struct vc4_cl_out *bcl = cl_start(&job->bcl); cl_start_reloc(&job->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, @@ -369,6 +364,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&bcl, info->count); cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); + cl_end(&job->bcl, bcl); job->draw_calls_queued++; if (info->index_size == 4 || info->has_user_indices) @@ -395,10 +391,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) * plus whatever remainder. */ if (extra_index_bias) { - cl_end(&job->bcl, bcl); vc4_emit_gl_shader_state(vc4, info, extra_index_bias); - bcl = cl_start(&job->bcl); } if (start + count > max_verts) { @@ -434,7 +428,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } } - cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) { + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) { array.primitive_mode = info->mode; array.length = this_count; array.index_of_first_vertex = start; @@ -446,7 +440,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) start = 0; } } - cl_end(&job->bcl, bcl); /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 * workaround. diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 9fc266e5baa..8fb379df5e0 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -29,7 +29,6 @@ vc4_emit_state(struct pipe_context *pctx) struct vc4_context *vc4 = vc4_context(pctx); struct vc4_job *job = vc4->job; - struct vc4_cl_out *bcl = cl_start(&job->bcl); if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT | VC4_DIRTY_RASTERIZER)) { float *vpscale = vc4->viewport.scale; @@ -60,7 +59,7 @@ vc4_emit_state(struct pipe_context *pctx) maxy = MIN2(vp_maxy, vc4->scissor.maxy); } - cl_emit(&bcl, CLIP_WINDOW, clip) { + cl_emit(&job->bcl, CLIP_WINDOW, clip) { clip.clip_window_left_pixel_coordinate = minx; clip.clip_window_bottom_pixel_coordinate = miny; clip.clip_window_height_in_pixels = maxy - miny; @@ -79,6 +78,7 @@ vc4_emit_state(struct pipe_context *pctx) uint8_t ez_enable_mask_out = ~0; uint8_t rasosm_mask_out = ~0; + struct vc4_cl_out *bcl = cl_start(&job->bcl); /* HW-2905: If the RCL ends up doing a full-res load when * multisampling, then early Z tracking may end up with values * from the previous tile due to a HW bug. Disable it to @@ -111,41 +111,42 @@ vc4_emit_state(struct pipe_context *pctx) cl_u8(&bcl, (vc4->rasterizer->config_bits[2] | vc4->zsa->config_bits[2]) & ez_enable_mask_out); + cl_end(&job->bcl, bcl); } if (vc4->dirty & VC4_DIRTY_RASTERIZER) { - cl_emit(&bcl, DEPTH_OFFSET, depth) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { depth.depth_offset_units = vc4->rasterizer->offset_units; depth.depth_offset_factor = vc4->rasterizer->offset_factor; } - cl_emit(&bcl, POINT_SIZE, points) { + cl_emit(&job->bcl, POINT_SIZE, points) { points.point_size = vc4->rasterizer->point_size; } - cl_emit(&bcl, LINE_WIDTH, points) { + cl_emit(&job->bcl, LINE_WIDTH, points) { points.line_width = vc4->rasterizer->base.line_width; } } if (vc4->dirty & VC4_DIRTY_VIEWPORT) { - cl_emit(&bcl, CLIPPER_XY_SCALING, clip) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { clip.viewport_half_width_in_1_16th_of_pixel = vc4->viewport.scale[0] * 16.0f; clip.viewport_half_height_in_1_16th_of_pixel = vc4->viewport.scale[1] * 16.0f; } - cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { clip.viewport_z_offset_zc_to_zs = vc4->viewport.translate[2]; clip.viewport_z_scale_zc_to_zs = vc4->viewport.scale[2]; } - cl_emit(&bcl, VIEWPORT_OFFSET, vp) { + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { vp.viewport_centre_x_coordinate = 16 * vc4->viewport.translate[0]; vp.viewport_centre_y_coordinate = @@ -154,12 +155,10 @@ vc4_emit_state(struct pipe_context *pctx) } if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) { - cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) { + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { if (vc4->rasterizer->base.flatshade) flags.flat_shading_flags = vc4->prog.fs->color_inputs; } } - - cl_end(&job->bcl, bcl); } diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index afdac8c991d..ed6c86c3e6c 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -377,13 +377,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) * until the FLUSH completes. */ cl_ensure_space(&job->bcl, 8); - struct vc4_cl_out *bcl = cl_start(&job->bcl); - cl_emit(&bcl, INCREMENT_SEMAPHORE, incr); + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); /* The FLUSH caps all of our bin lists with a * VC4_PACKET_RETURN. */ - cl_emit(&bcl, FLUSH, flush); - cl_end(&job->bcl, bcl); + cl_emit(&job->bcl, FLUSH, flush); } struct drm_vc4_submit_cl submit = { .color_read.hindex = ~0, -- 2.30.2