Take the CL pointer in, which will be useful for enabling relocs.
However, our code expands a bit more:
before:
4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o
988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o
after:
4481 0 0 4481 1181 src/gallium/drivers/vc4/.libs/vc4_draw.o
1020 0 0 1020 3fc src/gallium/drivers/vc4/.libs/vc4_emit.o
* Also, *dst is actually of the wrong type, it's the
* uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
*/
-#define cl_emit(cl_out, packet, name) \
+#define cl_emit(cl, packet, name) \
for (struct cl_packet_struct(packet) name = { \
cl_packet_header(packet) \
}, \
- *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
- __builtin_expect(_dst != NULL, 1); \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
({ \
- cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \
+ struct vc4_cl_out *cl_out = cl_start(cl); \
+ cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \
cl_packet_length(packet))); \
- _dst = NULL; \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
})) \
#endif /* VC4_CL_H */
vc4_get_draw_cl_space(job, 0);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
bin.width_in_tiles = job->draw_tiles_x;
bin.height_in_tiles = job->draw_tiles_y;
bin.multisample_mode_4x = job->msaa;
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_emit(&bcl, START_TILE_BINNING, start);
+ cl_emit(&job->bcl, START_TILE_BINNING, start);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
+ cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) {
list.data_type = _16_BIT_INDEX;
list.primitive_type = TRIANGLES_LIST;
}
job->needs_flush = true;
job->draw_width = vc4->framebuffer.width;
job->draw_height = vc4->framebuffer.height;
-
- cl_end(&job->bcl, bcl);
}
static void
}
cl_end(&job->shader_rec, shader_rec);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
+ cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into
* shader_rec.
shader_state.number_of_attribute_arrays =
num_elements_emit & 0x7;
}
- cl_end(&job->bcl, bcl);
vc4_write_uniforms(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (info->index_size) {
uint32_t index_size = info->index_size;
uint32_t offset = info->start * index_size;
}
struct vc4_resource *rsc = vc4_resource(prsc);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
cl_start_reloc(&job->bcl, &bcl, 1);
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&bcl,
cl_u32(&bcl, info->count);
cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, vc4->max_index);
+ cl_end(&job->bcl, bcl);
job->draw_calls_queued++;
if (info->index_size == 4 || info->has_user_indices)
* plus whatever remainder.
*/
if (extra_index_bias) {
- cl_end(&job->bcl, bcl);
vc4_emit_gl_shader_state(vc4, info,
extra_index_bias);
- bcl = cl_start(&job->bcl);
}
if (start + count > max_verts) {
}
}
- cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) {
array.primitive_mode = info->mode;
array.length = this_count;
array.index_of_first_vertex = start;
start = 0;
}
}
- cl_end(&job->bcl, bcl);
/* We shouldn't have tripped the HW_2116 bug with the GFXH-515
* workaround.
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT |
VC4_DIRTY_RASTERIZER)) {
float *vpscale = vc4->viewport.scale;
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
}
- cl_emit(&bcl, CLIP_WINDOW, clip) {
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
clip.clip_window_left_pixel_coordinate = minx;
clip.clip_window_bottom_pixel_coordinate = miny;
clip.clip_window_height_in_pixels = maxy - miny;
uint8_t ez_enable_mask_out = ~0;
uint8_t rasosm_mask_out = ~0;
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
/* HW-2905: If the RCL ends up doing a full-res load when
* multisampling, then early Z tracking may end up with values
* from the previous tile due to a HW bug. Disable it to
cl_u8(&bcl,
(vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]) & ez_enable_mask_out);
+ cl_end(&job->bcl, bcl);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_emit(&bcl, DEPTH_OFFSET, depth) {
+ cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
depth.depth_offset_units =
vc4->rasterizer->offset_units;
depth.depth_offset_factor =
vc4->rasterizer->offset_factor;
}
- cl_emit(&bcl, POINT_SIZE, points) {
+ cl_emit(&job->bcl, POINT_SIZE, points) {
points.point_size = vc4->rasterizer->point_size;
}
- cl_emit(&bcl, LINE_WIDTH, points) {
+ cl_emit(&job->bcl, LINE_WIDTH, points) {
points.line_width = vc4->rasterizer->base.line_width;
}
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
clip.viewport_half_width_in_1_16th_of_pixel =
vc4->viewport.scale[0] * 16.0f;
clip.viewport_half_height_in_1_16th_of_pixel =
vc4->viewport.scale[1] * 16.0f;
}
- cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
clip.viewport_z_offset_zc_to_zs =
vc4->viewport.translate[2];
clip.viewport_z_scale_zc_to_zs =
vc4->viewport.scale[2];
}
- cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
vp.viewport_centre_x_coordinate =
16 * vc4->viewport.translate[0];
vp.viewport_centre_y_coordinate =
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
if (vc4->rasterizer->base.flatshade)
flags.flat_shading_flags =
vc4->prog.fs->color_inputs;
}
}
-
- cl_end(&job->bcl, bcl);
}
* until the FLUSH completes.
*/
cl_ensure_space(&job->bcl, 8);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
/* The FLUSH caps all of our bin lists with a
* VC4_PACKET_RETURN.
*/
- cl_emit(&bcl, FLUSH, flush);
- cl_end(&job->bcl, bcl);
+ cl_emit(&job->bcl, FLUSH, flush);
}
struct drm_vc4_submit_cl submit = {
.color_read.hindex = ~0,