From 2449695e822421fdcaf1c66dffc12d7d705ea69d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 17 Dec 2011 23:12:45 +0100 Subject: [PATCH] gallium: improve the pipe_stream_output_info struct (v2) There are 3 changes: 1) stride is specified for each buffer, not just one, so that drivers don't have to derive it from the outputs 2) new per-output property dst_offset, which specifies the offset into the buffer in dwords where the output should be stored, so that drivers don't have to compute the offsets manually; this will also be useful for gl_SkipComponents from ARB_transform_feedback3 3) register_mask is removed, instead, there is start_component and num_components; register_mask with non-consecutive 1s doesn't make much sense (some hardware cannot do packing of components) Christoph Bumiller: fixed nvc0. v2: resolve merge conflicts in Draw and clean it up --- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 87 ++------------------ src/gallium/auxiliary/util/u_blitter.c | 4 +- src/gallium/auxiliary/util/u_dump_state.c | 6 +- src/gallium/drivers/llvmpipe/lp_state_so.c | 2 +- src/gallium/drivers/nvc0/nvc0_program.c | 11 +-- src/gallium/drivers/r600/r600.h | 4 +- src/gallium/drivers/r600/r600_hw_context.c | 8 +- src/gallium/drivers/r600/r600_pipe.h | 1 - src/gallium/drivers/r600/r600_shader.c | 46 +++-------- src/gallium/drivers/r600/r600_state_common.c | 2 +- src/gallium/drivers/trace/tr_dump_state.c | 6 +- src/gallium/include/pipe/p_state.h | 13 +-- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 ++-- 13 files changed, 57 insertions(+), 151 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 466f46abab3..ecf287f9128 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -79,72 +79,6 @@ void draw_pt_so_emit_prepare(struct pt_so_emit *emit) draw_do_flush( draw, DRAW_FLUSH_BACKEND ); } -static boolean -is_component_writable(unsigned mask, - unsigned compo) -{ - switch (mask) { - case TGSI_WRITEMASK_NONE: - return FALSE; - case TGSI_WRITEMASK_X: - return compo == 0; - case TGSI_WRITEMASK_Y: - return compo == 1; - case TGSI_WRITEMASK_XY: - return compo == 0 || compo == 1; - case TGSI_WRITEMASK_Z: - return compo == 2; - case TGSI_WRITEMASK_XZ: - return compo == 0 || compo == 2; - case TGSI_WRITEMASK_YZ: - return compo == 1 || compo == 2; - case TGSI_WRITEMASK_XYZ: - return compo == 0 || compo == 1 || compo == 2; - case TGSI_WRITEMASK_W: - return compo == 3; - case TGSI_WRITEMASK_XW: - return compo == 0 || compo == 3; - case TGSI_WRITEMASK_YW: - return compo == 1 || compo == 3; - case TGSI_WRITEMASK_XYW: - return compo == 0 || compo == 1 || compo == 3; - case TGSI_WRITEMASK_ZW: - return compo == 2 || compo == 3; - case TGSI_WRITEMASK_XZW: - return compo == 0 || compo == 1 || compo == 3; - case TGSI_WRITEMASK_YZW: - return compo == 1 || compo == 2 || compo == 4; - case TGSI_WRITEMASK_XYZW: - return compo < 4; - default: - debug_assert(!"Unknown writemask in stream out"); - return compo < 4; - } -} - -static INLINE int mask_num_comps(int register_mask) -{ - int comps = 0; - switch (register_mask) { - case TGSI_WRITEMASK_XYZW: - comps = 4; - break; - case TGSI_WRITEMASK_XYZ: - comps = 3; - break; - case TGSI_WRITEMASK_XY: - comps = 2; - break; - case TGSI_WRITEMASK_X: - comps = 1; - break; - default: - assert(0); - break; - } - return comps; -} - static void so_emit_prim(struct pt_so_emit *so, unsigned *indices, unsigned num_vertices) @@ -170,14 +104,14 @@ static void so_emit_prim(struct pt_so_emit *so, /* check have we space to emit prim first - if not don't do anything */ for (i = 0; i < num_vertices; ++i) { for (slot = 0; slot < state->num_outputs; ++slot) { - unsigned writemask = state->output[slot].register_mask; + unsigned num_comps = state->output[slot].num_components; int ob = state->output[slot].output_buffer; - if ((buffer_total_bytes[ob] + mask_num_comps(writemask) * sizeof(float)) > + if ((buffer_total_bytes[ob] + num_comps * sizeof(float)) > draw->so.targets[ob]->target.buffer_size) { return; } - buffer_total_bytes[ob] += mask_num_comps(writemask) * sizeof(float); + buffer_total_bytes[ob] += num_comps * sizeof(float); } } @@ -190,21 +124,16 @@ static void so_emit_prim(struct pt_so_emit *so, for (slot = 0; slot < state->num_outputs; ++slot) { unsigned idx = state->output[slot].register_index; - unsigned writemask = state->output[slot].register_mask; - unsigned written_compos = 0; - unsigned compo; + unsigned start_comp = state->output[slot].start_component; + unsigned num_comps = state->output[slot].num_components; int ob = state->output[slot].output_buffer; buffer = (float *)((char *)draw->so.targets[ob]->mapping + draw->so.targets[ob]->target.buffer_offset + draw->so.targets[ob]->internal_offset); - for (compo = 0; compo < 4; ++compo) { - if (is_component_writable(writemask, compo)) { - buffer[written_compos++] = input[idx][compo]; - } - } - draw->so.targets[ob]->internal_offset += written_compos * sizeof(float); - total_written_compos += written_compos; + memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float)); + draw->so.targets[ob]->internal_offset += num_comps * sizeof(float); + total_written_compos += num_comps; } } so->emitted_vertices += num_vertices; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 59940d9cbe7..6a32de619ef 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -263,8 +263,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&so, 0, sizeof(so)); so.num_outputs = 1; - so.output[0].register_mask = TGSI_WRITEMASK_XYZW; - so.stride = 4; + so.output[0].num_components = 4; + so.stride[0] = 4; ctx->vs_pos_only = util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names, diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c728bc4021c..c346a0ba5ca 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -444,13 +444,15 @@ util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state) util_dump_member_begin(stream, "stream_output"); util_dump_struct_begin(stream, "pipe_stream_output_info"); util_dump_member(stream, uint, &state->stream_output, num_outputs); - util_dump_member(stream, uint, &state->stream_output, stride); + util_dump_array(stream, uint, state->stream_output.stride, + Elements(state->stream_output.stride)); util_dump_array_begin(stream); for(i = 0; i < state->stream_output.num_outputs; ++i) { util_dump_elem_begin(stream); util_dump_struct_begin(stream, ""); /* anonymous */ util_dump_member(stream, uint, &state->stream_output.output[i], register_index); - util_dump_member(stream, uint, &state->stream_output.output[i], register_mask); + util_dump_member(stream, uint, &state->stream_output.output[i], start_component); + util_dump_member(stream, uint, &state->stream_output.output[i], num_components); util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer); util_dump_struct_end(stream); util_dump_elem_end(stream); diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c index 108f3aa4f39..ed2272d05ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_so.c +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c @@ -42,7 +42,7 @@ llvmpipe_create_stream_output_state(struct pipe_context *pipe, if (so) { so->base.num_outputs = templ->num_outputs; - so->base.stride = templ->stride; + memcpy(so->base.stride, templ->stride, sizeof(templ->stride)); memcpy(so->base.output, templ->output, templ->num_outputs * sizeof(templ->output[0])); } diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index cff76fe67f3..60abc224398 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -503,20 +503,17 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, tfb->varying_count[b] = 0; for (i = 0; i < pso->num_outputs; ++i) { + unsigned startc = pso->output[i].start_component; if (pso->output[i].output_buffer != b) continue; - for (c = 0; c < 4; ++c) { - if (!(pso->output[i].register_mask & (1 << c))) - continue; + for (c = 0; c < pso->output[i].num_components; ++c) { tfb->varying_count[b]++; tfb->varying_index[n++] = - info->out[pso->output[i].register_index].slot[c]; + info->out[pso->output[i].register_index].slot[startc + c]; } } - tfb->stride[b] = tfb->varying_count[b] * 4; + tfb->stride[b] = pso->stride[b] * 4; } - if (pso->stride) - tfb->stride[0] = pso->stride; return tfb; } diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 4bfb5a980f1..baf09c1d8aa 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -196,7 +196,7 @@ struct r600_so_target { /* The buffer where BUFFER_FILLED_SIZE is stored. */ struct r600_resource *filled_size; - unsigned stride; + unsigned stride_in_dw; unsigned so_index; }; @@ -248,7 +248,7 @@ struct r600_context { struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; boolean streamout_start; unsigned streamout_append_bitmask; - unsigned *vs_shader_so_strides; + unsigned *vs_so_stride_in_dw; }; struct r600_draw { diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index b0a28d98215..ac90011dbe7 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -2033,7 +2033,7 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_ void r600_context_streamout_begin(struct r600_context *ctx) { struct r600_so_target **t = ctx->so_targets; - unsigned *strides = ctx->vs_shader_so_strides; + unsigned *stride_in_dw = ctx->vs_so_stride_in_dw; unsigned buffer_en, i, update_flags = 0; buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | @@ -2064,7 +2064,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) for (i = 0; i < ctx->num_so_targets; i++) { if (t[i]) { - t[i]->stride = strides[i]; + t[i]->stride_in_dw = stride_in_dw[i]; t[i]->so_index = i; update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); @@ -2074,7 +2074,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) 16*i - R600_CONTEXT_REG_OFFSET) >> 2; ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset + t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ - ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */ + ctx->pm4[ctx->pm4_cdwords++] = stride_in_dw[i]; /* VTX_STRIDE (in DW) */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* BUFFER_BASE */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); @@ -2186,7 +2186,7 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2; - ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2; + ctx->pm4[ctx->pm4_cdwords++] = t->stride_in_dw; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index bd782438354..45bb4da8c2a 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -147,7 +147,6 @@ struct r600_pipe_shader { struct tgsi_token *tokens; unsigned sprite_coord_enable; struct pipe_stream_output_info so; - unsigned so_strides[4]; }; struct r600_pipe_sampler_state { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6121a43c763..6fd058069c1 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -124,12 +124,14 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s unsigned i; fprintf(stderr, "STREAMOUT\n"); for (i = 0; i < shader->so.num_outputs; i++) { + unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << + shader->so.output[i].start_component; fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, shader->so.output[i].output_buffer, shader->so.output[i].register_index, - shader->so.output[i].register_mask & 1 ? "x" : "_", - (shader->so.output[i].register_mask >> 1) & 1 ? "y" : "_", - (shader->so.output[i].register_mask >> 2) & 1 ? "z" : "_", - (shader->so.output[i].register_mask >> 3) & 1 ? "w" : "_"); + mask & 1 ? "x" : "_", + (mask >> 1) & 1 ? "y" : "_", + (mask >> 2) & 1 ? "z" : "_", + (mask >> 3) & 1 ? "w" : "_"); } } } @@ -863,11 +865,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi /* Add stream outputs. */ if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { - unsigned buffer_offset[PIPE_MAX_SO_BUFFERS] = {0}; - for (i = 0; i < so.num_outputs; i++) { struct r600_bytecode_output output; - unsigned comps; if (so.output[i].output_buffer >= 4) { R600_ERR("exceeded the max number of stream output buffers, got: %d\n", @@ -875,36 +874,21 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi r = -EINVAL; goto out_err; } - - switch (so.output[i].register_mask) { - case TGSI_WRITEMASK_XYZW: - comps = 4; - break; - case TGSI_WRITEMASK_XYZ: - comps = 3; - break; - case TGSI_WRITEMASK_XY: - comps = 2; - break; - case TGSI_WRITEMASK_X: - comps = 1; - break; - default: - R600_ERR("streamout: invalid register_mask, got: %x\n", - so.output[i].register_mask); - r = -EINVAL; - goto out_err; + if (so.output[i].start_component) { + R600_ERR("stream_output - start_component cannot be non-zero\n"); + r = -EINVAL; + goto out_err; } memset(&output, 0, sizeof(struct r600_bytecode_output)); output.gpr = shader->output[so.output[i].register_index].gpr; output.elem_size = 0; - output.array_base = buffer_offset[so.output[i].output_buffer]; + output.array_base = so.output[i].dst_offset; output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; output.burst_count = 1; output.barrier = 1; output.array_size = 0; - output.comp_mask = so.output[i].register_mask; + output.comp_mask = (1 << so.output[i].num_components) - 1; if (ctx.bc->chip_class >= EVERGREEN) { switch (so.output[i].output_buffer) { case 0: @@ -939,12 +923,6 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi r = r600_bytecode_add_output(ctx.bc, &output); if (r) goto out_err; - - buffer_offset[so.output[i].output_buffer] += comps; - } - - for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - pipeshader->so_strides[i] = buffer_offset[i] * 4; } } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 034a560a7ec..ac9bd27df24 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -649,7 +649,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) } } - rctx->ctx.vs_shader_so_strides = rctx->vs_shader->so_strides; + rctx->ctx.vs_so_stride_in_dw = rctx->vs_shader->so.stride; mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 8af084c2769..fb8683adba5 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -274,14 +274,16 @@ void trace_dump_shader_state(const struct pipe_shader_state *state) trace_dump_member_begin("stream_output"); trace_dump_struct_begin("pipe_stream_output_info"); trace_dump_member(uint, &state->stream_output, num_outputs); - trace_dump_member(uint, &state->stream_output, stride); + trace_dump_array(uint, state->stream_output.stride, PIPE_MAX_SO_BUFFERS); trace_dump_array_begin(); for(i = 0; i < state->stream_output.num_outputs; ++i) { trace_dump_elem_begin(); trace_dump_struct_begin(""); /* anonymous */ trace_dump_member(uint, &state->stream_output.output[i], register_index); - trace_dump_member(uint, &state->stream_output.output[i], register_mask); + trace_dump_member(uint, &state->stream_output.output[i], start_component); + trace_dump_member(uint, &state->stream_output.output[i], num_components); trace_dump_member(uint, &state->stream_output.output[i], output_buffer); + trace_dump_member(uint, &state->stream_output.output[i], dst_offset); trace_dump_struct_end(); trace_dump_elem_end(); } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 024d544e3ef..f6486f01dce 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -190,16 +190,19 @@ struct pipe_clip_state struct pipe_stream_output_info { unsigned num_outputs; - /** stride for an entire vertex, only used if all output_buffers are 0 */ - unsigned stride; + /** stride for an entire vertex for each buffer in dwords */ + unsigned stride[PIPE_MAX_SO_BUFFERS]; + /** * Array of stream outputs, in the order they are to be written in. * Selected components are tightly packed into the output buffer. */ struct { - unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS */ - unsigned register_mask:4; /**< TGSI_WRITEMASK_x */ - unsigned output_buffer:4; /**< 0 to PIPE_MAX_SO_BUFFERS */ + unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS */ + unsigned start_component:2; /** 0 to 3 */ + unsigned num_components:3; /** 1 to 4 */ + unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */ + unsigned dst_offset:16; /**< offset into the buffer in dwords */ } output[PIPE_MAX_SHADER_OUTPUTS]; }; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 26047cfe078..dc841ff9779 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5097,25 +5097,21 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, const GLuint outputMapping[], struct pipe_stream_output_info *so) { - static unsigned comps_to_mask[] = { - 0, - TGSI_WRITEMASK_X, - TGSI_WRITEMASK_XY, - TGSI_WRITEMASK_XYZ, - TGSI_WRITEMASK_XYZW - }; unsigned i; struct gl_transform_feedback_info *info = &glsl_to_tgsi->shader_program->LinkedTransformFeedback; for (i = 0; i < info->NumOutputs; i++) { - assert(info->Outputs[i].NumComponents < Elements(comps_to_mask)); so->output[i].register_index = outputMapping[info->Outputs[i].OutputRegister]; - so->output[i].register_mask = - comps_to_mask[info->Outputs[i].NumComponents] - << info->Outputs[i].ComponentOffset; + so->output[i].start_component = info->Outputs[i].ComponentOffset; + so->output[i].num_components = info->Outputs[i].NumComponents; so->output[i].output_buffer = info->Outputs[i].OutputBuffer; + so->output[i].dst_offset = info->Outputs[i].DstOffset; + } + + for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + so->stride[i] = info->BufferStride[i]; } so->num_outputs = info->NumOutputs; } -- 2.30.2