From: Chia-I Wu Date: Tue, 30 Sep 2014 02:32:53 +0000 (+0800) Subject: ilo: add a pass to finalize ilo_ve_state X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2d13b5ac81a8c2aa1f36be7e3350b12fbdbd65e8;p=mesa.git ilo: add a pass to finalize ilo_ve_state Add finalize_vertex_elements() to finalize ilo_ve_state. This fixes a potential issue with URB entry allocation for VS and move the complexity of gen6_3DSTATE_VERTEX_ELEMENTS() to the new function. Signed-off-by: Chia-I Wu --- diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 579c9c7eca3..5ce8b530eae 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -28,7 +28,6 @@ #include "util/u_draw.h" #include "util/u_pack_color.h" -#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */ #include "ilo_draw.h" #include "ilo_state.h" #include "ilo_state_gen.h" @@ -41,24 +40,25 @@ static bool ilo_blitter_set_invariants(struct ilo_blitter *blitter) { - struct pipe_vertex_element velems[2]; + struct pipe_vertex_element velem; struct pipe_viewport_state vp; if (blitter->initialized) return true; /* only vertex X and Y */ - memset(&velems, 0, sizeof(velems)); - velems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - ilo_gpe_init_ve(blitter->ilo->dev, 2, velems, &blitter->ve); + memset(&velem, 0, sizeof(velem)); + velem.src_format = PIPE_FORMAT_R32G32_FLOAT; + ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve); - /* override first VE to be VUE header */ - ve_init_cso_with_components(blitter->ilo->dev, + /* generate VUE header */ + ilo_gpe_init_ve_nosrc(blitter->ilo->dev, GEN6_VFCOMP_STORE_0, /* Reserved */ GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ GEN6_VFCOMP_STORE_0, /* Viewport Index */ GEN6_VFCOMP_STORE_0, /* Point Width */ - &blitter->ve.cso[0]); + &blitter->ve.nosrc_cso); + blitter->ve.prepend_nosrc_cso = true; /* a rectangle has 3 vertices in a RECTLIST */ util_draw_init_info(&blitter->draw); diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h index 4d0750f8753..4bcab8c29ae 100644 --- a/src/gallium/drivers/ilo/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h @@ -438,77 +438,9 @@ gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0); } -static inline void -ve_init_cso_with_components(const struct ilo_dev_info *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_DEV_ASSERT(dev, 6, 7.5); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = GEN6_VE_STATE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; -} - -static inline void -ve_set_cso_edgeflag(const struct ilo_dev_info *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - - cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; - - /* - * Edge flags have format GEN6_FORMAT_R8_UINT when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. - */ - format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; - if (format == GEN6_FORMAT_R32_FLOAT) { - STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); - cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); - } - else { - assert(format == GEN6_FORMAT_R8_UINT); - } -} - static inline void gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - bool last_velement_edgeflag, - bool prepend_generated_ids) + const struct ilo_ve_state *ve) { uint8_t cmd_len; uint32_t *dw; @@ -517,66 +449,37 @@ gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, ILO_DEV_ASSERT(builder->dev, 6, 7.5); /* + * From the Sandy Bridge PRM, volume 2 part 1, page 92: + * + * "At least one VERTEX_ELEMENT_STATE structure must be included." + * * From the Sandy Bridge PRM, volume 2 part 1, page 93: * * "Up to 34 (DevSNB+) vertex elements are supported." */ - assert(ve->count + prepend_generated_ids <= 34); + assert(ve->count + ve->prepend_nosrc_cso >= 1); + assert(ve->count + ve->prepend_nosrc_cso <= 34); STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); - if (!ve->count && !prepend_generated_ids) { - struct ilo_ve_cso dummy; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_1_FP, - &dummy); - - cmd_len = 3; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); - memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); - - return; - } - - cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; + cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); dw++; - if (prepend_generated_ids) { - struct ilo_ve_cso gen_ids; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_VID, - GEN6_VFCOMP_STORE_IID, - GEN6_VFCOMP_NOSTORE, - GEN6_VFCOMP_NOSTORE, - &gen_ids); - - memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); + if (ve->prepend_nosrc_cso) { + memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload)); dw += 2; } - if (last_velement_edgeflag && ve->count) { - struct ilo_ve_cso edgeflag; - - for (i = 0; i < ve->count - 1; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); - - edgeflag = ve->cso[i]; - ve_set_cso_edgeflag(builder->dev, &edgeflag); - memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); - } else { - for (i = 0; i < ve->count; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); + for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) { + memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload)); + dw += 2; } + + if (ve->last_cso_edgeflag) + memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload)); } static inline void diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 2c66b973baa..389b596e6cb 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -332,8 +332,8 @@ gen6_draw_common_urb(struct ilo_render *r, * VS-generated output data, output URB availability isn't a * factor." */ - if (vs_entry_size < vec->ve->count) - vs_entry_size = vec->ve->count; + if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) + vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; gs_entry_size = (vec->gs) ? ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) : @@ -465,31 +465,8 @@ gen6_draw_vf(struct ilo_render *r, gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb); /* 3DSTATE_VERTEX_ELEMENTS */ - if (DIRTY(VE) || DIRTY(VS)) { - const struct ilo_ve_state *ve = vec->ve; - bool last_velement_edgeflag = false; - bool prepend_generate_ids = false; - - if (vec->vs) { - if (ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_VS_INPUT_EDGEFLAG)) { - /* we rely on the state tracker here */ - assert(ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_INPUT_COUNT) == ve->count); - - last_velement_edgeflag = true; - } - - if (ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_VS_INPUT_INSTANCEID) || - ilo_shader_get_kernel_param(vec->vs, - ILO_KERNEL_VS_INPUT_VERTEXID)) - prepend_generate_ids = true; - } - - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, ve, - last_velement_edgeflag, prepend_generate_ids); - } + if (DIRTY(VE)) + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve); } void @@ -978,11 +955,12 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, session->vb_start, session->vb_end, sizeof(blitter->vertices[0])); - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, - &blitter->ve, false, false); + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); + + gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0, + (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float), + 0); - gen6_3DSTATE_URB(r->builder, - r->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0); /* 3DSTATE_URB workaround */ if (r->state.gs.active) { ilo_render_emit_flush(r); diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 373f800e071..9aefc6fa446 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -245,8 +245,8 @@ gen7_draw_common_urb(struct ilo_render *r, * Allocation Size must be sized to the maximum of the vertex input * and output structures." */ - if (vs_entry_size < vec->ve->count) - vs_entry_size = vec->ve->count; + if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) + vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; vs_entry_size *= sizeof(float) * 4; vs_total_size = r->dev->urb_size - offset; @@ -716,7 +716,8 @@ gen7_rectlist_urb(struct ilo_render *r, (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? 32768 : 16384; gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset, - blitter->ve.count * 4 * sizeof(float)); + (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * + 4 * sizeof(float)); gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); @@ -839,8 +840,7 @@ ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r, session->vb_start, session->vb_end, sizeof(blitter->vertices[0])); - gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, - &blitter->ve, false, false); + gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); gen7_rectlist_pcb_alloc(r, blitter); diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index d9b81bb97f1..6177ac07a3d 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -186,6 +186,63 @@ finalize_index_buffer(struct ilo_context *ilo) pipe_resource_reference(¤t_hw_res, NULL); } +static void +finalize_vertex_elements(struct ilo_context *ilo) +{ + struct ilo_state_vector *vec = &ilo->state_vector; + + if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS))) + return; + + vec->dirty |= ILO_DIRTY_VE; + + vec->ve->last_cso_edgeflag = false; + if (vec->ve->count && vec->vs && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) { + vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1]; + ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso); + vec->ve->last_cso_edgeflag = true; + } + + vec->ve->prepend_nosrc_cso = false; + if (vec->vs && + (ilo_shader_get_kernel_param(vec->vs, + ILO_KERNEL_VS_INPUT_INSTANCEID) || + ilo_shader_get_kernel_param(vec->vs, + ILO_KERNEL_VS_INPUT_VERTEXID))) { + ilo_gpe_init_ve_nosrc(ilo->dev, + GEN6_VFCOMP_STORE_VID, + GEN6_VFCOMP_STORE_IID, + GEN6_VFCOMP_NOSTORE, + GEN6_VFCOMP_NOSTORE, + &vec->ve->nosrc_cso); + vec->ve->prepend_nosrc_cso = true; + } else if (!vec->vs) { + /* generate VUE header */ + ilo_gpe_init_ve_nosrc(ilo->dev, + GEN6_VFCOMP_STORE_0, /* Reserved */ + GEN6_VFCOMP_STORE_0, /* Render Target Array Index */ + GEN6_VFCOMP_STORE_0, /* Viewport Index */ + GEN6_VFCOMP_STORE_0, /* Point Width */ + &vec->ve->nosrc_cso); + vec->ve->prepend_nosrc_cso = true; + } else if (!vec->ve->count) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 92: + * + * "SW must ensure that at least one vertex element is defined prior + * to issuing a 3DPRIMTIVE command, or operation is UNDEFINED." + */ + ilo_gpe_init_ve_nosrc(ilo->dev, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_1_FP, + &vec->ve->nosrc_cso); + vec->ve->prepend_nosrc_cso = true; + } +} + /** * Finalize states. Some states depend on other states and are * incomplete/invalid until finalized. @@ -199,6 +256,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_shader_states(&ilo->state_vector); finalize_constant_buffers(ilo); finalize_index_buffer(ilo); + finalize_vertex_elements(ilo); u_upload_unmap(ilo->uploader); } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 7f68118d4b6..3f3c495b061 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -176,6 +176,13 @@ struct ilo_ve_state { unsigned instance_divisors[PIPE_MAX_ATTRIBS]; unsigned vb_mapping[PIPE_MAX_ATTRIBS]; unsigned vb_count; + + /* these are not valid until the state is finalized */ + struct ilo_ve_cso edgeflag_cso; + bool last_cso_edgeflag; + + struct ilo_ve_cso nosrc_cso; + bool prepend_nosrc_cso; }; struct ilo_so_state { @@ -385,7 +392,7 @@ struct ilo_state_vector { uint32_t dirty; struct ilo_vb_state vb; - const struct ilo_ve_state *ve; + struct ilo_ve_state *ve; struct ilo_ib_state ib; struct ilo_shader_state *vs; diff --git a/src/gallium/drivers/ilo/ilo_state_gen.h b/src/gallium/drivers/ilo/ilo_state_gen.h index a4faad5134b..9e2f7c4a54b 100644 --- a/src/gallium/drivers/ilo/ilo_state_gen.h +++ b/src/gallium/drivers/ilo/ilo_state_gen.h @@ -86,6 +86,15 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, const struct pipe_vertex_element *states, struct ilo_ve_state *ve); +void +ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso); + +void +ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso); + void ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, const struct pipe_viewport_state *state, diff --git a/src/gallium/drivers/ilo/ilo_state_gen6.c b/src/gallium/drivers/ilo/ilo_state_gen6.c index 6950f275908..2da2de25a88 100644 --- a/src/gallium/drivers/ilo/ilo_state_gen6.c +++ b/src/gallium/drivers/ilo/ilo_state_gen6.c @@ -327,6 +327,83 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, } } +void +ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) +{ + int format; + + ILO_DEV_ASSERT(dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; + + /* + * Edge flags have format GEN6_FORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. + */ + format = GEN_EXTRACT(cso->payload[0], GEN6_VE_STATE_DW0_FORMAT); + cso->payload[0] &= ~GEN6_VE_STATE_DW0_FORMAT__MASK; + + switch (format) { + case GEN6_FORMAT_R32_FLOAT: + format = GEN6_FORMAT_R32_UINT; + break; + default: + assert(format == GEN6_FORMAT_R8_UINT); + break; + } + + cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_STATE_DW0_FORMAT); + + cso->payload[1] = + GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; +} + +void +ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_DEV_ASSERT(dev, 6, 7.5); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + + assert(comp0 != GEN6_VFCOMP_STORE_SRC && + comp1 != GEN6_VFCOMP_STORE_SRC && + comp2 != GEN6_VFCOMP_STORE_SRC && + comp3 != GEN6_VFCOMP_STORE_SRC); + + cso->payload[0] = GEN6_VE_STATE_DW0_VALID; + cso->payload[1] = + comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | + comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | + comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | + comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; +} + void ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *vs,