From: Chia-I Wu Date: Fri, 31 May 2013 18:00:55 +0000 (+0800) Subject: ilo: introduce vertex element CSO X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=70e78211d6c09b3076ac261d2cde9d0037540065;p=mesa.git ilo: introduce vertex element CSO Introduce ilo_ve_cso and initialize it in create_vertex_elements_state(). This commit goes a step further by setting up mappings from HW VB to PIPE VB, which we failed to do previously. That allows us to support instanced rendering. --- diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index a504acd586a..6c5125128c7 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -402,9 +402,9 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p, } /* 3DSTATE_VERTEX_BUFFERS */ - if (DIRTY(VERTEX_BUFFERS)) { + if (DIRTY(VERTEX_BUFFERS) || DIRTY(VERTEX_ELEMENTS)) { p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev, - ilo->vb.states, NULL, ilo->vb.enabled_mask, p->cp); + ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp); } /* 3DSTATE_VERTEX_ELEMENTS */ @@ -425,8 +425,7 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p, prepend_generate_ids = (info->has_instanceid || info->has_vertexid); } - p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, - ve->states, ve->count, + p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve, last_velement_edgeflag, prepend_generate_ids, p->cp); } } diff --git a/src/gallium/drivers/ilo/ilo_gpe.h b/src/gallium/drivers/ilo/ilo_gpe.h index dc4e80eb97e..ae813b1e839 100644 --- a/src/gallium/drivers/ilo/ilo_gpe.h +++ b/src/gallium/drivers/ilo/ilo_gpe.h @@ -62,9 +62,18 @@ struct ilo_ib_state { struct pipe_index_buffer state; }; +struct ilo_ve_cso { + /* VERTEX_ELEMENT_STATE */ + uint32_t payload[2]; +}; + struct ilo_ve_state { - struct pipe_vertex_element states[PIPE_MAX_ATTRIBS]; + struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; unsigned count; + + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; + unsigned vb_mapping[PIPE_MAX_ATTRIBS]; + unsigned vb_count; }; struct ilo_so_state { @@ -190,6 +199,12 @@ struct ilo_global_binding { unsigned count; }; +void +ilo_gpe_init_ve(const struct ilo_dev_info *dev, + unsigned num_states, + const struct pipe_vertex_element *states, + struct ilo_ve_state *ve); + void ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, const struct pipe_viewport_state *state, diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index a585819961f..3167bd692ee 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -712,12 +712,13 @@ gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, static void gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, const struct pipe_vertex_buffer *vbuffers, - const int *instance_divisors, - uint32_t vbuffer_mask, + uint64_t vbuffer_mask, + const struct ilo_ve_state *ve, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); uint8_t cmd_len; + unsigned hw_idx; ILO_GPE_VALID_GEN(dev, 6, 7); @@ -725,27 +726,34 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, * From the Sandy Bridge PRM, volume 2 part 1, page 82: * * "From 1 to 33 VBs can be specified..." - * - * Because of the type of vbuffer_mask, this is always the case. */ assert(vbuffer_mask <= (1UL << 33)); if (!vbuffer_mask) return; - cmd_len = 4 * util_bitcount(vbuffer_mask) + 1; + cmd_len = 1; + + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + + if (vbuffer_mask & (1 << pipe_idx)) + cmd_len += 4; + } ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - while (vbuffer_mask) { - const int index = u_bit_scan(&vbuffer_mask); - const struct pipe_vertex_buffer *vb = &vbuffers[index]; - const int instance_divisor = - (instance_divisors) ? instance_divisors[index] : 0; + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned instance_divisor = ve->instance_divisors[hw_idx]; + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx]; uint32_t dw; - dw = index << GEN6_VB0_INDEX_SHIFT; + if (!(vbuffer_mask & (1 << pipe_idx))) + continue; + + dw = hw_idx << GEN6_VB0_INDEX_SHIFT; if (instance_divisor) dw |= GEN6_VB0_ACCESS_INSTANCEDATA; @@ -781,17 +789,164 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, ilo_cp_end(cp); } +static void +ve_set_cso_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) +{ + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ + + cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE; + cso->payload[1] = + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT; + + /* + * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case. + */ + format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff; + if (format == BRW_SURFACEFORMAT_R32_FLOAT) { + STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT == + BRW_SURFACEFORMAT_R32_FLOAT - 1); + + cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT); + } + else { + assert(format == BRW_SURFACEFORMAT_R8_UINT); + } +} + +static void +ve_init_cso_with_components(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_GPE_VALID_GEN(dev, 6, 7); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = GEN6_VE0_VALID; + cso->payload[1] = + comp0 << BRW_VE1_COMPONENT_0_SHIFT | + comp1 << BRW_VE1_COMPONENT_1_SHIFT | + comp2 << BRW_VE1_COMPONENT_2_SHIFT | + comp3 << BRW_VE1_COMPONENT_3_SHIFT; +} + +static void +ve_init_cso(const struct ilo_dev_info *dev, + const struct pipe_vertex_element *state, + unsigned vb_index, + struct ilo_ve_cso *cso) +{ + int comp[4] = { + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + BRW_VE1_COMPONENT_STORE_SRC, + }; + int format; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + switch (util_format_get_nr_components(state->src_format)) { + case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0; + case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0; + case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? + BRW_VE1_COMPONENT_STORE_1_INT : + BRW_VE1_COMPONENT_STORE_1_FLT; + } + + format = ilo_translate_vertex_format(state->src_format); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = + vb_index << GEN6_VE0_INDEX_SHIFT | + GEN6_VE0_VALID | + format << BRW_VE0_FORMAT_SHIFT | + state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT; + + cso->payload[1] = + comp[0] << BRW_VE1_COMPONENT_0_SHIFT | + comp[1] << BRW_VE1_COMPONENT_1_SHIFT | + comp[2] << BRW_VE1_COMPONENT_2_SHIFT | + comp[3] << BRW_VE1_COMPONENT_3_SHIFT; +} + +void +ilo_gpe_init_ve(const struct ilo_dev_info *dev, + unsigned num_states, + const struct pipe_vertex_element *states, + struct ilo_ve_state *ve) +{ + unsigned i; + + ILO_GPE_VALID_GEN(dev, 6, 7); + + ve->count = num_states; + ve->vb_count = 0; + + for (i = 0; i < num_states; i++) { + const unsigned pipe_idx = states[i].vertex_buffer_index; + const unsigned instance_divisor = states[i].instance_divisor; + unsigned hw_idx; + + /* + * map the pipe vb to the hardware vb, which has a fixed instance + * divisor + */ + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + if (ve->vb_mapping[hw_idx] == pipe_idx && + ve->instance_divisors[hw_idx] == instance_divisor) + break; + } + + /* create one if there is no matching hardware vb */ + if (hw_idx >= ve->vb_count) { + hw_idx = ve->vb_count++; + + ve->vb_mapping[hw_idx] = pipe_idx; + ve->instance_divisors[hw_idx] = instance_divisor; + } + + ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); + } +} + static void gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, - const struct pipe_vertex_element *velements, - int num_velements, + const struct ilo_ve_state *ve, bool last_velement_edgeflag, bool prepend_generated_ids, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); uint8_t cmd_len; - int format, i; + unsigned i; ILO_GPE_VALID_GEN(dev, 6, 7); @@ -800,118 +955,58 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, * * "Up to 34 (DevSNB+) vertex elements are supported." */ - assert(num_velements + prepend_generated_ids <= 34); + assert(ve->count + prepend_generated_ids <= 34); - if (!num_velements && !prepend_generated_ids) { - cmd_len = 3; - format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + if (!ve->count && !prepend_generated_ids) { + struct ilo_ve_cso dummy; + + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_0, + BRW_VE1_COMPONENT_STORE_1_FLT, + &dummy); + cmd_len = 3; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, - 0 << GEN6_VE0_INDEX_SHIFT | - GEN6_VE0_VALID | - format << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - ilo_cp_write(cp, - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); + ilo_cp_write_multi(cp, dummy.payload, 2); ilo_cp_end(cp); return; } - cmd_len = 2 * (num_velements + prepend_generated_ids) + 1; + cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); if (prepend_generated_ids) { - ilo_cp_write(cp, GEN6_VE0_VALID); - ilo_cp_write(cp, - BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT); - } - - for (i = 0; i < num_velements; i++) { - const struct pipe_vertex_element *ve = &velements[i]; - int comp[4] = { - BRW_VE1_COMPONENT_STORE_SRC, - BRW_VE1_COMPONENT_STORE_SRC, - BRW_VE1_COMPONENT_STORE_SRC, - BRW_VE1_COMPONENT_STORE_SRC, - }; - int edgeflag_enable; - - if (last_velement_edgeflag && i == num_velements - 1) { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "* This bit (Edge Flag Enable) must only be ENABLED on the - * last valid VERTEX_ELEMENT structure. - * - * * When set, Component 0 Control must be set to - * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to - * VFCOMP_NOSTORE. - * - * * The Source Element Format must be set to the UINT format. - * - * * [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST - * primitives to some set of corresponding edge-flag-supported - * primitive types (e.g., POLYGONs) prior to submission to the - * 3D pipeline." - * - * Only a limitied set of primitive types could have Edge Flag Enable - * set. The caller should not set last_velement_edgeflag for such - * primitive types. - */ - comp[1] = BRW_VE1_COMPONENT_NOSTORE; - comp[2] = BRW_VE1_COMPONENT_NOSTORE; - comp[3] = BRW_VE1_COMPONENT_NOSTORE; + struct ilo_ve_cso gen_ids; - switch (ve->src_format) { - case PIPE_FORMAT_R32_FLOAT: - format = ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT); - break; - default: - assert(ve->src_format == PIPE_FORMAT_R8_UINT); - format = ilo_translate_vertex_format(ve->src_format); - break; - } + ve_init_cso_with_components(dev, + BRW_VE1_COMPONENT_STORE_VID, + BRW_VE1_COMPONENT_STORE_IID, + BRW_VE1_COMPONENT_NOSTORE, + BRW_VE1_COMPONENT_NOSTORE, + &gen_ids); - edgeflag_enable = GEN6_VE0_EDGE_FLAG_ENABLE; - } - else { - switch (util_format_get_nr_components(ve->src_format)) { - case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0; - case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0; - case 3: comp[3] = (util_format_is_pure_integer(ve->src_format)) ? - BRW_VE1_COMPONENT_STORE_1_INT : - BRW_VE1_COMPONENT_STORE_1_FLT; - } + ilo_cp_write_multi(cp, gen_ids.payload, 2); + } - format = ilo_translate_vertex_format(ve->src_format); + if (last_velement_edgeflag) { + struct ilo_ve_cso edgeflag; - edgeflag_enable = 0; - } + for (i = 0; i < ve->count - 1; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); - ilo_cp_write(cp, - ve->vertex_buffer_index << GEN6_VE0_INDEX_SHIFT | - GEN6_VE0_VALID | - format << BRW_VE0_FORMAT_SHIFT | - edgeflag_enable | - ve->src_offset << BRW_VE0_SRC_OFFSET_SHIFT); - - ilo_cp_write(cp, - comp[0] << BRW_VE1_COMPONENT_0_SHIFT | - comp[1] << BRW_VE1_COMPONENT_1_SHIFT | - comp[2] << BRW_VE1_COMPONENT_2_SHIFT | - comp[3] << BRW_VE1_COMPONENT_3_SHIFT); + edgeflag = ve->cso[i]; + ve_set_cso_edgeflag(dev, &edgeflag); + ilo_cp_write_multi(cp, edgeflag.payload, 2); + } + else { + for (i = 0; i < ve->count; i++) + ilo_cp_write_multi(cp, ve->cso[i].payload, 2); } ilo_cp_end(cp); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index c7cd7b3a53e..5c94e7def26 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -195,14 +195,13 @@ typedef void typedef void (*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev, const struct pipe_vertex_buffer *vbuffers, - const int *instance_divisors, - uint32_t vbuffer_mask, + uint64_t vbuffer_mask, + const struct ilo_ve_state *ve, struct ilo_cp *cp); typedef void (*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev, - const struct pipe_vertex_element *velements, - int num_elements, + const struct ilo_ve_state *ve, bool last_velement_edgeflag, bool prepend_generated_ids, struct ilo_cp *cp); diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index c4a5e9444b6..9e95bb947c3 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -347,7 +347,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) return false; case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - return false; /* TODO */ + return true; case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: return false; case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index c032e177673..502297e5591 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -438,13 +438,13 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe, unsigned num_elements, const struct pipe_vertex_element *elements) { + struct ilo_context *ilo = ilo_context(pipe); struct ilo_ve_state *ve; ve = MALLOC_STRUCT(ilo_ve_state); assert(ve); - memcpy(ve->states, elements, sizeof(*elements) * num_elements); - ve->count = num_elements; + ilo_gpe_init_ve(ilo->dev, num_elements, elements, ve); return ve; }