From: Chia-I Wu Date: Mon, 24 Jun 2013 06:13:33 +0000 (+0800) Subject: ilo: move SBE setup code to ilo_shader.c X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9b18df6e087a4db8ed5ed38bcc6b1db34fa95586;p=mesa.git ilo: move SBE setup code to ilo_shader.c Add ilo_shader_select_kernel_routing() to construct 3DSTATE_SBE. It is called in ilo_finalize_states(), rather than in create_fs_state(), as it depends on VS/GS and rasterizer states. With this change, ilo_shader_internal.h is no longer needed for ilo_gpe_gen6.c. --- diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index c0ed42dfe8c..97b566a89a4 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -30,7 +30,6 @@ #include "brw_defines.h" #include "intel_reg.h" -#include "shader/ilo_shader_internal.h" #include "ilo_context.h" #include "ilo_cp.h" #include "ilo_format.h" @@ -1814,178 +1813,52 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, void ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs_state, - const struct ilo_shader_state *last_sh_state, + const struct ilo_shader_state *fs, + const struct ilo_shader_state *last_sh, uint32_t *dw, int num_dwords) { - const struct ilo_shader *fs = fs_state->shader; - const struct ilo_shader *last_sh = last_sh_state->shader; - uint32_t point_sprite_enable, const_interp_enable; - uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS]; - int vue_offset, vue_len; - int dst, max_src, i; + int output_count, vue_offset, vue_len; + const struct ilo_kernel_routing *routing; ILO_GPE_VALID_GEN(dev, 6, 7); assert(num_dwords == 13); if (!fs) { + memset(dw, 0, sizeof(dw[0]) * num_dwords); + if (dev->gen >= ILO_GEN(7)) dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT; else dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT; - for (i = 1; i < num_dwords; i++) - dw[i] = 0; - return; } - if (last_sh) { - /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */ - assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE); - assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION); - vue_offset = 2; - vue_len = last_sh->out.count - vue_offset; - } - else { - vue_offset = 0; - vue_len = fs->in.count; - } - - point_sprite_enable = 0; - const_interp_enable = 0; - max_src = (last_sh) ? 0 : fs->in.count - 1; - - for (dst = 0; dst < fs->in.count; dst++) { - const int semantic = fs->in.semantic_names[dst]; - const int index = fs->in.semantic_indices[dst]; - const int interp = fs->in.interp[dst]; - int src; - uint16_t ctrl; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - if (semantic == TGSI_SEMANTIC_GENERIC && - (rasterizer->state.sprite_coord_enable & (1 << index))) - point_sprite_enable |= 1 << dst; - - if (interp == TGSI_INTERPOLATE_CONSTANT || - (interp == TGSI_INTERPOLATE_COLOR && rasterizer->state.flatshade)) - const_interp_enable |= 1 << dst; - - if (!last_sh) { - attr_ctrl[dst] = 0; - continue; - } - - /* find the matching VS/GS OUT for FS IN[i] */ - ctrl = 0; - for (src = 0; src < vue_len; src++) { - if (last_sh->out.semantic_names[vue_offset + src] != semantic || - last_sh->out.semantic_indices[vue_offset + src] != index) - continue; - - ctrl = src; - - if (semantic == TGSI_SEMANTIC_COLOR && - rasterizer->state.light_twoside && - src < vue_len - 1) { - const int next = src + 1; - - if (last_sh->out.semantic_names[vue_offset + next] == - TGSI_SEMANTIC_BCOLOR && - last_sh->out.semantic_indices[vue_offset + next] == index) { - ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << - ATTRIBUTE_SWIZZLE_SHIFT; - src++; - } - } - - break; - } - - /* if there is no COLOR, try BCOLOR */ - if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) { - for (src = 0; src < vue_len; src++) { - if (last_sh->out.semantic_names[vue_offset + src] != - TGSI_SEMANTIC_BCOLOR || - last_sh->out.semantic_indices[vue_offset + src] != index) - continue; - - ctrl = src; - break; - } - } - - if (src < vue_len) { - attr_ctrl[dst] = ctrl; - if (max_src < src) - max_src = src; - } - else { - /* - * The previous shader stage does not output this attribute. The - * value is supposed to be undefined for fs, unless the attribute - * goes through point sprite replacement or the attribute is - * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source - * attribute is picked. - * - * We should update the fs code and omit the output of - * TGSI_SEMANTIC_POSITION here. - */ - attr_ctrl[dst] = 0; - } - } - - for (; dst < Elements(attr_ctrl); dst++) - attr_ctrl[dst] = 0; + output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + assert(output_count <= 32); - /* only the first 16 attributes can be remapped */ - for (dst = 16; dst < Elements(attr_ctrl); dst++) - assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst); + routing = ilo_shader_get_kernel_routing(fs); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to - * 0 indicating no Vertex URB data to be read. - * - * This field should be set to the minimum length required to read the - * maximum source attribute. The maximum source attribute is indicated - * by the maximum value of the enabled Attribute # Source Attribute if - * Attribute Swizzle Enable is set, Number of Output Attributes-1 if - * enable is not set. - * - * read_length = ceiling((max_source_attr+1)/2) - * - * [errata] Corruption/Hang possible if length programmed larger than - * recommended" - */ - vue_len = max_src + 1; - - assert(fs->in.count <= 32); + vue_offset = routing->source_skip; assert(vue_offset % 2 == 0); + vue_offset /= 2; - if (dev->gen >= ILO_GEN(7)) { - dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT | - (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; + vue_len = (routing->source_len + 1) / 2; + if (!vue_len) + vue_len = 1; - if (last_sh) + if (dev->gen >= ILO_GEN(7)) { + dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT | + vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) dw[0] |= GEN7_SBE_SWIZZLE_ENABLE; } else { - dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT | - (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - - if (last_sh) + dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT | + vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + if (routing->swizzle_enable) dw[0] |= GEN6_SF_SWIZZLE_ENABLE; } @@ -1998,11 +1871,20 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, break; } - for (i = 0; i < 8; i++) - dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i]; + STATIC_ASSERT(Elements(routing->swizzles) >= 16); + memcpy(&dw[1], routing->swizzles, 2 * 16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + * + * TODO We do not check that yet. + */ + dw[9] = routing->point_sprite_enable; - dw[9] = point_sprite_enable; - dw[10] = const_interp_enable; + dw[10] = routing->const_interp_enable; /* WrapShortest enables */ dw[11] = 0; diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 2cff95bd994..5f95a19244f 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -27,6 +27,7 @@ #include "tgsi/tgsi_parse.h" #include "intel_winsys.h" +#include "brw_defines.h" /* for SBE setup */ #include "shader/ilo_shader_internal.h" #include "ilo_state.h" @@ -848,6 +849,157 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader, return (shader->shader != cur); } +static int +route_attr(const int *semantics, const int *indices, int len, + int semantic, int index) +{ + int i; + + for (i = 0; i < len; i++) { + if (semantics[i] == semantic && indices[i] == index) + return i; + } + + /* failed to match for COLOR, try BCOLOR */ + if (semantic == TGSI_SEMANTIC_COLOR) { + for (i = 0; i < len; i++) { + if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index) + return i; + } + } + + return -1; +} + +/** + * Select a routing for the given source shader and rasterizer state. + * + * \return true if a different routing is selected + */ +bool +ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, + const struct ilo_shader_state *source, + const struct ilo_rasterizer_state *rasterizer) +{ + const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable; + const bool light_twoside = rasterizer->state.light_twoside; + struct ilo_shader *kernel = shader->shader; + struct ilo_kernel_routing *routing = &kernel->routing; + const int *src_semantics, *src_indices; + int src_len, max_src_slot; + int dst_len, dst_slot; + + /* we are constructing 3DSTATE_SBE here */ + assert(shader->info.dev->gen >= ILO_GEN(6) && + shader->info.dev->gen <= ILO_GEN(7)); + + assert(kernel); + + if (source) { + assert(source->shader); + src_semantics = source->shader->out.semantic_names; + src_indices = source->shader->out.semantic_indices; + src_len = source->shader->out.count; + + /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */ + assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE); + assert(src_semantics[1] == TGSI_SEMANTIC_POSITION); + routing->source_skip = 2; + routing->source_len = src_len - routing->source_skip; + src_semantics += routing->source_skip; + src_indices += routing->source_skip; + } + else { + src_semantics = kernel->in.semantic_names; + src_indices = kernel->in.semantic_indices; + src_len = kernel->in.count; + + routing->source_skip = 0; + routing->source_len = src_len; + } + + routing->const_interp_enable = kernel->in.const_interp_enable; + routing->point_sprite_enable = 0; + routing->swizzle_enable = false; + + assert(kernel->in.count <= Elements(routing->swizzles)); + dst_len = MIN2(kernel->in.count, Elements(routing->swizzles)); + max_src_slot = -1; + + for (dst_slot = 0; dst_slot < dst_len; dst_slot++) { + const int semantic = kernel->in.semantic_names[dst_slot]; + const int index = kernel->in.semantic_indices[dst_slot]; + int src_slot; + + if (semantic == TGSI_SEMANTIC_GENERIC && + (sprite_coord_enable & (1 << index))) + routing->point_sprite_enable |= 1 << dst_slot; + + if (source) { + src_slot = route_attr(src_semantics, src_indices, + routing->source_len, semantic, index); + + /* + * The source shader stage does not output this attribute. The value + * is supposed to be undefined, unless the attribute goes through + * point sprite replacement or the attribute is + * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source + * attribute is picked. + * + * We should update the kernel code and omit the output of + * TGSI_SEMANTIC_POSITION here. + */ + if (src_slot < 0) + src_slot = 0; + } + else { + src_slot = dst_slot; + } + + routing->swizzles[dst_slot] = src_slot; + + /* use the following slot for two-sided lighting */ + if (semantic == TGSI_SEMANTIC_COLOR && light_twoside && + src_slot + 1 < routing->source_len && + src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR && + src_indices[src_slot + 1] == index) { + routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << + ATTRIBUTE_SWIZZLE_SHIFT; + src_slot++; + } + + if (routing->swizzles[dst_slot] != dst_slot) + routing->swizzle_enable = true; + + if (max_src_slot < src_slot) + max_src_slot = src_slot; + } + + memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) - + sizeof(routing->swizzles[0]) * dst_slot); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to + * 0 indicating no Vertex URB data to be read. + * + * This field should be set to the minimum length required to read the + * maximum source attribute. The maximum source attribute is indicated + * by the maximum value of the enabled Attribute # Source Attribute if + * Attribute Swizzle Enable is set, Number of Output Attributes-1 if + * enable is not set. + * + * read_length = ceiling((max_source_attr+1)/2) + * + * [errata] Corruption/Hang possible if length programmed larger than + * recommended" + */ + routing->source_len = max_src_slot + 1; + + return true; +} + /** * Return the cache offset of the selected kernel. This must be called after * ilo_shader_select_kernel() and ilo_shader_cache_upload(). @@ -978,3 +1130,16 @@ ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader) return &kernel->so_info; } + +/** + * Return the routing info of the selected kernel. + */ +const struct ilo_kernel_routing * +ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader) +{ + const struct ilo_shader *kernel = shader->shader; + + assert(kernel); + + return &kernel->routing; +} diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index d326b9c01f4..6a2b8a397b3 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -58,8 +58,18 @@ enum ilo_kernel_param { ILO_KERNEL_PARAM_COUNT, }; +struct ilo_kernel_routing { + uint32_t const_interp_enable; + uint32_t point_sprite_enable; + unsigned source_skip, source_len; + + bool swizzle_enable; + uint16_t swizzles[16]; +}; + struct intel_bo; struct ilo_context; +struct ilo_rasterizer_state; struct ilo_shader_cache; struct ilo_shader_state; struct ilo_shader_cso; @@ -114,6 +124,11 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader, const struct ilo_context *ilo, uint32_t dirty); +bool +ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, + const struct ilo_shader_state *source, + const struct ilo_rasterizer_state *rasterizer); + uint32_t ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader); @@ -127,4 +142,7 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader); const struct pipe_stream_output_info * ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader); +const struct ilo_kernel_routing * +ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader); + #endif /* ILO_SHADER_H */ diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 58894f288ff..7046a69890c 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -72,6 +72,14 @@ finalize_shader_states(struct ilo_context *ilo) /* mark the state dirty if a new kernel is selected */ ilo->dirty |= state; } + + /* need to setup SBE for FS */ + if (type == PIPE_SHADER_FRAGMENT && ilo->dirty & + (state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) { + if (ilo_shader_select_kernel_routing(shader, + (ilo->gs) ? ilo->gs : ilo->vs, ilo->rasterizer)) + ilo->dirty |= state; + } } } diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c index eca118aa2a1..bea2c097e14 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c @@ -1574,6 +1574,9 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi, } switch (tgsi->inputs[i].interp) { + case TGSI_INTERPOLATE_CONSTANT: + sh->in.const_interp_enable |= 1 << i; + break; case TGSI_INTERPOLATE_LINEAR: sh->in.has_linear_interp = true; @@ -1587,8 +1590,10 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi, } break; case TGSI_INTERPOLATE_COLOR: - if (flatshade) + if (flatshade) { + sh->in.const_interp_enable |= 1 << i; break; + } /* fall through */ case TGSI_INTERPOLATE_PERSPECTIVE: if (tgsi->inputs[i].centroid) { diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 3515e3f1d9f..d9ae2fa0a8f 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -30,6 +30,7 @@ #include "ilo_common.h" #include "ilo_context.h" +#include "ilo_shader.h" /* XXX The interface needs to be reworked */ @@ -88,6 +89,7 @@ struct ilo_shader { bool has_pos; bool has_linear_interp; int barycentric_interpolation_mode; + uint32_t const_interp_enable; bool discard_adj; } in; @@ -114,6 +116,8 @@ struct ilo_shader { void *kernel; int kernel_size; + struct ilo_kernel_routing routing; + /* what does the push constant buffer consist of? */ struct { int clip_state_size;