From 851202c319701c541d52f87ffa22505504c50d57 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 20 Jun 2013 17:42:21 +0800 Subject: [PATCH] ilo: use ilo_shader_cso for GS Add ilo_gpe_init_gs_cso() to construct 3DSTATE_GS once and early for geometry shaders. --- .../drivers/ilo/ilo_3d_pipeline_gen6.c | 11 +- src/gallium/drivers/ilo/ilo_gpe.h | 23 ++ src/gallium/drivers/ilo/ilo_gpe_gen6.c | 260 ++++++++++-------- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 6 +- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 65 +++-- src/gallium/drivers/ilo/ilo_gpe_gen7.h | 2 +- src/gallium/drivers/ilo/ilo_shader.c | 3 + 7 files changed, 225 insertions(+), 145 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index e51d7942ab3..72e87d4efa7 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -501,16 +501,9 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p, /* 3DSTATE_GS */ if (DIRTY(GS) || DIRTY(VS) || session->prim_changed || session->kernel_bo_changed) { - const struct ilo_shader *gs = (ilo->gs)? ilo->gs->shader : NULL; - const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL; - const int num_vertices = u_vertices_per_prim(session->reduced_prim); - - if (gs) - assert(!gs->pcb.clip_state_size); + const int verts_per_prim = u_vertices_per_prim(session->reduced_prim); - p->gen6_3DSTATE_GS(p->dev, gs, vs, - (vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0, - p->cp); + p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp); } } diff --git a/src/gallium/drivers/ilo/ilo_gpe.h b/src/gallium/drivers/ilo/ilo_gpe.h index b5d0163f909..e9141f7c3af 100644 --- a/src/gallium/drivers/ilo/ilo_gpe.h +++ b/src/gallium/drivers/ilo/ilo_gpe.h @@ -438,4 +438,27 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *vs, struct ilo_shader_cso *cso); +void +ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso); + +void +ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso); + +static inline void +ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) +{ + if (dev->gen >= ILO_GEN(7)) { + ilo_gpe_init_gs_cso_gen7(dev, gs, cso); + } + else { + ilo_gpe_init_gs_cso_gen6(dev, gs, cso); + } +} + #endif /* ILO_GPE_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index e57609bcfa4..f4918dccf4b 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -1246,135 +1246,167 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, ilo_cp_end(cp); } +void +ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) +{ + int start_grf, vue_read_len, max_threads; + uint32_t dw2, dw4, dw5, dw6; + + ILO_GPE_VALID_GEN(dev, 6, 6); + + if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { + start_grf = ilo_shader_get_kernel_param(gs, + ILO_KERNEL_URB_DATA_START_REG); + + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); + } + else { + start_grf = ilo_shader_get_kernel_param(gs, + ILO_KERNEL_VS_GEN6_SO_START_REG); + + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 153: + * + * "Specifies the amount of URB data read and passed in the thread + * payload for each Vertex URB entry, in 256-bit register increments. + * + * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to + * 0 indicating no Vertex URB data to be read and passed to the + * thread." + */ + vue_read_len = (vue_read_len + 1) / 2; + if (!vue_read_len) + vue_read_len = 1; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 154: + * + * "Maximum Number of Threads valid range is [0,27] when Rendering + * Enabled bit is set." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 173: + * + * "Programming Note: If the GS stage is enabled, software must always + * allocate at least one GS URB Entry. This is true even if the GS + * thread never needs to output vertices to the pipeline, e.g., when + * only performing stream output. This is an artifact of the need to + * pass the GS thread an initial destination URB handle." + * + * As such, we always enable rendering, and limit the number of threads. + */ + if (dev->gt == 2) { + /* maximum is 60, but limited to 28 */ + max_threads = 28; + } + else { + /* maximum is 24, but limited to 21 (see brwCreateContext()) */ + max_threads = 21; + } + + if (max_threads > 28) + max_threads = 28; + + dw2 = GEN6_GS_SPF_MODE; + + dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | + 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | + start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; + + dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_SO_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE; + + /* + * we cannot make use of GEN6_GS_REORDER because it will reorder + * triangle strips according to D3D rules (triangle 2N+1 uses vertices + * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices + * (2N+2, 2N+1, 2N+3)). + */ + dw6 = GEN6_GS_ENABLE; + + if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) + dw6 |= GEN6_GS_DISCARD_ADJACENCY; + + if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { + const uint32_t svbi_post_inc = + ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); + + dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE; + if (svbi_post_inc) { + dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE | + svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT; + } + } + + STATIC_ASSERT(Elements(cso->payload) >= 4); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; + cso->payload[3] = dw6; +} + static void gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader *gs, - const struct ilo_shader *vs, - uint32_t vs_offset, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); const uint8_t cmd_len = 7; uint32_t dw1, dw2, dw4, dw5, dw6; - int i; ILO_GPE_VALID_GEN(dev, 6, 6); - if (!gs && (!vs || !vs->stream_output)) { - dw1 = 0; - dw2 = 0; - dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; - dw5 = GEN6_GS_STATISTICS_ENABLE; - dw6 = 0; - } - else { - int max_threads, vue_read_len; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 154: - * - * "Maximum Number of Threads valid range is [0,27] when Rendering - * Enabled bit is set." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 173: - * - * "Programming Note: If the GS stage is enabled, software must - * always allocate at least one GS URB Entry. This is true even if - * the GS thread never needs to output vertices to the pipeline, - * e.g., when only performing stream output. This is an artifact of - * the need to pass the GS thread an initial destination URB - * handle." - * - * As such, we always enable rendering, and limit the number of threads. - */ - if (dev->gt == 2) { - /* maximum is 60, but limited to 28 */ - max_threads = 28; - } - else { - /* maximum is 24, but limited to 21 (see brwCreateContext()) */ - max_threads = 21; - } + if (gs) { + const struct ilo_shader_cso *cso; - if (max_threads > 28) - max_threads = 28; + dw1 = ilo_shader_get_kernel_offset(gs); - dw2 = GEN6_GS_SPF_MODE; + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + dw6 = cso->payload[3]; + } + else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { + struct ilo_shader_cso cso; + enum ilo_kernel_param param; - dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_SO_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE; + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } - /* - * we cannot make use of GEN6_GS_REORDER because it will reorder - * triangle strips according to D3D rules (triangle 2N+1 uses vertices - * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices - * (2N+2, 2N+1, 2N+3)). - */ - dw6 = GEN6_GS_ENABLE; - - if (gs) { - /* VS ouputs must match GS inputs */ - assert(gs->in.count == vs->out.count); - for (i = 0; i < gs->in.count; i++) { - assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]); - assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]); - } + dw1 = ilo_shader_get_kernel_offset(vs) + + ilo_shader_get_kernel_param(vs, param); - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 153: - * - * "It is UNDEFINED to set this field (Vertex URB Entry Read - * Length) to 0 indicating no Vertex URB data to be read and - * passed to the thread." - */ - vue_read_len = (gs->in.count + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - dw1 = gs->cache_offset; - dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | - 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | - gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; - - if (gs->in.discard_adj) - dw6 |= GEN6_GS_DISCARD_ADJACENCY; - - if (gs->stream_output) { - dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE; - if (gs->svbi_post_inc) { - dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE | - gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT; - } - } - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 153: - * - * "It is UNDEFINED to set this field (Vertex URB Entry Read - * Length) to 0 indicating no Vertex URB data to be read and - * passed to the thread." - */ - vue_read_len = (vs->out.count + 1) / 2; - if (!vue_read_len) - vue_read_len = 1; - - dw1 = vs_offset; - dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | - 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | - vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; - - if (vs->in.discard_adj) - dw6 |= GEN6_GS_DISCARD_ADJACENCY; - - dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE; - if (vs->svbi_post_inc) { - dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE | - vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT; - } - } + /* cannot use VS's CSO */ + ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); + dw2 = cso.payload[0]; + dw4 = cso.payload[1]; + dw5 = cso.payload[2]; + dw6 = cso.payload[3]; + } + else { + dw1 = 0; + dw2 = 0; + dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; + dw5 = GEN6_GS_STATISTICS_ENABLE; + dw6 = 0; } ilo_cp_begin(cp, cmd_len); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index 07e6050500b..bf4ed750341 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -241,9 +241,9 @@ typedef void typedef void (*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev, - const struct ilo_shader *gs, - const struct ilo_shader *vs, - uint32_t vs_offset, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim, struct ilo_cp *cp); typedef void diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 74ba793dea1..f374473b291 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -84,19 +84,22 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); } -static void -gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, - const struct ilo_shader *gs, - int num_samplers, - struct ilo_cp *cp) +void +ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + struct ilo_shader_cso *cso) { - const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); - const uint8_t cmd_len = 7; + int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5; - int max_threads; ILO_GPE_VALID_GEN(dev, 7, 7); + start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); + vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); + + /* in pairs */ + vue_read_len = (vue_read_len + 1) / 2; + switch (dev->gen) { case ILO_GEN(7): max_threads = (dev->gt == 2) ? 128 : 36; @@ -106,6 +109,36 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, break; } + dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT; + + dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | + GEN7_GS_INCLUDE_VERTEX_HANDLES | + 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | + start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; + + dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_ENABLE; + + STATIC_ASSERT(Elements(cso->payload) >= 3); + cso->payload[0] = dw2; + cso->payload[1] = dw4; + cso->payload[2] = dw5; +} + +static void +gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, + const struct ilo_shader_state *gs, + int num_samplers, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); + const uint8_t cmd_len = 7; + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5; + + ILO_GPE_VALID_GEN(dev, 7, 7); + if (!gs) { ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -119,20 +152,16 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, return; } - dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; - dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT | - GEN7_GS_INCLUDE_VERTEX_HANDLES | - 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | - gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; - - dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_ENABLE; + dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); - ilo_cp_write(cp, gs->cache_offset); + ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); ilo_cp_write(cp, dw2); ilo_cp_write(cp, 0); /* scratch */ ilo_cp_write(cp, dw4); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 1f123eaa285..f8e8745fcb4 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -158,7 +158,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS; typedef void (*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev, - const struct ilo_shader *gs, + const struct ilo_shader_state *gs, int num_samplers, struct ilo_cp *cp); diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 983cfffc851..086134a2bc8 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -683,6 +683,9 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state, case PIPE_SHADER_VERTEX: ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso); break; + case PIPE_SHADER_GEOMETRY: + ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso); + break; default: break; } -- 2.30.2