From: Chia-I Wu Date: Sat, 4 Oct 2014 02:51:20 +0000 (+0800) Subject: ilo: let shaders determine sampler counts X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ca824e69403a32144328d1fb7987d0537e88ee04;p=mesa.git ilo: let shaders determine sampler counts When a shader needs N samplers, we should upload N samplers and not depend on how many are bound. Signed-off-by: Chia-I Wu --- diff --git a/src/gallium/drivers/ilo/ilo_blitter_pipe.c b/src/gallium/drivers/ilo/ilo_blitter_pipe.c index d9d50ead9c5..c4c02bd3e53 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_pipe.c +++ b/src/gallium/drivers/ilo/ilo_blitter_pipe.c @@ -69,12 +69,12 @@ ilo_blitter_pipe_begin(struct ilo_blitter *blitter, case ILO_BLITTER_PIPE_BLIT: case ILO_BLITTER_PIPE_COPY: /* - * we are about to call util_blitter_blit() or - * util_blitter_copy_texture() + * We are about to call util_blitter_blit() or + * util_blitter_copy_texture(). Note that util_blitter uses at most two + * textures. */ util_blitter_save_fragment_sampler_states(b, - vec->sampler[PIPE_SHADER_FRAGMENT].count, - (void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso); + 2, (void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso); util_blitter_save_fragment_sampler_views(b, vec->view[PIPE_SHADER_FRAGMENT].count, diff --git a/src/gallium/drivers/ilo/ilo_builder.h b/src/gallium/drivers/ilo/ilo_builder.h index 38b1c836840..102f11a1853 100644 --- a/src/gallium/drivers/ilo/ilo_builder.h +++ b/src/gallium/drivers/ilo/ilo_builder.h @@ -312,6 +312,23 @@ ilo_builder_dynamic_write(struct ilo_builder *builder, return offset; } +/** + * Reserve some space from the top (for prefetches). + */ +static inline void +ilo_builder_dynamic_pad_top(struct ilo_builder *builder, unsigned len) +{ + const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; + const unsigned size = len << 2; + struct ilo_builder_writer *writer = &builder->writers[which]; + + if (writer->stolen < size) { + ilo_builder_writer_reserve_top(builder, which, + 1, size - writer->stolen); + writer->stolen = size; + } +} + static inline unsigned ilo_builder_dynamic_used(const struct ilo_builder *builder) { diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h index 73b5fae47ae..456a494d12b 100644 --- a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h @@ -273,7 +273,6 @@ gen7_3DSTATE_SBE(struct ilo_builder *builder, static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_shader_state *fs, - int num_samplers, const struct ilo_rasterizer_state *rasterizer, bool dual_blend, bool cc_may_kill, uint32_t hiz_op) @@ -311,8 +310,6 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw5 = fs_cso->payload[2]; dw6 = fs_cso->payload[3]; - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - /* * From the Sandy Bridge PRM, volume 2 part 1, page 248: * @@ -399,7 +396,7 @@ gen7_3DSTATE_WM(struct ilo_builder *builder, static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_shader_state *fs, - int num_samplers, bool dual_blend) + bool dual_blend) { const uint8_t cmd_len = 8; const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); @@ -446,8 +443,6 @@ gen7_3DSTATE_PS(struct ilo_builder *builder, dw4 = cso->payload[1]; dw5 = cso->payload[2]; - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - if (dual_blend) dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h index 4bcab8c29ae..6d6fa0e65ac 100644 --- a/src/gallium/drivers/ilo/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h @@ -544,8 +544,7 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen6_3DSTATE_VS(struct ilo_builder *builder, - const struct ilo_shader_state *vs, - int num_samplers) + const struct ilo_shader_state *vs) { const uint8_t cmd_len = 6; const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); @@ -571,8 +570,6 @@ gen6_3DSTATE_VS(struct ilo_builder *builder, dw4 = cso->payload[1]; dw5 = cso->payload[2]; - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; dw[1] = ilo_shader_get_kernel_offset(vs); @@ -584,8 +581,7 @@ gen6_3DSTATE_VS(struct ilo_builder *builder, static inline void gen7_3DSTATE_HS(struct ilo_builder *builder, - const struct ilo_shader_state *hs, - int num_samplers) + const struct ilo_shader_state *hs) { const uint8_t cmd_len = 7; uint32_t *dw; @@ -623,8 +619,7 @@ gen7_3DSTATE_TE(struct ilo_builder *builder) static inline void gen7_3DSTATE_DS(struct ilo_builder *builder, - const struct ilo_shader_state *ds, - int num_samplers) + const struct ilo_shader_state *ds) { const uint8_t cmd_len = 6; uint32_t *dw; @@ -736,8 +731,7 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, static inline void gen7_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs, - int num_samplers) + const struct ilo_shader_state *gs) { const uint8_t cmd_len = 7; const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); @@ -763,8 +757,6 @@ gen7_3DSTATE_GS(struct ilo_builder *builder, dw4 = cso->payload[1]; dw5 = cso->payload[2]; - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; @@ -1423,6 +1415,17 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder, if (!num_samplers) return 0; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 132: + * + * "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in + * multiples of 4) the vertex shader 0 kernel uses. Used only for + * prefetching the associated sampler state entries. + * + * It also applies to other shader stages. + */ + ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4))); + state_offset = ilo_builder_dynamic_pointer(builder, ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); diff --git a/src/gallium/drivers/ilo/ilo_render_dynamic.c b/src/gallium/drivers/ilo/ilo_render_dynamic.c index dfd29c3c3ac..5c3687318b8 100644 --- a/src/gallium/drivers/ilo/ilo_render_dynamic.c +++ b/src/gallium/drivers/ilo/ilo_render_dynamic.c @@ -134,9 +134,8 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r, vec->sampler[shader_type].cso; const struct pipe_sampler_view * const *views = (const struct pipe_sampler_view **) vec->view[shader_type].states; - const int num_samplers = vec->sampler[shader_type].count; - const int num_views = vec->view[shader_type].count; uint32_t *sampler_state, *border_color_state; + int sampler_count; bool emit_border_color = false; bool skip = false; @@ -145,26 +144,32 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r, /* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */ switch (shader_type) { case PIPE_SHADER_VERTEX: - if (DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) { + if (DIRTY(VS) || DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) { sampler_state = &r->state.vs.SAMPLER_STATE; border_color_state = r->state.vs.SAMPLER_BORDER_COLOR_STATE; - if (DIRTY(SAMPLER_VS)) + if (DIRTY(VS) || DIRTY(SAMPLER_VS)) emit_border_color = true; + sampler_count = (vec->vs) ? ilo_shader_get_kernel_param(vec->vs, + ILO_KERNEL_SAMPLER_COUNT) : 0; + session->sampler_vs_changed = true; } else { skip = true; } break; case PIPE_SHADER_FRAGMENT: - if (DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) { + if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) { sampler_state = &r->state.wm.SAMPLER_STATE; border_color_state = r->state.wm.SAMPLER_BORDER_COLOR_STATE; - if (DIRTY(SAMPLER_FS)) + if (DIRTY(VS) || DIRTY(SAMPLER_FS)) emit_border_color = true; + sampler_count = (vec->fs) ? ilo_shader_get_kernel_param(vec->fs, + ILO_KERNEL_SAMPLER_COUNT) : 0; + session->sampler_fs_changed = true; } else { skip = true; @@ -178,20 +183,20 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r, if (skip) return; + assert(sampler_count <= Elements(vec->view[shader_type].states) && + sampler_count <= Elements(vec->sampler[shader_type].cso)); + if (emit_border_color) { int i; - for (i = 0; i < num_samplers; i++) { + for (i = 0; i < sampler_count; i++) { border_color_state[i] = (samplers[i]) ? gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0; } } - /* should we take the minimum of num_samplers and num_views? */ *sampler_state = gen6_SAMPLER_STATE(r->builder, - samplers, views, - border_color_state, - MIN2(num_samplers, num_views)); + samplers, views, border_color_state, sampler_count); } static void @@ -322,14 +327,13 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render, for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) { const int alignment = 32 / 4; - int num_samplers, pcb_len; - - num_samplers = vec->sampler[sh_type].count; - pcb_len = 0; + int num_samplers = 0, pcb_len = 0; switch (sh_type) { case PIPE_SHADER_VERTEX: if (vec->vs) { + num_samplers = ilo_shader_get_kernel_param(vec->vs, + ILO_KERNEL_SAMPLER_COUNT); pcb_len = ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE); pcb_len += ilo_shader_get_kernel_param(vec->vs, @@ -340,6 +344,8 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render, break; case PIPE_SHADER_FRAGMENT: if (vec->fs) { + num_samplers = ilo_shader_get_kernel_param(vec->fs, + ILO_KERNEL_SAMPLER_COUNT); pcb_len = ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE); } @@ -350,6 +356,9 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render, /* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */ if (num_samplers) { + /* prefetches are done in multiples of 4 */ + num_samplers = align(num_samplers, 4); + len += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) + align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers; } diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 389b596e6cb..4221f694028 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -496,8 +496,7 @@ gen6_draw_vs(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || - r->instruction_bo_changed); + const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed); const bool emit_3dstate_constant_vs = session->pcb_vs_changed; /* @@ -516,11 +515,8 @@ gen6_draw_vs(struct ilo_render *r, } /* 3DSTATE_VS */ - if (emit_3dstate_vs) { - const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count; - - gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers); - } + if (emit_3dstate_vs) + gen6_3DSTATE_VS(r->builder, vec->vs); if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6)) gen6_wa_post_3dstate_constant_vs(r); @@ -692,9 +688,8 @@ gen6_draw_wm(struct ilo_render *r, } /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) || + if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER) || r->instruction_bo_changed) { - const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count; const bool dual_blend = vec->blend->dual_blend; const bool cc_may_kill = (vec->dsa->dw_alpha || vec->blend->alpha_to_coverage); @@ -702,7 +697,7 @@ gen6_draw_wm(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, vec->fs, num_samplers, + gen6_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, dual_blend, cc_may_kill, 0); } } @@ -849,7 +844,7 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_VS(r->builder, NULL, 0); + gen6_3DSTATE_VS(r->builder, NULL); gen6_wa_post_3dstate_constant_vs(r); @@ -884,7 +879,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, NULL, 0, NULL, false, false, hiz_op); + gen6_3DSTATE_WM(r->builder, NULL, NULL, false, false, hiz_op); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 9aefc6fa446..e0e6d06ba99 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -338,8 +338,7 @@ gen7_draw_vs(struct ilo_render *r, session->sampler_vs_changed; /* see gen6_draw_vs() */ const bool emit_3dstate_constant_vs = session->pcb_vs_changed; - const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || - r->instruction_bo_changed); + const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed); /* emit depth stall before any of the VS commands */ if (emit_3dstate_binding_table || emit_3dstate_sampler_state || @@ -367,11 +366,8 @@ gen7_draw_vs(struct ilo_render *r, } /* 3DSTATE_VS */ - if (emit_3dstate_vs) { - const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count; - - gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers); - } + if (emit_3dstate_vs) + gen6_3DSTATE_VS(r->builder, vec->vs); } static void @@ -382,7 +378,7 @@ gen7_draw_hs(struct ilo_render *r, /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */ if (r->hw_ctx_changed) { gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); - gen7_3DSTATE_HS(r->builder, NULL, 0); + gen7_3DSTATE_HS(r->builder, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ @@ -408,7 +404,7 @@ gen7_draw_ds(struct ilo_render *r, /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */ if (r->hw_ctx_changed) { gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); - gen7_3DSTATE_DS(r->builder, NULL, 0); + gen7_3DSTATE_DS(r->builder, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ @@ -425,7 +421,7 @@ gen7_draw_gs(struct ilo_render *r, /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ if (r->hw_ctx_changed) { gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); - gen7_3DSTATE_GS(r->builder, NULL, 0); + gen7_3DSTATE_GS(r->builder, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -541,9 +537,7 @@ gen7_draw_wm(struct ilo_render *r, } /* 3DSTATE_PS */ - if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || - r->instruction_bo_changed) { - const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count; + if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) { const bool dual_blend = vec->blend->dual_blend; if ((ilo_dev_gen(r->dev) == ILO_GEN(7) || @@ -551,7 +545,7 @@ gen7_draw_wm(struct ilo_render *r, r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, vec->fs, num_samplers, dual_blend); + gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -562,8 +556,7 @@ gen7_draw_wm(struct ilo_render *r, /* XXX what is the best way to know if this workaround is needed? */ { - const bool emit_3dstate_ps = - (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND)); + const bool emit_3dstate_ps = (DIRTY(FS) || DIRTY(BLEND)); const bool emit_3dstate_depth_buffer = (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed); @@ -729,18 +722,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_VS(r->builder, NULL, 0); + gen6_3DSTATE_VS(r->builder, NULL); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_HS(r->builder, NULL, 0); + gen7_3DSTATE_HS(r->builder, NULL); gen7_3DSTATE_TE(r->builder); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_DS(r->builder, NULL, 0); + gen7_3DSTATE_DS(r->builder, NULL); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_GS(r->builder, NULL, 0); + gen7_3DSTATE_GS(r->builder, NULL); gen7_3DSTATE_STREAMOUT(r->builder, 0x0, 0, false); @@ -778,7 +771,7 @@ gen7_rectlist_wm(struct ilo_render *r, gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, NULL, 0, false); + gen7_3DSTATE_PS(r->builder, NULL, false); } static void diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index b43fce7ed79..fdbd2b88ea9 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -1000,6 +1000,9 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, case ILO_KERNEL_OUTPUT_COUNT: val = kernel->out.count; break; + case ILO_KERNEL_SAMPLER_COUNT: + val = shader->info.num_samplers; + break; case ILO_KERNEL_URB_DATA_START_REG: val = kernel->in.start_grf; break; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 80e9c19a990..77deee95ef0 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -33,6 +33,7 @@ enum ilo_kernel_param { ILO_KERNEL_INPUT_COUNT, ILO_KERNEL_OUTPUT_COUNT, + ILO_KERNEL_SAMPLER_COUNT, ILO_KERNEL_URB_DATA_START_REG, ILO_KERNEL_SKIP_CBUF0_UPLOAD, ILO_KERNEL_PCB_CBUF0_SIZE, diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 6177ac07a3d..18c1566d93a 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -338,18 +338,6 @@ ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader, dst->cso[start + i] = NULL; } - if (dst->count <= start + count) { - if (samplers) - count += start; - else - count = start; - - while (count > 0 && !dst->cso[count - 1]) - count--; - - dst->count = count; - } - if (changed) { switch (shader) { case PIPE_SHADER_VERTEX: diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 3f3c495b061..c371716118c 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -292,7 +292,6 @@ struct ilo_sampler_cso { struct ilo_sampler_state { const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; - unsigned count; }; struct ilo_view_surface { diff --git a/src/gallium/drivers/ilo/ilo_state_gen6.c b/src/gallium/drivers/ilo/ilo_state_gen6.c index 0192c609672..6f0c92d18f3 100644 --- a/src/gallium/drivers/ilo/ilo_state_gen6.c +++ b/src/gallium/drivers/ilo/ilo_state_gen6.c @@ -411,13 +411,14 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *vs, struct ilo_shader_cso *cso) { - int start_grf, vue_read_len, max_threads; + int start_grf, vue_read_len, sampler_count, max_threads; uint32_t dw2, dw4, dw5; ILO_DEV_ASSERT(dev, 6, 7.5); start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); /* * From the Sandy Bridge PRM, volume 2 part 1, page 135: @@ -464,6 +465,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, } dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | @@ -933,13 +935,14 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, const struct ilo_shader_state *fs, struct ilo_shader_cso *cso) { - int start_grf, input_count, interps, max_threads; + int start_grf, input_count, sampler_count, interps, max_threads; uint32_t dw2, dw4, dw5, dw6; ILO_DEV_ASSERT(dev, 6, 6); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); interps = ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); @@ -947,6 +950,7 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, max_threads = (dev->gt == 2) ? 80 : 40; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | diff --git a/src/gallium/drivers/ilo/ilo_state_gen7.c b/src/gallium/drivers/ilo/ilo_state_gen7.c index 53c3aebc634..f91a088b059 100644 --- a/src/gallium/drivers/ilo/ilo_state_gen7.c +++ b/src/gallium/drivers/ilo/ilo_state_gen7.c @@ -39,13 +39,14 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, struct ilo_shader_cso *cso) { - int start_grf, vue_read_len, max_threads; + int start_grf, vue_read_len, sampler_count, max_threads; uint32_t dw2, dw4, dw5; ILO_DEV_ASSERT(dev, 7, 7.5); start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); + sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); /* in pairs */ vue_read_len = (vue_read_len + 1) / 2; @@ -63,6 +64,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, } dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | @@ -131,15 +133,17 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, const struct ilo_shader_state *fs, struct ilo_shader_cso *cso) { - int start_grf, max_threads; + int start_grf, sampler_count, max_threads; uint32_t dw2, dw4, dw5; uint32_t wm_interps, wm_dw1; ILO_DEV_ASSERT(dev, 7, 7.5); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); + sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; + dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = GEN7_PS_DW4_POSOFFSET_NONE;