From c6e1e0157b9bd9ec416062a21bbd30ca9b69f363 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 18 Jul 2013 05:58:45 +0800 Subject: [PATCH] ilo: support push constant model in shaders Source constants from URB constant data when the constant data can fit in the PCB. --- src/gallium/drivers/ilo/ilo_shader.c | 22 +++++-- .../drivers/ilo/shader/ilo_shader_fs.c | 56 +++++++++++++++++- .../drivers/ilo/shader/ilo_shader_internal.h | 5 ++ .../drivers/ilo/shader/ilo_shader_vs.c | 58 +++++++++++++++++-- src/gallium/drivers/ilo/shader/toy_tgsi.c | 11 +++- src/gallium/drivers/ilo/shader/toy_tgsi.h | 3 + 6 files changed, 143 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index b1a11a1391b..0c785201d1a 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -273,6 +273,13 @@ ilo_shader_variant_init(struct ilo_shader_variant *variant, break; } + /* use PCB unless constant buffer 0 is not in user buffer */ + if ((ilo->cbuf[info->type].enabled_mask & 0x1) && + !ilo->cbuf[info->type].cso[0].user_buffer) + variant->use_pcb = false; + else + variant->use_pcb = true; + num_views = ilo->view[info->type].count; assert(info->num_samplers <= num_views); @@ -341,6 +348,8 @@ ilo_shader_variant_guess(struct ilo_shader_variant *variant, break; } + variant->use_pcb = true; + variant->num_sampler_views = info->num_samplers; for (i = 0; i < info->num_samplers; i++) { if (info->shadow_samplers & (1 << i)) { @@ -747,7 +756,8 @@ ilo_shader_create_vs(const struct ilo_dev_info *dev, /* states used in ilo_shader_variant_init() */ shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS | - ILO_DIRTY_RASTERIZER; + ILO_DIRTY_RASTERIZER | + ILO_DIRTY_CBUF; return shader; } @@ -764,7 +774,8 @@ ilo_shader_create_gs(const struct ilo_dev_info *dev, /* states used in ilo_shader_variant_init() */ shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS | ILO_DIRTY_VS | - ILO_DIRTY_RASTERIZER; + ILO_DIRTY_RASTERIZER | + ILO_DIRTY_CBUF; return shader; } @@ -781,7 +792,8 @@ ilo_shader_create_fs(const struct ilo_dev_info *dev, /* states used in ilo_shader_variant_init() */ shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS | ILO_DIRTY_RASTERIZER | - ILO_DIRTY_FB; + ILO_DIRTY_FB | + ILO_DIRTY_CBUF; return shader; } @@ -1061,10 +1073,10 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, val = kernel->in.start_grf; break; case ILO_KERNEL_SKIP_CBUF0_UPLOAD: - val = false; + val = kernel->skip_cbuf0_upload; break; case ILO_KERNEL_PCB_CBUF0_SIZE: - val = 0; + val = kernel->pcb.cbuf0_size; break; case ILO_KERNEL_VS_INPUT_INSTANCEID: diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c index 36a308744c6..48d5721631c 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c @@ -306,6 +306,32 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc, } } +static bool +fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc, + struct toy_dst dst, int dim, + struct toy_src idx) +{ + const int grf = fcc->first_const_grf + idx.val32 / 2; + const int grf_subreg = (idx.val32 & 1) * 16; + struct toy_src src; + struct toy_dst real_dst[4]; + int i; + + if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || + grf >= fcc->first_attr_grf) + return false; + + src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010); + + tdst_transpose(dst, real_dst); + for (i = 0; i < 4; i++) { + /* cast to type D to make sure these are raw moves */ + tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i))); + } + + return true; +} + static void fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, struct toy_dst dst, int dim, struct toy_src idx) @@ -322,6 +348,9 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, struct toy_dst tmp, real_dst[4]; int i; + if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) + return; + /* set message header */ inst = tc_MOV(tc, header, r0); inst->mask_ctrl = BRW_MASK_DISABLE; @@ -365,6 +394,9 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc, struct toy_dst tmp, real_dst[4]; int i; + if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) + return; + /* * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was * changed from OWord Block Read to ld to increase performance in the @@ -1743,8 +1775,28 @@ fs_setup(struct fs_compile_context *fcc, fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); fs_setup_shader_out(fcc->shader, &fcc->tgsi); - /* we do not make use of push constant buffers yet */ - num_consts = 0; + if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) { + num_consts = (fcc->tgsi.const_count + 1) / 2; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 64" + * + * Since we are usually under a high register pressure, do not allow + * for more than 8. + */ + if (num_consts > 8) + num_consts = 0; + } + else { + num_consts = 0; + } + + fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts); + fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); fcc->first_const_grf = fs_setup_payloads(fcc); fcc->first_attr_grf = fcc->first_const_grf + num_consts; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 07e8ee2f683..8d4a6a18c6b 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -59,6 +59,8 @@ struct ilo_shader_variant { } fs; } u; + bool use_pcb; + int num_sampler_views; struct { unsigned r:3; @@ -102,6 +104,8 @@ struct ilo_shader { bool has_pos; } out; + bool skip_cbuf0_upload; + bool has_kill; bool dispatch_16; @@ -124,6 +128,7 @@ struct ilo_shader { /* what does the push constant buffer consist of? */ struct { + int cbuf0_size; int clip_state_size; } pcb; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c index dc166d7cc48..b5b44b57796 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c @@ -47,6 +47,7 @@ struct vs_compile_context { int num_grf_per_vrf; int first_const_grf; + int first_ucp_grf; int first_vue_grf; int first_free_grf; int last_free_grf; @@ -79,6 +80,27 @@ vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc, } } +static bool +vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc, + struct toy_dst dst, int dim, + struct toy_src idx) +{ + const int i = idx.val32; + const int grf = vcc->first_const_grf + i / 2; + const int grf_subreg = (i & 1) * 16; + struct toy_src src; + + if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || + grf >= vcc->first_ucp_grf) + return false; + + + src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041); + tc_MOV(&vcc->tc, dst, src); + + return true; +} + static void vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, struct toy_dst dst, int dim, @@ -94,6 +116,9 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, struct toy_inst *inst; struct toy_src desc; + if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) + return; + /* set message header */ inst = tc_MOV(tc, header, r0); inst->mask_ctrl = BRW_MASK_DISABLE; @@ -121,6 +146,9 @@ vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc, tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); struct toy_src desc; + if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) + return; + /* * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was * changed from OWord Dual Block Read to ld to increase performance in the @@ -835,7 +863,7 @@ vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs) } for (j = first_ucp; j <= last_ucp; j++) { - const int plane_grf = vcc->first_const_grf + j / 2; + const int plane_grf = vcc->first_ucp_grf + j / 2; const int plane_subreg = (j & 1) * 16; const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF, plane_grf, plane_subreg), TOY_RECT_041); @@ -1199,12 +1227,34 @@ vs_setup(struct vs_compile_context *vcc, vs_setup_shader_out(vcc->shader, &vcc->tgsi, (vcc->variant->u.vs.num_ucps > 0), vcc->output_map); - /* fit each pair of user clip planes into a register */ - num_consts = (vcc->variant->u.vs.num_ucps + 1) / 2; + if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) { + num_consts = (vcc->tgsi.const_count + 1) / 2; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + */ + if (num_consts > 32) + num_consts = 0; + } + else { + num_consts = 0; + } + + vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts); + vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); /* r0 is reserved for payload header */ vcc->first_const_grf = 1; - vcc->first_vue_grf = vcc->first_const_grf + num_consts; + vcc->first_ucp_grf = vcc->first_const_grf + num_consts; + + /* fit each pair of user clip planes into a register */ + vcc->first_vue_grf = vcc->first_ucp_grf + + (vcc->variant->u.vs.num_ucps + 1) / 2; + vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; vcc->last_free_grf = 127; diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index d5a3f2fe5af..bf1e37ef584 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -2244,6 +2244,12 @@ parse_instruction(struct toy_tgsi *tgsi, break; } + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; + if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect) + tgsi->const_indirect = true; + } + /* remember channels written */ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; @@ -2398,8 +2404,11 @@ parse_declaration(struct toy_tgsi *tgsi, /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */ assert(!"unexpected immediate declaration"); break; - case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: + if (tgsi->const_count <= decl->Range.Last) + tgsi->const_count = decl->Range.Last + 1; + break; + case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_PREDICATE: diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h index 95fc897b7e0..38be9f4f891 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.h +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.h @@ -91,6 +91,9 @@ struct toy_tgsi { } system_values[8]; int num_system_values; + int const_count; + bool const_indirect; + bool uses_kill; }; -- 2.30.2