From f6c4aace22aceb9089eb0e3fa297eab09dac4ce1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 13 Jan 2017 15:36:51 -0800 Subject: [PATCH] intel/fs: Extend thread payload layout to SIMD32 And handle 32-wide payload register reads in fetch_payload_reg(). v2 (Jason Ekstrand); - Fix some whitespace and brace placement Reviewed-by: Jason Ekstrand Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs.cpp | 14 +++++------ src/intel/compiler/brw_fs.h | 42 +++++++++++++++++++++++--------- src/intel/compiler/brw_wm_iz.cpp | 11 ++++++--- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 18bcdc4f8b3..173fc8593d3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4050,12 +4050,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, assert(length == 0 || length == 2); header_size = length; - if (payload.aa_dest_stencil_reg) { + if (payload.aa_dest_stencil_reg[0]) { assert(inst->group < 16); sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1)); bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") .MOV(sources[length], - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg[0], 0))); length++; } @@ -6054,7 +6054,7 @@ fs_visitor::setup_fs_payload_gen6() */ for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { if (prog_data->barycentric_interp_modes & (1 << i)) { - payload.barycentric_coord_reg[i] = payload.num_regs; + payload.barycentric_coord_reg[i][0] = payload.num_regs; payload.num_regs += 2; if (dispatch_width == 16) { payload.num_regs += 2; @@ -6066,7 +6066,7 @@ fs_visitor::setup_fs_payload_gen6() prog_data->uses_src_depth = (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; if (prog_data->uses_src_depth) { - payload.source_depth_reg = payload.num_regs; + payload.source_depth_reg[0] = payload.num_regs; payload.num_regs++; if (dispatch_width == 16) { /* R28: interpolated depth if not SIMD8. */ @@ -6078,7 +6078,7 @@ fs_visitor::setup_fs_payload_gen6() prog_data->uses_src_w = (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; if (prog_data->uses_src_w) { - payload.source_w_reg = payload.num_regs; + payload.source_w_reg[0] = payload.num_regs; payload.num_regs++; if (dispatch_width == 16) { /* R30: interpolated W if not SIMD8. */ @@ -6099,7 +6099,7 @@ fs_visitor::setup_fs_payload_gen6() * persample dispatch, we hard-code it to 0.5. */ prog_data->uses_pos_offset = true; - payload.sample_pos_reg = payload.num_regs; + payload.sample_pos_reg[0] = payload.num_regs; payload.num_regs++; } @@ -6108,7 +6108,7 @@ fs_visitor::setup_fs_payload_gen6() (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0; if (prog_data->uses_sample_mask) { assert(devinfo->gen >= 7); - payload.sample_mask_in_reg = payload.num_regs; + payload.sample_mask_in_reg[0] = payload.num_regs; payload.num_regs++; if (dispatch_width == 16) { /* R33: input coverage mask if not SIMD8. */ diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f0b2d5e8ad3..51529e89795 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -338,14 +338,15 @@ public: /** Register numbers for thread payload fields. */ struct thread_payload { - uint8_t source_depth_reg; - uint8_t source_w_reg; - uint8_t aa_dest_stencil_reg; - uint8_t dest_depth_reg; - uint8_t sample_pos_reg; - uint8_t sample_mask_in_reg; - uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT]; - uint8_t local_invocation_id_reg; + uint8_t subspan_coord_reg[2]; + uint8_t source_depth_reg[2]; + uint8_t source_w_reg[2]; + uint8_t aa_dest_stencil_reg[2]; + uint8_t dest_depth_reg[2]; + uint8_t sample_pos_reg[2]; + uint8_t sample_mask_in_reg[2]; + uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2]; + uint8_t local_invocation_id_reg[2]; /** The number of thread payload registers the hardware will supply. */ uint8_t num_regs; @@ -499,13 +500,32 @@ private: namespace brw { inline fs_reg - fetch_payload_reg(const brw::fs_builder &bld, uint8_t reg, + fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1) { - if (!reg) { + if (!regs[0]) return fs_reg(); + + if (bld.dispatch_width() > 16) { + const fs_reg tmp = bld.vgrf(type, n); + const brw::fs_builder hbld = bld.exec_all().group(16, 0); + const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); + fs_reg *const components = new fs_reg[n * m]; + + for (unsigned c = 0; c < n; c++) { + for (unsigned g = 0; g < m; g++) { + components[c * m + g] = + offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c); + } + } + + hbld.LOAD_PAYLOAD(tmp, components, n * m, 0); + + delete[] components; + return tmp; + } else { - return fs_reg(retype(brw_vec8_grf(reg, 0), type)); + return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } } diff --git a/src/intel/compiler/brw_wm_iz.cpp b/src/intel/compiler/brw_wm_iz.cpp index fead16586b6..b9b7e70a0f3 100644 --- a/src/intel/compiler/brw_wm_iz.cpp +++ b/src/intel/compiler/brw_wm_iz.cpp @@ -122,9 +122,10 @@ static const struct { void fs_visitor::setup_fs_payload_gen4() { assert(stage == MESA_SHADER_FRAGMENT); + assert(dispatch_width <= 16); struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - GLuint reg = 2; + GLuint reg = 1; bool kill_stats_promoted_workaround = false; int lookup = key->iz_lookup; @@ -141,11 +142,13 @@ void fs_visitor::setup_fs_payload_gen4() kill_stats_promoted_workaround = true; } + payload.subspan_coord_reg[0] = reg++; + prog_data->uses_src_depth = (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth || kill_stats_promoted_workaround) { - payload.source_depth_reg = reg; + payload.source_depth_reg[0] = reg; reg += 2; } @@ -153,14 +156,14 @@ void fs_visitor::setup_fs_payload_gen4() source_depth_to_render_target = true; if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) { - payload.aa_dest_stencil_reg = reg; + payload.aa_dest_stencil_reg[0] = reg; runtime_check_aads_emit = !wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES; reg++; } if (wm_iz_table[lookup].dd_present) { - payload.dest_depth_reg = reg; + payload.dest_depth_reg[0] = reg; reg+=2; } -- 2.30.2