intel/fs: Extend thread payload layout to SIMD32
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 13 Jan 2017 23:36:51 +0000 (15:36 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Thu, 28 Jun 2018 20:19:38 +0000 (13:19 -0700)
And handle 32-wide payload register reads in fetch_payload_reg().

v2 (Jason Ekstrand);
 - Fix some whitespace and brace placement

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_wm_iz.cpp

index 18bcdc4f8b381abfc45f44789a4c4c54db608ace..173fc8593d35789335121b7dddcd9c6a14515142 100644 (file)
@@ -4050,12 +4050,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    assert(length == 0 || length == 2);
    header_size = length;
 
-   if (payload.aa_dest_stencil_reg) {
+   if (payload.aa_dest_stencil_reg[0]) {
       assert(inst->group < 16);
       sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
       bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
          .MOV(sources[length],
-              fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
+              fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg[0], 0)));
       length++;
    }
 
@@ -6054,7 +6054,7 @@ fs_visitor::setup_fs_payload_gen6()
     */
    for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
       if (prog_data->barycentric_interp_modes & (1 << i)) {
-         payload.barycentric_coord_reg[i] = payload.num_regs;
+         payload.barycentric_coord_reg[i][0] = payload.num_regs;
          payload.num_regs += 2;
          if (dispatch_width == 16) {
             payload.num_regs += 2;
@@ -6066,7 +6066,7 @@ fs_visitor::setup_fs_payload_gen6()
    prog_data->uses_src_depth =
       (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
    if (prog_data->uses_src_depth) {
-      payload.source_depth_reg = payload.num_regs;
+      payload.source_depth_reg[0] = payload.num_regs;
       payload.num_regs++;
       if (dispatch_width == 16) {
          /* R28: interpolated depth if not SIMD8. */
@@ -6078,7 +6078,7 @@ fs_visitor::setup_fs_payload_gen6()
    prog_data->uses_src_w =
       (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
    if (prog_data->uses_src_w) {
-      payload.source_w_reg = payload.num_regs;
+      payload.source_w_reg[0] = payload.num_regs;
       payload.num_regs++;
       if (dispatch_width == 16) {
          /* R30: interpolated W if not SIMD8. */
@@ -6099,7 +6099,7 @@ fs_visitor::setup_fs_payload_gen6()
        * persample dispatch, we hard-code it to 0.5.
        */
       prog_data->uses_pos_offset = true;
-      payload.sample_pos_reg = payload.num_regs;
+      payload.sample_pos_reg[0] = payload.num_regs;
       payload.num_regs++;
    }
 
@@ -6108,7 +6108,7 @@ fs_visitor::setup_fs_payload_gen6()
       (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
    if (prog_data->uses_sample_mask) {
       assert(devinfo->gen >= 7);
-      payload.sample_mask_in_reg = payload.num_regs;
+      payload.sample_mask_in_reg[0] = payload.num_regs;
       payload.num_regs++;
       if (dispatch_width == 16) {
          /* R33: input coverage mask if not SIMD8. */
index f0b2d5e8ad35be4c7ab94b8d791b8e205fbe22e1..51529e89795e4ba9dca5f6761ce5c09636fbf10f 100644 (file)
@@ -338,14 +338,15 @@ public:
 
    /** Register numbers for thread payload fields. */
    struct thread_payload {
-      uint8_t source_depth_reg;
-      uint8_t source_w_reg;
-      uint8_t aa_dest_stencil_reg;
-      uint8_t dest_depth_reg;
-      uint8_t sample_pos_reg;
-      uint8_t sample_mask_in_reg;
-      uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
-      uint8_t local_invocation_id_reg;
+      uint8_t subspan_coord_reg[2];
+      uint8_t source_depth_reg[2];
+      uint8_t source_w_reg[2];
+      uint8_t aa_dest_stencil_reg[2];
+      uint8_t dest_depth_reg[2];
+      uint8_t sample_pos_reg[2];
+      uint8_t sample_mask_in_reg[2];
+      uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
+      uint8_t local_invocation_id_reg[2];
 
       /** The number of thread payload registers the hardware will supply. */
       uint8_t num_regs;
@@ -499,13 +500,32 @@ private:
 
 namespace brw {
    inline fs_reg
-   fetch_payload_reg(const brw::fs_builder &bld, uint8_t reg,
+   fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
                      brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1)
    {
-      if (!reg) {
+      if (!regs[0])
          return fs_reg();
+
+      if (bld.dispatch_width() > 16) {
+         const fs_reg tmp = bld.vgrf(type, n);
+         const brw::fs_builder hbld = bld.exec_all().group(16, 0);
+         const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
+         fs_reg *const components = new fs_reg[n * m];
+
+         for (unsigned c = 0; c < n; c++) {
+            for (unsigned g = 0; g < m; g++) {
+               components[c * m + g] =
+                  offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
+            }
+         }
+
+         hbld.LOAD_PAYLOAD(tmp, components, n * m, 0);
+
+         delete[] components;
+         return tmp;
+
       } else {
-         return fs_reg(retype(brw_vec8_grf(reg, 0), type));
+         return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
       }
    }
 }
index fead16586b68c1b14d319ea8d1c2bf661c36ba72..b9b7e70a0f3729ce5eb6448835a8a64f33cc7525 100644 (file)
@@ -122,9 +122,10 @@ static const struct {
 void fs_visitor::setup_fs_payload_gen4()
 {
    assert(stage == MESA_SHADER_FRAGMENT);
+   assert(dispatch_width <= 16);
    struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   GLuint reg = 2;
+   GLuint reg = 1;
    bool kill_stats_promoted_workaround = false;
    int lookup = key->iz_lookup;
 
@@ -141,11 +142,13 @@ void fs_visitor::setup_fs_payload_gen4()
       kill_stats_promoted_workaround = true;
    }
 
+   payload.subspan_coord_reg[0] = reg++;
+
    prog_data->uses_src_depth =
       (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
    if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
        kill_stats_promoted_workaround) {
-      payload.source_depth_reg = reg;
+      payload.source_depth_reg[0] = reg;
       reg += 2;
    }
 
@@ -153,14 +156,14 @@ void fs_visitor::setup_fs_payload_gen4()
       source_depth_to_render_target = true;
 
    if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
-      payload.aa_dest_stencil_reg = reg;
+      payload.aa_dest_stencil_reg[0] = reg;
       runtime_check_aads_emit =
          !wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
       reg++;
    }
 
    if (wm_iz_table[lookup].dd_present) {
-      payload.dest_depth_reg = reg;
+      payload.dest_depth_reg[0] = reg;
       reg+=2;
    }