i965/fs: Add wrapper functions for fs_inst::regs_read and ::regs_written.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 7 Sep 2016 23:59:35 +0000 (16:59 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
This is in preparation for dropping fs_inst::regs_read and
::regs_written in favor of more accurate alternatives expressed in
byte units.  The main reason these wrappers are useful is that a
number of optimization passes implement dataflow analysis with
register granularity, so these helpers will come in handy once we've
switched register offsets and sizes to the byte representation.  The
wrapper functions will also make sure that GRF misalignment (currently
neglected by most of the back-end) is taken into account correctly in
the calculation of regs_read and regs_written.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
src/mesa/drivers/dri/i965/brw_fs_validate.cpp
src/mesa/drivers/dri/i965/brw_ir_fs.h
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 04f04572d8324f23a355015502dcecfe85f4134a..802aa9f76f4794fc457a94598e0fdba52c5bac36 100644 (file)
@@ -1772,13 +1772,13 @@ fs_visitor::split_virtual_grfs()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == VGRF) {
          int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
-         for (int j = 1; j < inst->regs_written; j++)
+         for (unsigned j = 1; j < regs_written(inst); j++)
             split_points[reg + j] = false;
       }
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
-            for (int j = 1; j < inst->regs_read(i); j++)
+            for (unsigned j = 1; j < regs_read(inst, i); j++)
                split_points[reg + j] = false;
          }
       }
@@ -2611,7 +2611,7 @@ fs_visitor::opt_register_renaming()
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
-            remap[dst] = alloc.allocate(inst->regs_written);
+            remap[dst] = alloc.allocate(regs_written(inst));
             inst->dst.nr = remap[dst];
             progress = true;
          }
@@ -2727,7 +2727,7 @@ fs_visitor::compute_to_mrf()
        * regs_left bitset keeps track of the registers we haven't yet found a
        * generating instruction for.
        */
-      unsigned regs_left = (1 << inst->regs_read(0)) - 1;
+      unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
@@ -2819,7 +2819,7 @@ fs_visitor::compute_to_mrf()
       /* Found all generating instructions of our MRF's source value, so it
        * should be safe to rewrite them to point to the MRF directly.
        */
-      regs_left = (1 << inst->regs_read(0)) - 1;
+      regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
@@ -3086,7 +3086,7 @@ void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
                                                         fs_inst *inst)
 {
-   int write_len = inst->regs_written;
+   int write_len = regs_written(inst);
    int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -3119,7 +3119,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
        * dependency has more latency than a MOV.
        */
       if (scan_inst->dst.file == VGRF) {
-         for (int i = 0; i < scan_inst->regs_written; i++) {
+         for (unsigned i = 0; i < regs_written(scan_inst); i++) {
             int reg = scan_inst->dst.nr + i;
 
             if (reg >= first_write_grf &&
@@ -3157,7 +3157,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
 {
-   int write_len = inst->regs_written;
+   int write_len = regs_written(inst);
    int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -3800,7 +3800,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       /* Send from the GRF */
       fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
       load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
-      payload.nr = bld.shader->alloc.allocate(load->regs_written);
+      payload.nr = bld.shader->alloc.allocate(regs_written(load));
       load->dst = payload;
 
       inst->src[0] = payload;
@@ -3821,7 +3821,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    }
 
    inst->opcode = FS_OPCODE_FB_WRITE;
-   inst->mlen = load->regs_written;
+   inst->mlen = regs_written(load);
    inst->header_size = header_size;
 }
 
@@ -4069,7 +4069,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
                                 unsigned grad_components)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
-   int reg_width = bld.dispatch_width() / 8;
+   unsigned reg_width = bld.dispatch_width() / 8;
    unsigned header_size = 0, length = 0;
    fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
    for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
@@ -4097,9 +4097,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
        * and we have an explicit header, we need to set up the sampler
        * writemask.  It's reversed from normal: 1 means "don't write".
        */
-      if (!inst->eot && inst->regs_written != 4 * reg_width) {
-         assert((inst->regs_written % reg_width) == 0);
-         unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;
+      if (!inst->eot && regs_written(inst) != 4 * reg_width) {
+         assert(regs_written(inst) % reg_width == 0);
+         unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
          inst->offset |= mask << 12;
       }
    }
index 0c65c5b94b71b524352fb0831b6e40cd97771342..4744142a4b63884912bb3c7546bb48a9e92b0cfc 100644 (file)
@@ -199,8 +199,8 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
 static void
 create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
 {
-   int written = inst->regs_written;
-   int dst_width =
+   unsigned written = regs_written(inst);
+   unsigned dst_width =
       DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
    fs_inst *copy;
 
@@ -234,7 +234,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
       copy->force_writemask_all = inst->force_writemask_all;
       copy->src[0].negate = negate;
    }
-   assert(copy->regs_written == written);
+   assert(regs_written(copy) == written);
 }
 
 bool
@@ -284,7 +284,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
             if (no_existing_temp && !entry->generator->dst.is_null()) {
                const fs_builder ibld = fs_builder(this, block, entry->generator)
                                        .at(block, entry->generator->next);
-               int written = entry->generator->regs_written;
+               int written = regs_written(entry->generator);
 
                entry->tmp = fs_reg(VGRF, alloc.allocate(written),
                                    entry->generator->dst.type);
index 45f5c5ebb9b42c3372c54e5309bfcecc865bde91..4558bd42a24a198dcd6e85e0f5a7a2c01db12fb5 100644 (file)
@@ -55,12 +55,12 @@ fs_visitor::dead_code_eliminate()
          if (inst->dst.file == VGRF && !inst->has_side_effects()) {
             bool result_live = false;
 
-            if (inst->regs_written == 1) {
+            if (regs_written(inst) == 1) {
                int var = live_intervals->var_from_reg(inst->dst);
                result_live = BITSET_TEST(live, var);
             } else {
                int var = live_intervals->var_from_reg(inst->dst);
-               for (int i = 0; i < inst->regs_written; i++) {
+               for (unsigned i = 0; i < regs_written(inst); i++) {
                   result_live = result_live || BITSET_TEST(live, var + i);
                }
             }
@@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate()
          if (inst->dst.file == VGRF) {
             if (!inst->is_partial_write()) {
                int var = live_intervals->var_from_reg(inst->dst);
-               for (int i = 0; i < inst->regs_written; i++) {
+               for (unsigned i = 0; i < regs_written(inst); i++) {
                   BITSET_CLEAR(live, var + i);
                }
             }
@@ -114,7 +114,7 @@ fs_visitor::dead_code_eliminate()
             if (inst->src[i].file == VGRF) {
                int var = live_intervals->var_from_reg(inst->src[i]);
 
-               for (int j = 0; j < inst->regs_read(i); j++) {
+               for (unsigned j = 0; j < regs_read(inst, i); j++) {
                   BITSET_SET(live, var + j);
                }
             }
index 02dc77744276cf61eeb4805990a405e61383443a..a6c98e33218d91140406b391d90dfad0a03e875d 100644 (file)
@@ -118,7 +118,7 @@ fs_live_variables::setup_def_use()
             if (reg.file != VGRF)
                continue;
 
-            for (int j = 0; j < inst->regs_read(i); j++) {
+            for (unsigned j = 0; j < regs_read(inst, i); j++) {
                setup_one_read(bd, inst, ip, reg);
                reg.offset += REG_SIZE;
             }
@@ -129,7 +129,7 @@ fs_live_variables::setup_def_use()
          /* Set def[] for this instruction */
          if (inst->dst.file == VGRF) {
             fs_reg reg = inst->dst;
-            for (int j = 0; j < inst->regs_written; j++) {
+            for (unsigned j = 0; j < regs_written(inst); j++) {
                setup_one_write(bd, inst, ip, reg);
                reg.offset += REG_SIZE;
             }
index 82adaa3516625748acfaf4406d459b326a5e19ff..572735a379ace9cdf032d2084fd4fe4326596862 100644 (file)
@@ -362,9 +362,9 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
             if (node_nr >= payload_node_count)
                continue;
 
-            for (int j = 0; j < inst->regs_read(i); j++) {
+            for (unsigned j = 0; j < regs_read(inst, i); j++) {
                payload_last_use_ip[node_nr + j] = use_ip;
-               assert(node_nr + j < payload_node_count);
+               assert(node_nr + j < unsigned(payload_node_count));
             }
          }
       }
@@ -903,10 +903,10 @@ fs_visitor::spill_reg(int spill_reg)
       for (unsigned int i = 0; i < inst->sources; i++) {
         if (inst->src[i].file == VGRF &&
              inst->src[i].nr == spill_reg) {
-            int regs_read = inst->regs_read(i);
+            int count = regs_read(inst, i);
             int subset_spill_offset = spill_offset +
                ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
-            fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
+            fs_reg unspill_dst(VGRF, alloc.allocate(count));
 
             inst->src[i].nr = unspill_dst.nr;
             inst->src[i].offset %= REG_SIZE;
@@ -916,7 +916,7 @@ fs_visitor::spill_reg(int spill_reg)
              * hardware) up to the maximum supported block size.
              */
             const unsigned width =
-               MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1));
+               MIN2(32, 1u << (ffs(MAX2(1, count) * 8) - 1));
 
             /* Set exec_all() on unspill messages under the (rather
              * pessimistic) assumption that there is no one-to-one
@@ -926,7 +926,7 @@ fs_visitor::spill_reg(int spill_reg)
              * unspill destination is a block-local temporary.
              */
             emit_unspill(ibld.exec_all().group(width, 0),
-                         unspill_dst, subset_spill_offset, regs_read);
+                         unspill_dst, subset_spill_offset, count);
         }
       }
 
@@ -934,7 +934,7 @@ fs_visitor::spill_reg(int spill_reg)
           inst->dst.nr == spill_reg) {
          int subset_spill_offset = spill_offset +
             ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
-         fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
+         fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst)));
 
          inst->dst.nr = spill_src.nr;
          inst->dst.offset %= REG_SIZE;
@@ -971,19 +971,19 @@ fs_visitor::spill_reg(int spill_reg)
          const fs_builder ubld = ibld.exec_all(!per_channel).group(width, 0);
 
         /* If our write is going to affect just part of the
-          * inst->regs_written(), then we need to unspill the destination
-          * since we write back out all of the regs_written().  If the
-          * original instruction had force_writemask_all set and is not a
-          * partial write, there should be no need for the unspill since the
+          * regs_written(inst), then we need to unspill the destination since
+          * we write back out all of the regs_written().  If the original
+          * instruction had force_writemask_all set and is not a partial
+          * write, there should be no need for the unspill since the
           * instruction will be overwriting the whole destination in any case.
          */
          if (inst->is_partial_write() ||
              (!inst->force_writemask_all && !per_channel))
             emit_unspill(ubld, spill_src, subset_spill_offset,
-                         inst->regs_written);
+                         regs_written(inst));
 
          emit_spill(ubld.at(block, inst->next), spill_src,
-                    subset_spill_offset, inst->regs_written);
+                    subset_spill_offset, regs_written(inst));
       }
    }
 
index 651c136dfa7cd18ae421452b961663a3c8bbf4f2..3dd0fbfc1c1c2d79d536d4fc10eab9f75b78c641 100644 (file)
@@ -190,7 +190,7 @@ fs_visitor::register_coalesce()
             dst_reg_offset[i] = i;
          }
          mov[0] = inst;
-         channels_remaining -= inst->regs_written;
+         channels_remaining -= regs_written(inst);
       } else {
          const int offset = inst->src[0].offset / REG_SIZE;
          if (mov[offset]) {
@@ -207,7 +207,7 @@ fs_visitor::register_coalesce()
          if (inst->regs_written > 1)
             dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
          mov[offset] = inst;
-         channels_remaining -= inst->regs_written;
+         channels_remaining -= regs_written(inst);
       }
 
       if (channels_remaining)
index 10ad7c37b24b0b814bb7c144abc1e6ff13d3b88d..676942c19c00501180efe1f92f2139323a178f0d 100644 (file)
@@ -43,14 +43,14 @@ fs_visitor::validate()
 {
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       if (inst->dst.file == VGRF) {
-         fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <=
+         fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
                     alloc.sizes[inst->dst.nr]);
       }
 
       for (unsigned i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
-            fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <=
-                       (int)alloc.sizes[inst->src[i].nr]);
+            fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <=
+                       alloc.sizes[inst->src[i].nr]);
          }
       }
    }
index 19ef242d166841c43409c68fcf913a85b6630bdc..de08a691055f06fc3c423d9ab459d02715c7d176 100644 (file)
@@ -411,4 +411,30 @@ set_saturate(bool saturate, fs_inst *inst)
    return inst;
 }
 
+/**
+ * Return the number of dataflow registers written by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->dst) /
+ * register_size)'.  The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_written(const fs_inst *inst)
+{
+   /* XXX - Take into account register-misaligned offsets correctly. */
+   return inst->regs_written;
+}
+
+/**
+ * Return the number of dataflow registers read by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
+ * register_size)'.  The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_read(const fs_inst *inst, unsigned i)
+{
+   /* XXX - Take into account register-misaligned offsets correctly. */
+   return inst->regs_read(i);
+}
+
 #endif
index dde75547590143ec90a60f2cbdf4835423081772..0d3a07cad5b5b419655422164b1529c2e81f96e1 100644 (file)
@@ -620,7 +620,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
          if (inst->src[i].nr >= hw_reg_count)
             continue;
 
-         for (int j = 0; j < inst->regs_read(i); j++)
+         for (unsigned j = 0; j < regs_read(inst, i); j++)
             hw_reads_remaining[inst->src[i].nr + j]++;
       }
    }
@@ -702,7 +702,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
          reads_remaining[inst->src[i].nr]--;
       } else if (inst->src[i].file == FIXED_GRF &&
                  inst->src[i].nr < hw_reg_count) {
-         for (int off = 0; off < inst->regs_read(i); off++)
+         for (unsigned off = 0; off < regs_read(inst, i); off++)
             hw_reads_remaining[inst->src[i].nr + off]--;
       }
    }
@@ -731,7 +731,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
 
       if (inst->src[i].file == FIXED_GRF &&
           inst->src[i].nr < hw_reg_count) {
-         for (int off = 0; off < inst->regs_read(i); off++) {
+         for (unsigned off = 0; off < regs_read(inst, i); off++) {
             int reg = inst->src[i].nr + off;
             if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
                 hw_reads_remaining[reg] == 1) {
@@ -1004,17 +1004,17 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
-               for (int r = 0; r < inst->regs_read(i); r++) {
+               for (unsigned r = 0; r < regs_read(inst, i); r++) {
                   add_dep(last_grf_write[inst->src[i].nr * 16 +
                                          inst->src[i].offset / REG_SIZE + r], n);
                }
             }
          } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
                add_dep(last_fixed_grf_write, n);
@@ -1052,12 +1052,12 @@ fs_instruction_scheduler::calculate_deps()
       /* write-after-write deps. */
       if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                add_dep(last_grf_write[inst->dst.nr + r], n);
                last_grf_write[inst->dst.nr + r] = n;
             }
          } else {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                add_dep(last_grf_write[inst->dst.nr * 16 +
                                       inst->dst.offset / REG_SIZE + r], n);
                last_grf_write[inst->dst.nr * 16 +
@@ -1079,7 +1079,7 @@ fs_instruction_scheduler::calculate_deps()
          }
       } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
@@ -1130,17 +1130,17 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
-               for (int r = 0; r < inst->regs_read(i); r++) {
+               for (unsigned r = 0; r < regs_read(inst, i); r++) {
                   add_dep(n, last_grf_write[inst->src[i].nr * 16 +
                                             inst->src[i].offset / REG_SIZE + r], 0);
                }
             }
          } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
                add_dep(n, last_fixed_grf_write, 0);
@@ -1180,10 +1180,10 @@ fs_instruction_scheduler::calculate_deps()
        */
       if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                last_grf_write[inst->dst.nr * 16 +
                               inst->dst.offset / REG_SIZE + r] = n;
             }
@@ -1203,7 +1203,7 @@ fs_instruction_scheduler::calculate_deps()
          }
       } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;