i965/fs: Replace fs_inst::regs_read with ::size_read using byte units.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 8 Sep 2016 00:00:07 +0000 (17:00 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
The previous regs_read value can be recovered by rewriting each
reference of regs_read() like 'x = i.regs_read(j)' to 'x =
DIV_ROUND_UP(i.size_read(j), reg_unit)'.

For the same reason as in the previous patches, this doesn't attempt
to be particularly clever about simplifying the result in the interest
of keeping the rather lengthy patch as obvious as possible.  I'll come
back later to clean up any ugliness introduced here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
src/mesa/drivers/dri/i965/brw_ir_fs.h

index 0244f593149bac3d7e7745a107f06e47a7277fee..8f1cd61d3b6beb212baced9742bb3e1d276c98de 100644 (file)
@@ -818,7 +818,7 @@ fs_inst::components_read(unsigned i) const
 }
 
 int
-fs_inst::regs_read(int arg) const
+fs_inst::size_read(int arg) const
 {
    switch (opcode) {
    case FS_OPCODE_FB_WRITE:
@@ -837,28 +837,28 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
       if (arg == 0)
-         return mlen;
+         return mlen * REG_SIZE;
       break;
 
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
       /* The payload is actually stored in src1 */
       if (arg == 1)
-         return mlen;
+         return mlen * REG_SIZE;
       break;
 
    case FS_OPCODE_LINTERP:
       if (arg == 1)
-         return 1;
+         return REG_SIZE;
       break;
 
    case SHADER_OPCODE_LOAD_PAYLOAD:
       if (arg < this->header_size)
-         return 1;
+         return REG_SIZE;
       break;
 
    case CS_OPCODE_CS_TERMINATE:
    case SHADER_OPCODE_BARRIER:
-      return 1;
+      return REG_SIZE;
 
    case SHADER_OPCODE_MOV_INDIRECT:
       if (arg == 0) {
@@ -867,7 +867,7 @@ fs_inst::regs_read(int arg) const
 
          if (src[0].file == UNIFORM) {
             assert(region_length % 4 == 0);
-            return region_length / 4;
+            return region_length;
          } else if (src[0].file == FIXED_GRF) {
             /* If the start of the region is not register aligned, then
              * there's some portion of the register that's technically
@@ -884,7 +884,7 @@ fs_inst::regs_read(int arg) const
             if (src[0].subnr)
                region_length += src[0].subnr;
 
-            return DIV_ROUND_UP(region_length, REG_SIZE);
+            return region_length;
          } else {
             assert(!"Invalid register file");
          }
@@ -893,22 +893,20 @@ fs_inst::regs_read(int arg) const
 
    default:
       if (is_tex() && arg == 0 && src[0].file == VGRF)
-         return mlen;
+         return mlen * REG_SIZE;
       break;
    }
 
    switch (src[arg].file) {
    case UNIFORM:
    case IMM:
-      return 1;
+      return 4;
    case BAD_FILE:
    case ARF:
    case FIXED_GRF:
    case VGRF:
    case ATTR:
-      return DIV_ROUND_UP(components_read(arg) *
-                          src[arg].component_size(exec_size),
-                          REG_SIZE);
+      return components_read(arg) * src[arg].component_size(exec_size);
    case MRF:
       unreachable("MRF registers are not allowed as sources");
    }
@@ -2547,7 +2545,7 @@ fs_visitor::opt_sampler_eot()
    for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
       if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
          if (!fb_write->src[i].equals(tex_inst->dst) ||
-             fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
+             fb_write->size_read(i) != tex_inst->size_written)
          return false;
       } else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
          if (fb_write->src[i].file != BAD_FILE)
@@ -2730,7 +2728,7 @@ fs_visitor::compute_to_mrf()
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->size_written,
-                             inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+                             inst->src[0], inst->size_read(0))) {
            /* Found the last thing to write our reg we want to turn
             * into a compute-to-MRF.
             */
@@ -2749,7 +2747,7 @@ fs_visitor::compute_to_mrf()
              */
             if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
                 scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
-                inst->src[0].offset / REG_SIZE + inst->regs_read(0))
+                inst->src[0].offset / REG_SIZE + DIV_ROUND_UP(inst->size_read(0), REG_SIZE))
                break;
 
            /* SEND instructions can't have MRF as a destination. */
@@ -2785,8 +2783,8 @@ fs_visitor::compute_to_mrf()
          */
         bool interfered = false;
         for (int i = 0; i < scan_inst->sources; i++) {
-            if (regions_overlap(scan_inst->src[i], scan_inst->regs_read(i) * REG_SIZE,
-                                inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+            if (regions_overlap(scan_inst->src[i], scan_inst->size_read(i),
+                                inst->src[0], inst->size_read(0))) {
               interfered = true;
            }
         }
@@ -2823,7 +2821,7 @@ fs_visitor::compute_to_mrf()
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->size_written,
-                             inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+                             inst->src[0], inst->size_read(0))) {
             /* Clear the bits for any registers this instruction overwrites. */
             regs_left &= ~mask_relative_to(
                inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
@@ -3027,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes()
          if (last_mrf_move[i] &&
              regions_overlap(inst->dst, inst->size_written,
                              last_mrf_move[i]->src[0],
-                             last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
+                             last_mrf_move[i]->size_read(0))) {
             last_mrf_move[i] = NULL;
          }
       }
@@ -4607,7 +4605,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
    unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
 
    for (unsigned i = 0; i < inst->sources; i++)
-      reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
+      reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
 
    /* Calculate the maximum execution size of the instruction based on the
     * factor by which it goes over the hardware limit of 2 GRFs.
@@ -4632,7 +4630,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
    if (devinfo->gen < 8) {
       for (unsigned i = 0; i < inst->sources; i++) {
          if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
-             inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
+             inst->size_read(i) != 0 && DIV_ROUND_UP(inst->size_read(i), REG_SIZE) != 2 &&
              !is_uniform(inst->src[i]) &&
              !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
                type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
@@ -5114,7 +5112,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
        * the data read from the same source by other lowered instructions.
        */
       if (regions_overlap(inst->dst, inst->size_written,
-                          inst->src[i], inst->regs_read(i) * REG_SIZE) &&
+                          inst->src[i], inst->size_read(i)) &&
           !inst->dst.equals(inst->src[i]))
         return true;
    }
@@ -5371,7 +5369,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       switch (inst->src[i].file) {
       case VGRF:
          fprintf(file, "vgrf%d", inst->src[i].nr);
-         if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
+         if (alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i) ||
              inst->src[i].offset % REG_SIZE != 0)
             fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE,
                     inst->src[i].offset % REG_SIZE);
index 0e239d28d44dde869880a63d0335afe3b928d2fd..f8238aa6d77173b1fca20daa89515bb9263e1ccc 100644 (file)
@@ -44,7 +44,7 @@ struct acp_entry : public exec_node {
    fs_reg dst;
    fs_reg src;
    uint8_t size_written;
-   uint8_t regs_read;
+   uint8_t size_read;
    enum opcode opcode;
    bool saturate;
 };
@@ -367,7 +367,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    /* Bail if inst is reading a range that isn't contained in the range
     * that entry is writing.
     */
-   if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
+   if (!region_contained_in(inst->src[arg], DIV_ROUND_UP(inst->size_read(arg),
+                                                         REG_SIZE),
                             entry->dst, DIV_ROUND_UP(entry->size_written,
                                                      REG_SIZE)))
       return false;
@@ -524,7 +525,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
       /* Bail if inst is reading a range that isn't contained in the range
        * that entry is writing.
        */
-      if (!region_contained_in(inst->src[i], inst->regs_read(i),
+      if (!region_contained_in(inst->src[i], DIV_ROUND_UP(inst->size_read(i),
+                                                          REG_SIZE),
                                entry->dst, DIV_ROUND_UP(entry->size_written,
                                                         REG_SIZE)))
          continue;
@@ -785,7 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                /* Make sure we kill the entry if this instruction overwrites
                 * _any_ of the registers that it reads
                 */
-               if (regions_overlap(entry->src, entry->regs_read * REG_SIZE,
+               if (regions_overlap(entry->src, entry->size_read,
                                    inst->dst, inst->size_written))
                   entry->remove();
             }
@@ -800,7 +802,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          entry->dst = inst->dst;
          entry->src = inst->src[0];
          entry->size_written = inst->size_written;
-         entry->regs_read = inst->regs_read(0);
+         entry->size_read = inst->size_read(0);
          entry->opcode = inst->opcode;
          entry->saturate = inst->saturate;
          acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
@@ -818,7 +820,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                entry->dst.offset += offset * REG_SIZE;
                entry->src = inst->src[i];
                entry->size_written = size_written;
-               entry->regs_read = inst->regs_read(i);
+               entry->size_read = inst->size_read(i);
                entry->opcode = inst->opcode;
                if (!entry->dst.equals(inst->src[i])) {
                   acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
index cea81e4646a890c0345cb6c949f055b96a3cf2f5..2e5c8e54d221db017faff2dd8ad72a42787379c6 100644 (file)
@@ -324,7 +324,7 @@ public:
    bool is_partial_write() const;
    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
    unsigned components_read(unsigned i) const;
-   int regs_read(int arg) const;
+   int size_read(int arg) const;
    bool can_do_source_mods(const struct gen_device_info *devinfo);
    bool can_change_types() const;
    bool has_side_effects() const;
@@ -435,7 +435,9 @@ inline unsigned
 regs_read(const fs_inst *inst, unsigned i)
 {
    /* XXX - Take into account register-misaligned offsets correctly. */
-   return inst->regs_read(i);
+   const unsigned reg_size =
+      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
+   return DIV_ROUND_UP(inst->size_read(i), reg_size);
 }
 
 #endif