i965/fs: Replace fs_inst::regs_written with ::size_written field in bytes.

author Francisco Jerez <currojerez@riseup.net>

Wed, 7 Sep 2016 20:38:20 +0000 (13:38 -0700)

committer Francisco Jerez <currojerez@riseup.net>

Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
author Francisco Jerez <currojerez@riseup.net>
Wed, 7 Sep 2016 20:38:20 +0000 (13:38 -0700)
committer Francisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 802aa9f76f4794fc457a94598e0fdba52c5bac36..0244f593149bac3d7e7745a107f06e47a7277fee 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -76,11 +76,10 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
     case FIXED_GRF:
     case MRF:
     case ATTR:
-      this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
-                                        REG_SIZE);
+      this->size_written = dst.component_size(exec_size);
        break;
     case BAD_FILE:
-      this->regs_written = 0;
+      this->size_written = 0;
        break;
     case IMM:
     case UNIFORM:
@@ -192,7 +191,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
     fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
     fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
                              vec4_result, surf_index, vec4_offset);
-   inst->regs_written = 4 * bld.dispatch_width() / 8;
+   inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
  
     if (type_sz(dst.type) == 8) {
        shuffle_32bit_load_result_to_64bit_data(
@@ -244,7 +243,7 @@ fs_inst::equals(fs_inst *inst) const
  bool
  fs_inst::overwrites_reg(const fs_reg &reg) const
  {
-   return reg.in_range(dst, regs_written);
+   return reg.in_range(dst, DIV_ROUND_UP(size_written, REG_SIZE));
  }
  
  bool
@@ -357,7 +356,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
     if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
        return false;
  
-   if (grf_alloc.sizes[reg.nr] != this->regs_written)
+   if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
        return false;
  
     for (int i = 0; i < this->sources; i++) {
@@ -2548,7 +2547,7 @@ fs_visitor::opt_sampler_eot()
     for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
        if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
           if (!fb_write->src[i].equals(tex_inst->dst) ||
-             fb_write->regs_read(i) != tex_inst->regs_written)
+             fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
           return false;
        } else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
           if (fb_write->src[i].file != BAD_FILE)
@@ -2564,7 +2563,7 @@ fs_visitor::opt_sampler_eot()
     tex_inst->offset |= fb_write->target << 24;
     tex_inst->eot = true;
     tex_inst->dst = ibld.null_reg_ud();
-   tex_inst->regs_written = 0;
+   tex_inst->size_written = 0;
     fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
  
     /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT
@@ -2606,7 +2605,7 @@ fs_visitor::opt_register_renaming()
  
        if (depth == 0 &&
            inst->dst.file == VGRF &&
-          alloc.sizes[inst->dst.nr] == inst->regs_written &&
+          alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
            !inst->is_partial_write()) {
           if (remap[dst] == -1) {
              remap[dst] = dst;
@@ -2730,7 +2729,7 @@ fs_visitor::compute_to_mrf()
        unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
  
        foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                               inst->src[0], inst->regs_read(0) * REG_SIZE)) {
             /* Found the last thing to write our reg we want to turn
              * into a compute-to-MRF.
@@ -2749,7 +2748,7 @@ fs_visitor::compute_to_mrf()
               * a time.
               */
              if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
-                scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written >
+                scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
                  inst->src[0].offset / REG_SIZE + inst->regs_read(0))
                 break;
  
@@ -2768,7 +2767,8 @@ fs_visitor::compute_to_mrf()
  
              /* Clear the bits for any registers this instruction overwrites. */
              regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
              if (!regs_left)
                 break;
          }
@@ -2793,8 +2793,8 @@ fs_visitor::compute_to_mrf()
          if (interfered)
             break;
  
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+                             inst->dst, inst->size_written)) {
             /* If somebody else writes our MRF here, we can't
              * compute-to-MRF before that.
              */
@@ -2803,7 +2803,7 @@ fs_visitor::compute_to_mrf()
  
           if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
               regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+                             inst->dst, inst->size_written)) {
             /* Found a SEND instruction, which means that there are
              * live values in MRFs from base_mrf to base_mrf +
              * scan_inst->mlen - 1.  Don't go pushing our MRF write up
@@ -2822,11 +2822,12 @@ fs_visitor::compute_to_mrf()
        regs_left = (1 << regs_read(inst, 0)) - 1;
  
        foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                               inst->src[0], inst->regs_read(0) * REG_SIZE)) {
              /* Clear the bits for any registers this instruction overwrites. */
              regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
  
              const unsigned rel_offset = (reg_offset(scan_inst->dst) -
                                           reg_offset(inst->src[0])) / REG_SIZE;
@@ -2841,7 +2842,7 @@ fs_visitor::compute_to_mrf()
                 /* Clear the COMPR4 bit if the generating instruction is not
                  * compressed.
                  */
-               if (scan_inst->regs_written < 2)
+               if (scan_inst->size_written < 2 * REG_SIZE)
                    scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
  
              } else {
@@ -3024,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes()
        /* Clear out any MRF move records whose sources got overwritten. */
        for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
           if (last_mrf_move[i] &&
-             regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+             regions_overlap(inst->dst, inst->size_written,
                               last_mrf_move[i]->src[0],
                               last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
              last_mrf_move[i] = NULL;
@@ -4603,7 +4604,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
      * which is the one that is going to limit the overall execution size of
      * the instruction due to this rule.
      */
-   unsigned reg_count = inst->regs_written;
+   unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
  
     for (unsigned i = 0; i < inst->sources; i++)
        reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
@@ -4630,13 +4631,14 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
      */
     if (devinfo->gen < 8) {
        for (unsigned i = 0; i < inst->sources; i++) {
-         if (inst->regs_written == 2 &&
+         if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
               inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
               !is_uniform(inst->src[i]) &&
               !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
-               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1))
-            max_width = MIN2(max_width, inst->exec_size /
-                             inst->regs_written);
+               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
+            const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
+            max_width = MIN2(max_width, inst->exec_size / reg_count);
+         }
        }
     }
  
@@ -4681,9 +4683,10 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
      * In this situation we calculate the maximum size of the split
      * instructions so they only ever write to a single register.
      */
-   if (devinfo->gen < 8 && inst->regs_written > 1 &&
+   if (devinfo->gen < 8 && inst->size_written > REG_SIZE &&
         !inst->force_writemask_all) {
-      const unsigned channels_per_grf = inst->exec_size / inst->regs_written;
+      const unsigned channels_per_grf = inst->exec_size /
+         DIV_ROUND_UP(inst->size_written, REG_SIZE);
        unsigned exec_type_size = 0;
        for (int i = 0; i < inst->sources; i++) {
           if (inst->src[i].file != BAD_FILE)
@@ -5087,8 +5090,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
      * the results of multiple lowered instructions in order to make sure that
      * they end up arranged correctly in the original destination region.
      */
-   if (inst->regs_written * REG_SIZE >
-       inst->dst.component_size(inst->exec_size))
+   if (inst->size_written > inst->dst.component_size(inst->exec_size))
        return true;
  
     /* If the lowered execution size is larger than the original the result of
@@ -5111,7 +5113,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
         * group which could cause one of the lowered instructions to overwrite
         * the data read from the same source by other lowered instructions.
         */
-      if (regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+      if (regions_overlap(inst->dst, inst->size_written,
                            inst->src[i], inst->regs_read(i) * REG_SIZE) &&
            !inst->dst.equals(inst->src[i]))
          return true;
@@ -5138,8 +5140,8 @@ emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst)
  
     /* Specified channel group from the destination region. */
     const fs_reg dst = horiz_offset(inst->dst, lbld.group());
-   const unsigned dst_size = inst->regs_written * REG_SIZE /
-            inst->dst.component_size(inst->exec_size);
+   const unsigned dst_size = inst->size_written /
+      inst->dst.component_size(inst->exec_size);
  
     if (needs_dst_copy(lbld, inst)) {
        const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
@@ -5191,7 +5193,7 @@ fs_visitor::lower_simd_width()
            * original or the lowered instruction, whichever is lower.
            */
           const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
-         const unsigned dst_size = inst->regs_written * REG_SIZE /
+         const unsigned dst_size = inst->size_written /
              inst->dst.component_size(inst->exec_size);
  
           assert(!inst->writes_accumulator && !inst->mlen);
@@ -5215,9 +5217,8 @@ fs_visitor::lower_simd_width()
                 split_inst.src[j] = emit_unzip(lbld, block, inst, j);
  
              split_inst.dst = emit_zip(lbld, block, inst);
-            split_inst.regs_written = DIV_ROUND_UP(
-               split_inst.dst.component_size(lower_width) * dst_size,
-               REG_SIZE);
+            split_inst.size_written =
+               split_inst.dst.component_size(lower_width) * dst_size;
  
              lbld.emit(split_inst);
           }
@@ -5314,7 +5315,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
     switch (inst->dst.file) {
     case VGRF:
        fprintf(file, "vgrf%d", inst->dst.nr);
-      if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
+      if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
            inst->dst.offset % REG_SIZE)
           fprintf(file, "+%d.%d",
                   inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h

index 483672fbd96f2ce1b52c53ab6aa5c7c6d824f370..bae151ca042869bc1d117d973bacc186ca6c3a02 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -569,11 +569,11 @@ namespace brw {
        {
           instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
           inst->header_size = header_size;
-         inst->regs_written = header_size;
+         inst->size_written = header_size * REG_SIZE;
           for (unsigned i = header_size; i < sources; i++) {
-            inst->regs_written +=
-               DIV_ROUND_UP(dispatch_width() * type_sz(src[i].type) *
-                            dst.stride, REG_SIZE);
+            inst->size_written +=
+               ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
+                     REG_SIZE);
           }
  
           return inst;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index 10f0a5b282003f068a944a3c3df694191da54833..0e239d28d44dde869880a63d0335afe3b928d2fd 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -43,7 +43,7 @@ namespace { /* avoid conflict with opt_copy_propagation_elements */
  struct acp_entry : public exec_node {
     fs_reg dst;
     fs_reg src;
-   uint8_t regs_written;
+   uint8_t size_written;
     uint8_t regs_read;
     enum opcode opcode;
     bool saturate;
@@ -368,7 +368,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
      * that entry is writing.
      */
     if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
-                            entry->dst, entry->regs_written))
+                            entry->dst, DIV_ROUND_UP(entry->size_written,
+                                                     REG_SIZE)))
        return false;
  
     /* we can't generally copy-propagate UD negations because we
@@ -524,7 +525,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
         * that entry is writing.
         */
        if (!region_contained_in(inst->src[i], inst->regs_read(i),
-                               entry->dst, entry->regs_written))
+                               entry->dst, DIV_ROUND_UP(entry->size_written,
+                                                        REG_SIZE)))
           continue;
  
        /* If the type sizes don't match each channel of the instruction is
@@ -770,8 +772,8 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
        /* kill the destination from the ACP */
        if (inst->dst.file == VGRF) {
           foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
-            if (regions_overlap(entry->dst, entry->regs_written * REG_SIZE,
-                                inst->dst, inst->regs_written * REG_SIZE))
+            if (regions_overlap(entry->dst, entry->size_written,
+                                inst->dst, inst->size_written))
                 entry->remove();
           }
  
@@ -784,7 +786,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                  * _any_ of the registers that it reads
                  */
                 if (regions_overlap(entry->src, entry->regs_read * REG_SIZE,
-                                   inst->dst, inst->regs_written * REG_SIZE))
+                                   inst->dst, inst->size_written))
                    entry->remove();
              }
          }
@@ -797,7 +799,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
           acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
           entry->dst = inst->dst;
           entry->src = inst->src[0];
-         entry->regs_written = inst->regs_written;
+         entry->size_written = inst->size_written;
           entry->regs_read = inst->regs_read(0);
           entry->opcode = inst->opcode;
           entry->saturate = inst->saturate;
@@ -808,14 +810,14 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
           for (int i = 0; i < inst->sources; i++) {
              int effective_width = i < inst->header_size ? 8 : inst->exec_size;
              assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0);
-            int regs_written = effective_width *
-               type_sz(inst->src[i].type) / REG_SIZE;
+            const unsigned size_written = effective_width *
+                                          type_sz(inst->src[i].type);
              if (inst->src[i].file == VGRF) {
                 acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
                 entry->dst = inst->dst;
                 entry->dst.offset += offset * REG_SIZE;
                 entry->src = inst->src[i];
-               entry->regs_written = regs_written;
+               entry->size_written = size_written;
                 entry->regs_read = inst->regs_read(i);
                 entry->opcode = inst->opcode;
                 if (!entry->dst.equals(inst->src[i])) {
@@ -824,7 +826,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                    ralloc_free(entry);
                 }
              }
-            offset += regs_written;
+            offset += DIV_ROUND_UP(size_written, REG_SIZE);
           }
        }
     }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index 4744142a4b63884912bb3c7546bb48a9e92b0cfc..2acbfea71f0b8fda9a6b3db93eee9b52cb47c2d3 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -185,7 +185,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
            a->dst.type == b->dst.type &&
            a->offset == b->offset &&
            a->mlen == b->mlen &&
-          a->regs_written == b->regs_written &&
+          a->size_written == b->size_written &&
            a->base_mrf == b->base_mrf &&
            a->eot == b->eot &&
            a->header_size == b->header_size &&
@@ -296,7 +296,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
  
              /* dest <- temp */
              if (!inst->dst.is_null()) {
-               assert(inst->regs_written == entry->generator->regs_written);
+               assert(inst->size_written == entry->generator->size_written);
                 assert(inst->dst.type == entry->tmp.type);
                 const fs_builder ibld(this, block, inst);
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 12ab7b3fe6639a8d28345fa6f15ff42543b28168..8a581c9f02c3b9f9c131b918b992c78a790e1ec0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -357,13 +357,14 @@ void
  fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
                                 struct brw_reg payload)
  {
+   assert(inst->size_written % REG_SIZE == 0);
     brw_wm_prog_data *prog_data =
        reinterpret_cast<brw_wm_prog_data *>(this->prog_data);
     const unsigned surf_index =
        prog_data->binding_table.render_target_start + inst->target;
  
     gen9_fb_READ(p, dst, payload, surf_index,
-                inst->header_size, inst->regs_written,
+                inst->header_size, inst->size_written / REG_SIZE,
                  prog_data->persample_dispatch);
  
     brw_mark_surface_used(&prog_data->base, surf_index);
@@ -452,6 +453,7 @@ fs_generator::generate_urb_read(fs_inst *inst,
                                  struct brw_reg dst,
                                  struct brw_reg header)
  {
+   assert(inst->size_written % REG_SIZE == 0);
     assert(header.file == BRW_GENERAL_REGISTER_FILE);
     assert(header.type == BRW_REGISTER_TYPE_UD);
  
@@ -467,7 +469,7 @@ fs_generator::generate_urb_read(fs_inst *inst,
        brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
  
     brw_inst_set_mlen(p->devinfo, send, inst->mlen);
-   brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
+   brw_inst_set_rlen(p->devinfo, send, inst->size_written / REG_SIZE);
     brw_inst_set_header_present(p->devinfo, send, true);
     brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
  }
@@ -625,6 +627,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                             struct brw_reg surface_index,
                             struct brw_reg sampler_index)
  {
+   assert(inst->size_written % REG_SIZE == 0);
     int msg_type = -1;
     uint32_t simd_mode;
     uint32_t return_format;
@@ -895,7 +898,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                   surface + base_binding_table_index,
                   sampler % 16,
                   msg_type,
-                 inst->regs_written,
+                 inst->size_written / REG_SIZE,
                   inst->mlen,
                   inst->header_size != 0,
                   simd_mode,
@@ -932,7 +935,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                                0 /* surface */,
                                0 /* sampler */,
                                msg_type,
-                              inst->regs_written,
+                              inst->size_written / REG_SIZE,
                                inst->mlen /* mlen */,
                                inst->header_size != 0 /* header */,
                                simd_mode,
@@ -1263,7 +1266,7 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
         */
        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
        assert(inst->mlen == 3);
-      assert(inst->regs_written == 8);
+      assert(inst->size_written == 8 * REG_SIZE);
        rlen = 8;
        simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
     }
@@ -1399,6 +1402,7 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
                                                  struct brw_reg msg_data,
                                                  unsigned msg_type)
  {
+   assert(inst->size_written % REG_SIZE == 0);
     assert(msg_data.type == BRW_REGISTER_TYPE_UD);
  
     brw_pixel_interpolator_query(p,
@@ -1408,7 +1412,7 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
           msg_type,
           msg_data,
           inst->mlen,
-         inst->regs_written);
+         inst->size_written / REG_SIZE);
  }
  
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index cd4005c0e60fc82067039e53151662b4ae8be178..42ed131854e610a8f8247e66578af4e81bd8245d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1661,7 +1661,7 @@ emit_pixel_interpolater_send(const fs_builder &bld,
     inst = bld.emit(opcode, dst, payload, desc);
     inst->mlen = mlen;
     /* 2 floats per slot returned */
-   inst->regs_written = 2 * bld.dispatch_width() / 8;
+   inst->size_written = 2 * bld.dispatch_width() / 8 * REG_SIZE;
     inst->pi_noperspective = interpolation == INTERP_MODE_NOPERSPECTIVE;
  
     wm_prog_data->pulls_bary = true;
@@ -2144,7 +2144,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
              unsigned read_components = num_components + first_component;
              fs_reg tmp = bld.vgrf(dst.type, read_components);
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle);
-            inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
              for (unsigned i = 0; i < num_components; i++) {
                 bld.MOV(offset(tmp_dst, bld, i),
                         offset(tmp, bld, i + first_component));
@@ -2152,7 +2152,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
           } else {
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp_dst,
                              icp_handle);
-            inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
           }
           inst->offset = base_offset + offset_const->u32[0];
           inst->mlen = 1;
@@ -2166,7 +2166,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
           if (first_component != 0) {
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
                              payload);
-            inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
              for (unsigned i = 0; i < num_components; i++) {
                 bld.MOV(offset(tmp_dst, bld, i),
                         offset(tmp, bld, i + first_component));
@@ -2174,7 +2174,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
           } else {
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp_dst,
                           payload);
-            inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
           }
           inst->offset = base_offset;
           inst->mlen = 2;
@@ -2204,7 +2204,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
        /* Read the whole VUE header (because of alignment) and read .w. */
        fs_reg tmp = bld.vgrf(dst.type, 4);
        inst->dst = tmp;
-      inst->regs_written = 4;
+      inst->size_written = 4 * REG_SIZE;
        bld.MOV(dst, offset(tmp, bld, 3));
     }
  }
@@ -2510,8 +2510,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
              inst->offset = imm_offset;
              inst->mlen = 2;
           }
-         inst->regs_written =
-            ((num_components + first_component) * type_sz(dst.type) / 4);
+         inst->size_written =
+            ((num_components + first_component) * type_sz(dst.type) / 4) * REG_SIZE;
  
           /* If we are reading 64-bit data using 32-bit read messages we need
            * build proper 64-bit data elements by shuffling the low and high
@@ -2535,7 +2535,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
           if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
              assert(type_sz(dst.type) < 8);
              inst->dst = bld.vgrf(dst.type, 4);
-            inst->regs_written = 4;
+            inst->size_written = 4 * REG_SIZE;
              bld.MOV(dst, offset(inst->dst, bld, 3));
           }
  
@@ -2576,7 +2576,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
                 inst->offset = 0;
                 inst->mlen = 1;
-               inst->regs_written = 4;
+               inst->size_written = 4 * REG_SIZE;
  
                 /* dst.xy = tmp.wz */
                 bld.MOV(dst,                 offset(tmp, bld, 3));
@@ -2584,11 +2584,11 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                 break;
              }
              case GL_TRIANGLES:
-               /* DWord 4; hardcode offset = 1 and regs_written = 1 */
+               /* DWord 4; hardcode offset = 1 and size_written = REG_SIZE */
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
                 inst->offset = 1;
                 inst->mlen = 1;
-               inst->regs_written = 1;
+               inst->size_written = REG_SIZE;
                 break;
              case GL_ISOLINES:
                 /* All channels are undefined. */
@@ -2606,7 +2606,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
              inst->offset = 1;
              inst->mlen = 1;
-            inst->regs_written = 4;
+            inst->size_written = 4 * REG_SIZE;
  
              /* Reswizzle: WZYX */
              fs_reg srcs[4] = {
@@ -2641,7 +2641,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                 fs_reg tmp = bld.vgrf(dst.type, read_components);
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
                                 patch_handle);
-               inst->regs_written = read_components;
+               inst->size_written = read_components * REG_SIZE;
                 for (unsigned i = 0; i < instr->num_components; i++) {
                    bld.MOV(offset(dst, bld, i),
                            offset(tmp, bld, i + first_component));
@@ -2649,7 +2649,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
              } else {
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
                                 patch_handle);
-               inst->regs_written = instr->num_components;
+               inst->size_written = instr->num_components * REG_SIZE;
              }
              inst->offset = imm_offset;
              inst->mlen = 1;
@@ -2668,7 +2668,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
              fs_reg tmp = bld.vgrf(dst.type, read_components);
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
                              payload);
-            inst->regs_written = read_components;
+            inst->size_written = read_components * REG_SIZE;
              for (unsigned i = 0; i < instr->num_components; i++) {
                 bld.MOV(offset(dst, bld, i),
                         offset(tmp, bld, i + first_component));
@@ -2676,7 +2676,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
           } else {
              inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
                              payload);
-            inst->regs_written = instr->num_components;
+            inst->size_written = instr->num_components * REG_SIZE;
           }
           inst->offset = imm_offset;
           inst->mlen = 2;
@@ -2976,7 +2976,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
                 fs_reg tmp = bld.vgrf(dest.type, read_components);
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
                                 patch_handle);
-               inst->regs_written = read_components;
+               inst->size_written = read_components * REG_SIZE;
                 for (unsigned i = 0; i < instr->num_components; i++) {
                    bld.MOV(offset(dest, bld, i),
                            offset(tmp, bld, i + first_component));
@@ -2984,7 +2984,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
              } else {
                 inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest,
                                 patch_handle);
-               inst->regs_written = instr->num_components;
+               inst->size_written = instr->num_components * REG_SIZE;
              }
              inst->mlen = 1;
              inst->offset = imm_offset;
@@ -3032,8 +3032,9 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
              }
              inst->mlen = 2;
              inst->offset = imm_offset;
-            inst->regs_written =
-               ((num_components + first_component) * type_sz(dest.type) / 4);
+            inst->size_written =
+               ((num_components + first_component) * type_sz(dest.type) / 4) *
+               REG_SIZE;
  
              /* If we are reading 64-bit data using 32-bit read messages we need
               * build proper 64-bit data elements by shuffling the low and high
@@ -3207,8 +3208,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
     STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
  
     fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
-   inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
-                        REG_SIZE;
+   inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
  
     return inst;
  }
@@ -3223,8 +3223,7 @@ emit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target)
     assert(bld.shader->devinfo->gen >= 9);
     fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst);
     inst->target = target;
-   inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
-                        REG_SIZE;
+   inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
  
     return inst;
  }
@@ -3903,7 +3902,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        const fs_builder ubld = bld.group(8, 0);
        const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
        ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
-         ->regs_written = 2;
+         ->size_written = 2 * REG_SIZE;
        break;
     }
  
@@ -4338,7 +4337,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                                  src_payload, brw_imm_ud(index));
        inst->header_size = 0;
        inst->mlen = 1;
-      inst->regs_written = 4;
+      inst->size_written = 4 * REG_SIZE;
  
        bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
        brw_mark_surface_used(prog_data, index);
@@ -4685,9 +4684,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
                              nir_ssa_def_components_read(&instr->dest.ssa):
                              (1 << dest_size) - 1;
        assert(write_mask != 0); /* dead code should have been eliminated */
-      inst->regs_written = util_last_bit(write_mask) * dispatch_width / 8;
+      inst->size_written = util_last_bit(write_mask) * dispatch_width / 8 * REG_SIZE;
     } else {
-      inst->regs_written = 4 * dispatch_width / 8;
+      inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
     }
  
     if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index 572735a379ace9cdf032d2084fd4fe4326596862..5c6f3d490f01185e94f1aafd9ce39c17c2e89708 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -826,7 +826,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
        }
  
        if (inst->dst.file == VGRF)
-         spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
+         spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
+                                      * loop_scale;
  
        switch (inst->opcode) {
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp

index 3dd0fbfc1c1c2d79d536d4fc10eab9f75b78c641..310e8019fcbe8ec68e99d9d458c318e2193569fe 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -204,7 +204,7 @@ fs_visitor::register_coalesce()
              continue;
           }
           dst_reg_offset[offset] = inst->dst.offset / REG_SIZE;
-         if (inst->regs_written > 1)
+         if (inst->size_written > REG_SIZE)
              dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
           mov[offset] = inst;
           channels_remaining -= regs_written(inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp

index 5fa7c42d2ee2b0d12adec087e6608491ef1700e7..37e893bb89e4d3716ba0fe35c8dd8e7fb4fff1fd 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -50,7 +50,7 @@ namespace brw {
              const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
              fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
  
-            inst->regs_written = rsize * bld.dispatch_width() / 8;
+            inst->size_written = rsize * bld.dispatch_width() / 8 * REG_SIZE;
              inst->predicate = pred;
              return dst;
           }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index d0f504c1a2eae084efe2639d87844e978eff83ce..5aea62c4cbcfb1f9c6d7bd27e16933ab429ad671 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -97,7 +97,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
     /* We only care about one or two regs of response, but the sampler always
      * writes 4/8.
      */
-   inst->regs_written = 4 * dispatch_width / 8;
+   inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
  
     return dest;
  }
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h

index de08a691055f06fc3c423d9ab459d02715c7d176..cea81e4646a890c0345cb6c949f055b96a3cf2f5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -421,7 +421,8 @@ inline unsigned
  regs_written(const fs_inst *inst)
  {
     /* XXX - Take into account register-misaligned offsets correctly. */
-   return inst->regs_written;
+   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
+   return DIV_ROUND_UP(inst->size_written, REG_SIZE);
  }
  
  /**
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index c12bf09e8359ff748e120b18cf0e294f2c6de439..5e1e61683a22b5d49c13899b0629918f40628f8e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1494,11 +1494,11 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
                  * single-result send is probably actually reducing register
                  * pressure.
                  */
-               if (inst->regs_written <= inst->exec_size / 8 &&
-                   chosen_inst->regs_written > chosen_inst->exec_size / 8) {
+               if (inst->size_written <= inst->exec_size / 8 * REG_SIZE &&
+                   chosen_inst->size_written > chosen_inst->exec_size / 8 * REG_SIZE) {
                    chosen = n;
                    continue;
-               } else if (inst->regs_written > chosen_inst->regs_written) {
+               } else if (inst->size_written > chosen_inst->size_written) {
                    continue;
                 }
              }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h

index 66264b4ea7e5ddc41c0adf7c527d8984f2579408..2173f3226e13a4cb82af84e609b87e9ef240d399 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -138,6 +138,7 @@ struct backend_instruction {
     int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
     uint8_t target; /**< MRT target. */
     uint8_t regs_written; /**< Number of registers written by the instruction. */
+   unsigned size_written; /**< Data written to the destination register in bytes. */
  
     enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
     enum brw_conditional_mod conditional_mod; /**< BRW_CONDITIONAL_* */
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp

index 8ba7bc594816239767cb486512870a4bea638bd6..f71c6ee1e4203f35ea318a693d682bde38d92723 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -281,7 +281,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
     fs_reg zero(brw_imm_f(0.0f));
     bld.ADD(offset(dest, bld, 2), src0, src1);
     bld.emit(SHADER_OPCODE_TEX, dest, src2)
-      ->regs_written = 4;
+      ->size_written = 4 * REG_SIZE;
     bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
  
     /* = Before =
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp

index fd623a5e00fd346c159631d9ec48d9656963fb57..680fe72dfd5c98f32a9d4ffa6170d0d9f55e1a8a 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -525,7 +525,7 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
     fs_reg src2 = v->vgrf(glsl_type::vec2_type);
     bld.ADD(offset(dst0, bld, 2), src0, src1);
     bld.emit(SHADER_OPCODE_TEX, dst0, src2)
-      ->regs_written = 4;
+      ->size_written = 4 * REG_SIZE;
     set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
  
     /* = Before =
author	Francisco Jerez <currojerez@riseup.net>
	Wed, 7 Sep 2016 20:38:20 +0000 (13:38 -0700)
committer	Francisco Jerez <currojerez@riseup.net>
	Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_builder.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_cse.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_generator.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_ir_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.h		patch \| blob \| history
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp		patch \| blob \| history