i965/fs: Replace fs_inst::regs_written with ::size_written field in bytes.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 7 Sep 2016 20:38:20 +0000 (13:38 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
The previous regs_written field can be recovered by rewriting each
rvalue reference of regs_written like 'x = i.regs_written' to 'x =
DIV_ROUND_UP(i.size_written, reg_unit)', and each lvalue reference
like 'i.regs_written = x' to 'i.size_written = x * reg_unit'.

For the same reason as in the previous patches, this doesn't attempt
to be particularly clever about simplifying the result in the interest
of keeping the rather lengthy patch as obvious as possible.  I'll come
back later to clean up any ugliness introduced here.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
15 files changed:
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_builder.h
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_ir_fs.h
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_shader.h
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp

index 802aa9f76f4794fc457a94598e0fdba52c5bac36..0244f593149bac3d7e7745a107f06e47a7277fee 100644 (file)
@@ -76,11 +76,10 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    case FIXED_GRF:
    case MRF:
    case ATTR:
-      this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
-                                        REG_SIZE);
+      this->size_written = dst.component_size(exec_size);
       break;
    case BAD_FILE:
-      this->regs_written = 0;
+      this->size_written = 0;
       break;
    case IMM:
    case UNIFORM:
@@ -192,7 +191,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
    fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
    fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
                             vec4_result, surf_index, vec4_offset);
-   inst->regs_written = 4 * bld.dispatch_width() / 8;
+   inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
 
    if (type_sz(dst.type) == 8) {
       shuffle_32bit_load_result_to_64bit_data(
@@ -244,7 +243,7 @@ fs_inst::equals(fs_inst *inst) const
 bool
 fs_inst::overwrites_reg(const fs_reg &reg) const
 {
-   return reg.in_range(dst, regs_written);
+   return reg.in_range(dst, DIV_ROUND_UP(size_written, REG_SIZE));
 }
 
 bool
@@ -357,7 +356,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
    if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
       return false;
 
-   if (grf_alloc.sizes[reg.nr] != this->regs_written)
+   if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
       return false;
 
    for (int i = 0; i < this->sources; i++) {
@@ -2548,7 +2547,7 @@ fs_visitor::opt_sampler_eot()
    for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
       if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
          if (!fb_write->src[i].equals(tex_inst->dst) ||
-             fb_write->regs_read(i) != tex_inst->regs_written)
+             fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
          return false;
       } else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
          if (fb_write->src[i].file != BAD_FILE)
@@ -2564,7 +2563,7 @@ fs_visitor::opt_sampler_eot()
    tex_inst->offset |= fb_write->target << 24;
    tex_inst->eot = true;
    tex_inst->dst = ibld.null_reg_ud();
-   tex_inst->regs_written = 0;
+   tex_inst->size_written = 0;
    fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
 
    /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT
@@ -2606,7 +2605,7 @@ fs_visitor::opt_register_renaming()
 
       if (depth == 0 &&
           inst->dst.file == VGRF &&
-          alloc.sizes[inst->dst.nr] == inst->regs_written &&
+          alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
@@ -2730,7 +2729,7 @@ fs_visitor::compute_to_mrf()
       unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                              inst->src[0], inst->regs_read(0) * REG_SIZE)) {
            /* Found the last thing to write our reg we want to turn
             * into a compute-to-MRF.
@@ -2749,7 +2748,7 @@ fs_visitor::compute_to_mrf()
              * a time.
              */
             if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
-                scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written >
+                scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
                 inst->src[0].offset / REG_SIZE + inst->regs_read(0))
                break;
 
@@ -2768,7 +2767,8 @@ fs_visitor::compute_to_mrf()
 
             /* Clear the bits for any registers this instruction overwrites. */
             regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
             if (!regs_left)
                break;
         }
@@ -2793,8 +2793,8 @@ fs_visitor::compute_to_mrf()
         if (interfered)
            break;
 
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+                             inst->dst, inst->size_written)) {
            /* If somebody else writes our MRF here, we can't
             * compute-to-MRF before that.
             */
@@ -2803,7 +2803,7 @@ fs_visitor::compute_to_mrf()
 
          if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
              regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+                             inst->dst, inst->size_written)) {
            /* Found a SEND instruction, which means that there are
             * live values in MRFs from base_mrf to base_mrf +
             * scan_inst->mlen - 1.  Don't go pushing our MRF write up
@@ -2822,11 +2822,12 @@ fs_visitor::compute_to_mrf()
       regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                              inst->src[0], inst->regs_read(0) * REG_SIZE)) {
             /* Clear the bits for any registers this instruction overwrites. */
             regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
 
             const unsigned rel_offset = (reg_offset(scan_inst->dst) -
                                          reg_offset(inst->src[0])) / REG_SIZE;
@@ -2841,7 +2842,7 @@ fs_visitor::compute_to_mrf()
                /* Clear the COMPR4 bit if the generating instruction is not
                 * compressed.
                 */
-               if (scan_inst->regs_written < 2)
+               if (scan_inst->size_written < 2 * REG_SIZE)
                   scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
 
             } else {
@@ -3024,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes()
       /* Clear out any MRF move records whose sources got overwritten. */
       for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
          if (last_mrf_move[i] &&
-             regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+             regions_overlap(inst->dst, inst->size_written,
                              last_mrf_move[i]->src[0],
                              last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
             last_mrf_move[i] = NULL;
@@ -4603,7 +4604,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     * which is the one that is going to limit the overall execution size of
     * the instruction due to this rule.
     */
-   unsigned reg_count = inst->regs_written;
+   unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
 
    for (unsigned i = 0; i < inst->sources; i++)
       reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
@@ -4630,13 +4631,14 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     */
    if (devinfo->gen < 8) {
       for (unsigned i = 0; i < inst->sources; i++) {
-         if (inst->regs_written == 2 &&
+         if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
              inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
              !is_uniform(inst->src[i]) &&
              !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
-               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1))
-            max_width = MIN2(max_width, inst->exec_size /
-                             inst->regs_written);
+               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
+            const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
+            max_width = MIN2(max_width, inst->exec_size / reg_count);
+         }
       }
    }
 
@@ -4681,9 +4683,10 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     * In this situation we calculate the maximum size of the split
     * instructions so they only ever write to a single register.
     */
-   if (devinfo->gen < 8 && inst->regs_written > 1 &&
+   if (devinfo->gen < 8 && inst->size_written > REG_SIZE &&
        !inst->force_writemask_all) {
-      const unsigned channels_per_grf = inst->exec_size / inst->regs_written;
+      const unsigned channels_per_grf = inst->exec_size /
+         DIV_ROUND_UP(inst->size_written, REG_SIZE);
       unsigned exec_type_size = 0;
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file != BAD_FILE)
@@ -5087,8 +5090,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
     * the results of multiple lowered instructions in order to make sure that
     * they end up arranged correctly in the original destination region.
     */
-   if (inst->regs_written * REG_SIZE >
-       inst->dst.component_size(inst->exec_size))
+   if (inst->size_written > inst->dst.component_size(inst->exec_size))
       return true;
 
    /* If the lowered execution size is larger than the original the result of
@@ -5111,7 +5113,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
        * group which could cause one of the lowered instructions to overwrite
        * the data read from the same source by other lowered instructions.
        */
-      if (regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+      if (regions_overlap(inst->dst, inst->size_written,
                           inst->src[i], inst->regs_read(i) * REG_SIZE) &&
           !inst->dst.equals(inst->src[i]))
         return true;
@@ -5138,8 +5140,8 @@ emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst)
 
    /* Specified channel group from the destination region. */
    const fs_reg dst = horiz_offset(inst->dst, lbld.group());
-   const unsigned dst_size = inst->regs_written * REG_SIZE /
-            inst->dst.component_size(inst->exec_size);
+   const unsigned dst_size = inst->size_written /
+      inst->dst.component_size(inst->exec_size);
 
    if (needs_dst_copy(lbld, inst)) {
       const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
@@ -5191,7 +5193,7 @@ fs_visitor::lower_simd_width()
           * original or the lowered instruction, whichever is lower.
           */
          const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
-         const unsigned dst_size = inst->regs_written * REG_SIZE /
+         const unsigned dst_size = inst->size_written /
             inst->dst.component_size(inst->exec_size);
 
          assert(!inst->writes_accumulator && !inst->mlen);
@@ -5215,9 +5217,8 @@ fs_visitor::lower_simd_width()
                split_inst.src[j] = emit_unzip(lbld, block, inst, j);
 
             split_inst.dst = emit_zip(lbld, block, inst);
-            split_inst.regs_written = DIV_ROUND_UP(
-               split_inst.dst.component_size(lower_width) * dst_size,
-               REG_SIZE);
+            split_inst.size_written =
+               split_inst.dst.component_size(lower_width) * dst_size;
 
             lbld.emit(split_inst);
          }
@@ -5314,7 +5315,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    switch (inst->dst.file) {
    case VGRF:
       fprintf(file, "vgrf%d", inst->dst.nr);
-      if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
+      if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
           inst->dst.offset % REG_SIZE)
          fprintf(file, "+%d.%d",
                  inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);
index 483672fbd96f2ce1b52c53ab6aa5c7c6d824f370..bae151ca042869bc1d117d973bacc186ca6c3a02 100644 (file)
@@ -569,11 +569,11 @@ namespace brw {
       {
          instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
          inst->header_size = header_size;
-         inst->regs_written = header_size;
+         inst->size_written = header_size * REG_SIZE;
          for (unsigned i = header_size; i < sources; i++) {
-            inst->regs_written +=
-               DIV_ROUND_UP(dispatch_width() * type_sz(src[i].type) *
-                            dst.stride, REG_SIZE);
+            inst->size_written +=
+               ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
+                     REG_SIZE);
          }
 
          return inst;
index 10f0a5b282003f068a944a3c3df694191da54833..0e239d28d44dde869880a63d0335afe3b928d2fd 100644 (file)
@@ -43,7 +43,7 @@ namespace { /* avoid conflict with opt_copy_propagation_elements */
 struct acp_entry : public exec_node {
    fs_reg dst;
    fs_reg src;
-   uint8_t regs_written;
+   uint8_t size_written;
    uint8_t regs_read;
    enum opcode opcode;
    bool saturate;
@@ -368,7 +368,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
     * that entry is writing.
     */
    if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
-                            entry->dst, entry->regs_written))
+                            entry->dst, DIV_ROUND_UP(entry->size_written,
+                                                     REG_SIZE)))
       return false;
 
    /* we can't generally copy-propagate UD negations because we
@@ -524,7 +525,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
        * that entry is writing.
        */
       if (!region_contained_in(inst->src[i], inst->regs_read(i),
-                               entry->dst, entry->regs_written))
+                               entry->dst, DIV_ROUND_UP(entry->size_written,
+                                                        REG_SIZE)))
          continue;
 
       /* If the type sizes don't match each channel of the instruction is
@@ -770,8 +772,8 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
       /* kill the destination from the ACP */
       if (inst->dst.file == VGRF) {
          foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
-            if (regions_overlap(entry->dst, entry->regs_written * REG_SIZE,
-                                inst->dst, inst->regs_written * REG_SIZE))
+            if (regions_overlap(entry->dst, entry->size_written,
+                                inst->dst, inst->size_written))
                entry->remove();
          }
 
@@ -784,7 +786,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                 * _any_ of the registers that it reads
                 */
                if (regions_overlap(entry->src, entry->regs_read * REG_SIZE,
-                                   inst->dst, inst->regs_written * REG_SIZE))
+                                   inst->dst, inst->size_written))
                   entry->remove();
             }
         }
@@ -797,7 +799,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
          entry->dst = inst->dst;
          entry->src = inst->src[0];
-         entry->regs_written = inst->regs_written;
+         entry->size_written = inst->size_written;
          entry->regs_read = inst->regs_read(0);
          entry->opcode = inst->opcode;
          entry->saturate = inst->saturate;
@@ -808,14 +810,14 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          for (int i = 0; i < inst->sources; i++) {
             int effective_width = i < inst->header_size ? 8 : inst->exec_size;
             assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0);
-            int regs_written = effective_width *
-               type_sz(inst->src[i].type) / REG_SIZE;
+            const unsigned size_written = effective_width *
+                                          type_sz(inst->src[i].type);
             if (inst->src[i].file == VGRF) {
                acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
                entry->dst = inst->dst;
                entry->dst.offset += offset * REG_SIZE;
                entry->src = inst->src[i];
-               entry->regs_written = regs_written;
+               entry->size_written = size_written;
                entry->regs_read = inst->regs_read(i);
                entry->opcode = inst->opcode;
                if (!entry->dst.equals(inst->src[i])) {
@@ -824,7 +826,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                   ralloc_free(entry);
                }
             }
-            offset += regs_written;
+            offset += DIV_ROUND_UP(size_written, REG_SIZE);
          }
       }
    }
index 4744142a4b63884912bb3c7546bb48a9e92b0cfc..2acbfea71f0b8fda9a6b3db93eee9b52cb47c2d3 100644 (file)
@@ -185,7 +185,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
           a->dst.type == b->dst.type &&
           a->offset == b->offset &&
           a->mlen == b->mlen &&
-          a->regs_written == b->regs_written &&
+          a->size_written == b->size_written &&
           a->base_mrf == b->base_mrf &&
           a->eot == b->eot &&
           a->header_size == b->header_size &&
@@ -296,7 +296,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
 
             /* dest <- temp */
             if (!inst->dst.is_null()) {
-               assert(inst->regs_written == entry->generator->regs_written);
+               assert(inst->size_written == entry->generator->size_written);
                assert(inst->dst.type == entry->tmp.type);
                const fs_builder ibld(this, block, inst);
 
index 12ab7b3fe6639a8d28345fa6f15ff42543b28168..8a581c9f02c3b9f9c131b918b992c78a790e1ec0 100644 (file)
@@ -357,13 +357,14 @@ void
 fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
                                struct brw_reg payload)
 {
+   assert(inst->size_written % REG_SIZE == 0);
    brw_wm_prog_data *prog_data =
       reinterpret_cast<brw_wm_prog_data *>(this->prog_data);
    const unsigned surf_index =
       prog_data->binding_table.render_target_start + inst->target;
 
    gen9_fb_READ(p, dst, payload, surf_index,
-                inst->header_size, inst->regs_written,
+                inst->header_size, inst->size_written / REG_SIZE,
                 prog_data->persample_dispatch);
 
    brw_mark_surface_used(&prog_data->base, surf_index);
@@ -452,6 +453,7 @@ fs_generator::generate_urb_read(fs_inst *inst,
                                 struct brw_reg dst,
                                 struct brw_reg header)
 {
+   assert(inst->size_written % REG_SIZE == 0);
    assert(header.file == BRW_GENERAL_REGISTER_FILE);
    assert(header.type == BRW_REGISTER_TYPE_UD);
 
@@ -467,7 +469,7 @@ fs_generator::generate_urb_read(fs_inst *inst,
       brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
 
    brw_inst_set_mlen(p->devinfo, send, inst->mlen);
-   brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
+   brw_inst_set_rlen(p->devinfo, send, inst->size_written / REG_SIZE);
    brw_inst_set_header_present(p->devinfo, send, true);
    brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
 }
@@ -625,6 +627,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                            struct brw_reg surface_index,
                            struct brw_reg sampler_index)
 {
+   assert(inst->size_written % REG_SIZE == 0);
    int msg_type = -1;
    uint32_t simd_mode;
    uint32_t return_format;
@@ -895,7 +898,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                  surface + base_binding_table_index,
                  sampler % 16,
                  msg_type,
-                 inst->regs_written,
+                 inst->size_written / REG_SIZE,
                  inst->mlen,
                  inst->header_size != 0,
                  simd_mode,
@@ -932,7 +935,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
                               0 /* surface */,
                               0 /* sampler */,
                               msg_type,
-                              inst->regs_written,
+                              inst->size_written / REG_SIZE,
                               inst->mlen /* mlen */,
                               inst->header_size != 0 /* header */,
                               simd_mode,
@@ -1263,7 +1266,7 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
        */
       msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
       assert(inst->mlen == 3);
-      assert(inst->regs_written == 8);
+      assert(inst->size_written == 8 * REG_SIZE);
       rlen = 8;
       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
    }
@@ -1399,6 +1402,7 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
                                                 struct brw_reg msg_data,
                                                 unsigned msg_type)
 {
+   assert(inst->size_written % REG_SIZE == 0);
    assert(msg_data.type == BRW_REGISTER_TYPE_UD);
 
    brw_pixel_interpolator_query(p,
@@ -1408,7 +1412,7 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
          msg_type,
          msg_data,
          inst->mlen,
-         inst->regs_written);
+         inst->size_written / REG_SIZE);
 }
 
 
index cd4005c0e60fc82067039e53151662b4ae8be178..42ed131854e610a8f8247e66578af4e81bd8245d 100644 (file)
@@ -1661,7 +1661,7 @@ emit_pixel_interpolater_send(const fs_builder &bld,
    inst = bld.emit(opcode, dst, payload, desc);
    inst->mlen = mlen;
    /* 2 floats per slot returned */
-   inst->regs_written = 2 * bld.dispatch_width() / 8;
+   inst->size_written = 2 * bld.dispatch_width() / 8 * REG_SIZE;
    inst->pi_noperspective = interpolation == INTERP_MODE_NOPERSPECTIVE;
 
    wm_prog_data->pulls_bary = true;
@@ -2144,7 +2144,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
             unsigned read_components = num_components + first_component;
             fs_reg tmp = bld.vgrf(dst.type, read_components);
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle);
-            inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
             for (unsigned i = 0; i < num_components; i++) {
                bld.MOV(offset(tmp_dst, bld, i),
                        offset(tmp, bld, i + first_component));
@@ -2152,7 +2152,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
          } else {
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp_dst,
                             icp_handle);
-            inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
          }
          inst->offset = base_offset + offset_const->u32[0];
          inst->mlen = 1;
@@ -2166,7 +2166,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
          if (first_component != 0) {
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
                             payload);
-            inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
             for (unsigned i = 0; i < num_components; i++) {
                bld.MOV(offset(tmp_dst, bld, i),
                        offset(tmp, bld, i + first_component));
@@ -2174,7 +2174,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
          } else {
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp_dst,
                          payload);
-            inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+            inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
          }
          inst->offset = base_offset;
          inst->mlen = 2;
@@ -2204,7 +2204,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
       /* Read the whole VUE header (because of alignment) and read .w. */
       fs_reg tmp = bld.vgrf(dst.type, 4);
       inst->dst = tmp;
-      inst->regs_written = 4;
+      inst->size_written = 4 * REG_SIZE;
       bld.MOV(dst, offset(tmp, bld, 3));
    }
 }
@@ -2510,8 +2510,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
             inst->offset = imm_offset;
             inst->mlen = 2;
          }
-         inst->regs_written =
-            ((num_components + first_component) * type_sz(dst.type) / 4);
+         inst->size_written =
+            ((num_components + first_component) * type_sz(dst.type) / 4) * REG_SIZE;
 
          /* If we are reading 64-bit data using 32-bit read messages we need
           * build proper 64-bit data elements by shuffling the low and high
@@ -2535,7 +2535,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
          if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
             assert(type_sz(dst.type) < 8);
             inst->dst = bld.vgrf(dst.type, 4);
-            inst->regs_written = 4;
+            inst->size_written = 4 * REG_SIZE;
             bld.MOV(dst, offset(inst->dst, bld, 3));
          }
 
@@ -2576,7 +2576,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
                inst->offset = 0;
                inst->mlen = 1;
-               inst->regs_written = 4;
+               inst->size_written = 4 * REG_SIZE;
 
                /* dst.xy = tmp.wz */
                bld.MOV(dst,                 offset(tmp, bld, 3));
@@ -2584,11 +2584,11 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                break;
             }
             case GL_TRIANGLES:
-               /* DWord 4; hardcode offset = 1 and regs_written = 1 */
+               /* DWord 4; hardcode offset = 1 and size_written = REG_SIZE */
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
                inst->offset = 1;
                inst->mlen = 1;
-               inst->regs_written = 1;
+               inst->size_written = REG_SIZE;
                break;
             case GL_ISOLINES:
                /* All channels are undefined. */
@@ -2606,7 +2606,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
             inst->offset = 1;
             inst->mlen = 1;
-            inst->regs_written = 4;
+            inst->size_written = 4 * REG_SIZE;
 
             /* Reswizzle: WZYX */
             fs_reg srcs[4] = {
@@ -2641,7 +2641,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
                fs_reg tmp = bld.vgrf(dst.type, read_components);
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
                                patch_handle);
-               inst->regs_written = read_components;
+               inst->size_written = read_components * REG_SIZE;
                for (unsigned i = 0; i < instr->num_components; i++) {
                   bld.MOV(offset(dst, bld, i),
                           offset(tmp, bld, i + first_component));
@@ -2649,7 +2649,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
             } else {
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
                                patch_handle);
-               inst->regs_written = instr->num_components;
+               inst->size_written = instr->num_components * REG_SIZE;
             }
             inst->offset = imm_offset;
             inst->mlen = 1;
@@ -2668,7 +2668,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
             fs_reg tmp = bld.vgrf(dst.type, read_components);
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
                             payload);
-            inst->regs_written = read_components;
+            inst->size_written = read_components * REG_SIZE;
             for (unsigned i = 0; i < instr->num_components; i++) {
                bld.MOV(offset(dst, bld, i),
                        offset(tmp, bld, i + first_component));
@@ -2676,7 +2676,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
          } else {
             inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
                             payload);
-            inst->regs_written = instr->num_components;
+            inst->size_written = instr->num_components * REG_SIZE;
          }
          inst->offset = imm_offset;
          inst->mlen = 2;
@@ -2976,7 +2976,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
                fs_reg tmp = bld.vgrf(dest.type, read_components);
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
                                patch_handle);
-               inst->regs_written = read_components;
+               inst->size_written = read_components * REG_SIZE;
                for (unsigned i = 0; i < instr->num_components; i++) {
                   bld.MOV(offset(dest, bld, i),
                           offset(tmp, bld, i + first_component));
@@ -2984,7 +2984,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
             } else {
                inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest,
                                patch_handle);
-               inst->regs_written = instr->num_components;
+               inst->size_written = instr->num_components * REG_SIZE;
             }
             inst->mlen = 1;
             inst->offset = imm_offset;
@@ -3032,8 +3032,9 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
             }
             inst->mlen = 2;
             inst->offset = imm_offset;
-            inst->regs_written =
-               ((num_components + first_component) * type_sz(dest.type) / 4);
+            inst->size_written =
+               ((num_components + first_component) * type_sz(dest.type) / 4) *
+               REG_SIZE;
 
             /* If we are reading 64-bit data using 32-bit read messages we need
              * build proper 64-bit data elements by shuffling the low and high
@@ -3207,8 +3208,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
    STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
 
    fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
-   inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
-                        REG_SIZE;
+   inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
 
    return inst;
 }
@@ -3223,8 +3223,7 @@ emit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target)
    assert(bld.shader->devinfo->gen >= 9);
    fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst);
    inst->target = target;
-   inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
-                        REG_SIZE;
+   inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
 
    return inst;
 }
@@ -3903,7 +3902,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       const fs_builder ubld = bld.group(8, 0);
       const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
       ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
-         ->regs_written = 2;
+         ->size_written = 2 * REG_SIZE;
       break;
    }
 
@@ -4338,7 +4337,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                                 src_payload, brw_imm_ud(index));
       inst->header_size = 0;
       inst->mlen = 1;
-      inst->regs_written = 4;
+      inst->size_written = 4 * REG_SIZE;
 
       bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
       brw_mark_surface_used(prog_data, index);
@@ -4685,9 +4684,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
                             nir_ssa_def_components_read(&instr->dest.ssa):
                             (1 << dest_size) - 1;
       assert(write_mask != 0); /* dead code should have been eliminated */
-      inst->regs_written = util_last_bit(write_mask) * dispatch_width / 8;
+      inst->size_written = util_last_bit(write_mask) * dispatch_width / 8 * REG_SIZE;
    } else {
-      inst->regs_written = 4 * dispatch_width / 8;
+      inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
    }
 
    if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
index 572735a379ace9cdf032d2084fd4fe4326596862..5c6f3d490f01185e94f1aafd9ce39c17c2e89708 100644 (file)
@@ -826,7 +826,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
       }
 
       if (inst->dst.file == VGRF)
-         spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
+         spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
+                                      * loop_scale;
 
       switch (inst->opcode) {
 
index 3dd0fbfc1c1c2d79d536d4fc10eab9f75b78c641..310e8019fcbe8ec68e99d9d458c318e2193569fe 100644 (file)
@@ -204,7 +204,7 @@ fs_visitor::register_coalesce()
             continue;
          }
          dst_reg_offset[offset] = inst->dst.offset / REG_SIZE;
-         if (inst->regs_written > 1)
+         if (inst->size_written > REG_SIZE)
             dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
          mov[offset] = inst;
          channels_remaining -= regs_written(inst);
index 5fa7c42d2ee2b0d12adec087e6608491ef1700e7..37e893bb89e4d3716ba0fe35c8dd8e7fb4fff1fd 100644 (file)
@@ -50,7 +50,7 @@ namespace brw {
             const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
             fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
 
-            inst->regs_written = rsize * bld.dispatch_width() / 8;
+            inst->size_written = rsize * bld.dispatch_width() / 8 * REG_SIZE;
             inst->predicate = pred;
             return dst;
          }
index d0f504c1a2eae084efe2639d87844e978eff83ce..5aea62c4cbcfb1f9c6d7bd27e16933ab429ad671 100644 (file)
@@ -97,7 +97,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
    /* We only care about one or two regs of response, but the sampler always
     * writes 4/8.
     */
-   inst->regs_written = 4 * dispatch_width / 8;
+   inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
 
    return dest;
 }
index de08a691055f06fc3c423d9ab459d02715c7d176..cea81e4646a890c0345cb6c949f055b96a3cf2f5 100644 (file)
@@ -421,7 +421,8 @@ inline unsigned
 regs_written(const fs_inst *inst)
 {
    /* XXX - Take into account register-misaligned offsets correctly. */
-   return inst->regs_written;
+   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
+   return DIV_ROUND_UP(inst->size_written, REG_SIZE);
 }
 
 /**
index c12bf09e8359ff748e120b18cf0e294f2c6de439..5e1e61683a22b5d49c13899b0629918f40628f8e 100644 (file)
@@ -1494,11 +1494,11 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
                 * single-result send is probably actually reducing register
                 * pressure.
                 */
-               if (inst->regs_written <= inst->exec_size / 8 &&
-                   chosen_inst->regs_written > chosen_inst->exec_size / 8) {
+               if (inst->size_written <= inst->exec_size / 8 * REG_SIZE &&
+                   chosen_inst->size_written > chosen_inst->exec_size / 8 * REG_SIZE) {
                   chosen = n;
                   continue;
-               } else if (inst->regs_written > chosen_inst->regs_written) {
+               } else if (inst->size_written > chosen_inst->size_written) {
                   continue;
                }
             }
index 66264b4ea7e5ddc41c0adf7c527d8984f2579408..2173f3226e13a4cb82af84e609b87e9ef240d399 100644 (file)
@@ -138,6 +138,7 @@ struct backend_instruction {
    int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
    uint8_t target; /**< MRT target. */
    uint8_t regs_written; /**< Number of registers written by the instruction. */
+   unsigned size_written; /**< Data written to the destination register in bytes. */
 
    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
    enum brw_conditional_mod conditional_mod; /**< BRW_CONDITIONAL_* */
index 8ba7bc594816239767cb486512870a4bea638bd6..f71c6ee1e4203f35ea318a693d682bde38d92723 100644 (file)
@@ -281,7 +281,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
    fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(offset(dest, bld, 2), src0, src1);
    bld.emit(SHADER_OPCODE_TEX, dest, src2)
-      ->regs_written = 4;
+      ->size_written = 4 * REG_SIZE;
    bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
index fd623a5e00fd346c159631d9ec48d9656963fb57..680fe72dfd5c98f32a9d4ffa6170d0d9f55e1a8a 100644 (file)
@@ -525,7 +525,7 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
    fs_reg src2 = v->vgrf(glsl_type::vec2_type);
    bld.ADD(offset(dst0, bld, 2), src0, src1);
    bld.emit(SHADER_OPCODE_TEX, dst0, src2)
-      ->regs_written = 4;
+      ->size_written = 4 * REG_SIZE;
    set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
 
    /* = Before =