i965/fs: Expose arbitrary channel execution groups to the IR.

author Francisco Jerez <currojerez@riseup.net>

Fri, 20 May 2016 23:14:13 +0000 (16:14 -0700)

committer Francisco Jerez <currojerez@riseup.net>

Sat, 28 May 2016 06:29:04 +0000 (23:29 -0700)
author Francisco Jerez <currojerez@riseup.net>
Fri, 20 May 2016 23:14:13 +0000 (16:14 -0700)
committer Francisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:29:04 +0000 (23:29 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 5c5354e34f494024d138db329ecf03e9f0e534ff..98940dae8be1d98a4f58489cc81e5c8303e98ffe 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3621,7 +3621,7 @@ fs_visitor::lower_integer_multiplication()
              mul->src[1].stride *= 2;
  
           } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
-                    inst->force_sechalf) {
+                    inst->group > 0) {
              /* Among other things the quarter control bits influence which
               * accumulator register is used by the hardware for instructions
               * that access the accumulator implicitly (e.g. MACH).  A
@@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()
               * to get the result masked correctly according to the current
               * channel enables.
               */
-            mach->force_sechalf = false;
+            mach->group = 0;
              mach->force_writemask_all = true;
              mach->dst = ibld.vgrf(inst->dst.type);
              ibld.MOV(inst->dst, mach->dst);
@@ -3774,8 +3774,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
        sample_mask.stride *= 2;
  
        bld.exec_all().annotate("FB write oMask")
-         .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
-                   inst->force_sechalf),
+         .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
+                           inst->group),
                sample_mask);
        length++;
     }
@@ -5008,10 +5008,10 @@ fs_visitor::lower_simd_width()
            * execution size of the builder to the highest of both for now so
            * we're sure that both cases can be handled.
            */
+         const unsigned max_width = MAX2(inst->exec_size, lower_width);
           const fs_builder ibld = bld.at(block, inst)
                                      .exec_all(inst->force_writemask_all)
-                                    .group(MAX2(inst->exec_size, lower_width),
-                                           inst->force_sechalf);
+                                    .group(max_width, inst->group / max_width);
  
           /* Split the copies in chunks of the execution width of either the
            * original or the lowered instruction, whichever is lower.
@@ -5343,12 +5343,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
     if (inst->force_writemask_all)
        fprintf(file, "NoMask ");
  
-   if (dispatch_width == 16 && inst->exec_size == 8) {
-      if (inst->force_sechalf)
-         fprintf(file, "2ndhalf ");
-      else
-         fprintf(file, "1sthalf ");
-   }
+   if (inst->exec_size != dispatch_width)
+      fprintf(file, "group%d ", inst->group);
  
     fprintf(file, "\n");
  }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h

index b50dda421ae062521286cb4cc1ac7ee9db279541..c1d13a2b14a706e02197aa5d68d8b2890ff86e2e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -72,7 +72,7 @@ namespace brw {
        fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
           shader(shader), block(block), cursor(inst),
           _dispatch_width(inst->exec_size),
-         _group(inst->force_sechalf ? 8 : 0),
+         _group(inst->group),
           force_writemask_all(inst->force_writemask_all)
        {
           annotation.str = inst->annotation;
@@ -167,6 +167,15 @@ namespace brw {
           return _dispatch_width;
        }
  
+      /**
+       * Get the channel group in use.
+       */
+      unsigned
+      group() const
+      {
+         return _group;
+      }
+
        /**
         * Allocate a virtual register of natural vector size (one for this IR)
         * and SIMD width.  \p n gives the amount of space to allocate in
@@ -353,9 +362,8 @@ namespace brw {
           assert(inst->exec_size <= 32);
           assert(inst->exec_size == dispatch_width() ||
                  force_writemask_all);
-         assert(_group == 0 || _group == 8);
  
-         inst->force_sechalf = (_group == 8);
+         inst->group = _group;
           inst->force_writemask_all = force_writemask_all;
           inst->annotation = annotation.str;
           inst->ir = annotation.ir;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index 9c39106bbc2cb680d656cd0bf7d51b5140aeabdb..159bf5dd6e09a038522aea9b7e7f5249318a6e69 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
     return a->opcode == b->opcode &&
            a->force_writemask_all == b->force_writemask_all &&
            a->exec_size == b->exec_size &&
-          a->force_sechalf == b->force_sechalf &&
+          a->group == b->group &&
            a->saturate == b->saturate &&
            a->predicate == b->predicate &&
            a->predicate_inverse == b->predicate_inverse &&
@@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
        copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
     } else {
        copy = bld.MOV(inst->dst, src);
-      copy->force_sechalf = inst->force_sechalf;
+      copy->group = inst->group;
        copy->force_writemask_all = inst->force_writemask_all;
        copy->src[0].negate = negate;
     }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 3b0717e17d4968690b34bd38955771a31b35c73b..4ca3dad866c86b20b94aaaa7dc28a7935a46c05c 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
     if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
        msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
     else if (prog_data->dual_src_blend) {
-      if (!inst->force_sechalf)
+      if (!inst->group)
           msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
        else
           msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
@@ -1076,7 +1076,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
     brw_set_default_compression(p, lower_size > 8);
  
     for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
-      brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i);
+      brw_set_default_group(p, inst->group + lower_size * i);
  
        brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
                retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
@@ -1620,7 +1620,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        const bool compressed =
             inst->dst.component_size(inst->exec_size) > REG_SIZE;
        brw_set_default_compression(p, compressed);
-      brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
+      brw_set_default_group(p, inst->group);
  
        for (unsigned int i = 0; i < inst->sources; i++) {
           src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
@@ -1648,6 +1648,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
  
        assert(inst->force_writemask_all || inst->exec_size >= 8);
+      assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
        assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
        assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp

index 8613725f6b9b233e1dac3f835289fda838027ba7..8cd897f72e04acfa440b99a757b7043f64fae716 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()
           /* Check that the MOVs are the right form. */
           if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
               then_mov[i]->exec_size != else_mov[i]->exec_size ||
-             then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
+             then_mov[i]->group != else_mov[i]->group ||
               then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
               then_mov[i]->is_partial_write() ||
               else_mov[i]->is_partial_write() ||
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h

index 9c39a05f4ce13d68411cd985d10a77851856c6e6..13f4e151433f693c0d89e59355c6e38a3ef71b8d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -291,21 +291,19 @@ public:
      */
     uint8_t exec_size;
  
+   /**
+    * Channel group from the hardware execution and predication mask that
+    * should be applied to the instruction.  The subset of channel enable
+    * signals (calculated from the EU control flow and predication state)
+    * given by [group, group + exec_size) will be used to mask GRF writes and
+    * any other side effects of the instruction.
+    */
+   uint8_t group;
+
     bool eot:1;
-   bool force_sechalf:1;
     bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
  };
  
-/**
- * Set second-half quarter control on \p inst.
- */
-static inline fs_inst *
-set_sechalf(fs_inst *inst)
-{
-   inst->force_sechalf = true;
-   return inst;
-}
-
  /**
   * Make the execution of \p inst dependent on the evaluation of a possibly
   * inverted predicate.
author	Francisco Jerez <currojerez@riseup.net>
	Fri, 20 May 2016 23:14:13 +0000 (16:14 -0700)
committer	Francisco Jerez <currojerez@riseup.net>
	Sat, 28 May 2016 06:29:04 +0000 (23:29 -0700)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_builder.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_cse.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_generator.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_ir_fs.h		patch \| blob \| history