intel/fs: Be more explicit about our placement of [un]zip

author Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 7 Sep 2017 01:31:11 +0000 (18:31 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 7 Nov 2017 18:37:52 +0000 (10:37 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 7 Sep 2017 01:31:11 +0000 (18:31 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 7 Nov 2017 18:37:52 +0000 (10:37 -0800)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index befe421d214964572ff189b1fbbfc362ebb31e2e..965eb86f65e778ac3bb523445725384fe421a945 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5196,6 +5196,20 @@ fs_visitor::lower_simd_width()
  
           assert(!inst->writes_accumulator && !inst->mlen);
  
+         /* Inserting the zip, unzip, and duplicated instructions in all of
+          * the right spots is somewhat tricky.  All of the unzip and any
+          * instructions from the zip which unzip the destination prior to
+          * writing need to happen before all of the per-group instructions
+          * and the zip instructions need to happen after.  In order to sort
+          * this all out, we insert the unzip instructions before \p inst,
+          * insert the per-group instructions after \p inst (i.e. before
+          * inst->next), and insert the zip instructions before the
+          * instruction after \p inst.  Since we are inserting instructions
+          * after \p inst, inst->next is a moving target and we need to save
+          * it off here so that we insert the zip instructions in the right
+          * place.
+          */
+         exec_node *const after_inst = inst->next;
           for (unsigned i = 0; i < n; i++) {
              /* Emit a copy of the original instruction with the lowered width.
               * If the EOT flag was set throw it away except for the last
@@ -5203,7 +5217,7 @@ fs_visitor::lower_simd_width()
               */
              fs_inst split_inst = *inst;
              split_inst.exec_size = lower_width;
-            split_inst.eot = inst->eot && i == n - 1;
+            split_inst.eot = inst->eot && i == 0;
  
              /* Select the correct channel enables for the i-th group, then
               * transform the sources and destination and emit the lowered
@@ -5215,11 +5229,11 @@ fs_visitor::lower_simd_width()
                 split_inst.src[j] = emit_unzip(lbld.at(block, inst), inst, j);
  
              split_inst.dst = emit_zip(lbld.at(block, inst),
-                                      lbld.at(block, inst->next), inst);
+                                      lbld.at(block, after_inst), inst);
              split_inst.size_written =
                 split_inst.dst.component_size(lower_width) * dst_size;
  
-            lbld.emit(split_inst);
+            lbld.at(block, inst->next).emit(split_inst);
           }
  
           inst->remove(block);
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 7 Sep 2017 01:31:11 +0000 (18:31 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 7 Nov 2017 18:37:52 +0000 (10:37 -0800)