i965/fs: double regioning parameters and execsize for DF in IVB/BYT

author Juan A. Suarez Romero <jasuarez@igalia.com>

Mon, 18 Jul 2016 07:27:56 +0000 (07:27 +0000)

committer Francisco Jerez <currojerez@riseup.net>

Fri, 14 Apr 2017 21:56:07 +0000 (14:56 -0700)
author Juan A. Suarez Romero <jasuarez@igalia.com>
Mon, 18 Jul 2016 07:27:56 +0000 (07:27 +0000)
committer Francisco Jerez <currojerez@riseup.net>
Fri, 14 Apr 2017 21:56:07 +0000 (14:56 -0700)
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp

index aeed6a119776751f98ca3897518b86642d275090..bcb6608d3aeb36dd4769aef0027fa4175667d816 100644 (file)
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -53,13 +53,14 @@ brw_file_from_reg(fs_reg *reg)
  }
  
  static struct brw_reg
-brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
+brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
+                    fs_reg *reg, bool compressed)
  {
     struct brw_reg brw_reg;
  
     switch (reg->file) {
     case MRF:
-      assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen));
+      assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
        /* Fallthrough */
     case VGRF:
        if (reg->stride == 0) {
@@ -92,6 +93,34 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
           const unsigned width = MIN2(reg_width, phys_width);
           brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
           brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+         /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13):
+          *  "Each DF (Double Float) operand uses an element size of 4 rather
+          *   than 8 and all regioning parameters are twice what the values
+          *   would be based on the true element size: ExecSize, Width,
+          *   HorzStride, and VertStride. Each DF operand uses a pair of
+          *   channels and all masking and swizzing should be adjusted
+          *   appropriately."
+          *
+          * From the IvyBridge PRM (Special Requirements for Handling Double
+          * Precision Data Types, page 71):
+          *  "In Align1 mode, all regioning parameters like stride, execution
+          *   size, and width must use the syntax of a pair of packed
+          *   floats. The offsets for these data types must be 64-bit
+          *   aligned. The execution size and regioning parameters are in terms
+          *   of floats."
+          *
+          * Summarized: when handling DF-typed arguments, ExecSize,
+          * VertStride, and Width must be doubled.
+          *
+          * It applies to BayTrail too.
+          */
+         if (devinfo->gen == 7 && !devinfo->is_haswell &&
+             type_sz(reg->type) == 8) {
+            brw_reg.width++;
+            if (brw_reg.vstride > 0)
+               brw_reg.vstride++;
+            assert(brw_reg.hstride == BRW_HORIZONTAL_STRIDE_1);
+         }
        }
  
        brw_reg = retype(brw_reg, reg->type);
@@ -1585,9 +1614,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        brw_set_default_group(p, inst->group);
  
        for (unsigned int i = 0; i < inst->sources; i++) {
-         src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
-                                      compressed);
-
+         src[i] = brw_reg_from_fs_reg(devinfo, inst,
+                                      &inst->src[i], compressed);
          /* The accumulator result appears to get used for the
           * conditional modifier generation.  When negating a UD
           * value, there is a 33rd bit generated for the sign in the
@@ -1598,7 +1626,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
                 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
                 !inst->src[i].negate);
        }
-      dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen, compressed);
+      dst = brw_reg_from_fs_reg(devinfo, inst,
+                                &inst->dst, compressed);
  
        brw_set_default_access_mode(p, BRW_ALIGN_1);
        brw_set_default_predicate_control(p, inst->predicate);
@@ -1607,7 +1636,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        brw_set_default_saturate(p, inst->saturate);
        brw_set_default_mask_control(p, inst->force_writemask_all);
        brw_set_default_acc_write_control(p, inst->writes_accumulator);
-      brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
+
+      unsigned exec_size = inst->exec_size;
+      if (devinfo->gen == 7 && !devinfo->is_haswell &&
+          (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) {
+         exec_size *= 2;
+      }
+
+      brw_set_default_exec_size(p, cvt(exec_size) - 1);
  
        assert(inst->force_writemask_all || inst->exec_size >= 4);
        assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
author	Juan A. Suarez Romero <jasuarez@igalia.com>
	Mon, 18 Jul 2016 07:27:56 +0000 (07:27 +0000)
committer	Francisco Jerez <currojerez@riseup.net>
	Fri, 14 Apr 2017 21:56:07 +0000 (14:56 -0700)