i965/fs: No need to set compression control at the top of generate_code().

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_generator.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index ad32b6c31482851be47344671753d68bc6e4a71f..3ac27f224b5dfde780f463c43e73ecab6cd910d0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -509,47 +509,6 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
     brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
  }
  
-void
-fs_generator::generate_stencil_ref_packing(fs_inst *inst,
-                                           struct brw_reg dst,
-                                           struct brw_reg src)
-{
-   assert(dispatch_width == 8);
-   assert(devinfo->gen >= 9);
-
-   /* Stencil value updates are provided in 8 slots of 1 byte per slot.
-    * Presumably, in order to save memory bandwidth, the stencil reference
-    * values written from the FS need to be packed into 2 dwords (this makes
-    * sense because the stencil values are limited to 1 byte each and a SIMD8
-    * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
-    *
-    * The spec is confusing here because in the payload definition of MDP_RTW_S8
-    * (Message Data Payload for Render Target Writes with Stencil 8b) the
-    * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
-    * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
-    * packed values specified above and diagrammed below:
-    *
-    *     31                             0
-    *     --------------------------------
-    * DW  |                              |
-    * 2-7 |            IGNORED           |
-    *     |                              |
-    *     --------------------------------
-    * DW1 | STC   | STC   | STC   | STC  |
-    *     | slot7 | slot6 | slot5 | slot4|
-    *     --------------------------------
-    * DW0 | STC   | STC   | STC   | STC  |
-    *     | slot3 | slot2 | slot1 | slot0|
-    *     --------------------------------
-    */
-
-   src.vstride = BRW_VERTICAL_STRIDE_4;
-   src.width = BRW_WIDTH_1;
-   src.hstride = BRW_HORIZONTAL_STRIDE_0;
-   assert(src.type == BRW_REGISTER_TYPE_UB);
-   brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
-}
-
  void
  fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
  {
@@ -1038,34 +997,6 @@ fs_generator::generate_ddy(enum opcode opcode,
                             struct brw_reg dst, struct brw_reg src)
  {
     if (opcode == FS_OPCODE_DDY_FINE) {
-      /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
-       * Region Restrictions):
-       *
-       *     In Align16 access mode, SIMD16 is not allowed for DW operations
-       *     and SIMD8 is not allowed for DF operations.
-       *
-       * In this context, "DW operations" means "operations acting on 32-bit
-       * values", so it includes operations on floats.
-       *
-       * Gen4 has a similar restriction.  From the i965 PRM, section 11.5.3
-       * (Instruction Compression -> Rules and Restrictions):
-       *
-       *     A compressed instruction must be in Align1 access mode. Align16
-       *     mode instructions cannot be compressed.
-       *
-       * Similar text exists in the g45 PRM.
-       *
-       * Empirically, compressed align16 instructions using odd register
-       * numbers don't appear to work on Sandybridge either.
-       *
-       * On these platforms, if we're building a SIMD16 shader, we need to
-       * manually unroll to a pair of SIMD8 instructions.
-       */
-      bool unroll_to_simd8 =
-         (dispatch_width == 16 &&
-          (devinfo->gen == 4 || devinfo->gen == 6 ||
-           (devinfo->gen == 7 && !devinfo->is_haswell)));
-
        /* produce accurate derivatives */
        struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
                                      src.negate, src.abs,
@@ -1083,15 +1014,7 @@ fs_generator::generate_ddy(enum opcode opcode,
                                      BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
        brw_push_insn_state(p);
        brw_set_default_access_mode(p, BRW_ALIGN_16);
-      if (unroll_to_simd8) {
-         brw_set_default_exec_size(p, BRW_EXECUTE_8);
-         brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-         brw_ADD(p, firsthalf(dst), negate(firsthalf(src0)), firsthalf(src1));
-         brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
-         brw_ADD(p, sechalf(dst), negate(sechalf(src0)), sechalf(src1));
-      } else {
-         brw_ADD(p, dst, negate(src0), src1);
-      }
+      brw_ADD(p, dst, negate(src0), src1);
        brw_pop_insn_state(p);
     } else {
        /* replicate the derivative at the top-left pixel to other pixels */
@@ -1307,7 +1230,7 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
     gen6_resolve_implied_move(p, &header, inst->base_mrf);
  
     brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_inst_set_qtr_control(p->devinfo, send, BRW_COMPRESSION_NONE);
+   brw_inst_set_compression(devinfo, send, false);
     brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
     brw_set_src0(p, send, header);
     if (devinfo->gen < 6)
@@ -1618,8 +1541,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        brw_NOP(p);
  
     this->dispatch_width = dispatch_width;
-   if (dispatch_width == 16)
-      brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  
     int start_offset = p->next_insn_offset;
     int spill_count = 0, fill_count = 0;
@@ -1841,44 +1762,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
  
        case BRW_OPCODE_BFI1:
           assert(devinfo->gen >= 7);
-         /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
-          * should
-          *
-          *    "Force BFI instructions to be executed always in SIMD8."
-          */
-         if (dispatch_width == 16 && devinfo->is_haswell) {
-            brw_set_default_exec_size(p, BRW_EXECUTE_8);
-            brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-            brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]));
-            brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
-            brw_BFI1(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]));
-            brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-         } else {
-            brw_BFI1(p, dst, src[0], src[1]);
-         }
+         brw_BFI1(p, dst, src[0], src[1]);
           break;
        case BRW_OPCODE_BFI2:
           assert(devinfo->gen >= 7);
           brw_set_default_access_mode(p, BRW_ALIGN_16);
-         /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
-          * should
-          *
-          *    "Force BFI instructions to be executed always in SIMD8."
-          *
-          * Otherwise we would be able to emit compressed instructions like we
-          * do for the other three-source instructions.
-          */
-         if (dispatch_width == 16 && devinfo->is_haswell) {
-            brw_set_default_exec_size(p, BRW_EXECUTE_8);
-            brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-            brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
-            brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
-            brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
-            brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-         } else {
-            brw_BFI2(p, dst, src[0], src[1], src[2]);
-         }
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
+         brw_BFI2(p, dst, src[0], src[1], src[2]);
           break;
  
        case BRW_OPCODE_IF:
@@ -2118,28 +2007,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           brw_broadcast(p, dst, src[0], src[1]);
           break;
  
-      case SHADER_OPCODE_EXTRACT_BYTE: {
-         assert(src[0].type == BRW_REGISTER_TYPE_D ||
-                src[0].type == BRW_REGISTER_TYPE_UD);
-
-         enum brw_reg_type type =
-            src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_B
-                                               : BRW_REGISTER_TYPE_UB;
-         brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 4));
-         break;
-      }
-
-      case SHADER_OPCODE_EXTRACT_WORD: {
-         assert(src[0].type == BRW_REGISTER_TYPE_D ||
-                src[0].type == BRW_REGISTER_TYPE_UD);
-
-         enum brw_reg_type type =
-            src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_W
-                                               : BRW_REGISTER_TYPE_UW;
-         brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 2));
-         break;
-      }
-
        case FS_OPCODE_SET_SAMPLE_ID:
           generate_set_sample_id(inst, dst, src[0], src[1]);
           break;
@@ -2192,10 +2059,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          generate_barrier(inst, src[0]);
          break;
  
-      case FS_OPCODE_PACK_STENCIL_REF:
-         generate_stencil_ref_packing(inst, dst, src[0]);
-         break;
-
        default:
           unreachable("Unsupported opcode");