intel/eu: Switch to a logical state stack
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 29 May 2018 21:37:35 +0000 (14:37 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Mon, 4 Jun 2018 21:03:03 +0000 (14:03 -0700)
Instead of the state stack that's based on copying a dummy instruction
around, we start using a logical stack of brw_insn_states.  This uses a
bit less memory and is way less conceptually bogus.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_eu.c
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c

index 5375209d4fc8a890eb0ff316383896af2a44609e..d0e4ea2070426e66f8e63b34fd41374cb4471d1a 100644 (file)
@@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
 unsigned
 brw_get_default_exec_size(struct brw_codegen *p)
 {
-   return brw_inst_exec_size(p->devinfo, p->current);
+   return p->current->exec_size;
 }
 
 unsigned
 brw_get_default_group(struct brw_codegen *p)
 {
-   if (p->devinfo->gen >= 6) {
-      unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
-      if (p->devinfo->gen >= 7)
-         group += brw_inst_nib_control(p->devinfo, p->current) * 4;
-      return group;
-   } else {
-      unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
-      if (qtr_control == BRW_COMPRESSION_COMPRESSED)
-         return 0;
-      else
-         return qtr_control * 8;
-   }
+   return p->current->group;
 }
 
 unsigned
 brw_get_default_access_mode(struct brw_codegen *p)
 {
-   return brw_inst_access_mode(p->devinfo, p->current);
+   return p->current->access_mode;
 }
 
 void
 brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
 {
-   brw_inst_set_exec_size(p->devinfo, p->current, value);
+   p->current->exec_size = value;
 }
 
 void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc )
 {
-   brw_inst_set_pred_control(p->devinfo, p->current, pc);
+   p->current->predicate = pc;
 }
 
 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
 {
-   brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse);
+   p->current->pred_inv = predicate_inverse;
 }
 
 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
 {
-   if (p->devinfo->gen >= 7)
-      brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg);
-
-   brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg);
+   assert(subreg < 2);
+   p->current->flag_subreg = reg * 2 + subreg;
 }
 
 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
 {
-   brw_inst_set_access_mode(p->devinfo, p->current, access_mode);
+   p->current->access_mode = access_mode;
 }
 
 void
 brw_set_default_compression_control(struct brw_codegen *p,
                            enum brw_compression compression_control)
 {
-   if (p->devinfo->gen >= 6) {
-      /* Since we don't use the SIMD32 support in gen6, we translate
-       * the pre-gen6 compression control here.
+   switch (compression_control) {
+   case BRW_COMPRESSION_NONE:
+      /* This is the "use the first set of bits of dmask/vmask/arf
+       * according to execsize" option.
        */
-      switch (compression_control) {
-      case BRW_COMPRESSION_NONE:
-        /* This is the "use the first set of bits of dmask/vmask/arf
-         * according to execsize" option.
-         */
-         brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q);
-        break;
-      case BRW_COMPRESSION_2NDHALF:
-        /* For SIMD8, this is "use the second set of 8 bits." */
-         brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q);
-        break;
-      case BRW_COMPRESSION_COMPRESSED:
-        /* For SIMD16 instruction compression, use the first set of 16 bits
-         * since we don't do SIMD32 dispatch.
-         */
-         brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H);
-        break;
-      default:
-         unreachable("not reached");
-      }
-   } else {
-      brw_inst_set_qtr_control(p->devinfo, p->current, compression_control);
+      p->current->group = 0;
+      break;
+   case BRW_COMPRESSION_2NDHALF:
+      /* For SIMD8, this is "use the second set of 8 bits." */
+      p->current->group = 8;
+      break;
+   case BRW_COMPRESSION_COMPRESSED:
+      /* For SIMD16 instruction compression, use the first set of 16 bits
+       * since we don't do SIMD32 dispatch.
+       */
+      p->current->group = 0;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   if (p->devinfo->gen <= 6) {
+      p->current->compressed =
+         (compression_control == BRW_COMPRESSION_COMPRESSED);
    }
 }
 
@@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo,
 void
 brw_set_default_compression(struct brw_codegen *p, bool on)
 {
-   brw_inst_set_compression(p->devinfo, p->current, on);
+   p->current->compressed = on;
 }
 
 /**
@@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo,
 void
 brw_set_default_group(struct brw_codegen *p, unsigned group)
 {
-   brw_inst_set_group(p->devinfo, p->current, group);
+   p->current->group = group;
 }
 
 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
 {
-   brw_inst_set_mask_control(p->devinfo, p->current, value);
+   p->current->mask_control = value;
 }
 
 void brw_set_default_saturate( struct brw_codegen *p, bool enable )
 {
-   brw_inst_set_saturate(p->devinfo, p->current, enable);
+   p->current->saturate = enable;
 }
 
 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
 {
-   if (p->devinfo->gen >= 6)
-      brw_inst_set_acc_wr_control(p->devinfo, p->current, value);
+   p->current->acc_wr_control = value;
 }
 
 void brw_push_insn_state( struct brw_codegen *p )
index d709d12b671485764124c7e51b7392081f7e846f..0f07eeb3d6dc2933e6b768795b143a0d65056659 100644 (file)
@@ -46,6 +46,36 @@ extern "C" {
 
 #define BRW_EU_MAX_INSN_STACK 5
 
+struct brw_insn_state {
+   /* One of BRW_EXECUTE_* */
+   unsigned exec_size:3;
+
+   /* Group in units of channels */
+   unsigned group:5;
+
+   /* Compression control on gen4-5 */
+   bool compressed:1;
+
+   /* One of BRW_MASK_* */
+   unsigned mask_control:1;
+
+   bool saturate:1;
+
+   /* One of BRW_ALIGN_* */
+   unsigned access_mode:1;
+
+   /* One of BRW_PREDICATE_* */
+   enum brw_predicate predicate:4;
+
+   bool pred_inv:1;
+
+   /* Flag subreg.  Bottom bit is subreg, top bit is reg */
+   unsigned flag_subreg:2;
+
+   bool acc_wr_control:1;
+};
+
+
 /* A helper for accessing the last instruction emitted.  This makes it easy
  * to set various bits on an instruction without having to create temporary
  * variable and assign the emitted instruction to those.
@@ -62,8 +92,8 @@ struct brw_codegen {
 
    /* Allow clients to push/pop instruction state:
     */
-   brw_inst stack[BRW_EU_MAX_INSN_STACK];
-   brw_inst *current;
+   struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_insn_state *current;
 
    /** Whether or not the user wants automatic exec sizes
     *
index 412a051bc93a7069d80b7439e1280ee14f42e4cd..d3246edde44026d2c15eecab6a92cf22f6141ae7 100644 (file)
@@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
    brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
 }
 
-struct brw_insn_state {
-   /* One of BRW_EXECUTE_* */
-   unsigned exec_size:3;
-
-   /* Group in units of channels */
-   unsigned group:5;
-
-   /* Compression control on gen4-5 */
-   bool compressed:1;
-
-   /* One of BRW_MASK_* */
-   unsigned mask_control:1;
-
-   bool saturate:1;
-
-   /* One of BRW_ALIGN_* */
-   unsigned access_mode:1;
-
-   /* One of BRW_PREDICATE_* */
-   enum brw_predicate predicate:4;
-
-   bool pred_inv:1;
-
-   /* Flag subreg.  Bottom bit is subreg, top bit is reg */
-   unsigned flag_subreg:2;
-
-   bool acc_wr_control:1;
-};
-
-static struct brw_insn_state
-brw_inst_get_state(const struct gen_device_info *devinfo,
-                   const brw_inst *insn)
-{
-   struct brw_insn_state state = { };
-
-   state.exec_size = brw_inst_exec_size(devinfo, insn);
-   if (devinfo->gen >= 6) {
-      state.group = brw_inst_qtr_control(devinfo, insn) * 8;
-      if (devinfo->gen >= 7)
-         state.group += brw_inst_nib_control(devinfo, insn) * 4;
-   } else {
-      unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
-      if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
-         state.group = 0;
-         state.compressed = true;
-      } else {
-         state.group = qtr_control * 8;
-         state.compressed = false;
-      }
-   }
-   state.access_mode = brw_inst_access_mode(devinfo, insn);
-   state.mask_control = brw_inst_mask_control(devinfo, insn);
-   state.saturate = brw_inst_saturate(devinfo, insn);
-   state.predicate = brw_inst_pred_control(devinfo, insn);
-   state.pred_inv = brw_inst_pred_inv(devinfo, insn);
-
-   state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
-   if (devinfo->gen >= 7)
-      state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
-
-   if (devinfo->gen >= 6)
-      state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
-
-   return state;
-}
-
 static void
 brw_inst_set_state(const struct gen_device_info *devinfo,
                    brw_inst *insn,
@@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
    brw_inst_set_opcode(devinfo, insn, opcode);
 
    /* Apply the default instruction state */
-   struct brw_insn_state current = brw_inst_get_state(devinfo, p->current);
-   brw_inst_set_state(devinfo, insn, &current);
+   brw_inst_set_state(devinfo, insn, p->current);
 
    return insn;
 }
@@ -3504,9 +3437,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
           */
          inst = brw_FBL(p, vec1(dst), exec_mask);
       } else {
-         const struct brw_reg flag = brw_flag_reg(
-            brw_inst_flag_reg_nr(devinfo, p->current),
-            brw_inst_flag_subreg_nr(devinfo, p->current));
+         const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
+                                                  p->current->flag_subreg % 2);
 
          brw_set_default_exec_size(p, BRW_EXECUTE_1);
          brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));