i965: Add a new infrastructure for generating Broadwell shader assembly.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 7 Dec 2012 06:36:50 +0000 (22:36 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Sun, 19 Jan 2014 05:55:54 +0000 (21:55 -0800)
This replaces the brw_eu_emit.c layer for Broadwell.  It will be
used by both the vector and scalar shader backends.

v2: Port to use the C-based instruction representation.

v3: Fix destination register type for CMP.

v4: Pass brw to gen8_instruction functions (required by rebase).

v5: Remove bogus assertion on math instructions (caught by Piglit).

v6: Remove more restrictions on math instructions (caught by Eric).
    Make ADDC and SUBB helpers set accumulator writes, like MAC and
    MACH (caught by Matt).

v7: Don't implicitly force ALU3 operations to SIMD8 (we've been able
    to do SIMD16 versions since Haswell, but didn't when I originally
    wrote this code).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/gen8_generator.cpp [new file with mode: 0644]
src/mesa/drivers/dri/i965/gen8_generator.h [new file with mode: 0644]

index 4c629ccda32f7a90f8f68dc2f48922df12897ee7..ade40eded5ab12ff7ab8d15762bf9ed97ddfb19b 100644 (file)
@@ -140,5 +140,6 @@ i965_FILES = \
        gen7_wm_state.c \
        gen7_wm_surface_state.c \
        gen8_disasm.c \
+       gen8_generator.cpp \
        gen8_instruction.c \
         $()
diff --git a/src/mesa/drivers/dri/i965/gen8_generator.cpp b/src/mesa/drivers/dri/i965/gen8_generator.cpp
new file mode 100644 (file)
index 0000000..ee5f792
--- /dev/null
@@ -0,0 +1,643 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file gen8_generator.cpp
+ *
+ * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
+ */
+
+extern "C" {
+#include "main/compiler.h"
+#include "main/macros.h"
+#include "brw_context.h"
+} /* extern "C" */
+
+#include "glsl/ralloc.h"
+#include "brw_eu.h"
+#include "brw_reg.h"
+#include "gen8_generator.h"
+
+gen8_generator::gen8_generator(struct brw_context *brw,
+                               struct gl_shader_program *shader_prog,
+                               struct gl_program *prog,
+                               void *mem_ctx)
+   : shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
+{
+   ctx = &brw->ctx;
+
+   memset(&default_state, 0, sizeof(default_state));
+   default_state.mask_control = BRW_MASK_ENABLE;
+
+   store_size = 1024;
+   store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
+   nr_inst = 0;
+   next_inst_offset = 0;
+
+   /* Set up the control flow stacks. */
+   if_stack_depth = 0;
+   if_stack_array_size = 16;
+   if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);
+
+   loop_stack_depth = 0;
+   loop_stack_array_size = 16;
+   loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
+}
+
+gen8_generator::~gen8_generator()
+{
+}
+
+gen8_instruction *
+gen8_generator::next_inst(unsigned opcode)
+{
+   gen8_instruction *inst;
+
+   if (nr_inst + 1 > unsigned(store_size)) {
+      store_size <<= 1;
+      store = reralloc(mem_ctx, store, gen8_instruction, store_size);
+      assert(store);
+   }
+
+   next_inst_offset += 16;
+   inst = &store[nr_inst++];
+
+   memset(inst, 0, sizeof(gen8_instruction));
+
+   gen8_set_opcode(inst, opcode);
+   gen8_set_exec_size(inst, default_state.exec_size);
+   gen8_set_access_mode(inst, default_state.access_mode);
+   gen8_set_mask_control(inst, default_state.mask_control);
+   gen8_set_cond_modifier(inst, default_state.conditional_mod);
+   gen8_set_pred_control(inst, default_state.predicate);
+   gen8_set_pred_inv(inst, default_state.predicate_inverse);
+   gen8_set_saturate(inst, default_state.saturate);
+   gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
+   return inst;
+}
+
+#define ALU1(OP)                                           \
+gen8_instruction *                                         \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
+{                                                          \
+   gen8_instruction *inst = next_inst(BRW_OPCODE_##OP);    \
+   gen8_set_dst(brw, inst, dst);                           \
+   gen8_set_src0(brw, inst, src);                          \
+   return inst;                                            \
+}
+
+#define ALU2(OP)                                                             \
+gen8_instruction *                                                           \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
+{                                                                            \
+   gen8_instruction *inst = next_inst(BRW_OPCODE_##OP);                      \
+   gen8_set_dst(brw, inst, dst);                                             \
+   gen8_set_src0(brw, inst, s0);                                             \
+   gen8_set_src1(brw, inst, s1);                                             \
+   return inst;                                                              \
+}
+
+#define ALU2_ACCUMULATE(OP)                                                  \
+gen8_instruction *                                                           \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
+{                                                                            \
+   gen8_instruction *inst = next_inst(BRW_OPCODE_##OP);                      \
+   gen8_set_dst(brw, inst, dst);                                             \
+   gen8_set_src0(brw, inst, s0);                                             \
+   gen8_set_src1(brw, inst, s1);                                             \
+   gen8_set_acc_wr_control(inst, true);                                      \
+   return inst;                                                              \
+}
+
+#define ALU3(OP)                                          \
+gen8_instruction *                                        \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
+                   struct brw_reg s1,  struct brw_reg s2) \
+{                                                         \
+   return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2);         \
+}
+
+#define ALU3F(OP) \
+gen8_instruction *                                        \
+gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
+                   struct brw_reg s1,  struct brw_reg s2) \
+{                                                         \
+   assert(dst.type == BRW_REGISTER_TYPE_F);               \
+   assert(s0.type == BRW_REGISTER_TYPE_F);                \
+   assert(s1.type == BRW_REGISTER_TYPE_F);                \
+   assert(s2.type == BRW_REGISTER_TYPE_F);                \
+   return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2);         \
+}
+
+ALU2(ADD)
+ALU2(AND)
+ALU2(ASR)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(F32TO16)
+ALU1(F16TO32)
+ALU1(BFREV)
+ALU1(CBIT)
+ALU2_ACCUMULATE(ADDC)
+ALU2_ACCUMULATE(SUBB)
+ALU2(DP2)
+ALU2(DP3)
+ALU2(DP4)
+ALU2(DPH)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(FRC)
+ALU2(LINE)
+ALU3F(LRP)
+ALU3F(MAD)
+ALU2(MUL)
+ALU1(MOV)
+ALU1(NOT)
+ALU2(OR)
+ALU2(PLN)
+ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDZ)
+ALU2_ACCUMULATE(MAC)
+ALU2_ACCUMULATE(MACH)
+ALU2(SEL)
+ALU2(SHL)
+ALU2(SHR)
+ALU2(XOR)
+
+gen8_instruction *
+gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
+                    struct brw_reg src0, struct brw_reg src1)
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
+   gen8_set_cond_modifier(inst, conditional);
+   /* The CMP instruction appears to behave erratically for floating point
+    * sources unless the destination type is also float.  Overriding it to
+    * match src0 makes it work in all cases.
+    */
+   dst.type = src0.type;
+   gen8_set_dst(brw, inst, dst);
+   gen8_set_src0(brw, inst, src0);
+   gen8_set_src1(brw, inst, src1);
+   return inst;
+}
+
+static int
+get_3src_subreg_nr(struct brw_reg reg)
+{
+   if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
+      assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
+      return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+   } else {
+      return reg.subnr / 4;
+   }
+}
+
+gen8_instruction *
+gen8_generator::alu3(unsigned opcode,
+                     struct brw_reg dst,
+                     struct brw_reg src0,
+                     struct brw_reg src1,
+                     struct brw_reg src2)
+{
+   /* MRFs haven't existed since Gen7, so we better not be using them. */
+   if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
+      dst.file = BRW_GENERAL_REGISTER_FILE;
+      dst.nr += GEN7_MRF_HACK_START;
+   }
+
+   gen8_instruction *inst = next_inst(opcode);
+   assert(gen8_access_mode(inst) == BRW_ALIGN_16);
+
+   assert(dst.file == BRW_GENERAL_REGISTER_FILE);
+   assert(dst.nr < 128);
+   assert(dst.address_mode == BRW_ADDRESS_DIRECT);
+   assert(dst.type == BRW_REGISTER_TYPE_F ||
+          dst.type == BRW_REGISTER_TYPE_D ||
+          dst.type == BRW_REGISTER_TYPE_UD);
+   gen8_set_dst_3src_reg_nr(inst, dst.nr);
+   gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
+   gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);
+
+   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src0.nr < 128);
+   gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
+   gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
+   gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
+   gen8_set_src0_3src_reg_nr(inst, src0.nr);
+   gen8_set_src0_3src_abs(inst, src0.abs);
+   gen8_set_src0_3src_negate(inst, src0.negate);
+
+   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src1.nr < 128);
+   gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
+   gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
+   gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
+   gen8_set_src1_3src_reg_nr(inst, src1.nr);
+   gen8_set_src1_3src_abs(inst, src1.abs);
+   gen8_set_src1_3src_negate(inst, src1.negate);
+
+   assert(src2.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src2.nr < 128);
+   gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
+   gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
+   gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
+   gen8_set_src2_3src_reg_nr(inst, src2.nr);
+   gen8_set_src2_3src_abs(inst, src2.abs);
+   gen8_set_src2_3src_negate(inst, src2.negate);
+
+   /* Set both the source and destination types based on dst.type, ignoring
+    * the source register types.  The MAD and LRP emitters both ensure that
+    * all register types are float.  The BFE and BFI2 emitters, however, may
+    * send us mixed D and UD source types and want us to ignore that.
+    */
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_F:
+      gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
+      gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
+      break;
+   case BRW_REGISTER_TYPE_D:
+      gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
+      gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
+      gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
+      break;
+   }
+
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::math(unsigned math_function,
+                     struct brw_reg dst,
+                     struct brw_reg src0)
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);
+
+   assert(dst.hstride == src0.hstride);
+
+   gen8_set_math_function(inst, math_function);
+   gen8_set_dst(brw, inst, dst);
+   gen8_set_src0(brw, inst, src0);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::MATH(unsigned math_function,
+                     struct brw_reg dst,
+                     struct brw_reg src0)
+{
+   assert(src0.type == BRW_REGISTER_TYPE_F);
+   gen8_instruction *inst = math(math_function, dst, src0);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::MATH(unsigned math_function,
+                     struct brw_reg dst,
+                     struct brw_reg src0,
+                     struct brw_reg src1)
+{
+   bool int_math =
+      math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+      math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+      math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
+
+   if (int_math) {
+      assert(src0.type != BRW_REGISTER_TYPE_F);
+      assert(src1.type != BRW_REGISTER_TYPE_F);
+   } else {
+      assert(src0.type == BRW_REGISTER_TYPE_F);
+   }
+
+   gen8_instruction *inst = math(math_function, dst, src0);
+   gen8_set_src1(brw, inst, src1);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
+   gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
+   gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
+   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
+
+   return inst;
+}
+
+
+gen8_instruction *
+gen8_generator::NOP()
+{
+   return next_inst(BRW_OPCODE_NOP);
+}
+
+void
+gen8_generator::push_if_stack(gen8_instruction *inst)
+{
+   if_stack[if_stack_depth] = inst - store;
+
+   ++if_stack_depth;
+   if (if_stack_array_size <= if_stack_depth) {
+      if_stack_array_size *= 2;
+      if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
+   }
+}
+
+gen8_instruction *
+gen8_generator::pop_if_stack()
+{
+   --if_stack_depth;
+   return &store[if_stack[if_stack_depth]];
+}
+
+/**
+ * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
+ */
+void
+gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
+                              gen8_instruction *else_inst,
+                              gen8_instruction *endif_inst)
+{
+   assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
+   assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
+   assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);
+
+   gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));
+
+   if (else_inst == NULL) {
+      /* Patch IF -> ENDIF */
+      gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
+      gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
+   } else {
+      gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));
+
+      /* Patch IF -> ELSE and ELSE -> ENDIF:
+       *
+       * The IF's JIP should point at the instruction after the ELSE.
+       * The IF's UIP should point to the ENDIF.
+       *
+       * Both are expressed in bytes, hence the multiply by 16...128-bits.
+       */
+      gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
+      gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
+
+      /* Patch ELSE -> ENDIF:
+       *
+       * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
+       */
+      gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
+      gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
+   }
+   gen8_set_jip(endif_inst, 16);
+}
+
+gen8_instruction *
+gen8_generator::IF(unsigned predicate)
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
+   gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+   gen8_set_exec_size(inst, default_state.exec_size);
+   gen8_set_pred_control(inst, predicate);
+   gen8_set_mask_control(inst, BRW_MASK_ENABLE);
+   push_if_stack(inst);
+
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::ELSE()
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
+   gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_mask_control(inst, BRW_MASK_ENABLE);
+   push_if_stack(inst);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::ENDIF()
+{
+   gen8_instruction *if_inst = NULL;
+   gen8_instruction *else_inst = NULL;
+
+   gen8_instruction *tmp = pop_if_stack();
+   if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
+      else_inst = tmp;
+      tmp = pop_if_stack();
+   }
+   assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
+   if_inst = tmp;
+
+   gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
+   gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
+   patch_IF_ELSE(if_inst, else_inst, endif_inst);
+
+   return endif_inst;
+}
+
+unsigned
+gen8_generator::next_ip(unsigned ip) const
+{
+   return ip + 16;
+}
+
+unsigned
+gen8_generator::find_next_block_end(unsigned start) const
+{
+   for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
+      gen8_instruction *inst = &store[ip / 16];
+
+      switch (gen8_opcode(inst)) {
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_WHILE:
+      case BRW_OPCODE_HALT:
+         return ip;
+      }
+   }
+
+   return 0;
+}
+
+/* There is no DO instruction on Gen6+, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+unsigned
+gen8_generator::find_loop_end(unsigned start) const
+{
+   /* Always start after the instruction (such as a WHILE) we're trying to fix
+    * up.
+    */
+   for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
+      gen8_instruction *inst = &store[ip / 16];
+
+      if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
+         if (ip + gen8_jip(inst) <= start)
+            return ip;
+      }
+   }
+   assert(!"not reached");
+   return start;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK, CONT, and HALT instructions to their correct locations.
+ */
+void
+gen8_generator::patch_jump_targets()
+{
+   for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
+      gen8_instruction *inst = &store[ip / 16];
+
+      int block_end_ip = find_next_block_end(ip);
+      switch (gen8_opcode(inst)) {
+      case BRW_OPCODE_BREAK:
+         assert(block_end_ip != 0);
+         gen8_set_jip(inst, block_end_ip - ip);
+         gen8_set_uip(inst, find_loop_end(ip) - ip);
+         assert(gen8_uip(inst) != 0);
+         assert(gen8_jip(inst) != 0);
+         break;
+      case BRW_OPCODE_CONTINUE:
+         assert(block_end_ip != 0);
+         gen8_set_jip(inst, block_end_ip - ip);
+         gen8_set_uip(inst, find_loop_end(ip) - ip);
+         assert(gen8_uip(inst) != 0);
+         assert(gen8_jip(inst) != 0);
+         break;
+      case BRW_OPCODE_ENDIF:
+         if (block_end_ip == 0)
+            gen8_set_jip(inst, 16);
+         else
+            gen8_set_jip(inst, block_end_ip - ip);
+         break;
+      case BRW_OPCODE_HALT:
+         /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+          *
+          *    "In case of the halt instruction not inside any conditional
+          *     code block, the value of <JIP> and <UIP> should be the
+          *     same. In case of the halt instruction inside conditional code
+          *     block, the <UIP> should be the end of the program, and the
+          *     <JIP> should be end of the most inner conditional code block."
+          *
+          * The uip will have already been set by whoever set up the
+          * instruction.
+          */
+         if (block_end_ip == 0) {
+            gen8_set_jip(inst, gen8_uip(inst));
+         } else {
+            gen8_set_jip(inst, block_end_ip - ip);
+         }
+         assert(gen8_uip(inst) != 0);
+         assert(gen8_jip(inst) != 0);
+         break;
+      }
+   }
+}
+
+void
+gen8_generator::DO()
+{
+   if (loop_stack_array_size < loop_stack_depth) {
+      loop_stack_array_size *= 2;
+      loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
+   }
+   loop_stack[loop_stack_depth++] = nr_inst;
+}
+
+gen8_instruction *
+gen8_generator::BREAK()
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
+   gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_src1(brw, inst, brw_imm_d(0));
+   gen8_set_exec_size(inst, default_state.exec_size);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::CONTINUE()
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
+   gen8_set_dst(brw, inst, brw_ip_reg());
+   gen8_set_src0(brw, inst, brw_ip_reg());
+   gen8_set_src1(brw, inst, brw_imm_d(0));
+   gen8_set_exec_size(inst, default_state.exec_size);
+   return inst;
+}
+
+gen8_instruction *
+gen8_generator::WHILE()
+{
+   gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
+   gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
+
+   gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_src1(brw, while_inst, brw_imm_ud(0));
+   gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
+   gen8_set_exec_size(while_inst, default_state.exec_size);
+
+   return while_inst;
+}
+
+gen8_instruction *
+gen8_generator::HALT()
+{
+   gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
+   gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   gen8_set_exec_size(inst, default_state.exec_size);
+   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
+   return inst;
+}
+
+void
+gen8_generator::disassemble(FILE *out, int start, int end)
+{
+   bool dump_hex = false;
+
+   for (int offset = start; offset < end; offset += 16) {
+      gen8_instruction *inst = &store[offset / 16];
+      printf("0x%08x: ", offset);
+
+      if (dump_hex) {
+         printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+                ((uint32_t *) inst)[3],
+                ((uint32_t *) inst)[2],
+                ((uint32_t *) inst)[1],
+                ((uint32_t *) inst)[0]);
+      }
+
+      gen8_disassemble(stdout, inst, brw->gen);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/gen8_generator.h b/src/mesa/drivers/dri/i965/gen8_generator.h
new file mode 100644 (file)
index 0000000..7d74267
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file gen8_generator.h
+ *
+ * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
+ */
+
+#pragma once
+
+extern "C" {
+#include "main/macros.h"
+} /* extern "C" */
+
+#include "gen8_instruction.h"
+
+class gen8_generator {
+public:
+   gen8_generator(struct brw_context *brw,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog,
+                  void *mem_ctx);
+   ~gen8_generator();
+
+   /**
+    * Instruction emitters.
+    * @{
+    */
+   #define ALU1(OP) \
+   gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
+   #define ALU2(OP) \
+   gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
+   #define ALU3(OP) \
+   gen8_instruction *OP(struct brw_reg d, \
+                        struct brw_reg, struct brw_reg, struct brw_reg);
+   ALU2(ADD)
+   ALU2(AND)
+   ALU2(ASR)
+   ALU3(BFE)
+   ALU2(BFI1)
+   ALU3(BFI2)
+   ALU1(F32TO16)
+   ALU1(F16TO32)
+   ALU1(BFREV)
+   ALU1(CBIT)
+   ALU2(ADDC)
+   ALU2(SUBB)
+   ALU2(DP2)
+   ALU2(DP3)
+   ALU2(DP4)
+   ALU2(DPH)
+   ALU1(FBH)
+   ALU1(FBL)
+   ALU1(FRC)
+   ALU2(LINE)
+   ALU3(LRP)
+   ALU2(MAC)
+   ALU2(MACH)
+   ALU3(MAD)
+   ALU2(MUL)
+   ALU1(MOV)
+   ALU1(MOV_RAW)
+   ALU1(NOT)
+   ALU2(OR)
+   ALU2(PLN)
+   ALU1(RNDD)
+   ALU1(RNDE)
+   ALU1(RNDZ)
+   ALU2(SEL)
+   ALU2(SHL)
+   ALU2(SHR)
+   ALU2(XOR)
+   #undef ALU1
+   #undef ALU2
+   #undef ALU3
+
+   gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
+                         struct brw_reg src0, struct brw_reg src1);
+   gen8_instruction *IF(unsigned predicate);
+   gen8_instruction *ELSE();
+   gen8_instruction *ENDIF();
+   void DO();
+   gen8_instruction *BREAK();
+   gen8_instruction *CONTINUE();
+   gen8_instruction *WHILE();
+
+   gen8_instruction *HALT();
+
+   gen8_instruction *MATH(unsigned math_function,
+                          struct brw_reg dst,
+                          struct brw_reg src0);
+   gen8_instruction *MATH(unsigned math_function,
+                          struct brw_reg dst,
+                          struct brw_reg src0,
+                          struct brw_reg src1);
+   gen8_instruction *NOP();
+   /** @} */
+
+   void disassemble(FILE *out, int start, int end);
+
+protected:
+   gen8_instruction *alu3(unsigned opcode,
+                          struct brw_reg dst,
+                          struct brw_reg src0,
+                          struct brw_reg src1,
+                          struct brw_reg src2);
+
+   gen8_instruction *math(unsigned math_function,
+                          struct brw_reg dst,
+                          struct brw_reg src0);
+
+   gen8_instruction *next_inst(unsigned opcode);
+
+   struct gl_shader_program *shader_prog;
+   struct gl_shader *shader;
+   struct gl_program *prog;
+
+   struct brw_context *brw;
+   struct intel_context *intel;
+   struct gl_context *ctx;
+
+   gen8_instruction *store;
+   unsigned store_size;
+   unsigned nr_inst;
+   unsigned next_inst_offset;
+
+   /**
+    * Control flow stacks:
+    *
+    * if_stack contains IF and ELSE instructions which must be patched with
+    * the final jump offsets (and popped) once the matching ENDIF is encountered.
+    *
+    * We actually store an array index into the store, rather than pointers
+    * to the instructions.  This is necessary since we may realloc the store.
+    *
+    *  @{
+    */
+   int *if_stack;
+   int if_stack_depth;
+   int if_stack_array_size;
+
+   int *loop_stack;
+   int loop_stack_depth;
+   int loop_stack_array_size;
+
+   int if_depth_in_loop;
+
+   void push_if_stack(gen8_instruction *inst);
+   gen8_instruction *pop_if_stack();
+   /** @} */
+
+   void patch_IF_ELSE(gen8_instruction *if_inst,
+                      gen8_instruction *else_inst,
+                      gen8_instruction *endif_inst);
+
+   unsigned next_ip(unsigned ip) const;
+   unsigned find_next_block_end(unsigned start_ip) const;
+   unsigned find_loop_end(unsigned start) const;
+
+   void patch_jump_targets();
+
+   /**
+    * Default state for new instructions.
+    */
+   struct {
+      unsigned exec_size;
+      unsigned access_mode;
+      unsigned mask_control;
+      unsigned flag_subreg_nr;
+      unsigned conditional_mod;
+      unsigned predicate;
+      bool predicate_inverse;
+      bool saturate;
+   } default_state;
+
+   void *mem_ctx;
+};