i965/vs: Add a function for how many MRFs get written as part of a SEND.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
index 7372f947f6f09c74c927d349e4293bac44ff4f91..f9f8d49a0d0b4f78b4430ef238e19069966990a3 100644 (file)
@@ -34,7 +34,7 @@
 #include "brw_defines.h"
 #include "brw_eu.h"
 
-#include "../glsl/ralloc.h"
+#include "glsl/ralloc.h"
 
 /***********************************************************************
  * Internal helper for constructing instructions
@@ -58,7 +58,7 @@ static void guess_execution_size(struct brw_compile *p,
  * On Sandybridge, this is no longer the case.  This function performs the
  * explicit move; it should be called before emitting a SEND instruction.
  */
-static void
+void
 gen6_resolve_implied_move(struct brw_compile *p,
                          struct brw_reg *src,
                          GLuint msg_reg_nr)
@@ -89,12 +89,10 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 }
 
 
-static void brw_set_dest(struct brw_compile *p,
-                        struct brw_instruction *insn,
-                        struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+            struct brw_reg dest)
 {
-   struct intel_context *intel = &p->brw->intel;
-
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
       assert(dest.nr < 128);
@@ -223,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
    /* 10. Check destination issues. */
 }
 
-static void brw_set_src0(struct brw_compile *p,
-                        struct brw_instruction *insn,
-                        struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+            struct brw_reg reg)
 {
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
@@ -458,8 +456,21 @@ static void brw_set_urb_message( struct brw_compile *p,
     struct intel_context *intel = &brw->intel;
     brw_set_src1(p, insn, brw_imm_d(0));
 
-    if (intel->gen >= 5) {
-        insn->bits3.urb_gen5.opcode = 0;       /* ? */
+    if (intel->gen == 7) {
+        insn->bits3.urb_gen7.opcode = 0;       /* URB_WRITE_HWORD */
+        insn->bits3.urb_gen7.offset = offset;
+        assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+        insn->bits3.urb_gen7.swizzle_control = swizzle_control;
+        /* per_slot_offset = 0 makes it ignore offsets in message header */
+        insn->bits3.urb_gen7.per_slot_offset = 0;
+        insn->bits3.urb_gen7.complete = complete;
+        insn->bits3.urb_gen7.header_present = 1;
+        insn->bits3.urb_gen7.response_length = response_length;
+        insn->bits3.urb_gen7.msg_length = msg_length;
+        insn->bits3.urb_gen7.end_of_thread = end_of_thread;
+       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+    } else if (intel->gen >= 5) {
+        insn->bits3.urb_gen5.opcode = 0;       /* URB_WRITE */
         insn->bits3.urb_gen5.offset = offset;
         insn->bits3.urb_gen5.swizzle_control = swizzle_control;
         insn->bits3.urb_gen5.allocate = allocate;
@@ -493,23 +504,36 @@ static void brw_set_urb_message( struct brw_compile *p,
     }
 }
 
-static void brw_set_dp_write_message( struct brw_compile *p,
-                                     struct brw_instruction *insn,
-                                     GLuint binding_table_index,
-                                     GLuint msg_control,
-                                     GLuint msg_type,
-                                     GLuint msg_length,
-                                     GLboolean header_present,
-                                     GLuint pixel_scoreboard_clear,
-                                     GLuint response_length,
-                                     GLuint end_of_thread,
-                                     GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+                        struct brw_instruction *insn,
+                        GLuint binding_table_index,
+                        GLuint msg_control,
+                        GLuint msg_type,
+                        GLuint msg_length,
+                        GLboolean header_present,
+                        GLuint pixel_scoreboard_clear,
+                        GLuint response_length,
+                        GLuint end_of_thread,
+                        GLuint send_commit_msg)
 {
    struct brw_context *brw = p->brw;
    struct intel_context *intel = &brw->intel;
    brw_set_src1(p, insn, brw_imm_ud(0));
 
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+       insn->bits3.gen7_dp.msg_control = msg_control;
+       insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
+       insn->bits3.gen7_dp.msg_type = msg_type;
+       insn->bits3.gen7_dp.header_present = header_present;
+       insn->bits3.gen7_dp.response_length = response_length;
+       insn->bits3.gen7_dp.msg_length = msg_length;
+       insn->bits3.gen7_dp.end_of_thread = end_of_thread;
+
+       /* We always use the render cache for write messages */
+       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
+   } else if (intel->gen == 6) {
        insn->bits3.gen6_dp.binding_table_index = binding_table_index;
        insn->bits3.gen6_dp.msg_control = msg_control;
        insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
@@ -547,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p,
    }
 }
 
-static void
+void
 brw_set_dp_read_message(struct brw_compile *p,
                        struct brw_instruction *insn,
                        GLuint binding_table_index,
@@ -561,7 +585,17 @@ brw_set_dp_read_message(struct brw_compile *p,
    struct intel_context *intel = &brw->intel;
    brw_set_src1(p, insn, brw_imm_d(0));
 
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+       insn->bits3.gen7_dp.msg_control = msg_control;
+       insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
+       insn->bits3.gen7_dp.msg_type = msg_type;
+       insn->bits3.gen7_dp.header_present = 1;
+       insn->bits3.gen7_dp.response_length = response_length;
+       insn->bits3.gen7_dp.msg_length = msg_length;
+       insn->bits3.gen7_dp.end_of_thread = 0;
+       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
+   } else if (intel->gen == 6) {
        uint32_t target_function;
 
        if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
@@ -630,7 +664,17 @@ static void brw_set_sampler_message(struct brw_compile *p,
    assert(eot == 0);
    brw_set_src1(p, insn, brw_imm_d(0));
 
-   if (intel->gen >= 5) {
+   if (intel->gen >= 7) {
+      insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+      insn->bits3.sampler_gen7.sampler = sampler;
+      insn->bits3.sampler_gen7.msg_type = msg_type;
+      insn->bits3.sampler_gen7.simd_mode = simd_mode;
+      insn->bits3.sampler_gen7.header_present = header_present;
+      insn->bits3.sampler_gen7.response_length = response_length;
+      insn->bits3.sampler_gen7.msg_length = msg_length;
+      insn->bits3.sampler_gen7.end_of_thread = eot;
+      insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
+   } else if (intel->gen >= 5) {
       insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
       insn->bits3.sampler_gen5.sampler = sampler;
       insn->bits3.sampler_gen5.msg_type = msg_type;
@@ -666,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p,
 }
 
 
-
-static struct brw_instruction *next_insn( struct brw_compile *p, 
-                                         GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
 {
    struct brw_instruction *insn;
 
@@ -689,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
    return insn;
 }
 
-
 static struct brw_instruction *brw_alu1( struct brw_compile *p,
                                         GLuint opcode,
                                         struct brw_reg dest,
@@ -739,6 +782,8 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,     \
  * stores a rounded value (possibly the wrong way) in the dest register, but
  * also sets a per-channel "increment bit" in the flag register.  A predicated
  * add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
  */
 #define ROUND(OP)                                                            \
 void brw_##OP(struct brw_compile *p,                                         \
@@ -749,10 +794,13 @@ void brw_##OP(struct brw_compile *p,                                            \
    rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
    brw_set_dest(p, rnd, dest);                                               \
    brw_set_src0(p, rnd, src);                                                \
-   rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */  \
                                                                              \
-   add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                            \
-   add->header.predicate_control = BRW_PREDICATE_NORMAL;                     \
+   if (p->brw->intel.gen < 6) {                                                      \
+      /* turn on round-increments */                                         \
+      rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;               \
+      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                         \
+      add->header.predicate_control = BRW_PREDICATE_NORMAL;                  \
+   }                                                                         \
 }
 
 
@@ -915,11 +963,17 @@ brw_IF(struct brw_compile *p, GLuint execute_size)
       brw_set_dest(p, insn, brw_ip_reg());
       brw_set_src0(p, insn, brw_ip_reg());
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else {
+   } else if (intel->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
       insn->bits1.branch_gen6.jump_count = 0;
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = 0;
+      insn->bits3.break_cont.uip = 0;
    }
 
    insn->header.execution_size = execute_size;
@@ -935,6 +989,9 @@ brw_IF(struct brw_compile *p, GLuint execute_size)
    return insn;
 }
 
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier).  It is not used on gen7.
+ */
 struct brw_instruction *
 gen6_IF(struct brw_compile *p, uint32_t conditional,
        struct brw_reg src0, struct brw_reg src1)
@@ -944,7 +1001,11 @@ gen6_IF(struct brw_compile *p, uint32_t conditional,
    insn = next_insn(p, BRW_OPCODE_IF);
 
    brw_set_dest(p, insn, brw_imm_w(0));
-   insn->header.execution_size = BRW_EXECUTE_8;
+   if (p->compressed) {
+      insn->header.execution_size = BRW_EXECUTE_16;
+   } else {
+      insn->header.execution_size = BRW_EXECUTE_8;
+   }
    insn->bits1.branch_gen6.jump_count = 0;
    brw_set_src0(p, insn, src0);
    brw_set_src1(p, insn, src1);
@@ -1036,9 +1097,12 @@ patch_IF_ELSE(struct brw_compile *p,
         if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
         if_inst->bits3.if_else.pop_count = 0;
         if_inst->bits3.if_else.pad0 = 0;
-      } else {
+      } else if (intel->gen == 6) {
         /* As of gen6, there is no IFF and IF must point to the ENDIF. */
         if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+      } else {
+        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+        if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
       }
    } else {
       else_inst->header.execution_size = if_inst->header.execution_size;
@@ -1060,9 +1124,15 @@ patch_IF_ELSE(struct brw_compile *p,
         else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
         else_inst->bits3.if_else.pop_count = 1;
         else_inst->bits3.if_else.pad0 = 0;
-      } else {
+      } else if (intel->gen == 6) {
         /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
         else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+      } else {
+        /* The IF instruction's JIP should point just past the ELSE */
+        if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+        /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+        else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
       }
    }
 }
@@ -1079,11 +1149,17 @@ brw_ELSE(struct brw_compile *p)
       brw_set_dest(p, insn, brw_ip_reg());
       brw_set_src0(p, insn, brw_ip_reg());
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else {
+   } else if (intel->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
       insn->bits1.branch_gen6.jump_count = 0;
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = 0;
+      insn->bits3.break_cont.uip = 0;
    }
 
    insn->header.compression_control = BRW_COMPRESSION_NONE;
@@ -1122,10 +1198,14 @@ brw_ENDIF(struct brw_compile *p)
       brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
       brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
       brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else {
+   } else if (intel->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
    }
 
    insn->header.compression_control = BRW_COMPRESSION_NONE;
@@ -1137,8 +1217,10 @@ brw_ENDIF(struct brw_compile *p)
       insn->bits3.if_else.jump_count = 0;
       insn->bits3.if_else.pop_count = 1;
       insn->bits3.if_else.pad0 = 0;
-   } else {
+   } else if (intel->gen == 6) {
       insn->bits1.branch_gen6.jump_count = 2;
+   } else {
+      insn->bits3.break_cont.jip = 2;
    }
    patch_IF_ELSE(p, if_inst, else_inst, insn);
 }
@@ -1170,7 +1252,6 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p,
                                  struct brw_instruction *do_insn)
 {
    struct brw_instruction *insn;
-   int br = 2;
 
    insn = next_insn(p, BRW_OPCODE_CONTINUE);
    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -1179,8 +1260,6 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p,
    brw_set_src0(p, insn, brw_ip_reg());
    brw_set_src1(p, insn, brw_imm_d(0x0));
 
-   insn->bits3.break_cont.uip = br * (do_insn - insn);
-
    insn->header.compression_control = BRW_COMPRESSION_NONE;
    insn->header.execution_size = BRW_EXECUTE_8;
    return insn;
@@ -1254,7 +1333,16 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
    if (intel->gen >= 5)
       br = 2;
 
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = br * (do_insn - insn);
+
+      insn->header.execution_size = BRW_EXECUTE_8;
+   } else if (intel->gen == 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
 
       brw_set_dest(p, insn, brw_imm_w(0));
@@ -1262,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else {
       if (p->single_program_flow) {
         insn = next_insn(p, BRW_OPCODE_ADD);
@@ -1924,7 +2011,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src);
 
-   if (intel->gen == 6)
+   if (intel->gen >= 6)
       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else if (intel->gen == 5 || intel->is_g4x)
       msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
@@ -2155,6 +2242,17 @@ void brw_urb_WRITE(struct brw_compile *p,
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
+   if (intel->gen == 7) {
+      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
+                      BRW_REGISTER_TYPE_UD),
+               retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
+               brw_imm_ud(0xff00));
+      brw_pop_insn_state(p);
+   }
+
    insn = next_insn(p, BRW_OPCODE_SEND);
 
    assert(msg_length < BRW_MAX_MRF);
@@ -2204,6 +2302,7 @@ brw_find_next_block_end(struct brw_compile *p, int start)
 static int
 brw_find_loop_end(struct brw_compile *p, int start)
 {
+   struct intel_context *intel = &p->brw->intel;
    int ip;
    int br = 2;
 
@@ -2211,7 +2310,9 @@ brw_find_loop_end(struct brw_compile *p, int start)
       struct brw_instruction *insn = &p->store[ip];
 
       if (insn->header.opcode == BRW_OPCODE_WHILE) {
-        if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+        int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
+                                  : insn->bits3.break_cont.jip;
+        if (ip + jip / br <= start)
            return ip;
       }
    }
@@ -2238,13 +2339,14 @@ brw_set_uip_jip(struct brw_compile *p)
       switch (insn->header.opcode) {
       case BRW_OPCODE_BREAK:
         insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
-        insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+        /* Gen7 UIP points to WHILE; Gen6 points just after it */
+        insn->bits3.break_cont.uip =
+           br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
         break;
       case BRW_OPCODE_CONTINUE:
-        /* JIP is set at CONTINUE emit time, since that's when we
-         * know where the start of the loop is.
-         */
         insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+        insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
+
         assert(insn->bits3.break_cont.uip != 0);
         assert(insn->bits3.break_cont.jip != 0);
         break;