i965: Add _CACHE_ in brw_cache_id enum names.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
index f1534d996eaef40d0f930db4f1282ce32c1d5cf0..7117890197a2bb250d129132c3c9188338c506a4 100644 (file)
  * The idea of instruction compaction is that most instructions use a tiny
  * subset of the GPU functionality, so we can encode what would be a 16 byte
  * instruction in 8 bytes using some lookup tables for various fields.
+ *
+ *
+ * Instruction compaction capabilities vary subtly by generation.
+ *
+ * G45's support for instruction compaction is very limited. Jump counts on
+ * this generation are in units of 16-byte uncompacted instructions. As such,
+ * all jump targets must be 16-byte aligned. Also, all instructions must be
+ * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
+ * A G45-only instruction, NENOP, must be used to provide padding to align
+ * uncompacted instructions.
+ *
+ * Gen5 removes these restrictions and changes jump counts to be in units of
+ * 8-byte compacted instructions, allowing jump targets to be only 8-byte
+ * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
+ *
+ * Gen6 adds the ability to compact instructions with a limited range of
+ * immediate values. Compactable immediates have 12 unrestricted bits, and a
+ * 13th bit that's replicated through the high 20 bits, to create the 32-bit
+ * value of DW3 in the uncompacted instruction word.
+ *
+ * On Gen7 we can compact some control flow instructions with a small positive
+ * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
+ * control flow instructions with UIP cannot be compacted, because of the
+ * replicated 13th bit. No control flow instructions can be compacted on Gen6
+ * since the jump count field is not in DW3.
+ *
+ *    break    JIP/UIP
+ *    cont     JIP/UIP
+ *    halt     JIP/UIP
+ *    if       JIP/UIP
+ *    else     JIP (plus UIP on BDW+)
+ *    endif    JIP
+ *    while    JIP (must be negative)
+ *
+ * Gen 8 adds support for compacting 3-src instructions.
  */
 
 #include "brw_context.h"
@@ -760,7 +795,7 @@ set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst
       (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
       (brw_inst_bits(src, 28,  8));        /* 21b */
 
-   if (brw->is_cherryview)
+   if (brw->gen >= 9 || brw->is_cherryview)
       uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
 
    for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
@@ -785,7 +820,7 @@ set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *
       (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
       (brw_inst_bits(src,  55,  37));        /* 19b */
 
-   if (brw->is_cherryview) {
+   if (brw->gen >= 9 || brw->is_cherryview) {
       uncompacted |=
          (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
          (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
@@ -879,19 +914,6 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
 
    assert(brw_inst_cmpt_control(brw, src) == 0);
 
-   if (brw_inst_opcode(brw, src) == BRW_OPCODE_IF ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_IFF ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_ELSE ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_ENDIF ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_HALT ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_DO ||
-       brw_inst_opcode(brw, src) == BRW_OPCODE_WHILE) {
-      /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
-       * to be able to handle compacted flow control instructions..
-       */
-      return false;
-   }
-
    if (is_3src(brw_inst_opcode(brw, src))) {
       if (brw->gen >= 8) {
          memset(&temp, 0, sizeof(temp));
@@ -1035,7 +1057,7 @@ set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
    brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
    brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
 
-   if (brw->is_cherryview)
+   if (brw->gen >= 9 || brw->is_cherryview)
       brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
 }
 
@@ -1054,7 +1076,7 @@ set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
    brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
    brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
 
-   if (brw->is_cherryview) {
+   if (brw->gen >= 9 || brw->is_cherryview) {
       brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
       brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
       brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
@@ -1179,28 +1201,24 @@ update_uip_jip(struct brw_context *brw, brw_inst *insn,
     *    - bytes on Gen8+; and
     *    - compacted instructions on Gen6+.
     */
-   int32_t jip = brw_inst_jip(brw, insn);
-   int32_t jip_compacted = jip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
-   int32_t jip_uncompacted = jip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
+   int shift = brw->gen >= 8 ? 3 : 0;
+
+   int32_t jip_compacted = brw_inst_jip(brw, insn) >> shift;
    jip_compacted -= compacted_between(this_old_ip,
-                                      this_old_ip + jip_uncompacted,
+                                      this_old_ip + (jip_compacted / 2),
                                       compacted_counts);
-   brw_inst_set_jip(brw, insn,
-                    jip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
+   brw_inst_set_jip(brw, insn, jip_compacted << shift);
 
    if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
        brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE ||
        (brw_inst_opcode(brw, insn) == BRW_OPCODE_ELSE && brw->gen <= 7))
       return;
 
-   int32_t uip = brw_inst_uip(brw, insn);
-   int32_t uip_compacted = uip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
-   int32_t uip_uncompacted = uip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
+   int32_t uip_compacted = brw_inst_uip(brw, insn) >> shift;
    uip_compacted -= compacted_between(this_old_ip,
-                                      this_old_ip + uip_uncompacted,
+                                      this_old_ip + (uip_compacted / 2),
                                       compacted_counts);
-   brw_inst_set_uip(brw, insn,
-                    uip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
+   brw_inst_set_uip(brw, insn, uip_compacted << shift);
 }
 
 static void
@@ -1213,18 +1231,17 @@ update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
     *    - uncompacted instructions on G45; and
     *    - compacted instructions on Gen5.
     */
-   int jump_count = brw_inst_gen4_jump_count(brw, insn);
-   int jump_count_compacted = jump_count * (brw->is_g4x ? 2 : 1);
-   int jump_count_uncompacted = jump_count / (brw->is_g4x ? 1 : 2);
+   int shift = brw->is_g4x ? 1 : 0;
 
-   int target_old_ip = this_old_ip + jump_count_uncompacted;
+   int jump_count_compacted = brw_inst_gen4_jump_count(brw, insn) << shift;
+
+   int target_old_ip = this_old_ip + (jump_count_compacted / 2);
 
    int this_compacted_count = compacted_counts[this_old_ip];
    int target_compacted_count = compacted_counts[target_old_ip];
 
    jump_count_compacted -= (target_compacted_count - this_compacted_count);
-   brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted /
-                                           (brw->is_g4x ? 2 : 1));
+   brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted >> shift);
 }
 
 void
@@ -1248,6 +1265,7 @@ brw_init_compaction_tables(struct brw_context *brw)
    assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
 
    switch (brw->gen) {
+   case 9:
    case 8:
       control_index_table = gen8_control_index_table;
       datatype_table = gen8_datatype_table;
@@ -1361,7 +1379,6 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
       brw_inst *insn = store + offset;
       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
       int this_compacted_count = compacted_counts[this_old_ip];
-      int target_old_ip, target_compacted_count;
 
       switch (brw_inst_opcode(brw, insn)) {
       case BRW_OPCODE_BREAK:
@@ -1380,14 +1397,28 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
       case BRW_OPCODE_ENDIF:
       case BRW_OPCODE_WHILE:
          if (brw->gen >= 7) {
-            update_uip_jip(brw, insn, this_old_ip, compacted_counts);
+            if (brw_inst_cmpt_control(brw, insn)) {
+               brw_inst uncompacted;
+               brw_uncompact_instruction(brw, &uncompacted,
+                                         (brw_compact_inst *)insn);
+
+               update_uip_jip(brw, &uncompacted, this_old_ip, compacted_counts);
+
+               bool ret = brw_try_compact_instruction(brw,
+                                                      (brw_compact_inst *)insn,
+                                                      &uncompacted);
+               assert(ret); (void)ret;
+            } else {
+               update_uip_jip(brw, insn, this_old_ip, compacted_counts);
+            }
          } else if (brw->gen == 6) {
+            assert(!brw_inst_cmpt_control(brw, insn));
+
             /* Jump Count is in units of compacted instructions on Gen6. */
             int jump_count_compacted = brw_inst_gen6_jump_count(brw, insn);
-            int jump_count_uncompacted = jump_count_compacted / 2;
 
-            target_old_ip = this_old_ip + jump_count_uncompacted;
-            target_compacted_count = compacted_counts[target_old_ip];
+            int target_old_ip = this_old_ip + (jump_count_compacted / 2);
+            int target_compacted_count = compacted_counts[target_old_ip];
             jump_count_compacted -= (target_compacted_count - this_compacted_count);
             brw_inst_set_gen6_jump_count(brw, insn, jump_count_compacted);
          } else {
@@ -1407,15 +1438,13 @@ brw_compact_instructions(struct brw_compile *p, int start_offset,
              brw_inst_dst_da_reg_nr(brw, insn) == BRW_ARF_IP) {
             assert(brw_inst_src1_reg_file(brw, insn) == BRW_IMMEDIATE_VALUE);
 
-            int jump = brw_inst_imm_d(brw, insn);
-            int jump_compacted = jump / sizeof(brw_compact_inst);
-            int jump_uncompacted = jump / sizeof(brw_inst);
+            int shift = 3;
+            int jump_compacted = brw_inst_imm_d(brw, insn) >> shift;
 
-            target_old_ip = this_old_ip + jump_uncompacted;
-            target_compacted_count = compacted_counts[target_old_ip];
+            int target_old_ip = this_old_ip + (jump_compacted / 2);
+            int target_compacted_count = compacted_counts[target_old_ip];
             jump_compacted -= (target_compacted_count - this_compacted_count);
-            brw_inst_set_imm_ud(brw, insn, jump_compacted *
-                                           sizeof(brw_compact_inst));
+            brw_inst_set_imm_ud(brw, insn, jump_compacted << shift);
          }
          break;
       }