#include "brw_eu.h"
#include "brw_shader.h"
-#include "intel_asm_annotation.h"
-#include "common/gen_debug.h"
+#include "brw_disasm_info.h"
+#include "dev/gen_debug.h"
static const uint32_t g45_control_index_table[32] = {
0b00000000000000000,
0b00110000000000110,
0b00000000000001010,
0b01010000000101000,
- 0b01010000000100100
+ 0b01010000000100100,
};
static const uint32_t g45_datatype_table[32] = {
0b101000001000101001,
0b001011010110001100,
0b001000000110100001,
- 0b001010010100001000
+ 0b001010010100001000,
};
static const uint16_t g45_subreg_table[32] = {
0b000110010000100,
0b001100000000110,
0b000000010000110,
- 0b000001000110000
+ 0b000001000110000,
};
static const uint16_t g45_src_index_table[32] = {
0b010000000100,
0b010000111000,
0b000101100000,
- 0b111101110100
+ 0b111101110100,
};
static const uint32_t gen6_control_index_table[32] = {
0b00110000000000011,
0b00110000000000100,
0b00110000100001000,
- 0b00100000000001001
+ 0b00100000000001001,
};
static const uint32_t gen6_datatype_table[32] = {
0b0011000000000000000,
0b0011000000100000000,
0b0101000000000000000,
- 0b0101000000100000000
+ 0b0101000000100000000,
};
static const uint32_t gen7_datatype_table[32] = {
0b001111111110111101,
0b001011110110101100,
0b001010010100101000,
- 0b001010110100101000
+ 0b001010110100101000,
};
static const uint16_t gen7_subreg_table[32] = {
0b101000000000000,
0b110000000000000,
0b111000000000000,
- 0b111000000011100
+ 0b111000000011100,
};
static const uint16_t gen7_src_index_table[32] = {
0b010001101000,
0b010001101001,
0b010001101010,
- 0b010110001000
+ 0b010110001000,
};
static const uint32_t gen8_control_index_table[32] = {
0b0011000000000000000,
0b0011000000100000000,
0b0101000000000000000,
- 0b0101000000100000000
+ 0b0101000000100000000,
};
static const uint32_t gen8_datatype_table[32] = {
0b001011111011101011101,
0b001001111001101001100,
0b001001001001001001000,
- 0b001001011001001001000
+ 0b001001011001001001000,
};
static const uint16_t gen8_subreg_table[32] = {
0b101000000000000,
0b110000000000000,
0b111000000000000,
- 0b111000000011100
+ 0b111000000011100,
};
static const uint16_t gen8_src_index_table[32] = {
0b010001101000,
0b010001101001,
0b010001101010,
- 0b010110001000
+ 0b010110001000,
+};
+
+static const uint32_t gen11_datatype_table[32] = {
+ 0b001000000000000000001,
+ 0b001000000000001000000,
+ 0b001000000000001000001,
+ 0b001000000000011000001,
+ 0b001000000000101100101,
+ 0b001000000101111100101,
+ 0b001000000100101000001,
+ 0b001000000100101000101,
+ 0b001000000100101100101,
+ 0b001000001000001000001,
+ 0b001000011000001000000,
+ 0b001000011000001000001,
+ 0b001000101000101000101,
+ 0b001000111000101000100,
+ 0b001000111000101000101,
+ 0b001100100100101100101,
+ 0b001100101100100100101,
+ 0b001100101100101100100,
+ 0b001100101100101100101,
+ 0b001100111100101100100,
+ 0b000000000010000001100,
+ 0b001000000000001100101,
+ 0b001000000000101000101,
+ 0b001000001000001000000,
+ 0b001000101000101000100,
+ 0b001000111000100000100,
+ 0b001001001001000001001,
+ 0b001101111100101100101,
+ 0b001100111100101100101,
+ 0b001001111001101001100,
+ 0b001001001001001001000,
+ 0b001001011001001001000,
};
/* This is actually the control index table for Cherryview (26 bits), but the
0b00100000000110000000000001,
0b00000000000110000000000001,
0b00000000001000000000000001,
- 0b00000000001000000000100001
+ 0b00000000001000000000100001,
};
/* This is actually the control index table for Cherryview (49 bits), but the
0b0000001110010011100100111001000001111000000000000,
0b0000001110010011100100111001000001111000000000010,
0b0000001110010011100100111001000001111000000001000,
- 0b0000001110010011100100111001000001111000000100000
+ 0b0000001110010011100100111001000001111000000100000,
};
static const uint32_t *control_index_table;
static bool
set_control_index(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
uint32_t uncompacted = devinfo->gen >= 8 /* 17b/G45; 19b/IVB+ */
? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
static bool
set_datatype_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
- brw_inst *src)
+ const brw_inst *src)
{
uint32_t uncompacted = devinfo->gen >= 8 /* 18b/G45+; 21b/BDW+ */
? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
static bool
set_subreg_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
- brw_inst *src, bool is_immediate)
+ const brw_inst *src, bool is_immediate)
{
uint16_t uncompacted = /* 15b */
(brw_inst_bits(src, 52, 48) << 0) | /* 5b */
static bool
set_src0_index(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
uint16_t compacted;
uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
static bool
set_src1_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
- brw_inst *src, bool is_immediate)
+ const brw_inst *src, bool is_immediate)
{
uint16_t compacted;
static bool
set_3src_control_index(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
assert(devinfo->gen >= 8);
static bool
set_3src_source_index(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
assert(devinfo->gen >= 8);
}
static bool
-has_unmapped_bits(const struct gen_device_info *devinfo, brw_inst *src)
+has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
{
/* EOT can only be mapped on a send if the src1 is an immediate */
if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
}
static bool
-has_3src_unmapped_bits(const struct gen_device_info *devinfo, brw_inst *src)
+has_3src_unmapped_bits(const struct gen_device_info *devinfo,
+ const brw_inst *src)
{
/* Check for three-source instruction bits that don't map to any of the
* fields of the compacted instruction. All of them seem to be reserved
assert(!brw_inst_bits(src, 127, 126) &&
!brw_inst_bits(src, 105, 105) &&
!brw_inst_bits(src, 84, 84) &&
- !brw_inst_bits(src, 36, 35) &&
!brw_inst_bits(src, 7, 7));
+
+ /* Src1Type and Src2Type, used for mixed-precision floating point */
+ if (brw_inst_bits(src, 36, 35))
+ return true;
}
return false;
static bool
brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
assert(devinfo->gen >= 8);
#define compact(field) \
brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
+#define compact_a16(field) \
+ brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
- compact(opcode);
+ compact(hw_opcode);
if (!set_3src_control_index(devinfo, dst, src))
return false;
return false;
compact(dst_reg_nr);
- compact(src0_rep_ctrl);
+ compact_a16(src0_rep_ctrl);
brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
compact(debug_control);
compact(saturate);
- compact(src1_rep_ctrl);
- compact(src2_rep_ctrl);
+ compact_a16(src1_rep_ctrl);
+ compact_a16(src2_rep_ctrl);
compact(src0_reg_nr);
compact(src1_reg_nr);
compact(src2_reg_nr);
- compact(src0_subreg_nr);
- compact(src1_subreg_nr);
- compact(src2_subreg_nr);
+ compact_a16(src0_subreg_nr);
+ compact_a16(src1_subreg_nr);
+ compact_a16(src2_subreg_nr);
#undef compact
+#undef compact_a16
return true;
}
return imm == 0 || imm == 0xfffff000;
}
+/**
+ * Applies some small changes to instruction types to increase chances of
+ * compaction.
+ */
+static brw_inst
+precompact(const struct gen_device_info *devinfo, brw_inst inst)
+{
+ if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
+ return inst;
+
+ /* The Bspec's section titled "Non-present Operands" claims that if src0
+ * is an immediate that src1's type must be the same as that of src0.
+ *
+ * The SNB+ DataTypeIndex instruction compaction tables contain mappings
+ * that do not follow this rule. E.g., from the IVB/HSW table:
+ *
+ * DataTypeIndex 18-Bit Mapping Mapped Meaning
+ * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
+ *
+ * And from the SNB table:
+ *
+ * DataTypeIndex 18-Bit Mapping Mapped Meaning
+ * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
+ *
+ * Neither of these cause warnings from the simulator when used,
+ * compacted or otherwise. In fact, all compaction mappings that have an
+ * immediate in src0 use a:ud for src1.
+ *
+ * The GM45 instruction compaction tables do not contain mapped meanings
+ * so it's not clear whether it has the restriction. We'll assume it was
+ * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
+ *
+ * Don't do any of this for 64-bit immediates, since the src1 fields
+ * overlap with the immediate and setting them would overwrite the
+ * immediate we set.
+ */
+ if (devinfo->gen >= 6 &&
+ !(devinfo->is_haswell &&
+ brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
+ !(devinfo->gen >= 8 &&
+ (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
+ brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
+ brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
+ enum brw_reg_file file = brw_inst_src1_reg_file(devinfo, &inst);
+ brw_inst_set_src1_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Compacted instructions only have 12-bits (plus 1 for the other 20)
+ * for immediate values. Presumably the hardware engineers realized
+ * that the only useful floating-point value that could be represented
+ * in this format is 0.0, which can also be represented as a VF-typed
+ * immediate, so they gave us the previously mentioned mapping on IVB+.
+ *
+ * Strangely, we do have a mapping for imm:f in src1, so we don't need
+ * to do this there.
+ *
+ * If we see a 0.0:F, change the type to VF so that it can be compacted.
+ */
+ if (brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
+ brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
+ brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
+ brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
+ enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
+ brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
+ }
+
+ /* There are no mappings for dst:d | i:d, so if the immediate is suitable
+ * set the types to :UD so the instruction can be compacted.
+ */
+ if (is_compactable_immediate(brw_inst_imm_ud(devinfo, &inst)) &&
+ brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
+ brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
+ brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
+ enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
+ enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
+
+ brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
+ brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
+ }
+
+ return inst;
+}
+
/**
* Tries to compact instruction src into dst.
*
*/
bool
brw_try_compact_instruction(const struct gen_device_info *devinfo,
- brw_compact_inst *dst, brw_inst *src)
+ brw_compact_inst *dst, const brw_inst *src)
{
brw_compact_inst temp;
#define compact(field) \
brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
- compact(opcode);
+ compact(hw_opcode);
compact(debug_control);
if (!set_control_index(devinfo, &temp, src))
#define uncompact(field) \
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
+#define uncompact_a16(field) \
+ brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
- uncompact(opcode);
+ uncompact(hw_opcode);
set_uncompacted_3src_control_index(devinfo, dst, src);
set_uncompacted_3src_source_index(devinfo, dst, src);
uncompact(dst_reg_nr);
- uncompact(src0_rep_ctrl);
+ uncompact_a16(src0_rep_ctrl);
brw_inst_set_3src_cmpt_control(devinfo, dst, false);
uncompact(debug_control);
uncompact(saturate);
- uncompact(src1_rep_ctrl);
- uncompact(src2_rep_ctrl);
+ uncompact_a16(src1_rep_ctrl);
+ uncompact_a16(src2_rep_ctrl);
uncompact(src0_reg_nr);
uncompact(src1_reg_nr);
uncompact(src2_reg_nr);
- uncompact(src0_subreg_nr);
- uncompact(src1_subreg_nr);
- uncompact(src2_subreg_nr);
+ uncompact_a16(src0_subreg_nr);
+ uncompact_a16(src1_subreg_nr);
+ uncompact_a16(src2_subreg_nr);
#undef uncompact
+#undef uncompact_a16
}
void
memset(dst, 0, sizeof(*dst));
if (devinfo->gen >= 8 &&
- is_3src(devinfo, brw_compact_inst_3src_opcode(devinfo, src))) {
+ is_3src(devinfo, brw_opcode_decode(
+ devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
brw_uncompact_3src_instruction(devinfo, dst, src);
return;
}
#define uncompact(field) \
brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
- uncompact(opcode);
+ uncompact(hw_opcode);
uncompact(debug_control);
set_uncompacted_control(devinfo, dst, src);
assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
+ assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
switch (devinfo->gen) {
+ case 12:
+ control_index_table = NULL;
+ datatype_table = NULL;
+ subreg_table = NULL;
+ src_index_table = NULL;
+ break;
+ case 11:
+ control_index_table = gen8_control_index_table;
+ datatype_table = gen11_datatype_table;
+ subreg_table = gen8_subreg_table;
+ src_index_table = gen8_src_index_table;
+ break;
case 10:
case 9:
case 8:
void
brw_compact_instructions(struct brw_codegen *p, int start_offset,
- int num_annotations, struct annotation *annotation)
+ struct disasm_info *disasm)
{
- if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION) || p->devinfo->gen > 11)
return;
const struct gen_device_info *devinfo = p->devinfo;
/* For an instruction at byte offset 8*i after compaction, this was its IP
* (in 16-byte units) before compaction.
*/
- int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
+ int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
if (devinfo->gen == 4 && !devinfo->is_g4x)
return;
old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
- brw_inst saved = *src;
+ brw_inst inst = precompact(devinfo, *src);
+ brw_inst saved = inst;
- if (brw_try_compact_instruction(devinfo, dst, src)) {
+ if (brw_try_compact_instruction(devinfo, dst, &inst)) {
compacted_count++;
if (INTEL_DEBUG) {
if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
- brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
+ brw_compact_inst_set_hw_opcode(
+ devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
brw_compact_inst_set_cmpt_control(devinfo, align, true);
offset += sizeof(brw_compact_inst);
compacted_count--;
}
}
+ /* Add an entry for the ending offset of the program. This greatly
+ * simplifies the linked list walk at the end of the function.
+ */
+ old_ip[offset / sizeof(brw_compact_inst)] =
+ (p->next_insn_offset - start_offset) / sizeof(brw_inst);
+
/* Fix up control flow offsets. */
p->next_insn_offset = start_offset + offset;
for (offset = 0; offset < p->next_insn_offset - start_offset;
brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
}
break;
+
+ default:
+ break;
}
}
if (p->next_insn_offset & sizeof(brw_compact_inst)) {
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
- brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
+ brw_compact_inst_set_hw_opcode(
+ devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
brw_compact_inst_set_cmpt_control(devinfo, align, true);
p->next_insn_offset += sizeof(brw_compact_inst);
}
p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
- /* Update the instruction offsets for each annotation. */
- if (annotation) {
- for (int offset = 0, i = 0; i < num_annotations; i++) {
+ /* Update the instruction offsets for each group. */
+ if (disasm) {
+ int offset = 0;
+
+ foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
- sizeof(brw_inst) != annotation[i].offset) {
+ sizeof(brw_inst) != group->offset) {
assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
- sizeof(brw_inst) < annotation[i].offset);
+ sizeof(brw_inst) < group->offset);
offset = next_offset(devinfo, store, offset);
}
- annotation[i].offset = start_offset + offset;
+ group->offset = start_offset + offset;
offset = next_offset(devinfo, store, offset);
}
-
- annotation[num_annotations].offset = p->next_insn_offset;
}
}