From: Francisco Jerez Date: Mon, 11 Jun 2018 19:54:17 +0000 (-0700) Subject: intel/eu: Rework opcode description tables to allow efficient look-up by either HW... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=25dd67099df301f09ce40b8f9c5a3bbc857e367c;p=mesa.git intel/eu: Rework opcode description tables to allow efficient look-up by either HW or IR opcode. This rewrites the current opcode description tables as a more compact flat data structure. The purpose is to allow efficient constant-time look-up by either HW or IR opcode, which will allow us to drop the hard-coded correspondence between HW and IR opcodes -- See the next commits for the rationale. brw_eu.c is now built as C++ source so we can take advantage of pointers to member in order to make the look-up function work regardless of the opcode_desc member used as look-up key. v2: Optimize devinfo struct comparison (Caio) Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 115403983cd..a57c0d1bada 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -43,7 +43,7 @@ COMPILER_FILES = \ compiler/brw_disasm.c \ compiler/brw_disasm_info.c \ compiler/brw_disasm_info.h \ - compiler/brw_eu.c \ + compiler/brw_eu.cpp \ compiler/brw_eu_compact.c \ compiler/brw_eu_defines.h \ compiler/brw_eu_emit.c \ diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c deleted file mode 100644 index fbc899d765e..00000000000 --- a/src/intel/compiler/brw_eu.c +++ /dev/null @@ -1,771 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include -#include - -#include "brw_eu_defines.h" -#include "brw_eu.h" -#include "brw_shader.h" -#include "dev/gen_debug.h" - -#include "util/ralloc.h" - -/* Returns a conditional modifier that negates the condition. */ -enum brw_conditional_mod -brw_negate_cmod(enum brw_conditional_mod cmod) -{ - switch (cmod) { - case BRW_CONDITIONAL_Z: - return BRW_CONDITIONAL_NZ; - case BRW_CONDITIONAL_NZ: - return BRW_CONDITIONAL_Z; - case BRW_CONDITIONAL_G: - return BRW_CONDITIONAL_LE; - case BRW_CONDITIONAL_GE: - return BRW_CONDITIONAL_L; - case BRW_CONDITIONAL_L: - return BRW_CONDITIONAL_GE; - case BRW_CONDITIONAL_LE: - return BRW_CONDITIONAL_G; - default: - unreachable("Can't negate this cmod"); - } -} - -/* Returns the corresponding conditional mod for swapping src0 and - * src1 in e.g. CMP. - */ -enum brw_conditional_mod -brw_swap_cmod(enum brw_conditional_mod cmod) -{ - switch (cmod) { - case BRW_CONDITIONAL_Z: - case BRW_CONDITIONAL_NZ: - return cmod; - case BRW_CONDITIONAL_G: - return BRW_CONDITIONAL_L; - case BRW_CONDITIONAL_GE: - return BRW_CONDITIONAL_LE; - case BRW_CONDITIONAL_L: - return BRW_CONDITIONAL_G; - case BRW_CONDITIONAL_LE: - return BRW_CONDITIONAL_GE; - default: - return BRW_CONDITIONAL_NONE; - } -} - -/** - * Get the least significant bit offset of the i+1-th component of immediate - * type \p type. For \p i equal to the two's complement of j, return the - * offset of the j-th component starting from the end of the vector. For - * scalar register types return zero. - */ -static unsigned -imm_shift(enum brw_reg_type type, unsigned i) -{ - assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V && - "Not implemented."); - - if (type == BRW_REGISTER_TYPE_VF) - return 8 * (i & 3); - else - return 0; -} - -/** - * Swizzle an arbitrary immediate \p x of the given type according to the - * permutation specified as \p swz. - */ -uint32_t -brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz) -{ - if (imm_shift(type, 1)) { - const unsigned n = 32 / imm_shift(type, 1); - uint32_t y = 0; - - for (unsigned i = 0; i < n; i++) { - /* Shift the specified component all the way to the right and left to - * discard any undesired L/MSBs, then shift it right into component i. - */ - y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3)) - << imm_shift(type, ~0u) - >> imm_shift(type, ~0u - i); - } - - return y; - } else { - return x; - } -} - -unsigned -brw_get_default_exec_size(struct brw_codegen *p) -{ - return p->current->exec_size; -} - -unsigned -brw_get_default_group(struct brw_codegen *p) -{ - return p->current->group; -} - -unsigned -brw_get_default_access_mode(struct brw_codegen *p) -{ - return p->current->access_mode; -} - -void -brw_set_default_exec_size(struct brw_codegen *p, unsigned value) -{ - p->current->exec_size = value; -} - -void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc) -{ - p->current->predicate = pc; -} - -void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse) -{ - p->current->pred_inv = predicate_inverse; -} - -void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg) -{ - assert(subreg < 2); - p->current->flag_subreg = reg * 2 + subreg; -} - -void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ) -{ - p->current->access_mode = access_mode; -} - -void -brw_set_default_compression_control(struct brw_codegen *p, - enum brw_compression compression_control) -{ - switch (compression_control) { - case BRW_COMPRESSION_NONE: - /* This is the "use the first set of bits of dmask/vmask/arf - * according to execsize" option. - */ - p->current->group = 0; - break; - case BRW_COMPRESSION_2NDHALF: - /* For SIMD8, this is "use the second set of 8 bits." */ - p->current->group = 8; - break; - case BRW_COMPRESSION_COMPRESSED: - /* For SIMD16 instruction compression, use the first set of 16 bits - * since we don't do SIMD32 dispatch. - */ - p->current->group = 0; - break; - default: - unreachable("not reached"); - } - - if (p->devinfo->gen <= 6) { - p->current->compressed = - (compression_control == BRW_COMPRESSION_COMPRESSED); - } -} - -/** - * Enable or disable instruction compression on the given instruction leaving - * the currently selected channel enable group untouched. - */ -void -brw_inst_set_compression(const struct gen_device_info *devinfo, - brw_inst *inst, bool on) -{ - if (devinfo->gen >= 6) { - /* No-op, the EU will figure out for us whether the instruction needs to - * be compressed. - */ - } else { - /* The channel group and compression controls are non-orthogonal, there - * are two possible representations for uncompressed instructions and we - * may need to preserve the current one to avoid changing the selected - * channel group inadvertently. - */ - if (on) - brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED); - else if (brw_inst_qtr_control(devinfo, inst) - == BRW_COMPRESSION_COMPRESSED) - brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); - } -} - -void -brw_set_default_compression(struct brw_codegen *p, bool on) -{ - p->current->compressed = on; -} - -/** - * Apply the range of channel enable signals given by - * [group, group + exec_size) to the instruction passed as argument. - */ -void -brw_inst_set_group(const struct gen_device_info *devinfo, - brw_inst *inst, unsigned group) -{ - if (devinfo->gen >= 7) { - assert(group % 4 == 0 && group < 32); - brw_inst_set_qtr_control(devinfo, inst, group / 8); - brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2); - - } else if (devinfo->gen == 6) { - assert(group % 8 == 0 && group < 32); - brw_inst_set_qtr_control(devinfo, inst, group / 8); - - } else { - assert(group % 8 == 0 && group < 16); - /* The channel group and compression controls are non-orthogonal, there - * are two possible representations for group zero and we may need to - * preserve the current one to avoid changing the selected compression - * enable inadvertently. - */ - if (group == 8) - brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF); - else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF) - brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); - } -} - -void -brw_set_default_group(struct brw_codegen *p, unsigned group) -{ - p->current->group = group; -} - -void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ) -{ - p->current->mask_control = value; -} - -void brw_set_default_saturate( struct brw_codegen *p, bool enable ) -{ - p->current->saturate = enable; -} - -void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value) -{ - p->current->acc_wr_control = value; -} - -void brw_push_insn_state( struct brw_codegen *p ) -{ - assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); - *(p->current + 1) = *p->current; - p->current++; -} - -void brw_pop_insn_state( struct brw_codegen *p ) -{ - assert(p->current != p->stack); - p->current--; -} - - -/*********************************************************************** - */ -void -brw_init_codegen(const struct gen_device_info *devinfo, - struct brw_codegen *p, void *mem_ctx) -{ - memset(p, 0, sizeof(*p)); - - p->devinfo = devinfo; - p->automatic_exec_sizes = true; - /* - * Set the initial instruction store array size to 1024, if found that - * isn't enough, then it will double the store size at brw_next_insn() - * until out of memory. - */ - p->store_size = 1024; - p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size); - p->nr_insn = 0; - p->current = p->stack; - memset(p->current, 0, sizeof(p->current[0])); - - p->mem_ctx = mem_ctx; - - /* Some defaults? - */ - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ - brw_set_default_saturate(p, 0); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - - /* Set up control flow stack */ - p->if_stack_depth = 0; - p->if_stack_array_size = 16; - p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size); - - p->loop_stack_depth = 0; - p->loop_stack_array_size = 16; - p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); - p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); -} - - -const unsigned *brw_get_program( struct brw_codegen *p, - unsigned *sz ) -{ - *sz = p->next_insn_offset; - return (const unsigned *)p->store; -} - -bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, - const char *identifier) -{ - const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH"); - if (!read_path) { - return false; - } - - char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier); - - int fd = open(name, O_RDONLY); - ralloc_free(name); - - if (fd == -1) { - return false; - } - - struct stat sb; - if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) { - close(fd); - return false; - } - - p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst); - p->nr_insn += sb.st_size / sizeof(brw_inst); - - p->next_insn_offset = start_offset + sb.st_size; - p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst); - p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset); - assert(p->store); - - read(fd, p->store + start_offset, sb.st_size); - close(fd); - - bool valid = brw_validate_instructions(p->devinfo, p->store, - start_offset, p->next_insn_offset, - 0); - assert(valid); - - return true; -} - -void -brw_disassemble(const struct gen_device_info *devinfo, - const void *assembly, int start, int end, FILE *out) -{ - bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0; - - for (int offset = start; offset < end;) { - const brw_inst *insn = (const brw_inst *)((char *)assembly + offset); - brw_inst uncompacted; - bool compacted = brw_inst_cmpt_control(devinfo, insn); - if (0) - fprintf(out, "0x%08x: ", offset); - - if (compacted) { - brw_compact_inst *compacted = (brw_compact_inst *)insn; - if (dump_hex) { - unsigned char * insn_ptr = ((unsigned char *)&insn[0]); - const unsigned int blank_spaces = 24; - for (int i = 0 ; i < 8; i = i + 4) { - fprintf(out, "%02x %02x %02x %02x ", - insn_ptr[i], - insn_ptr[i + 1], - insn_ptr[i + 2], - insn_ptr[i + 3]); - } - /* Make compacted instructions hex value output vertically aligned - * with uncompacted instructions hex value - */ - fprintf(out, "%*c", blank_spaces, ' '); - } - - brw_uncompact_instruction(devinfo, &uncompacted, compacted); - insn = &uncompacted; - offset += 8; - } else { - if (dump_hex) { - unsigned char * insn_ptr = ((unsigned char *)&insn[0]); - for (int i = 0 ; i < 16; i = i + 4) { - fprintf(out, "%02x %02x %02x %02x ", - insn_ptr[i], - insn_ptr[i + 1], - insn_ptr[i + 2], - insn_ptr[i + 3]); - } - } - offset += 16; - } - - brw_disassemble_inst(out, devinfo, insn, compacted); - } -} - -enum gen { - GEN4 = (1 << 0), - GEN45 = (1 << 1), - GEN5 = (1 << 2), - GEN6 = (1 << 3), - GEN7 = (1 << 4), - GEN75 = (1 << 5), - GEN8 = (1 << 6), - GEN9 = (1 << 7), - GEN10 = (1 << 8), - GEN11 = (1 << 9), - GEN_ALL = ~0 -}; - -#define GEN_LT(gen) ((gen) - 1) -#define GEN_GE(gen) (~GEN_LT(gen)) -#define GEN_LE(gen) (GEN_LT(gen) | (gen)) - -static const struct opcode_desc opcode_10_descs[] = { - { .name = "dim", .nsrc = 1, .ndst = 1, .gens = GEN75 }, - { .name = "smov", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN8) }, -}; - -static const struct opcode_desc opcode_35_descs[] = { - { .name = "iff", .nsrc = 0, .ndst = 0, .gens = GEN_LE(GEN5) }, - { .name = "brc", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN7) }, -}; - -static const struct opcode_desc opcode_38_descs[] = { - { .name = "do", .nsrc = 0, .ndst = 0, .gens = GEN_LE(GEN5) }, - { .name = "case", .nsrc = 0, .ndst = 0, .gens = GEN6 }, -}; - -static const struct opcode_desc opcode_44_descs[] = { - { .name = "msave", .nsrc = 0, .ndst = 0, .gens = GEN_LE(GEN5) }, - { .name = "call", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN6) }, -}; - -static const struct opcode_desc opcode_45_descs[] = { - { .name = "mrest", .nsrc = 0, .ndst = 0, .gens = GEN_LE(GEN5) }, - { .name = "ret", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN6) }, -}; - -static const struct opcode_desc opcode_46_descs[] = { - { .name = "push", .nsrc = 0, .ndst = 0, .gens = GEN_LE(GEN5) }, - { .name = "fork", .nsrc = 0, .ndst = 0, .gens = GEN6 }, - { .name = "goto", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN8) }, -}; - -static const struct opcode_desc opcode_descs[128] = { - [BRW_OPCODE_ILLEGAL] = { - .name = "illegal", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_MOV] = { - .name = "mov", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SEL] = { - .name = "sel", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_MOVI] = { - .name = "movi", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN45), - }, - [BRW_OPCODE_NOT] = { - .name = "not", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_AND] = { - .name = "and", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_OR] = { - .name = "or", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_XOR] = { - .name = "xor", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SHR] = { - .name = "shr", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SHL] = { - .name = "shl", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [10] = { - .table = opcode_10_descs, .size = ARRAY_SIZE(opcode_10_descs), - }, - /* Reserved - 11 */ - [BRW_OPCODE_ASR] = { - .name = "asr", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - /* Reserved - 13 */ - [BRW_OPCODE_ROR] = { - .name = "ror", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN11), - }, - [BRW_OPCODE_ROL] = { - .name = "rol", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN11), - }, - [BRW_OPCODE_CMP] = { - .name = "cmp", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_CMPN] = { - .name = "cmpn", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_CSEL] = { - .name = "csel", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN8), - }, - [BRW_OPCODE_F32TO16] = { - .name = "f32to16", .nsrc = 1, .ndst = 1, .gens = GEN7 | GEN75, - }, - [BRW_OPCODE_F16TO32] = { - .name = "f16to32", .nsrc = 1, .ndst = 1, .gens = GEN7 | GEN75, - }, - /* Reserved - 21-22 */ - [BRW_OPCODE_BFREV] = { - .name = "bfrev", .nsrc = 1, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_BFE] = { - .name = "bfe", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_BFI1] = { - .name = "bfi1", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_BFI2] = { - .name = "bfi2", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN7), - }, - /* Reserved - 27-31 */ - [BRW_OPCODE_JMPI] = { - .name = "jmpi", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [33] = { - .name = "brd", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_IF] = { - .name = "if", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [35] = { - .table = opcode_35_descs, .size = ARRAY_SIZE(opcode_35_descs), - }, - [BRW_OPCODE_ELSE] = { - .name = "else", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_ENDIF] = { - .name = "endif", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [38] = { - .table = opcode_38_descs, .size = ARRAY_SIZE(opcode_38_descs), - }, - [BRW_OPCODE_WHILE] = { - .name = "while", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_BREAK] = { - .name = "break", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_CONTINUE] = { - .name = "cont", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_HALT] = { - .name = "halt", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, - [43] = { - .name = "calla", .nsrc = 0, .ndst = 0, .gens = GEN_GE(GEN75), - }, - [44] = { - .table = opcode_44_descs, .size = ARRAY_SIZE(opcode_44_descs), - }, - [45] = { - .table = opcode_45_descs, .size = ARRAY_SIZE(opcode_45_descs), - }, - [46] = { - .table = opcode_46_descs, .size = ARRAY_SIZE(opcode_46_descs), - }, - [47] = { - .name = "pop", .nsrc = 2, .ndst = 0, .gens = GEN_LE(GEN5), - }, - [BRW_OPCODE_WAIT] = { - .name = "wait", .nsrc = 1, .ndst = 0, .gens = GEN_ALL, - }, - [BRW_OPCODE_SEND] = { - .name = "send", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SENDC] = { - .name = "sendc", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SENDS] = { - .name = "sends", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN9), - }, - [BRW_OPCODE_SENDSC] = { - .name = "sendsc", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN9), - }, - /* Reserved 53-55 */ - [BRW_OPCODE_MATH] = { - .name = "math", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN6), - }, - /* Reserved 57-63 */ - [BRW_OPCODE_ADD] = { - .name = "add", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_MUL] = { - .name = "mul", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_AVG] = { - .name = "avg", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_FRC] = { - .name = "frc", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_RNDU] = { - .name = "rndu", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_RNDD] = { - .name = "rndd", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_RNDE] = { - .name = "rnde", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_RNDZ] = { - .name = "rndz", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_MAC] = { - .name = "mac", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_MACH] = { - .name = "mach", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_LZD] = { - .name = "lzd", .nsrc = 1, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_FBH] = { - .name = "fbh", .nsrc = 1, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_FBL] = { - .name = "fbl", .nsrc = 1, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_CBIT] = { - .name = "cbit", .nsrc = 1, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_ADDC] = { - .name = "addc", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_SUBB] = { - .name = "subb", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN7), - }, - [BRW_OPCODE_SAD2] = { - .name = "sad2", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_SADA2] = { - .name = "sada2", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - /* Reserved 82-83 */ - [BRW_OPCODE_DP4] = { - .name = "dp4", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_DPH] = { - .name = "dph", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_DP3] = { - .name = "dp3", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - [BRW_OPCODE_DP2] = { - .name = "dp2", .nsrc = 2, .ndst = 1, .gens = GEN_ALL, - }, - /* Reserved 88 */ - [BRW_OPCODE_LINE] = { - .name = "line", .nsrc = 2, .ndst = 1, .gens = GEN_LE(GEN10), - }, - [BRW_OPCODE_PLN] = { - .name = "pln", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN45) & GEN_LE(GEN10), - }, - [BRW_OPCODE_MAD] = { - .name = "mad", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN6), - }, - [BRW_OPCODE_LRP] = { - .name = "lrp", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN6) & GEN_LE(GEN10), - }, - [93] = { - .name = "madm", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN8), - }, - /* Reserved 94-124 */ - [BRW_OPCODE_NENOP] = { - .name = "nenop", .nsrc = 0, .ndst = 0, .gens = GEN45, - }, - [BRW_OPCODE_NOP] = { - .name = "nop", .nsrc = 0, .ndst = 0, .gens = GEN_ALL, - }, -}; - -static enum gen -gen_from_devinfo(const struct gen_device_info *devinfo) -{ - switch (devinfo->gen) { - case 4: return devinfo->is_g4x ? GEN45 : GEN4; - case 5: return GEN5; - case 6: return GEN6; - case 7: return devinfo->is_haswell ? GEN75 : GEN7; - case 8: return GEN8; - case 9: return GEN9; - case 10: return GEN10; - case 11: return GEN11; - default: - unreachable("not reached"); - } -} - -/* Return the matching opcode_desc for the specified opcode number and - * hardware generation, or NULL if the opcode is not supported by the device. - */ -const struct opcode_desc * -brw_opcode_desc(const struct gen_device_info *devinfo, enum opcode opcode) -{ - if (opcode >= ARRAY_SIZE(opcode_descs)) - return NULL; - - enum gen gen = gen_from_devinfo(devinfo); - if (opcode_descs[opcode].gens != 0) { - if ((opcode_descs[opcode].gens & gen) != 0) { - return &opcode_descs[opcode]; - } - } else if (opcode_descs[opcode].table != NULL) { - const struct opcode_desc *table = opcode_descs[opcode].table; - for (unsigned i = 0; i < opcode_descs[opcode].size; i++) { - if ((table[i].gens & gen) != 0) { - return &table[i]; - } - } - } - return NULL; -} diff --git a/src/intel/compiler/brw_eu.cpp b/src/intel/compiler/brw_eu.cpp new file mode 100644 index 00000000000..cf707f7303e --- /dev/null +++ b/src/intel/compiler/brw_eu.cpp @@ -0,0 +1,630 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include +#include + +#include "brw_eu_defines.h" +#include "brw_eu.h" +#include "brw_shader.h" +#include "dev/gen_debug.h" + +#include "util/ralloc.h" + +/* Returns a conditional modifier that negates the condition. */ +enum brw_conditional_mod +brw_negate_cmod(enum brw_conditional_mod cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + return BRW_CONDITIONAL_NZ; + case BRW_CONDITIONAL_NZ: + return BRW_CONDITIONAL_Z; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_GE; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_G; + default: + unreachable("Can't negate this cmod"); + } +} + +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +enum brw_conditional_mod +brw_swap_cmod(enum brw_conditional_mod cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_G; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_GE; + default: + return BRW_CONDITIONAL_NONE; + } +} + +/** + * Get the least significant bit offset of the i+1-th component of immediate + * type \p type. For \p i equal to the two's complement of j, return the + * offset of the j-th component starting from the end of the vector. For + * scalar register types return zero. + */ +static unsigned +imm_shift(enum brw_reg_type type, unsigned i) +{ + assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V && + "Not implemented."); + + if (type == BRW_REGISTER_TYPE_VF) + return 8 * (i & 3); + else + return 0; +} + +/** + * Swizzle an arbitrary immediate \p x of the given type according to the + * permutation specified as \p swz. + */ +uint32_t +brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz) +{ + if (imm_shift(type, 1)) { + const unsigned n = 32 / imm_shift(type, 1); + uint32_t y = 0; + + for (unsigned i = 0; i < n; i++) { + /* Shift the specified component all the way to the right and left to + * discard any undesired L/MSBs, then shift it right into component i. + */ + y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3)) + << imm_shift(type, ~0u) + >> imm_shift(type, ~0u - i); + } + + return y; + } else { + return x; + } +} + +unsigned +brw_get_default_exec_size(struct brw_codegen *p) +{ + return p->current->exec_size; +} + +unsigned +brw_get_default_group(struct brw_codegen *p) +{ + return p->current->group; +} + +unsigned +brw_get_default_access_mode(struct brw_codegen *p) +{ + return p->current->access_mode; +} + +void +brw_set_default_exec_size(struct brw_codegen *p, unsigned value) +{ + p->current->exec_size = value; +} + +void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc) +{ + p->current->predicate = pc; +} + +void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse) +{ + p->current->pred_inv = predicate_inverse; +} + +void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg) +{ + assert(subreg < 2); + p->current->flag_subreg = reg * 2 + subreg; +} + +void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ) +{ + p->current->access_mode = access_mode; +} + +void +brw_set_default_compression_control(struct brw_codegen *p, + enum brw_compression compression_control) +{ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->group = 0; + break; + case BRW_COMPRESSION_2NDHALF: + /* For SIMD8, this is "use the second set of 8 bits." */ + p->current->group = 8; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For SIMD16 instruction compression, use the first set of 16 bits + * since we don't do SIMD32 dispatch. + */ + p->current->group = 0; + break; + default: + unreachable("not reached"); + } + + if (p->devinfo->gen <= 6) { + p->current->compressed = + (compression_control == BRW_COMPRESSION_COMPRESSED); + } +} + +/** + * Enable or disable instruction compression on the given instruction leaving + * the currently selected channel enable group untouched. + */ +void +brw_inst_set_compression(const struct gen_device_info *devinfo, + brw_inst *inst, bool on) +{ + if (devinfo->gen >= 6) { + /* No-op, the EU will figure out for us whether the instruction needs to + * be compressed. + */ + } else { + /* The channel group and compression controls are non-orthogonal, there + * are two possible representations for uncompressed instructions and we + * may need to preserve the current one to avoid changing the selected + * channel group inadvertently. + */ + if (on) + brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED); + else if (brw_inst_qtr_control(devinfo, inst) + == BRW_COMPRESSION_COMPRESSED) + brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); + } +} + +void +brw_set_default_compression(struct brw_codegen *p, bool on) +{ + p->current->compressed = on; +} + +/** + * Apply the range of channel enable signals given by + * [group, group + exec_size) to the instruction passed as argument. + */ +void +brw_inst_set_group(const struct gen_device_info *devinfo, + brw_inst *inst, unsigned group) +{ + if (devinfo->gen >= 7) { + assert(group % 4 == 0 && group < 32); + brw_inst_set_qtr_control(devinfo, inst, group / 8); + brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2); + + } else if (devinfo->gen == 6) { + assert(group % 8 == 0 && group < 32); + brw_inst_set_qtr_control(devinfo, inst, group / 8); + + } else { + assert(group % 8 == 0 && group < 16); + /* The channel group and compression controls are non-orthogonal, there + * are two possible representations for group zero and we may need to + * preserve the current one to avoid changing the selected compression + * enable inadvertently. + */ + if (group == 8) + brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF); + else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF) + brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); + } +} + +void +brw_set_default_group(struct brw_codegen *p, unsigned group) +{ + p->current->group = group; +} + +void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ) +{ + p->current->mask_control = value; +} + +void brw_set_default_saturate( struct brw_codegen *p, bool enable ) +{ + p->current->saturate = enable; +} + +void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value) +{ + p->current->acc_wr_control = value; +} + +void brw_push_insn_state( struct brw_codegen *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + *(p->current + 1) = *p->current; + p->current++; +} + +void brw_pop_insn_state( struct brw_codegen *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void +brw_init_codegen(const struct gen_device_info *devinfo, + struct brw_codegen *p, void *mem_ctx) +{ + memset(p, 0, sizeof(*p)); + + p->devinfo = devinfo; + p->automatic_exec_sizes = true; + /* + * Set the initial instruction store array size to 1024, if found that + * isn't enough, then it will double the store size at brw_next_insn() + * until out of memory. + */ + p->store_size = 1024; + p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size); + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + p->mem_ctx = mem_ctx; + + /* Some defaults? + */ + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_default_saturate(p, 0); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + + /* Set up control flow stack */ + p->if_stack_depth = 0; + p->if_stack_array_size = 16; + p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size); + + p->loop_stack_depth = 0; + p->loop_stack_array_size = 16; + p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); + p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); +} + + +const unsigned *brw_get_program( struct brw_codegen *p, + unsigned *sz ) +{ + *sz = p->next_insn_offset; + return (const unsigned *)p->store; +} + +bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, + const char *identifier) +{ + const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH"); + if (!read_path) { + return false; + } + + char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier); + + int fd = open(name, O_RDONLY); + ralloc_free(name); + + if (fd == -1) { + return false; + } + + struct stat sb; + if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) { + close(fd); + return false; + } + + p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst); + p->nr_insn += sb.st_size / sizeof(brw_inst); + + p->next_insn_offset = start_offset + sb.st_size; + p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst); + p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset); + assert(p->store); + + read(fd, p->store + start_offset, sb.st_size); + close(fd); + + bool valid = brw_validate_instructions(p->devinfo, p->store, + start_offset, p->next_insn_offset, + 0); + assert(valid); + + return true; +} + +void +brw_disassemble(const struct gen_device_info *devinfo, + const void *assembly, int start, int end, FILE *out) +{ + bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0; + + for (int offset = start; offset < end;) { + const brw_inst *insn = (const brw_inst *)((char *)assembly + offset); + brw_inst uncompacted; + bool compacted = brw_inst_cmpt_control(devinfo, insn); + if (0) + fprintf(out, "0x%08x: ", offset); + + if (compacted) { + brw_compact_inst *compacted = (brw_compact_inst *)insn; + if (dump_hex) { + unsigned char * insn_ptr = ((unsigned char *)&insn[0]); + const unsigned int blank_spaces = 24; + for (int i = 0 ; i < 8; i = i + 4) { + fprintf(out, "%02x %02x %02x %02x ", + insn_ptr[i], + insn_ptr[i + 1], + insn_ptr[i + 2], + insn_ptr[i + 3]); + } + /* Make compacted instructions hex value output vertically aligned + * with uncompacted instructions hex value + */ + fprintf(out, "%*c", blank_spaces, ' '); + } + + brw_uncompact_instruction(devinfo, &uncompacted, compacted); + insn = &uncompacted; + offset += 8; + } else { + if (dump_hex) { + unsigned char * insn_ptr = ((unsigned char *)&insn[0]); + for (int i = 0 ; i < 16; i = i + 4) { + fprintf(out, "%02x %02x %02x %02x ", + insn_ptr[i], + insn_ptr[i + 1], + insn_ptr[i + 2], + insn_ptr[i + 3]); + } + } + offset += 16; + } + + brw_disassemble_inst(out, devinfo, insn, compacted); + } +} + +enum gen { + GEN4 = (1 << 0), + GEN45 = (1 << 1), + GEN5 = (1 << 2), + GEN6 = (1 << 3), + GEN7 = (1 << 4), + GEN75 = (1 << 5), + GEN8 = (1 << 6), + GEN9 = (1 << 7), + GEN10 = (1 << 8), + GEN11 = (1 << 9), + GEN_ALL = ~0 +}; + +#define GEN_LT(gen) ((gen) - 1) +#define GEN_GE(gen) (~GEN_LT(gen)) +#define GEN_LE(gen) (GEN_LT(gen) | (gen)) + +static const struct opcode_desc opcode_descs[] = { + /* IR, HW, name, nsrc, ndst, gens */ + { BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GEN_ALL }, + { BRW_OPCODE_MOV, 1, "mov", 1, 1, GEN_ALL }, + { BRW_OPCODE_SEL, 2, "sel", 2, 1, GEN_ALL }, + { BRW_OPCODE_MOVI, 3, "movi", 2, 1, GEN_GE(GEN45) }, + { BRW_OPCODE_NOT, 4, "not", 1, 1, GEN_ALL }, + { BRW_OPCODE_AND, 5, "and", 2, 1, GEN_ALL }, + { BRW_OPCODE_OR, 6, "or", 2, 1, GEN_ALL }, + { BRW_OPCODE_XOR, 7, "xor", 2, 1, GEN_ALL }, + { BRW_OPCODE_SHR, 8, "shr", 2, 1, GEN_ALL }, + { BRW_OPCODE_SHL, 9, "shl", 2, 1, GEN_ALL }, + { BRW_OPCODE_DIM, 10, "dim", 1, 1, GEN75 }, + { BRW_OPCODE_SMOV, 10, "smov", 0, 0, GEN_GE(GEN8) }, + { BRW_OPCODE_ASR, 12, "asr", 2, 1, GEN_ALL }, + { BRW_OPCODE_ROR, 14, "ror", 2, 1, GEN_GE(GEN11) }, + { BRW_OPCODE_ROL, 15, "rol", 2, 1, GEN_GE(GEN11) }, + { BRW_OPCODE_CMP, 16, "cmp", 2, 1, GEN_ALL }, + { BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GEN_ALL }, + { BRW_OPCODE_CSEL, 18, "csel", 3, 1, GEN_GE(GEN8) }, + { BRW_OPCODE_F32TO16, 19, "f32to16", 1, 1, GEN7 | GEN75 }, + { BRW_OPCODE_F16TO32, 20, "f16to32", 1, 1, GEN7 | GEN75 }, + { BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_BFE, 24, "bfe", 3, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GEN_ALL }, + { BRW_OPCODE_BRD, 33, "brd", 0, 0, GEN_GE(GEN7) }, + { BRW_OPCODE_IF, 34, "if", 0, 0, GEN_ALL }, + { BRW_OPCODE_IFF, 35, "iff", 0, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_BRC, 35, "brc", 0, 0, GEN_GE(GEN7) }, + { BRW_OPCODE_ELSE, 36, "else", 0, 0, GEN_ALL }, + { BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GEN_ALL }, + { BRW_OPCODE_DO, 38, "do", 0, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_CASE, 38, "case", 0, 0, GEN6 }, + { BRW_OPCODE_WHILE, 39, "while", 0, 0, GEN_ALL }, + { BRW_OPCODE_BREAK, 40, "break", 0, 0, GEN_ALL }, + { BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GEN_ALL }, + { BRW_OPCODE_HALT, 42, "halt", 0, 0, GEN_ALL }, + { BRW_OPCODE_CALLA, 43, "calla", 0, 0, GEN_GE(GEN75) }, + { BRW_OPCODE_MSAVE, 44, "msave", 0, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_CALL, 44, "call", 0, 0, GEN_GE(GEN6) }, + { BRW_OPCODE_MREST, 45, "mrest", 0, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_RET, 45, "ret", 0, 0, GEN_GE(GEN6) }, + { BRW_OPCODE_PUSH, 46, "push", 0, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_FORK, 46, "fork", 0, 0, GEN6 }, + { BRW_OPCODE_GOTO, 46, "goto", 0, 0, GEN_GE(GEN8) }, + { BRW_OPCODE_POP, 47, "pop", 2, 0, GEN_LE(GEN5) }, + { BRW_OPCODE_WAIT, 48, "wait", 1, 0, GEN_ALL }, + { BRW_OPCODE_SEND, 49, "send", 1, 1, GEN_ALL }, + { BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GEN_ALL }, + { BRW_OPCODE_SENDS, 51, "sends", 2, 1, GEN_GE(GEN9) }, + { BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GEN_GE(GEN9) }, + { BRW_OPCODE_MATH, 56, "math", 2, 1, GEN_GE(GEN6) }, + { BRW_OPCODE_ADD, 64, "add", 2, 1, GEN_ALL }, + { BRW_OPCODE_MUL, 65, "mul", 2, 1, GEN_ALL }, + { BRW_OPCODE_AVG, 66, "avg", 2, 1, GEN_ALL }, + { BRW_OPCODE_FRC, 67, "frc", 1, 1, GEN_ALL }, + { BRW_OPCODE_RNDU, 68, "rndu", 1, 1, GEN_ALL }, + { BRW_OPCODE_RNDD, 69, "rndd", 1, 1, GEN_ALL }, + { BRW_OPCODE_RNDE, 70, "rnde", 1, 1, GEN_ALL }, + { BRW_OPCODE_RNDZ, 71, "rndz", 1, 1, GEN_ALL }, + { BRW_OPCODE_MAC, 72, "mac", 2, 1, GEN_ALL }, + { BRW_OPCODE_MACH, 73, "mach", 2, 1, GEN_ALL }, + { BRW_OPCODE_LZD, 74, "lzd", 1, 1, GEN_ALL }, + { BRW_OPCODE_FBH, 75, "fbh", 1, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_FBL, 76, "fbl", 1, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_ADDC, 78, "addc", 2, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_SUBB, 79, "subb", 2, 1, GEN_GE(GEN7) }, + { BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GEN_ALL }, + { BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GEN_ALL }, + { BRW_OPCODE_DP4, 84, "dp4", 2, 1, GEN_ALL }, + { BRW_OPCODE_DPH, 85, "dph", 2, 1, GEN_ALL }, + { BRW_OPCODE_DP3, 86, "dp3", 2, 1, GEN_ALL }, + { BRW_OPCODE_DP2, 87, "dp2", 2, 1, GEN_ALL }, + { BRW_OPCODE_LINE, 89, "line", 2, 1, GEN_LE(GEN10) }, + { BRW_OPCODE_PLN, 90, "pln", 2, 1, GEN_GE(GEN45) & GEN_LE(GEN10) }, + { BRW_OPCODE_MAD, 91, "mad", 3, 1, GEN_GE(GEN6) }, + { BRW_OPCODE_LRP, 92, "lrp", 3, 1, GEN_GE(GEN6) & GEN_LE(GEN10) }, + { BRW_OPCODE_MADM, 93, "madm", 3, 1, GEN_GE(GEN8) }, + { BRW_OPCODE_NENOP, 125, "nenop", 0, 0, GEN45 }, + { BRW_OPCODE_NOP, 126, "nop", 0, 0, GEN_ALL }, +}; + +static enum gen +gen_from_devinfo(const struct gen_device_info *devinfo) +{ + switch (devinfo->gen) { + case 4: return devinfo->is_g4x ? GEN45 : GEN4; + case 5: return GEN5; + case 6: return GEN6; + case 7: return devinfo->is_haswell ? GEN75 : GEN7; + case 8: return GEN8; + case 9: return GEN9; + case 10: return GEN10; + case 11: return GEN11; + default: + unreachable("not reached"); + } +} + +/** + * Look up the opcode_descs[] entry with \p key member matching \p k which is + * supported by the device specified by \p devinfo, or NULL if there is no + * matching entry. + * + * This is implemented by using an index data structure (storage for which is + * provided by the caller as \p index_gen and \p index_descs) in order to + * provide efficient constant-time look-up. + */ +static const opcode_desc * +lookup_opcode_desc(gen *index_gen, + const opcode_desc **index_descs, + unsigned index_size, + unsigned opcode_desc::*key, + const gen_device_info *devinfo, + unsigned k) +{ + if (*index_gen != gen_from_devinfo(devinfo)) { + *index_gen = gen_from_devinfo(devinfo); + + for (unsigned l = 0; l < index_size; l++) + index_descs[l] = NULL; + + for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) { + if (opcode_descs[i].gens & *index_gen) { + const unsigned l = opcode_descs[i].*key; + assert(l < index_size && !index_descs[l]); + index_descs[l] = &opcode_descs[i]; + } + } + } + + if (k < index_size) + return index_descs[k]; + else + return NULL; +} + +/** + * Return the matching opcode_desc for the specified IR opcode and hardware + * generation, or NULL if the opcode is not supported by the device. + */ +const struct opcode_desc * +brw_opcode_desc(const struct gen_device_info *devinfo, enum opcode opcode) +{ + static __thread gen index_gen = {}; + static __thread const opcode_desc *index_descs[NUM_BRW_OPCODES]; + return lookup_opcode_desc(&index_gen, index_descs, ARRAY_SIZE(index_descs), + &opcode_desc::ir, devinfo, opcode); +} + +/** + * Return the matching opcode_desc for the specified HW opcode and hardware + * generation, or NULL if the opcode is not supported by the device. + */ +const struct opcode_desc * +brw_opcode_desc_from_hw(const struct gen_device_info *devinfo, unsigned hw) +{ + static __thread gen index_gen = {}; + static __thread const opcode_desc *index_descs[128]; + return lookup_opcode_desc(&index_gen, index_descs, ARRAY_SIZE(index_descs), + &opcode_desc::hw, devinfo, hw); +} diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ef2cf889cd1..282ebd7d563 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1221,32 +1221,33 @@ next_offset(const struct gen_device_info *devinfo, void *store, int offset) } struct opcode_desc { - /* The union is an implementation detail used by brw_opcode_desc() to handle - * opcodes that have been reused for different instructions across hardware - * generations. - * - * The gens field acts as a tag. If it is non-zero, name points to a string - * containing the instruction mnemonic. If it is zero, the table field is - * valid and either points to a secondary opcode_desc table with 'size' - * elements or is NULL and no such instruction exists for the opcode. - */ - union { - struct { - char *name; - int nsrc; - }; - struct { - const struct opcode_desc *table; - unsigned size; - }; - }; - int ndst; - int gens; + unsigned ir; + unsigned hw; + const char *name; + int nsrc; + int ndst; + int gens; }; const struct opcode_desc * brw_opcode_desc(const struct gen_device_info *devinfo, enum opcode opcode); +const struct opcode_desc * +brw_opcode_desc_from_hw(const struct gen_device_info *devinfo, unsigned hw); + +static inline unsigned +brw_opcode_encode(const struct gen_device_info *devinfo, enum opcode opcode) +{ + return brw_opcode_desc(devinfo, opcode)->hw; +} + +static inline enum opcode +brw_opcode_decode(const struct gen_device_info *devinfo, unsigned hw) +{ + const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw); + return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL; +} + static inline bool is_3src(const struct gen_device_info *devinfo, enum opcode opcode) { diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 7024f010f9f..707c37c34fd 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -288,10 +288,12 @@ enum opcode { BRW_OPCODE_NOP = 126, /* Reserved 127 */ + NUM_BRW_OPCODES = 128, + /* These are compiler backend opcodes that get translated into other * instructions. */ - FS_OPCODE_FB_WRITE = 128, + FS_OPCODE_FB_WRITE = NUM_BRW_OPCODES, /** * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 798163ecd2d..d74caa0632b 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -37,7 +37,7 @@ libintel_compiler_files = files( 'brw_disasm.c', 'brw_disasm_info.c', 'brw_disasm_info.h', - 'brw_eu.c', + 'brw_eu.cpp', 'brw_eu_compact.c', 'brw_eu_defines.h', 'brw_eu_emit.c',