From 6a9525bf6729a8d2bb9c6a7b10ebdc9925c55463 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 May 2018 14:37:35 -0700 Subject: [PATCH] intel/eu: Switch to a logical state stack Instead of the state stack that's based on copying a dummy instruction around, we start using a logical stack of brw_insn_states. This uses a bit less memory and is way less conceptually bogus. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_eu.c | 90 +++++++++++++------------------- src/intel/compiler/brw_eu.h | 34 +++++++++++- src/intel/compiler/brw_eu_emit.c | 74 ++------------------------ 3 files changed, 72 insertions(+), 126 deletions(-) diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c index 5375209d4fc..d0e4ea20704 100644 --- a/src/intel/compiler/brw_eu.c +++ b/src/intel/compiler/brw_eu.c @@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz) unsigned brw_get_default_exec_size(struct brw_codegen *p) { - return brw_inst_exec_size(p->devinfo, p->current); + return p->current->exec_size; } unsigned brw_get_default_group(struct brw_codegen *p) { - if (p->devinfo->gen >= 6) { - unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8; - if (p->devinfo->gen >= 7) - group += brw_inst_nib_control(p->devinfo, p->current) * 4; - return group; - } else { - unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current); - if (qtr_control == BRW_COMPRESSION_COMPRESSED) - return 0; - else - return qtr_control * 8; - } + return p->current->group; } unsigned brw_get_default_access_mode(struct brw_codegen *p) { - return brw_inst_access_mode(p->devinfo, p->current); + return p->current->access_mode; } void brw_set_default_exec_size(struct brw_codegen *p, unsigned value) { - brw_inst_set_exec_size(p->devinfo, p->current, value); + p->current->exec_size = value; } void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc ) { - brw_inst_set_pred_control(p->devinfo, p->current, pc); + p->current->predicate = pc; } void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse) { - brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse); + p->current->pred_inv = predicate_inverse; } void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg) { - if (p->devinfo->gen >= 7) - brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg); - - brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg); + assert(subreg < 2); + p->current->flag_subreg = reg * 2 + subreg; } void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ) { - brw_inst_set_access_mode(p->devinfo, p->current, access_mode); + p->current->access_mode = access_mode; } void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression compression_control) { - if (p->devinfo->gen >= 6) { - /* Since we don't use the SIMD32 support in gen6, we translate - * the pre-gen6 compression control here. + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. */ - switch (compression_control) { - case BRW_COMPRESSION_NONE: - /* This is the "use the first set of bits of dmask/vmask/arf - * according to execsize" option. - */ - brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q); - break; - case BRW_COMPRESSION_2NDHALF: - /* For SIMD8, this is "use the second set of 8 bits." */ - brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q); - break; - case BRW_COMPRESSION_COMPRESSED: - /* For SIMD16 instruction compression, use the first set of 16 bits - * since we don't do SIMD32 dispatch. - */ - brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H); - break; - default: - unreachable("not reached"); - } - } else { - brw_inst_set_qtr_control(p->devinfo, p->current, compression_control); + p->current->group = 0; + break; + case BRW_COMPRESSION_2NDHALF: + /* For SIMD8, this is "use the second set of 8 bits." */ + p->current->group = 8; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For SIMD16 instruction compression, use the first set of 16 bits + * since we don't do SIMD32 dispatch. + */ + p->current->group = 0; + break; + default: + unreachable("not reached"); + } + + if (p->devinfo->gen <= 6) { + p->current->compressed = + (compression_control == BRW_COMPRESSION_COMPRESSED); } } @@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo, void brw_set_default_compression(struct brw_codegen *p, bool on) { - brw_inst_set_compression(p->devinfo, p->current, on); + p->current->compressed = on; } /** @@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo, void brw_set_default_group(struct brw_codegen *p, unsigned group) { - brw_inst_set_group(p->devinfo, p->current, group); + p->current->group = group; } void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ) { - brw_inst_set_mask_control(p->devinfo, p->current, value); + p->current->mask_control = value; } void brw_set_default_saturate( struct brw_codegen *p, bool enable ) { - brw_inst_set_saturate(p->devinfo, p->current, enable); + p->current->saturate = enable; } void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value) { - if (p->devinfo->gen >= 6) - brw_inst_set_acc_wr_control(p->devinfo, p->current, value); + p->current->acc_wr_control = value; } void brw_push_insn_state( struct brw_codegen *p ) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index d709d12b671..0f07eeb3d6d 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -46,6 +46,36 @@ extern "C" { #define BRW_EU_MAX_INSN_STACK 5 +struct brw_insn_state { + /* One of BRW_EXECUTE_* */ + unsigned exec_size:3; + + /* Group in units of channels */ + unsigned group:5; + + /* Compression control on gen4-5 */ + bool compressed:1; + + /* One of BRW_MASK_* */ + unsigned mask_control:1; + + bool saturate:1; + + /* One of BRW_ALIGN_* */ + unsigned access_mode:1; + + /* One of BRW_PREDICATE_* */ + enum brw_predicate predicate:4; + + bool pred_inv:1; + + /* Flag subreg. Bottom bit is subreg, top bit is reg */ + unsigned flag_subreg:2; + + bool acc_wr_control:1; +}; + + /* A helper for accessing the last instruction emitted. This makes it easy * to set various bits on an instruction without having to create temporary * variable and assign the emitted instruction to those. @@ -62,8 +92,8 @@ struct brw_codegen { /* Allow clients to push/pop instruction state: */ - brw_inst stack[BRW_EU_MAX_INSN_STACK]; - brw_inst *current; + struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK]; + struct brw_insn_state *current; /** Whether or not the user wants automatic exec sizes * diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 412a051bc93..d3246edde44 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset); } -struct brw_insn_state { - /* One of BRW_EXECUTE_* */ - unsigned exec_size:3; - - /* Group in units of channels */ - unsigned group:5; - - /* Compression control on gen4-5 */ - bool compressed:1; - - /* One of BRW_MASK_* */ - unsigned mask_control:1; - - bool saturate:1; - - /* One of BRW_ALIGN_* */ - unsigned access_mode:1; - - /* One of BRW_PREDICATE_* */ - enum brw_predicate predicate:4; - - bool pred_inv:1; - - /* Flag subreg. Bottom bit is subreg, top bit is reg */ - unsigned flag_subreg:2; - - bool acc_wr_control:1; -}; - -static struct brw_insn_state -brw_inst_get_state(const struct gen_device_info *devinfo, - const brw_inst *insn) -{ - struct brw_insn_state state = { }; - - state.exec_size = brw_inst_exec_size(devinfo, insn); - if (devinfo->gen >= 6) { - state.group = brw_inst_qtr_control(devinfo, insn) * 8; - if (devinfo->gen >= 7) - state.group += brw_inst_nib_control(devinfo, insn) * 4; - } else { - unsigned qtr_control = brw_inst_qtr_control(devinfo, insn); - if (qtr_control == BRW_COMPRESSION_COMPRESSED) { - state.group = 0; - state.compressed = true; - } else { - state.group = qtr_control * 8; - state.compressed = false; - } - } - state.access_mode = brw_inst_access_mode(devinfo, insn); - state.mask_control = brw_inst_mask_control(devinfo, insn); - state.saturate = brw_inst_saturate(devinfo, insn); - state.predicate = brw_inst_pred_control(devinfo, insn); - state.pred_inv = brw_inst_pred_inv(devinfo, insn); - - state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn); - if (devinfo->gen >= 7) - state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2; - - if (devinfo->gen >= 6) - state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn); - - return state; -} - static void brw_inst_set_state(const struct gen_device_info *devinfo, brw_inst *insn, @@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode) brw_inst_set_opcode(devinfo, insn, opcode); /* Apply the default instruction state */ - struct brw_insn_state current = brw_inst_get_state(devinfo, p->current); - brw_inst_set_state(devinfo, insn, ¤t); + brw_inst_set_state(devinfo, insn, p->current); return insn; } @@ -3504,9 +3437,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, */ inst = brw_FBL(p, vec1(dst), exec_mask); } else { - const struct brw_reg flag = brw_flag_reg( - brw_inst_flag_reg_nr(devinfo, p->current), - brw_inst_flag_subreg_nr(devinfo, p->current)); + const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2, + p->current->flag_subreg % 2); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); -- 2.30.2