From c458eeb94620fbce0a37474fc292545002d67f76 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 7 Sep 2016 16:59:35 -0700 Subject: [PATCH] i965/fs: Add wrapper functions for fs_inst::regs_read and ::regs_written. This is in preparation for dropping fs_inst::regs_read and ::regs_written in favor of more accurate alternatives expressed in byte units. The main reason these wrappers are useful is that a number of optimization passes implement dataflow analysis with register granularity, so these helpers will come in handy once we've switched register offsets and sizes to the byte representation. The wrapper functions will also make sure that GRF misalignment (currently neglected by most of the back-end) is taken into account correctly in the calculation of regs_read and regs_written. Reviewed-by: Iago Toral Quiroga --- src/mesa/drivers/dri/i965/brw_fs.cpp | 28 ++++++++--------- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 8 ++--- .../dri/i965/brw_fs_dead_code_eliminate.cpp | 8 ++--- .../dri/i965/brw_fs_live_variables.cpp | 4 +-- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 26 ++++++++-------- .../dri/i965/brw_fs_register_coalesce.cpp | 4 +-- src/mesa/drivers/dri/i965/brw_fs_validate.cpp | 6 ++-- src/mesa/drivers/dri/i965/brw_ir_fs.h | 26 ++++++++++++++++ .../dri/i965/brw_schedule_instructions.cpp | 30 +++++++++---------- 9 files changed, 83 insertions(+), 57 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 04f04572d83..802aa9f76f4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1772,13 +1772,13 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; - for (int j = 1; j < inst->regs_written; j++) + for (unsigned j = 1; j < regs_written(inst); j++) split_points[reg + j] = false; } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; - for (int j = 1; j < inst->regs_read(i); j++) + for (unsigned j = 1; j < regs_read(inst, i); j++) split_points[reg + j] = false; } } @@ -2611,7 +2611,7 @@ fs_visitor::opt_register_renaming() if (remap[dst] == -1) { remap[dst] = dst; } else { - remap[dst] = alloc.allocate(inst->regs_written); + remap[dst] = alloc.allocate(regs_written(inst)); inst->dst.nr = remap[dst]; progress = true; } @@ -2727,7 +2727,7 @@ fs_visitor::compute_to_mrf() * regs_left bitset keeps track of the registers we haven't yet found a * generating instruction for. */ - unsigned regs_left = (1 << inst->regs_read(0)) - 1; + unsigned regs_left = (1 << regs_read(inst, 0)) - 1; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE, @@ -2819,7 +2819,7 @@ fs_visitor::compute_to_mrf() /* Found all generating instructions of our MRF's source value, so it * should be safe to rewrite them to point to the MRF directly. */ - regs_left = (1 << inst->regs_read(0)) - 1; + regs_left = (1 << regs_read(inst, 0)) - 1; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE, @@ -3086,7 +3086,7 @@ void fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { - int write_len = inst->regs_written; + int write_len = regs_written(inst); int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -3119,7 +3119,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, * dependency has more latency than a MOV. */ if (scan_inst->dst.file == VGRF) { - for (int i = 0; i < scan_inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(scan_inst); i++) { int reg = scan_inst->dst.nr + i; if (reg >= first_write_grf && @@ -3157,7 +3157,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, void fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { - int write_len = inst->regs_written; + int write_len = regs_written(inst); int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -3800,7 +3800,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, /* Send from the GRF */ fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F); load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); - payload.nr = bld.shader->alloc.allocate(load->regs_written); + payload.nr = bld.shader->alloc.allocate(regs_written(load)); load->dst = payload; inst->src[0] = payload; @@ -3821,7 +3821,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } inst->opcode = FS_OPCODE_FB_WRITE; - inst->mlen = load->regs_written; + inst->mlen = regs_written(load); inst->header_size = header_size; } @@ -4069,7 +4069,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, unsigned grad_components) { const gen_device_info *devinfo = bld.shader->devinfo; - int reg_width = bld.dispatch_width() / 8; + unsigned reg_width = bld.dispatch_width() / 8; unsigned header_size = 0, length = 0; fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE]; for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) @@ -4097,9 +4097,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, * and we have an explicit header, we need to set up the sampler * writemask. It's reversed from normal: 1 means "don't write". */ - if (!inst->eot && inst->regs_written != 4 * reg_width) { - assert((inst->regs_written % reg_width) == 0); - unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf; + if (!inst->eot && regs_written(inst) != 4 * reg_width) { + assert(regs_written(inst) % reg_width == 0); + unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf; inst->offset |= mask << 12; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 0c65c5b94b7..4744142a4b6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -199,8 +199,8 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) static void create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { - int written = inst->regs_written; - int dst_width = + unsigned written = regs_written(inst); + unsigned dst_width = DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); fs_inst *copy; @@ -234,7 +234,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } - assert(copy->regs_written == written); + assert(regs_written(copy) == written); } bool @@ -284,7 +284,7 @@ fs_visitor::opt_cse_local(bblock_t *block) if (no_existing_temp && !entry->generator->dst.is_null()) { const fs_builder ibld = fs_builder(this, block, entry->generator) .at(block, entry->generator->next); - int written = entry->generator->regs_written; + int written = regs_written(entry->generator); entry->tmp = fs_reg(VGRF, alloc.allocate(written), entry->generator->dst.type); diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 45f5c5ebb9b..4558bd42a24 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -55,12 +55,12 @@ fs_visitor::dead_code_eliminate() if (inst->dst.file == VGRF && !inst->has_side_effects()) { bool result_live = false; - if (inst->regs_written == 1) { + if (regs_written(inst) == 1) { int var = live_intervals->var_from_reg(inst->dst); result_live = BITSET_TEST(live, var); } else { int var = live_intervals->var_from_reg(inst->dst); - for (int i = 0; i < inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(inst); i++) { result_live = result_live || BITSET_TEST(live, var + i); } } @@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate() if (inst->dst.file == VGRF) { if (!inst->is_partial_write()) { int var = live_intervals->var_from_reg(inst->dst); - for (int i = 0; i < inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(inst); i++) { BITSET_CLEAR(live, var + i); } } @@ -114,7 +114,7 @@ fs_visitor::dead_code_eliminate() if (inst->src[i].file == VGRF) { int var = live_intervals->var_from_reg(inst->src[i]); - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { BITSET_SET(live, var + j); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 02dc7774427..a6c98e33218 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -118,7 +118,7 @@ fs_live_variables::setup_def_use() if (reg.file != VGRF) continue; - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { setup_one_read(bd, inst, ip, reg); reg.offset += REG_SIZE; } @@ -129,7 +129,7 @@ fs_live_variables::setup_def_use() /* Set def[] for this instruction */ if (inst->dst.file == VGRF) { fs_reg reg = inst->dst; - for (int j = 0; j < inst->regs_written; j++) { + for (unsigned j = 0; j < regs_written(inst); j++) { setup_one_write(bd, inst, ip, reg); reg.offset += REG_SIZE; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 82adaa35166..572735a379a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -362,9 +362,9 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count, if (node_nr >= payload_node_count) continue; - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { payload_last_use_ip[node_nr + j] = use_ip; - assert(node_nr + j < payload_node_count); + assert(node_nr + j < unsigned(payload_node_count)); } } } @@ -903,10 +903,10 @@ fs_visitor::spill_reg(int spill_reg) for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg) { - int regs_read = inst->regs_read(i); + int count = regs_read(inst, i); int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE); - fs_reg unspill_dst(VGRF, alloc.allocate(regs_read)); + fs_reg unspill_dst(VGRF, alloc.allocate(count)); inst->src[i].nr = unspill_dst.nr; inst->src[i].offset %= REG_SIZE; @@ -916,7 +916,7 @@ fs_visitor::spill_reg(int spill_reg) * hardware) up to the maximum supported block size. */ const unsigned width = - MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1)); + MIN2(32, 1u << (ffs(MAX2(1, count) * 8) - 1)); /* Set exec_all() on unspill messages under the (rather * pessimistic) assumption that there is no one-to-one @@ -926,7 +926,7 @@ fs_visitor::spill_reg(int spill_reg) * unspill destination is a block-local temporary. */ emit_unspill(ibld.exec_all().group(width, 0), - unspill_dst, subset_spill_offset, regs_read); + unspill_dst, subset_spill_offset, count); } } @@ -934,7 +934,7 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.nr == spill_reg) { int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->dst.offset, REG_SIZE); - fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written)); + fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst))); inst->dst.nr = spill_src.nr; inst->dst.offset %= REG_SIZE; @@ -971,19 +971,19 @@ fs_visitor::spill_reg(int spill_reg) const fs_builder ubld = ibld.exec_all(!per_channel).group(width, 0); /* If our write is going to affect just part of the - * inst->regs_written(), then we need to unspill the destination - * since we write back out all of the regs_written(). If the - * original instruction had force_writemask_all set and is not a - * partial write, there should be no need for the unspill since the + * regs_written(inst), then we need to unspill the destination since + * we write back out all of the regs_written(). If the original + * instruction had force_writemask_all set and is not a partial + * write, there should be no need for the unspill since the * instruction will be overwriting the whole destination in any case. */ if (inst->is_partial_write() || (!inst->force_writemask_all && !per_channel)) emit_unspill(ubld, spill_src, subset_spill_offset, - inst->regs_written); + regs_written(inst)); emit_spill(ubld.at(block, inst->next), spill_src, - subset_spill_offset, inst->regs_written); + subset_spill_offset, regs_written(inst)); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 651c136dfa7..3dd0fbfc1c1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -190,7 +190,7 @@ fs_visitor::register_coalesce() dst_reg_offset[i] = i; } mov[0] = inst; - channels_remaining -= inst->regs_written; + channels_remaining -= regs_written(inst); } else { const int offset = inst->src[0].offset / REG_SIZE; if (mov[offset]) { @@ -207,7 +207,7 @@ fs_visitor::register_coalesce() if (inst->regs_written > 1) dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1; mov[offset] = inst; - channels_remaining -= inst->regs_written; + channels_remaining -= regs_written(inst); } if (channels_remaining) diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp index 10ad7c37b24..676942c19c0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp @@ -43,14 +43,14 @@ fs_visitor::validate() { foreach_block_and_inst (block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { - fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <= + fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <= alloc.sizes[inst->dst.nr]); } for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <= - (int)alloc.sizes[inst->src[i].nr]); + fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <= + alloc.sizes[inst->src[i].nr]); } } } diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 19ef242d166..de08a691055 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -411,4 +411,30 @@ set_saturate(bool saturate, fs_inst *inst) return inst; } +/** + * Return the number of dataflow registers written by the instruction (either + * fully or partially) counted from 'floor(reg_offset(inst->dst) / + * register_size)'. The somewhat arbitrary register size unit is 4B for the + * UNIFORM and IMM files and 32B for all other files. + */ +inline unsigned +regs_written(const fs_inst *inst) +{ + /* XXX - Take into account register-misaligned offsets correctly. */ + return inst->regs_written; +} + +/** + * Return the number of dataflow registers read by the instruction (either + * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / + * register_size)'. The somewhat arbitrary register size unit is 4B for the + * UNIFORM and IMM files and 32B for all other files. + */ +inline unsigned +regs_read(const fs_inst *inst, unsigned i) +{ + /* XXX - Take into account register-misaligned offsets correctly. */ + return inst->regs_read(i); +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index dde75547590..0d3a07cad5b 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -620,7 +620,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) if (inst->src[i].nr >= hw_reg_count) continue; - for (int j = 0; j < inst->regs_read(i); j++) + for (unsigned j = 0; j < regs_read(inst, i); j++) hw_reads_remaining[inst->src[i].nr + j]++; } } @@ -702,7 +702,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) reads_remaining[inst->src[i].nr]--; } else if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (int off = 0; off < inst->regs_read(i); off++) + for (unsigned off = 0; off < regs_read(inst, i); off++) hw_reads_remaining[inst->src[i].nr + off]--; } } @@ -731,7 +731,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (int off = 0; off < inst->regs_read(i); off++) { + for (unsigned off = 0; off < regs_read(inst, i); off++) { int reg = inst->src[i].nr + off; if (!BITSET_TEST(hw_liveout[block_idx], reg) && hw_reads_remaining[reg] == 1) { @@ -1004,17 +1004,17 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(last_grf_write[inst->src[i].nr + r], n); } else { - for (int r = 0; r < inst->regs_read(i); r++) { + for (unsigned r = 0; r < regs_read(inst, i); r++) { add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].offset / REG_SIZE + r], n); } } } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(last_grf_write[inst->src[i].nr + r], n); } else { add_dep(last_fixed_grf_write, n); @@ -1052,12 +1052,12 @@ fs_instruction_scheduler::calculate_deps() /* write-after-write deps. */ if (inst->dst.file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { add_dep(last_grf_write[inst->dst.nr + r], n); last_grf_write[inst->dst.nr + r] = n; } } else { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.offset / REG_SIZE + r], n); last_grf_write[inst->dst.nr * 16 + @@ -1079,7 +1079,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { last_fixed_grf_write = n; @@ -1130,17 +1130,17 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { - for (int r = 0; r < inst->regs_read(i); r++) { + for (unsigned r = 0; r < regs_read(inst, i); r++) { add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].offset / REG_SIZE + r], 0); } } } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { add_dep(n, last_fixed_grf_write, 0); @@ -1180,10 +1180,10 @@ fs_instruction_scheduler::calculate_deps() */ if (inst->dst.file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { last_grf_write[inst->dst.nr * 16 + inst->dst.offset / REG_SIZE + r] = n; } @@ -1203,7 +1203,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { last_fixed_grf_write = n; -- 2.30.2