From 20a849b4aa63c7fce96b04de674a4c70f054ed9c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 12 Jul 2014 21:18:39 -0700 Subject: [PATCH] i965: Use basic-block aware insertion/removal functions. To avoid invalidating and recreating the control flow graph. Also stop invalidating the CFG in places we didn't add or remove an instruction. cfg calculations: 202951 -> 80307 (-60.43%) Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 45 +++++++++++-------- .../dri/i965/brw_fs_copy_propagation.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 8 ++-- .../dri/i965/brw_fs_dead_code_eliminate.cpp | 6 +-- .../dri/i965/brw_fs_register_coalesce.cpp | 7 +-- .../dri/i965/brw_fs_saturate_propagation.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 10 ++--- .../dri/i965/brw_vec4_copy_propagation.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 8 ++-- 9 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c70f500896..f52b7305b11 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -46,6 +46,7 @@ extern "C" { #include "brw_wm.h" } #include "brw_fs.h" +#include "brw_cfg.h" #include "brw_dead_control_flow.h" #include "main/uniforms.h" #include "brw_fs_live_variables.h" @@ -1702,7 +1703,7 @@ fs_visitor::split_virtual_grfs() } } } - invalidate_live_intervals(); + invalidate_live_intervals(false); } /** @@ -1740,7 +1741,7 @@ fs_visitor::compact_virtual_grfs() if (remap_table[i] != -1) { remap_table[i] = new_index; virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; - invalidate_live_intervals(); + invalidate_live_intervals(false); ++new_index; } } @@ -1923,7 +1924,9 @@ fs_visitor::assign_constant_locations() void fs_visitor::demote_pull_constants() { - foreach_in_list(fs_inst, inst, &instructions) { + calculate_cfg(); + + foreach_block_and_inst (block, fs_inst, inst, cfg) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; @@ -1946,14 +1949,14 @@ fs_visitor::demote_pull_constants() surf_index, *inst->src[i].reladdr, pull_index); - inst->insert_before(&list); + inst->insert_before(block, &list); inst->src[i].reladdr = NULL; } else { fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, surf_index, offset); - inst->insert_before(pull); + inst->insert_before(block, pull); inst->src[i].set_smear(pull_index & 3); } @@ -1963,7 +1966,7 @@ fs_visitor::demote_pull_constants() inst->src[i].reg_offset = 0; } } - invalidate_live_intervals(); + invalidate_live_intervals(false); } bool @@ -2298,7 +2301,7 @@ fs_visitor::compute_to_mrf() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } @@ -2413,7 +2416,9 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_in_list_safe(fs_inst, inst, &instructions) { + calculate_cfg(); + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { if (inst->is_control_flow()) { memset(last_mrf_move, 0, sizeof(last_mrf_move)); } @@ -2422,7 +2427,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF) { fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { - inst->remove(); + inst->remove(block); progress = true; continue; } @@ -2696,7 +2701,9 @@ fs_visitor::insert_gen4_send_dependency_workarounds() void fs_visitor::lower_uniform_pull_constant_loads() { - foreach_in_list(fs_inst, inst, &instructions) { + calculate_cfg(); + + foreach_block_and_inst (block, fs_inst, inst, cfg) { if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) continue; @@ -2721,7 +2728,7 @@ fs_visitor::lower_uniform_pull_constant_loads() setup->ir = inst->ir; setup->annotation = inst->annotation; - inst->insert_before(setup); + inst->insert_before(block, setup); /* Similarly, this will only populate the first 4 channels of the * result register (since we only use smear values from 0-3), but we @@ -2730,7 +2737,7 @@ fs_visitor::lower_uniform_pull_constant_loads() inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; inst->src[1] = payload; - invalidate_live_intervals(); + invalidate_live_intervals(false); } else { /* Before register allocation, we didn't tell the scheduler about the * MRF we use. We know it's safe to use this MRF because nothing @@ -2748,28 +2755,30 @@ fs_visitor::lower_load_payload() { bool progress = false; - foreach_in_list_safe(fs_inst, inst, &instructions) { + calculate_cfg(); + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { fs_reg dst = inst->dst; /* src[0] represents the (optional) message header. */ if (inst->src[0].file != BAD_FILE) { - inst->insert_before(MOV(dst, inst->src[0])); + inst->insert_before(block, MOV(dst, inst->src[0])); } dst.reg_offset++; for (int i = 1; i < inst->sources; i++) { - inst->insert_before(MOV(dst, inst->src[i])); + inst->insert_before(block, MOV(dst, inst->src[i])); dst.reg_offset++; } - inst->remove(); + inst->remove(block); progress = true; } } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } @@ -3111,7 +3120,7 @@ fs_visitor::assign_binding_table_offsets() void fs_visitor::calculate_register_pressure() { - invalidate_live_intervals(); + invalidate_live_intervals(false); calculate_live_intervals(); unsigned num_instructions = instructions.length(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index e841c04b546..e0655fcf2e8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -633,7 +633,7 @@ fs_visitor::opt_copy_propagate() ralloc_free(copy_prop_ctx); if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 639097f1dd0..9db6865f589 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -222,7 +222,7 @@ fs_visitor::opt_cse_local(bblock_t *block) copy->force_writemask_all = entry->generator->force_writemask_all; } - entry->generator->insert_after(copy); + entry->generator->insert_after(block, copy); } /* dest <- temp */ @@ -244,7 +244,7 @@ fs_visitor::opt_cse_local(bblock_t *block) copy = MOV(dst, tmp); copy->force_writemask_all = inst->force_writemask_all; } - inst->insert_before(copy); + inst->insert_before(block, copy); } /* Set our iterator so that next time through the loop inst->next @@ -253,7 +253,7 @@ fs_visitor::opt_cse_local(bblock_t *block) */ fs_inst *prev = (fs_inst *)inst->prev; - inst->remove(); + inst->remove(block); /* Appending an instruction may have changed our bblock end. */ if (inst == block->end) { @@ -321,7 +321,7 @@ fs_visitor::opt_cse() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 2506b4681c6..697b44ada0f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -100,13 +100,13 @@ fs_visitor::dead_code_eliminate() ralloc_free(live); if (progress) { - foreach_in_list_safe(fs_inst, inst, &instructions) { + foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) { if (inst->opcode == BRW_OPCODE_NOP) { - inst->remove(); + inst->remove(block); } } - invalidate_live_intervals(); + invalidate_live_intervals(false); } return progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 3e3aeca1f8e..c1b785b1ed2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -41,6 +41,7 @@ */ #include "brw_fs.h" +#include "brw_cfg.h" #include "brw_fs_live_variables.h" static bool @@ -273,13 +274,13 @@ fs_visitor::register_coalesce() } if (progress) { - foreach_in_list_safe(fs_inst, inst, &instructions) { + foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) { if (inst->opcode == BRW_OPCODE_NOP) { - inst->remove(); + inst->remove(block); } } - invalidate_live_intervals(); + invalidate_live_intervals(false); } return progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index d65b2f14e13..3412ce0a284 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -100,7 +100,7 @@ fs_visitor::opt_saturate_propagation() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ee61202c358..acf0b6390bc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -403,7 +403,7 @@ vec4_visitor::opt_reduce_swizzle() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } @@ -1029,7 +1029,7 @@ vec4_visitor::opt_register_coalesce() calculate_live_intervals(); - foreach_in_list_safe(vec4_instruction, inst, &instructions) { + foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) { int ip = next_ip; next_ip++; @@ -1199,13 +1199,13 @@ vec4_visitor::opt_register_coalesce() } scan_inst = (vec4_instruction *)scan_inst->next; } - inst->remove(); + inst->remove(block); progress = true; } } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } @@ -1284,7 +1284,7 @@ vec4_visitor::split_virtual_grfs() } } } - invalidate_live_intervals(); + invalidate_live_intervals(false); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 37ca661d561..c4ea7eca322 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -407,7 +407,7 @@ vec4_visitor::opt_copy_propagation() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 1f24af648fd..a0f6e77f577 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -165,7 +165,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) entry->tmp.swizzle = BRW_SWIZZLE_XYZW; vec4_instruction *copy = MOV(entry->generator->dst, entry->tmp); - entry->generator->insert_after(copy); + entry->generator->insert_after(block, copy); entry->generator->dst = dst_reg(entry->tmp); } @@ -174,7 +174,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) assert(inst->dst.type == entry->tmp.type); vec4_instruction *copy = MOV(inst->dst, entry->tmp); copy->force_writemask_all = inst->force_writemask_all; - inst->insert_before(copy); + inst->insert_before(block, copy); } /* Set our iterator so that next time through the loop inst->next @@ -183,7 +183,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) */ vec4_instruction *prev = (vec4_instruction *)inst->prev; - inst->remove(); + inst->remove(block); /* Appending an instruction may have changed our bblock end. */ if (inst == block->end) { @@ -256,7 +256,7 @@ vec4_visitor::opt_cse() } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } -- 2.30.2