X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4.cpp;h=5f8f39971b9c00fa3db93c7cde4b7b7a153f679d;hb=2c50212b14da27de4e3da62488ae4e35c069d84e;hp=e333c6b6ed1d3d89fd22b588a04d185fb842ab12;hpb=6bb2cf2107c4461ea9dd100edaf110b839311b90;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e333c6b6ed1..5f8f39971b9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -24,6 +24,7 @@ #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_vs.h" +#include "brw_dead_control_flow.h" extern "C" { #include "main/macros.h" @@ -75,7 +76,7 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type) if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) this->swizzle = swizzle_for_size(type->vector_elements); else - this->swizzle = SWIZZLE_XYZW; + this->swizzle = BRW_SWIZZLE_XYZW; } /** Generic unset register constructor. */ @@ -90,7 +91,7 @@ src_reg::src_reg(float f) this->file = IMM; this->type = BRW_REGISTER_TYPE_F; - this->imm.f = f; + this->fixed_hw_reg.dw1.f = f; } src_reg::src_reg(uint32_t u) @@ -99,7 +100,7 @@ src_reg::src_reg(uint32_t u) this->file = IMM; this->type = BRW_REGISTER_TYPE_UD; - this->imm.u = u; + this->fixed_hw_reg.dw1.ud = u; } src_reg::src_reg(int32_t i) @@ -108,7 +109,16 @@ src_reg::src_reg(int32_t i) this->file = IMM; this->type = BRW_REGISTER_TYPE_D; - this->imm.i = i; + this->fixed_hw_reg.dw1.d = i; +} + +src_reg::src_reg(struct brw_reg reg) +{ + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + this->type = reg.type; } src_reg::src_reg(dst_reg reg) @@ -179,6 +189,7 @@ dst_reg::dst_reg(struct brw_reg reg) this->file = HW_REG; this->fixed_hw_reg = reg; + this->type = reg.type; } dst_reg::dst_reg(src_reg reg) @@ -214,15 +225,15 @@ vec4_instruction::is_send_from_grf() } bool -vec4_visitor::can_do_source_mods(vec4_instruction *inst) +vec4_instruction::can_do_source_mods(struct brw_context *brw) { - if (brw->gen == 6 && inst->is_math()) + if (brw->gen == 6 && is_math()) return false; - if (inst->is_send_from_grf()) + if (is_send_from_grf()) return false; - if (!inst->can_do_source_mods()) + if (!backend_instruction::can_do_source_mods()) return false; return true; @@ -258,9 +269,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) return 1; case VS_OPCODE_PULL_CONSTANT_LOAD: return 2; - case VS_OPCODE_SCRATCH_READ: + case SHADER_OPCODE_GEN4_SCRATCH_READ: return 2; - case VS_OPCODE_SCRATCH_WRITE: + case SHADER_OPCODE_GEN4_SCRATCH_WRITE: return 3; case GS_OPCODE_URB_WRITE: case GS_OPCODE_THREAD_END: @@ -271,35 +282,77 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_MS: + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: return inst->header_present ? 1 : 0; + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + return 0; default: - assert(!"not reached"); - return inst->mlen; + unreachable("not reached"); } } bool -src_reg::equals(src_reg *r) +src_reg::equals(const src_reg &r) const { - return (file == r->file && - reg == r->reg && - reg_offset == r->reg_offset && - type == r->type && - negate == r->negate && - abs == r->abs && - swizzle == r->swizzle && - !reladdr && !r->reladdr && - memcmp(&fixed_hw_reg, &r->fixed_hw_reg, - sizeof(fixed_hw_reg)) == 0 && - imm.u == r->imm.u); + return (file == r.file && + reg == r.reg && + reg_offset == r.reg_offset && + type == r.type && + negate == r.negate && + abs == r.abs && + swizzle == r.swizzle && + !reladdr && !r.reladdr && + memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0); +} + +static bool +try_eliminate_instruction(vec4_instruction *inst, int new_writemask, + const struct brw_context *brw) +{ + if (inst->has_side_effects()) + return false; + + if (new_writemask == 0) { + /* Don't dead code eliminate instructions that write to the + * accumulator as a side-effect. Instead just set the destination + * to the null register to free it. + */ + if (inst->writes_accumulator || inst->writes_flag()) { + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } else { + inst->remove(); + } + + return true; + } else if (inst->dst.writemask != new_writemask) { + switch (inst->opcode) { + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_GEN4_SCRATCH_READ: + case VS_OPCODE_PULL_CONSTANT_LOAD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + break; + default: + /* Do not set a writemask on Gen6 for math instructions, those are + * executed using align1 mode that does not support a destination mask. + */ + if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) { + inst->dst.writemask = new_writemask; + return true; + } + } + } + + return false; } /** - * Must be called after calculate_live_intervales() to remove unused + * Must be called after calculate_live_intervals() to remove unused * writes to registers -- register allocation will fail otherwise * because something deffed but not used won't be considered to * interfere with other regs. @@ -308,39 +361,103 @@ bool vec4_visitor::dead_code_eliminate() { bool progress = false; - int pc = 0; + int pc = -1; calculate_live_intervals(); - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; + foreach_in_list_safe(vec4_instruction, inst, &instructions) { + pc++; + + bool inst_writes_flag = false; + if (inst->dst.file != GRF) { + if (inst->dst.is_null() && inst->writes_flag()) { + inst_writes_flag = true; + } else { + continue; + } + } if (inst->dst.file == GRF) { - assert(this->virtual_grf_end[inst->dst.reg] >= pc); - if (this->virtual_grf_end[inst->dst.reg] == pc) { - /* Don't dead code eliminate instructions that write to the - * accumulator as a side-effect. Instead just set the destination - * to the null register to free it. - */ - switch (inst->opcode) { - case BRW_OPCODE_ADDC: - case BRW_OPCODE_SUBB: - case BRW_OPCODE_MACH: - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - break; - default: - inst->remove(); - break; + int write_mask = inst->dst.writemask; + + for (int c = 0; c < 4; c++) { + if (write_mask & (1 << c)) { + assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc); + if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) { + write_mask &= ~(1 << c); + } } - progress = true; } + + progress = try_eliminate_instruction(inst, write_mask, brw) || + progress; } - pc++; + if (inst->predicate || inst->prev == NULL) + continue; + + int dead_channels; + if (inst_writes_flag) { +/* Arbitrarily chosen, other than not being an xyzw writemask. */ +#define FLAG_WRITEMASK (1 << 5) + dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK; + } else { + dead_channels = inst->dst.writemask; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != GRF || + inst->src[i].reg != inst->dst.reg) + continue; + + for (int j = 0; j < 4; j++) { + int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j); + dead_channels &= ~(1 << swiz); + } + } + } + + for (exec_node *node = inst->prev, *prev = node->prev; + prev != NULL && dead_channels != 0; + node = prev, prev = prev->prev) { + vec4_instruction *scan_inst = (vec4_instruction *)node; + + if (scan_inst->is_control_flow()) + break; + + if (inst_writes_flag) { + if (scan_inst->dst.is_null() && scan_inst->writes_flag()) { + scan_inst->remove(); + progress = true; + continue; + } else if (scan_inst->reads_flag()) { + break; + } + } + + if (inst->dst.file == scan_inst->dst.file && + inst->dst.reg == scan_inst->dst.reg && + inst->dst.reg_offset == scan_inst->dst.reg_offset) { + int new_writemask = scan_inst->dst.writemask & ~dead_channels; + + progress = try_eliminate_instruction(scan_inst, new_writemask, brw) || + progress; + } + + for (int i = 0; i < 3; i++) { + if (scan_inst->src[i].file != inst->dst.file || + scan_inst->src[i].reg != inst->dst.reg) + continue; + + for (int j = 0; j < 4; j++) { + int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j); + dead_channels &= ~(1 << swiz); + } + } + } } if (progress) - live_intervals_valid = false; + invalidate_live_intervals(); return progress; } @@ -355,9 +472,7 @@ vec4_visitor::split_uniform_registers() * vector. The goal is to make elimination of unused uniform * components easier later. */ - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { for (int i = 0 ; i < 3; i++) { if (inst->src[i].file != UNIFORM) continue; @@ -390,9 +505,7 @@ vec4_visitor::pack_uniform_registers() * expect unused vector elements when we've moved array access out * to pull constants, and from some GLSL code generators like wine. */ - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { for (int i = 0 ; i < 3; i++) { if (inst->src[i].file != UNIFORM) continue; @@ -407,6 +520,7 @@ vec4_visitor::pack_uniform_registers() * push constants. */ for (int src = 0; src < uniforms; src++) { + assert(src < uniform_array_size); int size = this->uniform_vector_size[src]; if (!uniform_used[src]) { @@ -430,8 +544,8 @@ vec4_visitor::pack_uniform_registers() /* Move the references to the data */ for (int j = 0; j < size; j++) { - prog_data->param[dst * 4 + new_chan[src] + j] = - prog_data->param[src * 4 + j]; + stage_prog_data->param[dst * 4 + new_chan[src] + j] = + stage_prog_data->param[src * 4 + j]; } this->uniform_vector_size[dst] += size; @@ -444,9 +558,7 @@ vec4_visitor::pack_uniform_registers() this->uniforms = new_uniform_count; /* Now, update the instructions for our repacked uniforms. */ - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { for (int i = 0 ; i < 3; i++) { int src = inst->src[i].reg; @@ -464,32 +576,6 @@ vec4_visitor::pack_uniform_registers() } } -bool -src_reg::is_zero() const -{ - if (file != IMM) - return false; - - if (type == BRW_REGISTER_TYPE_F) { - return imm.f == 0.0; - } else { - return imm.i == 0; - } -} - -bool -src_reg::is_one() const -{ - if (file != IMM) - return false; - - if (type == BRW_REGISTER_TYPE_F) { - return imm.f == 1.0; - } else { - return imm.i == 1; - } -} - /** * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). * @@ -507,9 +593,7 @@ vec4_visitor::opt_algebraic() { bool progress = false; - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { switch (inst->opcode) { case BRW_OPCODE_ADD: if (inst->src[1].is_zero()) { @@ -533,9 +617,7 @@ vec4_visitor::opt_algebraic() inst->src[0] = src_reg(0u); break; default: - assert(!"not reached"); - inst->src[0] = src_reg(0.0f); - break; + unreachable("not reached"); } inst->src[1] = src_reg(); progress = true; @@ -551,7 +633,7 @@ vec4_visitor::opt_algebraic() } if (progress) - this->live_intervals_valid = false; + invalidate_live_intervals(); return progress; } @@ -568,6 +650,9 @@ vec4_visitor::move_push_constants_to_pull_constants() /* Only allow 32 registers (256 uniform components) as push constants, * which is the limit on gen6. + * + * If changing this value, note the limitation about total_regs in + * brw_curbe.c. */ int max_uniform_components = 32 * 8; if (this->uniforms * 4 <= max_uniform_components) @@ -582,16 +667,16 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const float **values = &prog_data->param[i]; + const gl_constant_value **values = &stage_prog_data->param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. */ - for (unsigned int j = 0; j < prog_data->nr_pull_params; j += 4) { + for (unsigned int j = 0; j < stage_prog_data->nr_pull_params; j += 4) { int matches; for (matches = 0; matches < 4; matches++) { - if (prog_data->pull_param[j + matches] != values[matches]) + if (stage_prog_data->pull_param[j + matches] != values[matches]) break; } @@ -602,11 +687,12 @@ vec4_visitor::move_push_constants_to_pull_constants() } if (pull_constant_loc[i / 4] == -1) { - assert(prog_data->nr_pull_params % 4 == 0); - pull_constant_loc[i / 4] = prog_data->nr_pull_params / 4; + assert(stage_prog_data->nr_pull_params % 4 == 0); + pull_constant_loc[i / 4] = stage_prog_data->nr_pull_params / 4; for (int j = 0; j < 4; j++) { - prog_data->pull_param[prog_data->nr_pull_params++] = values[j]; + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] = + values[j]; } } } @@ -615,9 +701,7 @@ vec4_visitor::move_push_constants_to_pull_constants() /* Now actually rewrite usage of the things we've moved to pull * constants. */ - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list_safe(vec4_instruction, inst, &instructions) { for (int i = 0 ; i < 3; i++) { if (inst->src[i].file != UNIFORM || pull_constant_loc[inst->src[i].reg] == -1) @@ -664,21 +748,18 @@ vec4_visitor::opt_set_dependency_control() vec4_instruction *last_mrf_write[BRW_MAX_GRF]; uint8_t mrf_channels_written[BRW_MAX_GRF]; - cfg_t cfg(this); + calculate_cfg(); assert(prog_data->total_grf || !"Must be called after register allocation"); - for (int i = 0; i < cfg.num_blocks; i++) { - bblock_t *bblock = cfg.blocks[i]; - vec4_instruction *inst; + for (int i = 0; i < cfg->num_blocks; i++) { + bblock_t *bblock = cfg->blocks[i]; memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); - for (inst = (vec4_instruction *)bblock->start; - inst != (vec4_instruction *)bblock->end->next; - inst = (vec4_instruction *)inst->next) { + foreach_inst_in_block (vec4_instruction, inst, bblock) { /* If we read from a register that we were doing dependency control * on, don't do dependency control across the read. */ @@ -712,6 +793,14 @@ vec4_visitor::opt_set_dependency_control() continue; } + /* Dependency control does not work well over math instructions. + */ + if (inst->is_math()) { + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + continue; + } + /* Now, see if we can do dependency control for this instruction * against a previous one writing to its destination. */ @@ -839,9 +928,7 @@ vec4_visitor::opt_register_coalesce() calculate_live_intervals(); - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list_safe(vec4_instruction, inst, &instructions) { int ip = next_ip; next_ip++; @@ -858,7 +945,10 @@ vec4_visitor::opt_register_coalesce() /* Can't coalesce this GRF if someone else was going to * read it later. */ - if (this->virtual_grf_end[inst->src[0].reg] > ip) + if (this->virtual_grf_end[inst->src[0].reg * 4 + 0] > ip || + this->virtual_grf_end[inst->src[0].reg * 4 + 1] > ip || + this->virtual_grf_end[inst->src[0].reg * 4 + 2] > ip || + this->virtual_grf_end[inst->src[0].reg * 4 + 3] > ip) continue; /* We need to check interference with the final destination between this @@ -1014,7 +1104,7 @@ vec4_visitor::opt_register_coalesce() } if (progress) - live_intervals_valid = false; + invalidate_live_intervals(); return progress; } @@ -1048,9 +1138,7 @@ vec4_visitor::split_virtual_grfs() /* Check that the instructions are compatible with the registers we're trying * to split. */ - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { /* If there's a SEND message loading from a GRF on gen7+, it needs to be * contiguous. */ @@ -1079,9 +1167,7 @@ vec4_visitor::split_virtual_grfs() this->virtual_grf_sizes[i] = 1; } - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { if (inst->dst.file == GRF && split_grf[inst->dst.reg] && inst->dst.reg_offset != 0) { inst->dst.reg = (new_virtual_grf[inst->dst.reg] + @@ -1097,92 +1183,183 @@ vec4_visitor::split_virtual_grfs() } } } - this->live_intervals_valid = false; + invalidate_live_intervals(); } void vec4_visitor::dump_instruction(backend_instruction *be_inst) +{ + dump_instruction(be_inst, stderr); +} + +void +vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) { vec4_instruction *inst = (vec4_instruction *)be_inst; - printf("%s ", brw_instruction_name(inst->opcode)); + if (inst->predicate) { + fprintf(file, "(%cf0) ", + inst->predicate_inverse ? '-' : '+'); + } + + fprintf(file, "%s", brw_instruction_name(inst->opcode)); + if (inst->conditional_mod) { + fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); + } + fprintf(file, " "); switch (inst->dst.file) { case GRF: - printf("vgrf%d.%d", inst->dst.reg, inst->dst.reg_offset); + fprintf(file, "vgrf%d.%d", inst->dst.reg, inst->dst.reg_offset); break; case MRF: - printf("m%d", inst->dst.reg); + fprintf(file, "m%d", inst->dst.reg); + break; + case HW_REG: + if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (inst->dst.fixed_hw_reg.nr) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr); + break; + case BRW_ARF_ACCUMULATOR: + fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr); + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, + inst->dst.fixed_hw_reg.subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, + inst->dst.fixed_hw_reg.subnr); + break; + } + } else { + fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr); + } + if (inst->dst.fixed_hw_reg.subnr) + fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr); break; case BAD_FILE: - printf("(null)"); + fprintf(file, "(null)"); break; default: - printf("???"); + fprintf(file, "???"); break; } if (inst->dst.writemask != WRITEMASK_XYZW) { - printf("."); + fprintf(file, "."); if (inst->dst.writemask & 1) - printf("x"); + fprintf(file, "x"); if (inst->dst.writemask & 2) - printf("y"); + fprintf(file, "y"); if (inst->dst.writemask & 4) - printf("z"); + fprintf(file, "z"); if (inst->dst.writemask & 8) - printf("w"); + fprintf(file, "w"); } - printf(", "); + fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 3 && inst->src[i].file != BAD_FILE; i++) { + if (inst->src[i].negate) + fprintf(file, "-"); + if (inst->src[i].abs) + fprintf(file, "|"); switch (inst->src[i].file) { case GRF: - printf("vgrf%d", inst->src[i].reg); + fprintf(file, "vgrf%d", inst->src[i].reg); break; case ATTR: - printf("attr%d", inst->src[i].reg); + fprintf(file, "attr%d", inst->src[i].reg); break; case UNIFORM: - printf("u%d", inst->src[i].reg); + fprintf(file, "u%d", inst->src[i].reg); break; case IMM: switch (inst->src[i].type) { case BRW_REGISTER_TYPE_F: - printf("%fF", inst->src[i].imm.f); + fprintf(file, "%fF", inst->src[i].fixed_hw_reg.dw1.f); break; case BRW_REGISTER_TYPE_D: - printf("%dD", inst->src[i].imm.i); + fprintf(file, "%dD", inst->src[i].fixed_hw_reg.dw1.d); break; case BRW_REGISTER_TYPE_UD: - printf("%uU", inst->src[i].imm.u); + fprintf(file, "%uU", inst->src[i].fixed_hw_reg.dw1.ud); break; default: - printf("???"); + fprintf(file, "???"); break; } break; + case HW_REG: + if (inst->src[i].fixed_hw_reg.negate) + fprintf(file, "-"); + if (inst->src[i].fixed_hw_reg.abs) + fprintf(file, "|"); + if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (inst->src[i].fixed_hw_reg.nr) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr); + break; + case BRW_ARF_ACCUMULATOR: + fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr); + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, + inst->src[i].fixed_hw_reg.subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, + inst->src[i].fixed_hw_reg.subnr); + break; + } + } else { + fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr); + } + if (inst->src[i].fixed_hw_reg.subnr) + fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr); + if (inst->src[i].fixed_hw_reg.abs) + fprintf(file, "|"); + break; case BAD_FILE: - printf("(null)"); + fprintf(file, "(null)"); break; default: - printf("???"); + fprintf(file, "???"); break; } - if (inst->src[i].reg_offset) - printf(".%d", inst->src[i].reg_offset); + /* Don't print .0; and only VGRFs have reg_offsets and sizes */ + if (inst->src[i].reg_offset != 0 && + inst->src[i].file == GRF && + virtual_grf_sizes[inst->src[i].reg] != 1) + fprintf(file, ".%d", inst->src[i].reg_offset); + + if (inst->src[i].file != IMM) { + static const char *chans[4] = {"x", "y", "z", "w"}; + fprintf(file, "."); + for (int c = 0; c < 4; c++) { + fprintf(file, "%s", chans[BRW_GET_SWZ(inst->src[i].swizzle, c)]); + } + } - static const char *chans[4] = {"x", "y", "z", "w"}; - printf("."); - for (int c = 0; c < 4; c++) { - printf("%s", chans[BRW_GET_SWZ(inst->src[i].swizzle, c)]); + if (inst->src[i].abs) + fprintf(file, "|"); + + if (inst->src[i].file != IMM) { + fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); } - if (i < 3) - printf(", "); + if (i < 2 && inst->src[i + 1].file != BAD_FILE) + fprintf(file, ", "); } - printf("\n"); + fprintf(file, "\n"); } @@ -1212,9 +1389,7 @@ void vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, bool interleaved) { - foreach_list(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - + foreach_in_list(vec4_instruction, inst, &instructions) { /* We have to support ATTR as a destination for GL_FIXED fixup. */ if (inst->dst.file == ATTR) { int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset]; @@ -1305,19 +1480,21 @@ vec4_vs_visitor::setup_attributes(int payload_reg) int vec4_visitor::setup_uniforms(int reg) { - prog_data->dispatch_grf_start_reg = reg; + prog_data->base.dispatch_grf_start_reg = reg; /* The pre-gen6 VS requires that some push constants get loaded no * matter what, or the GPU would hang. */ if (brw->gen < 6 && this->uniforms == 0) { + assert(this->uniforms < this->uniform_array_size); this->uniform_vector_size[this->uniforms] = 1; - prog_data->param = reralloc(NULL, prog_data->param, const float *, 4); + stage_prog_data->param = + reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; - static float zero = 0.0; - prog_data->param[slot] = &zero; + static gl_constant_value zero = { .f = 0.0 }; + stage_prog_data->param[slot] = &zero; } this->uniforms++; @@ -1326,9 +1503,9 @@ vec4_visitor::setup_uniforms(int reg) reg += ALIGN(uniforms, 2) / 2; } - prog_data->nr_params = this->uniforms * 4; + stage_prog_data->nr_params = this->uniforms * 4; - prog_data->curb_read_length = reg - prog_data->dispatch_grf_start_reg; + prog_data->curb_read_length = reg - prog_data->base.dispatch_grf_start_reg; return reg; } @@ -1412,10 +1589,10 @@ vec4_visitor::emit_shader_time_end() */ emit(ADD(diff, src_reg(diff), src_reg(-2u))); - emit_shader_time_write(ST_VS, src_reg(diff)); - emit_shader_time_write(ST_VS_WRITTEN, src_reg(1u)); + emit_shader_time_write(st_base, src_reg(diff)); + emit_shader_time_write(st_written, src_reg(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(ST_VS_RESET, src_reg(1u)); + emit_shader_time_write(st_reset, src_reg(1u)); emit(BRW_OPCODE_ENDIF); } @@ -1458,7 +1635,7 @@ vec4_visitor::run() * functions called "main"). */ if (shader) { - visit_instructions(shader->ir); + visit_instructions(shader->base.ir); } else { emit_program_code(); } @@ -1491,13 +1668,45 @@ vec4_visitor::run() move_push_constants_to_pull_constants(); split_virtual_grfs(); + const char *stage_name = stage == MESA_SHADER_GEOMETRY ? "gs" : "vs"; + +#define OPT(pass, args...) do { \ + pass_num++; \ + bool this_progress = pass(args); \ + \ + if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ + char filename[64]; \ + snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ + stage_name, shader_prog->Name, iteration, pass_num); \ + \ + backend_visitor::dump_instructions(filename); \ + } \ + \ + progress = progress || this_progress; \ + } while (false) + + + if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { + char filename[64]; + snprintf(filename, 64, "%s-%04d-00-start", + stage_name, shader_prog->Name); + + backend_visitor::dump_instructions(filename); + } + bool progress; + int iteration = 0; do { progress = false; - progress = dead_code_eliminate() || progress; - progress = opt_copy_propagation() || progress; - progress = opt_algebraic() || progress; - progress = opt_register_coalesce() || progress; + iteration++; + int pass_num = 0; + + OPT(dead_code_eliminate); + OPT(dead_control_flow_eliminate, this); + OPT(opt_copy_propagation); + OPT(opt_algebraic); + OPT(opt_cse); + OPT(opt_register_coalesce); } while (progress); @@ -1556,7 +1765,7 @@ brw_vs_emit(struct brw_context *brw, unsigned *final_assembly_size) { bool start_busy = false; - float start_time = 0; + double start_time = 0; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -1568,19 +1777,10 @@ brw_vs_emit(struct brw_context *brw, if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { - if (prog) { - printf("GLSL IR for native vertex shader %d:\n", prog->Name); - _mesa_print_ir(shader->ir, NULL); - printf("\n\n"); - } else { - printf("ARB_vertex_program %d for native vertex shader\n", - c->vp->program.Base.Id); - _mesa_print_program(&c->vp->program.Base); - } - } + if (unlikely(INTEL_DEBUG & DEBUG_VS)) + brw_dump_ir(brw, "vertex", prog, &shader->base, &c->vp->program.Base); - vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx); + vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx); if (!v.run()) { if (prog) { prog->LinkStatus = false; @@ -1593,10 +1793,10 @@ brw_vs_emit(struct brw_context *brw, return NULL; } - vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, mem_ctx, - INTEL_DEBUG & DEBUG_VS); - const unsigned *generated =g.generate_assembly(&v.instructions, - final_assembly_size); + const unsigned *assembly = NULL; + vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base, + mem_ctx, INTEL_DEBUG & DEBUG_VS); + assembly = g.generate_assembly(&v.instructions, final_assembly_size); if (unlikely(brw->perf_debug) && shader) { if (shader->compiled_once) { @@ -1609,7 +1809,7 @@ brw_vs_emit(struct brw_context *brw, shader->compiled_once = true; } - return generated; + return assembly; } @@ -1634,31 +1834,4 @@ brw_vec4_setup_prog_key_for_precompile(struct gl_context *ctx, } } - -bool -brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a, - const struct brw_vec4_prog_data *b) -{ - /* Compare all the struct (including the base) up to the pointers. */ - if (memcmp(a, b, offsetof(struct brw_vec4_prog_data, param))) - return false; - - if (memcmp(a->param, b->param, a->nr_params * sizeof(void *))) - return false; - - if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *))) - return false; - - return true; -} - - -void -brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data) -{ - ralloc_free((void *)prog_data->param); - ralloc_free((void *)prog_data->pull_param); -} - - } /* extern "C" */