X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4.cpp;h=37170e7fc577eb2dfecb7f9690303a0b0ddea479;hb=58fa9d47b536403c4e3ca5d6a2495691338388fd;hp=f18915a8e3806a12fcb6138de39ee81e3da823d7;hpb=34d162260f513a7eaec12611e3859bb34230cf33;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f18915a8e38..37170e7fc57 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -61,6 +61,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type) this->swizzle = brw_swizzle_for_size(type->vector_elements); else this->swizzle = BRW_SWIZZLE_XYZW; + if (type) + this->type = brw_type_for_base_type(type); } /** Generic unset register constructor. */ @@ -75,7 +77,7 @@ src_reg::src_reg(float f) this->file = IMM; this->type = BRW_REGISTER_TYPE_F; - this->fixed_hw_reg.dw1.f = f; + this->f = f; } src_reg::src_reg(uint32_t u) @@ -84,7 +86,7 @@ src_reg::src_reg(uint32_t u) this->file = IMM; this->type = BRW_REGISTER_TYPE_UD; - this->fixed_hw_reg.dw1.ud = u; + this->ud = u; } src_reg::src_reg(int32_t i) @@ -93,7 +95,7 @@ src_reg::src_reg(int32_t i) this->file = IMM; this->type = BRW_REGISTER_TYPE_D; - this->fixed_hw_reg.dw1.d = i; + this->d = i; } src_reg::src_reg(uint8_t vf[4]) @@ -102,7 +104,7 @@ src_reg::src_reg(uint8_t vf[4]) this->file = IMM; this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned)); + memcpy(&this->ud, vf, sizeof(unsigned)); } src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) @@ -111,31 +113,28 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) this->file = IMM; this->type = BRW_REGISTER_TYPE_VF; - this->fixed_hw_reg.dw1.ud = (vf0 << 0) | + this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); } -src_reg::src_reg(struct brw_reg reg) +src_reg::src_reg(struct brw_reg reg) : + backend_reg(reg) { - init(); - this->file = HW_REG; - this->fixed_hw_reg = reg; - this->type = reg.type; + this->reg = 0; + this->reg_offset = 0; + this->reladdr = NULL; } -src_reg::src_reg(const dst_reg ®) +src_reg::src_reg(const dst_reg ®) : + backend_reg(static_cast(reg)) { - init(); - this->file = reg.file; this->reg = reg.reg; this->reg_offset = reg.reg_offset; - this->type = reg.type; this->reladdr = reg.reladdr; - this->fixed_hw_reg = reg.fixed_hw_reg; this->swizzle = brw_swizzle_for_mask(reg.writemask); } @@ -182,26 +181,23 @@ dst_reg::dst_reg(register_file file, int reg, brw_reg_type type, this->writemask = writemask; } -dst_reg::dst_reg(struct brw_reg reg) +dst_reg::dst_reg(struct brw_reg reg) : + backend_reg(reg) { - init(); - this->file = HW_REG; - this->fixed_hw_reg = reg; - this->type = reg.type; + this->reg = 0; + this->reg_offset = 0; + this->reladdr = NULL; } -dst_reg::dst_reg(const src_reg ®) +dst_reg::dst_reg(const src_reg ®) : + backend_reg(static_cast(reg)) { - init(); - this->file = reg.file; this->reg = reg.reg; this->reg_offset = reg.reg_offset; - this->type = reg.type; this->writemask = brw_mask_for_swizzle(reg.swizzle); this->reladdr = reg.reladdr; - this->fixed_hw_reg = reg.fixed_hw_reg; } bool @@ -216,8 +212,8 @@ dst_reg::equals(const dst_reg &r) const writemask == r.writemask && (reladdr == r.reladdr || (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) && - memcmp(&fixed_hw_reg, &r.fixed_hw_reg, - sizeof(fixed_hw_reg)) == 0); + (file != HW_REG || + memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0)); } bool @@ -277,6 +273,18 @@ vec4_instruction::can_do_source_mods(const struct brw_device_info *devinfo) return true; } +bool +vec4_instruction::can_change_types() const +{ + return dst.type == src[0].type && + !src[0].abs && !src[0].negate && !saturate && + (opcode == BRW_OPCODE_MOV || + (opcode == BRW_OPCODE_SEL && + dst.type == src[1].type && + predicate != BRW_PREDICATE_NONE && + !src[1].abs && !src[1].negate)); +} + /** * Returns how many MRFs an opcode will write over. * @@ -324,10 +332,13 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_SAMPLEINFO: + case VS_OPCODE_GET_BUFFER_SIZE: return inst->header_size; default: unreachable("not reached"); @@ -345,8 +356,9 @@ src_reg::equals(const src_reg &r) const abs == r.abs && swizzle == r.swizzle && !reladdr && !r.reladdr && - memcmp(&fixed_hw_reg, &r.fixed_hw_reg, - sizeof(fixed_hw_reg)) == 0); + (file != HW_REG || + memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0) && + (file != IMM || d == r.d)); } bool @@ -379,7 +391,7 @@ vec4_visitor::opt_vector_float() inst->src[0].file != IMM) continue; - int vf = brw_float_to_vf(inst->src[0].fixed_hw_reg.dw1.f); + int vf = brw_float_to_vf(inst->src[0].f); if (vf == -1) continue; @@ -513,11 +525,11 @@ vec4_visitor::split_uniform_registers() void vec4_visitor::pack_uniform_registers() { - bool uniform_used[this->uniforms]; + uint8_t chans_used[this->uniforms]; int new_loc[this->uniforms]; int new_chan[this->uniforms]; - memset(uniform_used, 0, sizeof(uniform_used)); + memset(chans_used, 0, sizeof(chans_used)); memset(new_loc, 0, sizeof(new_loc)); memset(new_chan, 0, sizeof(new_chan)); @@ -526,11 +538,36 @@ vec4_visitor::pack_uniform_registers() * to pull constants, and from some GLSL code generators like wine. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + unsigned readmask; + switch (inst->opcode) { + case VEC4_OPCODE_PACK_BYTES: + case BRW_OPCODE_DP4: + case BRW_OPCODE_DPH: + readmask = 0xf; + break; + case BRW_OPCODE_DP3: + readmask = 0x7; + break; + case BRW_OPCODE_DP2: + readmask = 0x3; + break; + default: + readmask = inst->dst.writemask; + break; + } + for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) - continue; + if (inst->src[i].file != UNIFORM) + continue; + + int reg = inst->src[i].reg; + for (int c = 0; c < 4; c++) { + if (!(readmask & (1 << c))) + continue; - uniform_used[inst->src[i].reg] = true; + chans_used[reg] = MAX2(chans_used[reg], + BRW_GET_SWZ(inst->src[i].swizzle, c) + 1); + } } } @@ -541,17 +578,15 @@ vec4_visitor::pack_uniform_registers() */ for (int src = 0; src < uniforms; src++) { assert(src < uniform_array_size); - int size = this->uniform_vector_size[src]; + int size = chans_used[src]; - if (!uniform_used[src]) { - this->uniform_vector_size[src] = 0; - continue; - } + if (size == 0) + continue; int dst; /* Find the lowest place we can slot this uniform in. */ for (dst = 0; dst < src; dst++) { - if (this->uniform_vector_size[dst] + size <= 4) + if (chans_used[dst] + size <= 4) break; } @@ -560,7 +595,7 @@ vec4_visitor::pack_uniform_registers() new_chan[src] = 0; } else { new_loc[src] = dst; - new_chan[src] = this->uniform_vector_size[dst]; + new_chan[src] = chans_used[dst]; /* Move the references to the data */ for (int j = 0; j < size; j++) { @@ -568,8 +603,8 @@ vec4_visitor::pack_uniform_registers() stage_prog_data->param[src * 4 + j]; } - this->uniform_vector_size[dst] += size; - this->uniform_vector_size[src] = 0; + chans_used[dst] += size; + chans_used[src] = 0; } new_uniform_count = MAX2(new_uniform_count, dst + 1); @@ -619,8 +654,7 @@ vec4_visitor::opt_algebraic() if (inst->dst.type != inst->src[0].type) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, - &inst->src[0].fixed_hw_reg)) { + if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) { inst->saturate = false; progress = true; } @@ -927,9 +961,9 @@ vec4_visitor::opt_set_dependency_control() last_mrf_write[reg] = inst; mrf_channels_written[reg] |= inst->dst.writemask; } else if (inst->dst.reg == HW_REG) { - if (inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) + if (inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) memset(last_grf_write, 0, sizeof(last_grf_write)); - if (inst->dst.fixed_hw_reg.file == BRW_MESSAGE_REGISTER_FILE) + if (inst->dst.brw_reg::file == BRW_MESSAGE_REGISTER_FILE) memset(last_mrf_write, 0, sizeof(last_mrf_write)); } } @@ -937,10 +971,18 @@ vec4_visitor::opt_set_dependency_control() } bool -vec4_instruction::can_reswizzle(int dst_writemask, +vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo, + int dst_writemask, int swizzle, int swizzle_mask) { + /* Gen6 MATH instructions can not execute in align16 mode, so swizzles + * or writemasking are not allowed. + */ + if (devinfo->gen == 6 && is_math() && + (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW)) + return false; + /* If this instruction sets anything not referenced by swizzle, then we'd * totally break it when we reswizzle. */ @@ -950,6 +992,14 @@ vec4_instruction::can_reswizzle(int dst_writemask, if (mlen > 0) return false; + /* We can't use swizzles on the accumulator and that's really the only + * HW_REG we would care to reswizzle so just disallow them all. + */ + for (int i = 0; i < 3; i++) { + if (src[i].file == HW_REG) + return false; + } + return true; } @@ -1009,6 +1059,28 @@ vec4_visitor::opt_register_coalesce() inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr) continue; + /* Remove no-op MOVs */ + if (inst->dst.file == inst->src[0].file && + inst->dst.reg == inst->src[0].reg && + inst->dst.reg_offset == inst->src[0].reg_offset) { + bool is_nop_mov = true; + + for (unsigned c = 0; c < 4; c++) { + if ((inst->dst.writemask & (1 << c)) == 0) + continue; + + if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) { + is_nop_mov = false; + break; + } + } + + if (is_nop_mov) { + inst->remove(block); + continue; + } + } + bool to_mrf = (inst->dst.file == MRF); /* Can't coalesce this GRF if someone else was going to @@ -1033,7 +1105,7 @@ vec4_visitor::opt_register_coalesce() */ vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev; foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, - inst, block) { + inst) { _scan_inst = scan_inst; if (inst->src[0].in_range(scan_inst->dst, scan_inst->regs_written)) { @@ -1053,8 +1125,19 @@ vec4_visitor::opt_register_coalesce() } } + /* This doesn't handle saturation on the instruction we + * want to coalesce away if the register types do not match. + * But if scan_inst is a non type-converting 'mov', we can fix + * the types later. + */ + if (inst->saturate && + inst->dst.type != scan_inst->dst.type && + !(scan_inst->opcode == BRW_OPCODE_MOV && + scan_inst->dst.type == scan_inst->src[0].type)) + break; + /* If we can't handle the swizzle, bail. */ - if (!scan_inst->can_reswizzle(inst->dst.writemask, + if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask, inst->src[0].swizzle, chans_needed)) { break; @@ -1086,11 +1169,13 @@ vec4_visitor::opt_register_coalesce() if (interfered) break; - /* If somebody else writes our destination here, we can't coalesce - * before that. + /* If somebody else writes the same channels of our destination here, + * we can't coalesce before that. */ - if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written)) - break; + if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) && + (inst->dst.writemask & scan_inst->dst.writemask) != 0) { + break; + } /* Check for reads of the register we're trying to coalesce into. We * can't go rewriting instructions above that to put some other value @@ -1128,6 +1213,16 @@ vec4_visitor::opt_register_coalesce() scan_inst->dst.file = inst->dst.file; scan_inst->dst.reg = inst->dst.reg; scan_inst->dst.reg_offset = inst->dst.reg_offset; + if (inst->saturate && + inst->dst.type != scan_inst->dst.type) { + /* If we have reached this point, scan_inst is a non + * type-converting 'mov' and we can modify its register types + * to match the ones in inst. Otherwise, we could have an + * incorrect saturation result. + */ + scan_inst->dst.type = inst->dst.type; + scan_inst->src[0].type = inst->src[0].type; + } scan_inst->saturate |= inst->saturate; } scan_inst = (vec4_instruction *)scan_inst->next; @@ -1269,9 +1364,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) vec4_instruction *inst = (vec4_instruction *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d) ", + fprintf(file, "(%cf0.%d%s) ", inst->predicate_inverse ? '-' : '+', - inst->flag_subreg); + inst->flag_subreg, + pred_ctrl_align16[inst->predicate]); } fprintf(file, "%s", brw_instruction_name(inst->opcode)); @@ -1296,38 +1392,39 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "m%d", inst->dst.reg); break; case HW_REG: - if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { - switch (inst->dst.fixed_hw_reg.nr) { + if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (inst->dst.nr) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr); + fprintf(file, "a0.%d", inst->dst.subnr); break; case BRW_ARF_ACCUMULATOR: - fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr); + fprintf(file, "acc%d", inst->dst.subnr); break; case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, - inst->dst.fixed_hw_reg.subnr); + fprintf(file, "f%d.%d", inst->dst.nr & 0xf, + inst->dst.subnr); break; default: - fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, - inst->dst.fixed_hw_reg.subnr); + fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, + inst->dst.subnr); break; } } else { - fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr); + fprintf(file, "hw_reg%d", inst->dst.nr); } - if (inst->dst.fixed_hw_reg.subnr) - fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr); + if (inst->dst.subnr) + fprintf(file, "+%d", inst->dst.subnr); break; case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); } if (inst->dst.writemask != WRITEMASK_XYZW) { fprintf(file, "."); @@ -1363,20 +1460,20 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case IMM: switch (inst->src[i].type) { case BRW_REGISTER_TYPE_F: - fprintf(file, "%fF", inst->src[i].fixed_hw_reg.dw1.f); + fprintf(file, "%fF", inst->src[i].f); break; case BRW_REGISTER_TYPE_D: - fprintf(file, "%dD", inst->src[i].fixed_hw_reg.dw1.d); + fprintf(file, "%dD", inst->src[i].d); break; case BRW_REGISTER_TYPE_UD: - fprintf(file, "%uU", inst->src[i].fixed_hw_reg.dw1.ud); + fprintf(file, "%uU", inst->src[i].ud); break; case BRW_REGISTER_TYPE_VF: fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 0) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 8) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff)); + brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); break; default: fprintf(file, "???"); @@ -1384,44 +1481,37 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) } break; case HW_REG: - if (inst->src[i].fixed_hw_reg.negate) - fprintf(file, "-"); - if (inst->src[i].fixed_hw_reg.abs) - fprintf(file, "|"); - if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { - switch (inst->src[i].fixed_hw_reg.nr) { + if (inst->src[i].brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (inst->src[i].nr) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr); + fprintf(file, "a0.%d", inst->src[i].subnr); break; case BRW_ARF_ACCUMULATOR: - fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr); + fprintf(file, "acc%d", inst->src[i].subnr); break; case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, - inst->src[i].fixed_hw_reg.subnr); + fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, + inst->src[i].subnr); break; default: - fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, - inst->src[i].fixed_hw_reg.subnr); + fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, + inst->src[i].subnr); break; } } else { - fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr); + fprintf(file, "hw_reg%d", inst->src[i].nr); } - if (inst->src[i].fixed_hw_reg.subnr) - fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr); - if (inst->src[i].fixed_hw_reg.abs) - fprintf(file, "|"); + if (inst->src[i].subnr) + fprintf(file, "+%d", inst->src[i].subnr); break; case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case MRF: + unreachable("not reached"); } /* Don't print .0; and only VGRFs have reg_offsets and sizes */ @@ -1449,6 +1539,9 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, ", "); } + if (inst->force_writemask_all) + fprintf(file, " NoMask"); + fprintf(file, "\n"); } @@ -1491,10 +1584,9 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, struct brw_reg reg = attribute_to_hw_reg(grf, interleaved); reg.type = inst->dst.type; - reg.dw1.bits.writemask = inst->dst.writemask; + reg.writemask = inst->dst.writemask; - inst->dst.file = HW_REG; - inst->dst.fixed_hw_reg = reg; + inst->dst = reg; } for (int i = 0; i < 3; i++) { @@ -1509,15 +1601,14 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, assert(grf != 0); struct brw_reg reg = attribute_to_hw_reg(grf, interleaved); - reg.dw1.bits.swizzle = inst->src[i].swizzle; + reg.swizzle = inst->src[i].swizzle; reg.type = inst->src[i].type; if (inst->src[i].abs) reg = brw_abs(reg); if (inst->src[i].negate) reg = negate(reg); - inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = reg; + inst->src[i] = reg; } } } @@ -1543,28 +1634,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) */ if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; - nr_attributes++; } lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); - /* The BSpec says we always have to read at least one thing from - * the VF, and it appears that the hardware wedges otherwise. - */ - if (nr_attributes == 0) - nr_attributes = 1; - - prog_data->urb_read_length = (nr_attributes + 1) / 2; - - unsigned vue_entries = - MAX2(nr_attributes, prog_data->vue_map.num_slots); - - if (devinfo->gen == 6) - prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8; - else - prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4; - - return payload_reg + nr_attributes; + return payload_reg + vs_prog_data->nr_attributes; } int @@ -1577,7 +1651,6 @@ vec4_visitor::setup_uniforms(int reg) */ if (devinfo->gen < 6 && this->uniforms == 0) { assert(this->uniforms < this->uniform_array_size); - this->uniform_vector_size[this->uniforms] = 1; stage_prog_data->param = reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); @@ -1619,12 +1692,6 @@ vec4_vs_visitor::setup_payload(void) this->first_non_payload_grf = reg; } -void -vec4_visitor::assign_binding_table_offsets() -{ - assign_common_binding_table_offsets(0); -} - src_reg vec4_visitor::get_timestamp() { @@ -1710,46 +1777,111 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; - emit(MOV(time, src_reg(value))); + emit(MOV(time, value)); vec4_instruction *inst = emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst)); inst->mlen = 2; } -bool -vec4_visitor::run(gl_clip_plane *clip_planes) +void +vec4_visitor::convert_to_hw_regs() { - bool use_vec4_nir = - compiler->glsl_compiler_options[stage].NirOptions != NULL; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + struct src_reg &src = inst->src[i]; + struct brw_reg reg; + switch (src.file) { + case GRF: + reg = brw_vec8_grf(src.reg + src.reg_offset, 0); + reg.type = src.type; + reg.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + break; + + case IMM: + reg = brw_imm_reg(src.type); + reg.ud = src.ud; + break; + + case UNIFORM: + reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + + (src.reg + src.reg_offset) / 2, + ((src.reg + src.reg_offset) % 2) * 4), + 0, 4, 1); + reg.type = src.type; + reg.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + + /* This should have been moved to pull constants. */ + assert(!src.reladdr); + break; + + case HW_REG: + continue; + + case BAD_FILE: + /* Probably unused. */ + reg = brw_null_reg(); + break; + + case MRF: + case ATTR: + unreachable("not reached"); + } + src = reg; + } + + dst_reg &dst = inst->dst; + struct brw_reg reg; + + switch (inst->dst.file) { + case GRF: + reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + reg.type = dst.type; + reg.writemask = dst.writemask; + break; + + case MRF: + assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen)); + reg = brw_message_reg(dst.reg + dst.reg_offset); + reg.type = dst.type; + reg.writemask = dst.writemask; + break; - sanity_param_count = prog->Parameters->NumParameters; + case HW_REG: + reg = dst; + break; + + case BAD_FILE: + reg = brw_null_reg(); + break; + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); + } + + dst = reg; + } +} + +bool +vec4_visitor::run() +{ if (shader_time_index >= 0) emit_shader_time_begin(); - assign_binding_table_offsets(); - emit_prolog(); - if (use_vec4_nir) { - assert(prog->nir != NULL); - emit_nir_code(); - if (failed) - return false; - } else if (shader) { - /* Generate VS IR for main(). (the visitor only descends into - * functions called "main"). - */ - visit_instructions(shader->base.ir); - } else { - emit_program_code(); - } + emit_nir_code(); + if (failed) + return false; base_ir = NULL; - if (key->userclip_active && !prog->UsesClipDistanceOut) - setup_uniform_clipplane_values(clip_planes); - emit_thread_end(); calculate_cfg(); @@ -1760,18 +1892,9 @@ vec4_visitor::run(gl_clip_plane *clip_planes) * that we have reladdr computations available for CSE, since we'll * often do repeated subexpressions for those. */ - if (shader || use_vec4_nir) { - move_grf_array_access_to_scratch(); - move_uniform_array_access_to_pull_constants(); - } else { - /* The ARB_vertex_program frontend emits pull constant loads directly - * rather than using reladdr, so we don't need to walk through all the - * instructions looking for things to move. There isn't anything. - * - * We do still need to split things to vec4 size. - */ - split_uniform_registers(); - } + move_grf_array_access_to_scratch(); + move_uniform_array_access_to_pull_constants(); + pack_uniform_registers(); move_push_constants_to_pull_constants(); split_virtual_grfs(); @@ -1782,8 +1905,8 @@ vec4_visitor::run(gl_clip_plane *clip_planes) \ if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ char filename[64]; \ - snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ - stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ + snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass, \ + stage_abbrev, nir->info.name, iteration, pass_num); \ \ backend_shader::dump_instructions(filename); \ } \ @@ -1795,8 +1918,8 @@ vec4_visitor::run(gl_clip_plane *clip_planes) if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s-%04d-00-start", - stage_abbrev, shader_prog ? shader_prog->Name : 0); + snprintf(filename, 64, "%s-%s-00-start", + stage_abbrev, nir->info.name); backend_shader::dump_instructions(filename); } @@ -1809,10 +1932,12 @@ vec4_visitor::run(gl_clip_plane *clip_planes) pass_num = 0; iteration++; + OPT(opt_predicated_break, this); OPT(opt_reduce_swizzle); OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); OPT(opt_copy_propagation); + OPT(opt_cmod_propagation); OPT(opt_cse); OPT(opt_algebraic); OPT(opt_register_coalesce); @@ -1833,7 +1958,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes) setup_payload(); - if (false) { + if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) { /* Debug of register spilling: Go spill everything. */ const int grf_count = alloc.count; float spill_costs[alloc.count]; @@ -1865,18 +1990,13 @@ vec4_visitor::run(gl_clip_plane *clip_planes) opt_set_dependency_control(); + convert_to_hw_regs(); + if (last_scratch > 0) { prog_data->base.total_scratch = brw_get_scratch_size(last_scratch * REG_SIZE); } - /* If any state parameters were appended, then ParameterValues could have - * been realloced, in which case the driver uniform storage set up by - * _mesa_associate_uniform_storage() would point to freed memory. Make - * sure that didn't happen. - */ - assert(sanity_param_count == prog->Parameters->NumParameters); - return !failed; } @@ -1890,83 +2010,76 @@ extern "C" { * Returns the final assembly and the program's size. */ const unsigned * -brw_vs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_vertex_program *vp, - struct gl_shader_program *prog, - unsigned *final_assembly_size) +brw_compile_vs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_vs_prog_key *key, + struct brw_vs_prog_data *prog_data, + const nir_shader *shader, + gl_clip_plane *clip_planes, + bool use_legacy_snorm_formula, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str) { - bool start_busy = false; - double start_time = 0; const unsigned *assembly = NULL; - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); + unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read); + + /* gl_VertexID and gl_InstanceID are system values, but arrive via an + * incoming vertex attribute. So, add an extra slot. + */ + if (shader->info.system_values_read & + (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) { + nr_attributes++; } - struct brw_shader *shader = NULL; - if (prog) - shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry + * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in + * vec4 mode, the hardware appears to wedge unless we read something. + */ + if (compiler->scalar_vs) + prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2); + else + prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2); - int st_index = -1; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS); + prog_data->nr_attributes = nr_attributes; - if (unlikely(INTEL_DEBUG & DEBUG_VS)) - brw_dump_ir("vertex", prog, &shader->base, &vp->Base); + /* Since vertex shaders reuse the same VUE entry for inputs and outputs + * (overwriting the original contents), we need to make sure the size is + * the larger of the two. + */ + const unsigned vue_entries = + MAX2(nr_attributes, (unsigned)prog_data->base.vue_map.num_slots); - if (!vp->Base.nir && - (brw->intelScreen->compiler->scalar_vs || - brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) { - /* Normally we generate NIR in LinkShader() or - * ProgramStringNotify(), but Mesa's fixed-function vertex program - * handling doesn't notify the driver at all. Just do it here, at - * the last minute, even though it's lame. - */ - assert(vp->Base.Id == 0 && prog == NULL); - vp->Base.nir = - brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); - } + if (compiler->devinfo->gen == 6) + prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); + else + prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); - if (brw->intelScreen->compiler->scalar_vs) { + if (compiler->scalar_vs) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; - fs_visitor v(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_VERTEX, key, - &prog_data->base.base, prog, &vp->Base, - 8, st_index); - if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - } - - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", - v.fail_msg); + fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, + NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ + shader, 8, shader_time_index); + if (!v.run_vs(clip_planes)) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } - fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void *) key, &prog_data->base.base, - &vp->Base, v.promoted_constants, + fs_generator g(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); if (INTEL_DEBUG & DEBUG_VS) { - char *name; - if (prog) { - name = ralloc_asprintf(mem_ctx, "%s vertex shader %d", - prog->Label ? prog->Label : "unnamed", - prog->Name); - } else { - name = ralloc_asprintf(mem_ctx, "vertex program %d", - vp->Base.Id); - } - g.enable_debug(name); + const char *debug_name = + ralloc_asprintf(mem_ctx, "%s vertex shader %s", + shader->info.label ? shader->info.label : "unnamed", + shader->info.name); + + g.enable_debug(debug_name); } g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); @@ -1975,51 +2088,22 @@ brw_vs_emit(struct brw_context *brw, if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data, - vp, prog, mem_ctx, st_index, - !_mesa_is_gles3(&brw->ctx)); - if (!v.run(brw_select_clip_planes(&brw->ctx))) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - } - - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", - v.fail_msg); + vec4_vs_visitor v(compiler, log_data, key, prog_data, + shader, clip_planes, mem_ctx, + shader_time_index, use_legacy_snorm_formula); + if (!v.run()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } - vec4_generator g(brw->intelScreen->compiler, brw, - prog, &vp->Base, &prog_data->base, - mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); - assembly = g.generate_assembly(v.cfg, final_assembly_size); - } - - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) { - brw_vs_debug_recompile(brw, prog, key); - } - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("VS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - shader->compiled_once = true; + assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, + shader, &prog_data->base, v.cfg, + final_assembly_size); } return assembly; } - -void -brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx, - struct brw_vue_prog_key *key, - GLuint id, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - key->program_string_id = id; - - brw_setup_tex_for_precompile(brw, &key->tex, prog); -} - } /* extern "C" */