X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4.cpp;h=9459d61af6c6174d81245bc7fbbe6f5861e3e659;hb=0c8395e15d295d328f68574e71afaf04596e2a82;hp=b443effca9adaacfeba535212f19ea80e47f97ed;hpb=f9e31a26d4cf075e236e92aea63bb69eb9fad533;p=mesa.git diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index b443effca9a..9459d61af6c 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -42,8 +42,8 @@ void src_reg::init() { memset(this, 0, sizeof(*this)); - this->file = BAD_FILE; + this->type = BRW_REGISTER_TYPE_UD; } src_reg::src_reg(enum brw_reg_file file, int nr, const glsl_type *type) @@ -85,6 +85,7 @@ dst_reg::init() { memset(this, 0, sizeof(*this)); this->file = BAD_FILE; + this->type = BRW_REGISTER_TYPE_UD; this->writemask = WRITEMASK_XYZW; } @@ -360,7 +361,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - case VS_OPCODE_GET_BUFFER_SIZE: + case SHADER_OPCODE_GET_BUFFER_SIZE: return inst->header_size; default: unreachable("not reached"); @@ -374,6 +375,13 @@ src_reg::equals(const src_reg &r) const !reladdr && !r.reladdr); } +bool +src_reg::negative_equals(const src_reg &r) const +{ + return this->backend_reg::negative_equals(r) && + !reladdr && !r.reladdr; +} + bool vec4_visitor::opt_vector_float() { @@ -687,8 +695,11 @@ vec4_visitor::pack_uniform_registers() * the next part of our packing algorithm. */ int reg = inst->src[0].nr; - for (unsigned i = 0; i < vec4s_read; i++) + int channel_size = type_sz(inst->src[0].type) / 4; + for (unsigned i = 0; i < vec4s_read; i++) { chans_used[reg + i] = 4; + channel_sizes[reg + i] = MAX2(channel_sizes[reg + i], channel_size); + } } } @@ -697,10 +708,9 @@ vec4_visitor::pack_uniform_registers() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); /* Now, figure out a packing of the live uniform vectors into our * push constants. Start with dvec{3,4} because they are aligned to @@ -906,7 +916,7 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const gl_constant_value **values = &stage_prog_data->param[i]; + uint32_t *values = &stage_prog_data->param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. @@ -985,7 +995,7 @@ vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst) * affected, at least by the 64b restriction, since DepCtrl with double * precision instructions seems to produce GPU hangs in some cases. */ - if (devinfo->gen == 8 || devinfo->is_broxton) { + if (devinfo->gen == 8 || gen_device_info_is_9lp(devinfo)) { if (inst->opcode == BRW_OPCODE_MUL && IS_DWORD(inst->src[0]) && IS_DWORD(inst->src[1])) @@ -1542,9 +1552,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) vec4_instruction *inst = (vec4_instruction *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d%s) ", + fprintf(file, "(%cf%d.%d%s) ", inst->predicate_inverse ? '-' : '+', - inst->flag_subreg, + inst->flag_subreg / 2, + inst->flag_subreg % 2, pred_ctrl_align16[inst->predicate]); } @@ -1556,9 +1567,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL && + inst->opcode != BRW_OPCODE_CSEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE))) { - fprintf(file, ".f0.%d", inst->flag_subreg); + fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2); } } fprintf(file, " "); @@ -1618,7 +1630,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->dst.writemask & 8) fprintf(file, "w"); } - fprintf(file, ":%s", brw_reg_type_letters(inst->dst.type)); + fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); if (inst->src[0].file != BAD_FILE) fprintf(file, ", "); @@ -1713,7 +1725,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "|"); if (inst->src[i].file != IMM) { - fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); + fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); } if (i < 2 && inst->src[i + 1].file != BAD_FILE) @@ -1762,12 +1774,10 @@ vec4_visitor::setup_uniforms(int reg) * matter what, or the GPU would hang. */ if (devinfo->gen < 6 && this->uniforms == 0) { - stage_prog_data->param = - reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); + brw_stage_prog_data_add_params(stage_prog_data, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[slot] = &zero; + stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; } this->uniforms++; @@ -1776,6 +1786,9 @@ vec4_visitor::setup_uniforms(int reg) reg += ALIGN(uniforms, 2) / 2; } + for (int i = 0; i < 4; i++) + reg += stage_prog_data->ubo_ranges[i].length; + stage_prog_data->nr_params = this->uniforms * 4; prog_data->base.curb_read_length = @@ -1942,12 +1955,36 @@ is_align1_df(vec4_instruction *inst) } } +/** + * Three source instruction must have a GRF/MRF destination register. + * ARF NULL is not allowed. Fix that up by allocating a temporary GRF. + */ +void +vec4_visitor::fixup_3src_null_dest() +{ + bool progress = false; + + foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) { + if (inst->is_3src(devinfo) && inst->dst.is_null()) { + const unsigned size_written = type_sz(inst->dst.type); + const unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); + + inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), + inst->dst.type); + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); +} + void vec4_visitor::convert_to_hw_regs() { foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (int i = 0; i < 3; i++) { - struct src_reg &src = inst->src[i]; + class src_reg &src = inst->src[i]; struct brw_reg reg; switch (src.file) { case VGRF: { @@ -2237,7 +2274,12 @@ vec4_visitor::lower_simd_width() if (linst->src[i].file == BAD_FILE) continue; - if (!is_uniform(linst->src[i])) + bool is_interleaved_attr = + linst->src[i].file == ATTR && + stage_uses_interleaved_attributes(stage, + prog_data->dispatch_mode); + + if (!is_uniform(linst->src[i]) && !is_interleaved_attr) linst->src[i] = horiz_offset(linst->src[i], channel_offset); } @@ -2688,6 +2730,8 @@ vec4_visitor::run() OPT(scalarize_df); } + fixup_3src_null_dest(); + bool allocated_without_spills = reg_allocate(); if (!allocated_without_spills) { @@ -2738,10 +2782,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const nir_shader *src_shader, - gl_clip_plane *clip_planes, - bool use_legacy_snorm_formula, int shader_time_index, - unsigned *final_assembly_size, char **error_str) { const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX]; @@ -2766,10 +2807,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, } prog_data->inputs_read = shader->info.inputs_read; - prog_data->double_inputs_read = shader->info.double_inputs_read; + prog_data->double_inputs_read = shader->info.vs.double_inputs; - brw_nir_lower_vs_inputs(shader, use_legacy_snorm_formula, - key->gl_attrib_wa_flags); + brw_nir_lower_vs_inputs(shader, key->gl_attrib_wa_flags); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler, is_scalar); @@ -2796,6 +2836,10 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) prog_data->uses_basevertex = true; + if (shader->info.system_values_read & + BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX)) + prog_data->uses_firstvertex = true; + if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) prog_data->uses_baseinstance = true; @@ -2815,9 +2859,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, nr_attribute_slots++; } - unsigned nr_attributes = nr_attribute_slots - - DIV_ROUND_UP(_mesa_bitcount_64(shader->info.double_inputs_read), 2); - /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in * vec4 mode, the hardware appears to wedge unless we read something. @@ -2829,7 +2870,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attribute_slots, 1), 2); - prog_data->nr_attributes = nr_attributes; prog_data->nr_attribute_slots = nr_attribute_slots; /* Since vertex shaders reuse the same VUE entry for inputs and outputs @@ -2862,7 +2902,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ shader, 8, shader_time_index); - if (!v.run_vs(clip_planes)) { + if (!v.run_vs()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2884,15 +2924,14 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, g.enable_debug(debug_name); } g.generate_code(v.cfg, 8); - assembly = g.get_assembly(final_assembly_size); + assembly = g.get_assembly(); } if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(compiler, log_data, key, prog_data, - shader, clip_planes, mem_ctx, - shader_time_index, use_legacy_snorm_formula); + shader, mem_ctx, shader_time_index); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2901,8 +2940,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, } assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, - shader, &prog_data->base, v.cfg, - final_assembly_size); + shader, &prog_data->base, v.cfg); } return assembly;