X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4.cpp;h=44cb89c6e51cf0fbe06b112b9b8d05b416795929;hb=5b6e91dd35a54297d28e086d1791a97be68c4c9e;hp=3de7d931dde65aed099624ca10f65ffd4f5ebfd0;hpb=030abc61091a38596427aa04022066719be9add5;p=mesa.git diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 3de7d931dde..44cb89c6e51 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -42,8 +42,8 @@ void src_reg::init() { memset(this, 0, sizeof(*this)); - this->file = BAD_FILE; + this->type = BRW_REGISTER_TYPE_UD; } src_reg::src_reg(enum brw_reg_file file, int nr, const glsl_type *type) @@ -85,6 +85,7 @@ dst_reg::init() { memset(this, 0, sizeof(*this)); this->file = BAD_FILE; + this->type = BRW_REGISTER_TYPE_UD; this->writemask = WRITEMASK_XYZW; } @@ -360,7 +361,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - case VS_OPCODE_GET_BUFFER_SIZE: + case SHADER_OPCODE_GET_BUFFER_SIZE: return inst->header_size; default: unreachable("not reached"); @@ -374,6 +375,13 @@ src_reg::equals(const src_reg &r) const !reladdr && !r.reladdr); } +bool +src_reg::negative_equals(const src_reg &r) const +{ + return this->backend_reg::negative_equals(r) && + !reladdr && !r.reladdr; +} + bool vec4_visitor::opt_vector_float() { @@ -687,8 +695,11 @@ vec4_visitor::pack_uniform_registers() * the next part of our packing algorithm. */ int reg = inst->src[0].nr; - for (unsigned i = 0; i < vec4s_read; i++) + int channel_size = type_sz(inst->src[0].type) / 4; + for (unsigned i = 0; i < vec4s_read; i++) { chans_used[reg + i] = 4; + channel_sizes[reg + i] = MAX2(channel_sizes[reg + i], channel_size); + } } } @@ -697,10 +708,9 @@ vec4_visitor::pack_uniform_registers() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); /* Now, figure out a packing of the live uniform vectors into our * push constants. Start with dvec{3,4} because they are aligned to @@ -800,6 +810,14 @@ vec4_visitor::opt_algebraic() } break; + case BRW_OPCODE_OR: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + case VEC4_OPCODE_UNPACK_UNIFORM: if (inst->src[0].file != UNIFORM) { inst->opcode = BRW_OPCODE_MOV; @@ -906,7 +924,7 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const gl_constant_value **values = &stage_prog_data->param[i]; + uint32_t *values = &stage_prog_data->param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. @@ -1275,6 +1293,15 @@ vec4_visitor::opt_register_coalesce() } } + /* VS_OPCODE_UNPACK_FLAGS_SIMD4X2 generates a bunch of mov(1) + * instructions, and this optimization pass is not capable of + * handling that. Bail on these instructions and hope that some + * later optimization pass can do the right thing after they are + * expanded. + */ + if (scan_inst->opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) + break; + /* This doesn't handle saturation on the instruction we * want to coalesce away if the register types do not match. * But if scan_inst is a non type-converting 'mov', we can fix @@ -1542,9 +1569,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) vec4_instruction *inst = (vec4_instruction *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d%s) ", + fprintf(file, "(%cf%d.%d%s) ", inst->predicate_inverse ? '-' : '+', - inst->flag_subreg, + inst->flag_subreg / 2, + inst->flag_subreg % 2, pred_ctrl_align16[inst->predicate]); } @@ -1556,9 +1584,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL && + inst->opcode != BRW_OPCODE_CSEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE))) { - fprintf(file, ".f0.%d", inst->flag_subreg); + fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2); } } fprintf(file, " "); @@ -1618,7 +1647,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->dst.writemask & 8) fprintf(file, "w"); } - fprintf(file, ":%s", brw_reg_type_letters(inst->dst.type)); + fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); if (inst->src[0].file != BAD_FILE) fprintf(file, ", "); @@ -1713,7 +1742,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "|"); if (inst->src[i].file != IMM) { - fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); + fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); } if (i < 2 && inst->src[i + 1].file != BAD_FILE) @@ -1762,12 +1791,10 @@ vec4_visitor::setup_uniforms(int reg) * matter what, or the GPU would hang. */ if (devinfo->gen < 6 && this->uniforms == 0) { - stage_prog_data->param = - reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); + brw_stage_prog_data_add_params(stage_prog_data, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[slot] = &zero; + stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; } this->uniforms++; @@ -1776,6 +1803,9 @@ vec4_visitor::setup_uniforms(int reg) reg += ALIGN(uniforms, 2) / 2; } + for (int i = 0; i < 4; i++) + reg += stage_prog_data->ubo_ranges[i].length; + stage_prog_data->nr_params = this->uniforms * 4; prog_data->base.curb_read_length = @@ -1942,12 +1972,36 @@ is_align1_df(vec4_instruction *inst) } } +/** + * Three source instruction must have a GRF/MRF destination register. + * ARF NULL is not allowed. Fix that up by allocating a temporary GRF. + */ +void +vec4_visitor::fixup_3src_null_dest() +{ + bool progress = false; + + foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) { + if (inst->is_3src(devinfo) && inst->dst.is_null()) { + const unsigned size_written = type_sz(inst->dst.type); + const unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE); + + inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), + inst->dst.type); + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); +} + void vec4_visitor::convert_to_hw_regs() { foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (int i = 0; i < 3; i++) { - struct src_reg &src = inst->src[i]; + class src_reg &src = inst->src[i]; struct brw_reg reg; switch (src.file) { case VGRF: { @@ -2237,7 +2291,12 @@ vec4_visitor::lower_simd_width() if (linst->src[i].file == BAD_FILE) continue; - if (!is_uniform(linst->src[i])) + bool is_interleaved_attr = + linst->src[i].file == ATTR && + stage_uses_interleaved_attributes(stage, + prog_data->dispatch_mode); + + if (!is_uniform(linst->src[i]) && !is_interleaved_attr) linst->src[i] = horiz_offset(linst->src[i], channel_offset); } @@ -2688,6 +2747,8 @@ vec4_visitor::run() OPT(scalarize_df); } + fixup_3src_null_dest(); + bool allocated_without_spills = reg_allocate(); if (!allocated_without_spills) { @@ -2738,10 +2799,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const nir_shader *src_shader, - gl_clip_plane *clip_planes, - bool use_legacy_snorm_formula, int shader_time_index, - unsigned *final_assembly_size, char **error_str) { const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX]; @@ -2766,10 +2824,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, } prog_data->inputs_read = shader->info.inputs_read; - prog_data->double_inputs_read = shader->info.double_inputs_read; + prog_data->double_inputs_read = shader->info.vs.double_inputs; - brw_nir_lower_vs_inputs(shader, use_legacy_snorm_formula, - key->gl_attrib_wa_flags); + brw_nir_lower_vs_inputs(shader, key->gl_attrib_wa_flags); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler, is_scalar); @@ -2785,16 +2842,27 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, * incoming vertex attribute. So, add an extra slot. */ if (shader->info.system_values_read & - (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) | + (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) | BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) { nr_attribute_slots++; } + /* gl_DrawID and IsIndexedDraw share its very own vec4 */ + if (shader->info.system_values_read & + (BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID) | + BITFIELD64_BIT(SYSTEM_VALUE_IS_INDEXED_DRAW))) { + nr_attribute_slots++; + } + if (shader->info.system_values_read & - BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) - prog_data->uses_basevertex = true; + BITFIELD64_BIT(SYSTEM_VALUE_IS_INDEXED_DRAW)) + prog_data->uses_is_indexed_draw = true; + + if (shader->info.system_values_read & + BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX)) + prog_data->uses_firstvertex = true; if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) @@ -2808,15 +2876,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)) prog_data->uses_instanceid = true; - /* gl_DrawID has its very own vec4 */ if (shader->info.system_values_read & - BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) { - prog_data->uses_drawid = true; - nr_attribute_slots++; - } - - unsigned nr_attributes = nr_attribute_slots - - DIV_ROUND_UP(_mesa_bitcount_64(shader->info.double_inputs_read), 2); + BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) + prog_data->uses_drawid = true; /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in @@ -2829,7 +2891,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attribute_slots, 1), 2); - prog_data->nr_attributes = nr_attributes; prog_data->nr_attribute_slots = nr_attribute_slots; /* Since vertex shaders reuse the same VUE entry for inputs and outputs @@ -2862,7 +2923,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ shader, 8, shader_time_index); - if (!v.run_vs(clip_planes)) { + if (!v.run_vs()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2871,7 +2932,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; - fs_generator g(compiler, log_data, mem_ctx, (void *) key, + fs_generator g(compiler, log_data, mem_ctx, &prog_data->base.base, v.promoted_constants, v.runtime_check_aads_emit, MESA_SHADER_VERTEX); if (INTEL_DEBUG & DEBUG_VS) { @@ -2884,15 +2945,14 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, g.enable_debug(debug_name); } g.generate_code(v.cfg, 8); - assembly = g.get_assembly(final_assembly_size); + assembly = g.get_assembly(); } if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(compiler, log_data, key, prog_data, - shader, clip_planes, mem_ctx, - shader_time_index, use_legacy_snorm_formula); + shader, mem_ctx, shader_time_index); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2901,8 +2961,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, } assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, - shader, &prog_data->base, v.cfg, - final_assembly_size); + shader, &prog_data->base, v.cfg); } return assembly;