X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs.cpp;h=41a3f81b9d82c5f1fb2f82fffc00b1ade1e77677;hb=24f984f64ae58c274f79eaf9148aea37df67131c;hp=84b5920d4f5bfb8e854c7dc2fef4b64bf2081829;hpb=d2f089ba17c6b17823fc3d244e15c0a18108d5ce;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 84b5920d4f5..41a3f81b9d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -28,28 +28,17 @@ * from the LIR. */ -#include - -#include "util/hash_table.h" #include "main/macros.h" -#include "main/shaderobj.h" -#include "main/fbobject.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "util/register_allocate.h" -#include "program/hash_table.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_wm.h" #include "brw_fs.h" #include "brw_cs.h" +#include "brw_nir.h" #include "brw_vec4_gs_visitor.h" #include "brw_cfg.h" +#include "brw_program.h" #include "brw_dead_control_flow.h" -#include "main/uniforms.h" -#include "brw_fs_live_variables.h" -#include "glsl/nir/glsl_types.h" -#include "program/sampler.h" +#include "compiler/glsl_types.h" using namespace brw; @@ -186,7 +175,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@ -218,7 +207,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, inst->mlen = 1 + bld.dispatch_width() / 8; } - bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); + bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); } /** @@ -299,6 +288,71 @@ fs_inst::is_send_from_grf() const } } +/** + * Returns true if this instruction's sources and destinations cannot + * safely be the same register. + * + * In most cases, a register can be written over safely by the same + * instruction that is its last use. For a single instruction, the + * sources are dereferenced before writing of the destination starts + * (naturally). + * + * However, there are a few cases where this can be problematic: + * + * - Virtual opcodes that translate to multiple instructions in the + * code generator: if src == dst and one instruction writes the + * destination before a later instruction reads the source, then + * src will have been clobbered. + * + * - SIMD16 compressed instructions with certain regioning (see below). + * + * The register allocator uses this information to set up conflicts between + * GRF sources and the destination. + */ +bool +fs_inst::has_source_and_destination_hazard() const +{ + switch (opcode) { + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + /* Multiple partial writes to the destination */ + return true; + default: + /* The SIMD16 compressed instruction + * + * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F + * + * is actually decoded in hardware as: + * + * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F + * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F + * + * Which is safe. However, if we have uniform accesses + * happening, we get into trouble: + * + * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F + * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F + * + * Now our destination for the first instruction overwrote the + * second instruction's src0, and we get garbage for those 8 + * pixels. There's a similar issue for the pre-gen6 + * pixel_x/pixel_y, which are registers of 16-bit values and thus + * would get stomped by the first decode as well. + */ + if (exec_size == 16) { + for (int i = 0; i < sources; i++) { + if (src[i].file == VGRF && (src[i].stride == 0 || + src[i].type == BRW_REGISTER_TYPE_UW || + src[i].type == BRW_REGISTER_TYPE_W || + src[i].type == BRW_REGISTER_TYPE_UB || + src[i].type == BRW_REGISTER_TYPE_B)) { + return true; + } + } + } + return false; + } +} + bool fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const { @@ -374,55 +428,7 @@ fs_reg::fs_reg() this->file = BAD_FILE; } -/** Immediate value constructor. */ -fs_reg::fs_reg(float f) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_F; - this->stride = 0; - this->f = f; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(int32_t i) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_D; - this->stride = 0; - this->d = i; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(uint32_t u) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_UD; - this->stride = 0; - this->ud = u; -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf[4]) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->ud, vf, sizeof(unsigned)); -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); -} - -fs_reg::fs_reg(struct brw_reg reg) : +fs_reg::fs_reg(struct ::brw_reg reg) : backend_reg(reg) { this->reg_offset = 0; @@ -440,8 +446,7 @@ fs_reg::fs_reg(struct brw_reg reg) : bool fs_reg::equals(const fs_reg &r) const { - return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 && - reg_offset == r.reg_offset && + return (this->backend_reg::equals(r) && subreg_offset == r.subreg_offset && !reladdr && !r.reladdr && stride == r.stride); @@ -590,7 +595,7 @@ fs_visitor::emit_shader_time_end() fs_reg reset = shader_end_time; reset.set_smear(2); set_condmod(BRW_CONDITIONAL_Z, - ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u))); + ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u))); ibld.IF(BRW_PREDICATE_NORMAL); fs_reg start = shader_start_time; @@ -605,11 +610,11 @@ fs_visitor::emit_shader_time_end() * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - cbld.ADD(diff, diff, fs_reg(-2u)); + cbld.ADD(diff, diff, brw_imm_ud(-2u)); SHADER_TIME_ADD(cbld, 0, diff); - SHADER_TIME_ADD(cbld, 1, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(cbld, 2, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ENDIF); } @@ -619,7 +624,7 @@ fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, fs_reg value) { int index = shader_time_index * 3 + shader_time_subindex; - fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); + struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) @@ -1032,7 +1037,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, if (pixel_center_integer) { bld.MOV(wpos, this->pixel_x); } else { - bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); + bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f)); } wpos = offset(wpos, bld, 1); @@ -1048,7 +1053,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, offset += key->drawable_height - 1.0f; } - bld.ADD(wpos, pixel_y, fs_reg(offset)); + bld.ADD(wpos, pixel_y, brw_imm_f(offset)); } wpos = offset(wpos, bld, 1); @@ -1103,33 +1108,19 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, } void -fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, +fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, const glsl_type *type, glsl_interp_qualifier interpolation_mode, - int location, bool mod_centroid, + int *location, bool mod_centroid, bool mod_sample) { - attr.type = brw_type_for_base_type(type->get_scalar_type()); - assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - unsigned int array_elements; - - if (type->is_array()) { - array_elements = type->arrays_of_arrays_size(); - if (array_elements == 0) { - fail("dereferenced array '%s' has length 0\n", name); - } - type = type->without_array(); - } else { - array_elements = 1; - } - if (interpolation_mode == INTERP_QUALIFIER_NONE) { bool is_gl_Color = - location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; + *location == VARYING_SLOT_COL0 || *location == VARYING_SLOT_COL1; if (key->flat_shade && is_gl_Color) { interpolation_mode = INTERP_QUALIFIER_FLAT; } else { @@ -1137,71 +1128,86 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, } } - for (unsigned int i = 0; i < array_elements; i++) { - for (unsigned int j = 0; j < type->matrix_columns; j++) { - if (prog_data->urb_setup[location] == -1) { - /* If there's no incoming setup data for this slot, don't - * emit interpolation for it. - */ - attr = offset(attr, bld, type->vector_elements); - location++; - continue; - } + if (type->is_array() || type->is_matrix()) { + const glsl_type *elem_type = glsl_get_array_element(type); + const unsigned length = glsl_get_length(type); - if (interpolation_mode == INTERP_QUALIFIER_FLAT) { - /* Constant interpolation (flat shading) case. The SF has - * handed us defined values in only the constant offset - * field of the setup reg. - */ - for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - interp = suboffset(interp, 3); - interp.type = attr.type; - bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); - attr = offset(attr, bld, 1); - } - } else { - /* Smooth/noperspective interpolation case. */ - for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { - /* Get the pixel/sample mask into f0 so that we know - * which pixels are lit. Then, for each channel that is - * unlit, replace the centroid data with non-centroid - * data. - */ - bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); - - fs_inst *inst; - inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, - false, false); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - if (devinfo->has_pln) - inst->no_dd_clear = true; - - inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = false; - if (devinfo->has_pln) - inst->no_dd_check = true; + for (unsigned i = 0; i < length; i++) { + emit_general_interpolation(attr, name, elem_type, interpolation_mode, + location, mod_centroid, mod_sample); + } + } else if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + emit_general_interpolation(attr, name, field_type, interpolation_mode, + location, mod_centroid, mod_sample); + } + } else { + assert(type->is_scalar() || type->is_vector()); - } else { - emit_linterp(attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); - } - if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { - bld.MUL(attr, attr, this->pixel_w); - } - attr = offset(attr, bld, 1); - } + if (prog_data->urb_setup[*location] == -1) { + /* If there's no incoming setup data for this slot, don't + * emit interpolation for it. + */ + *attr = offset(*attr, bld, type->vector_elements); + (*location)++; + return; + } - } - location++; + attr->type = brw_type_for_base_type(type->get_scalar_type()); + + if (interpolation_mode == INTERP_QUALIFIER_FLAT) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ + for (unsigned int i = 0; i < type->vector_elements; i++) { + struct brw_reg interp = interp_reg(*location, i); + interp = suboffset(interp, 3); + interp.type = attr->type; + bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp)); + *attr = offset(*attr, bld, 1); + } + } else { + /* Smooth/noperspective interpolation case. */ + for (unsigned int i = 0; i < type->vector_elements; i++) { + struct brw_reg interp = interp_reg(*location, i); + if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { + /* Get the pixel/sample mask into f0 so that we know + * which pixels are lit. Then, for each channel that is + * unlit, replace the centroid data with non-centroid + * data. + */ + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + + fs_inst *inst; + inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode, + false, false); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + if (devinfo->has_pln) + inst->no_dd_clear = true; + + inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode, + mod_centroid && !key->persample_shading, + mod_sample || key->persample_shading); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = false; + if (devinfo->has_pln) + inst->no_dd_check = true; + + } else { + emit_linterp(*attr, fs_reg(interp), interpolation_mode, + mod_centroid && !key->persample_shading, + mod_sample || key->persample_shading); + } + if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { + bld.MUL(*attr, *attr, this->pixel_w); + } + *attr = offset(*attr, bld, 1); + } } + (*location)++; } } @@ -1225,7 +1231,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - bld.ASR(*reg, g0, fs_reg(15)); + bld.ASR(*reg, g0, brw_imm_d(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1240,7 +1246,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - bld.ASR(*reg, g1_6, fs_reg(31)); + bld.ASR(*reg, g1_6, brw_imm_d(31)); } return reg; @@ -1257,7 +1263,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) /* Convert int_sample_pos to floating point */ bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - bld.MUL(dst, dst, fs_reg(1 / 16.0f)); + bld.MUL(dst, dst, brw_imm_f(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1265,7 +1271,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - bld.MOV(dst, fs_reg(0.5f)); + bld.MOV(dst, brw_imm_f(0.5f)); } } @@ -1360,8 +1366,8 @@ fs_visitor::emit_sampleid_setup() abld.exec_all().group(1, 0) .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(sspi_mask)); - abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); + brw_imm_ud(sspi_mask)); + abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5)); /* This works for both SIMD8 and SIMD16 */ abld.exec_all().group(4, 0) @@ -1376,7 +1382,7 @@ fs_visitor::emit_sampleid_setup() * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - abld.MOV(*reg, fs_reg(0)); + abld.MOV(*reg, brw_imm_d(0)); } return reg; @@ -1664,9 +1670,6 @@ fs_visitor::assign_vs_urb_setup() brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; assert(stage == MESA_SHADER_VERTEX); - int count = _mesa_bitcount_64(vs_prog_data->inputs_read); - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) - count++; /* Each attribute is 4 regs. */ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; @@ -1679,6 +1682,21 @@ fs_visitor::assign_vs_urb_setup() } } +void +fs_visitor::assign_tes_urb_setup() +{ + assert(stage == MESA_SHADER_TESS_EVAL); + + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data; + + first_non_payload_grf += 8 * vue_prog_data->urb_read_length; + + /* Rewrite all ATTR file references to HW_REGs. */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + convert_attr_sources_to_hw_regs(inst); + } +} + void fs_visitor::assign_gs_urb_setup() { @@ -1689,24 +1707,7 @@ fs_visitor::assign_gs_urb_setup() first_non_payload_grf += 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in; - const unsigned first_icp_handle = payload.num_regs - - (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0); - foreach_block_and_inst(block, fs_inst, inst, cfg) { - /* Lower URB_READ_SIMD8 opcodes into real messages. */ - if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) { - assert(inst->src[0].file == IMM); - inst->src[0] = retype(brw_vec8_grf(first_icp_handle + - inst->src[0].ud, - 0), BRW_REGISTER_TYPE_UD); - /* for now, assume constant - we can do per-slot offsets later */ - assert(inst->src[1].file == IMM); - inst->offset = inst->src[1].ud; - inst->src[1] = fs_reg(); - inst->mlen = 1; - inst->base_mrf = -1; - } - /* Rewrite all ATTR file references to GRFs. */ convert_attr_sources_to_hw_regs(inst); } @@ -2064,16 +2065,16 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { VARYING_PULL_CONSTANT_LOAD(ibld, dst, - fs_reg(index), + brw_imm_ud(index), *inst->src[i].reladdr, - pull_index); + pull_index * 4); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { const fs_builder ubld = ibld.exec_all().group(8, 0); - fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, fs_reg(index), offset); + dst, brw_imm_ud(index), offset); inst->src[i].set_smear(pull_index & 3); } brw_mark_surface_used(prog_data, index); @@ -2102,7 +2103,8 @@ fs_visitor::opt_algebraic() if (inst->dst.type != inst->src[0].type) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) { + if (brw_saturate_immediate(inst->dst.type, + &inst->src[0].as_brw_reg())) { inst->saturate = false; progress = true; } @@ -2765,7 +2767,7 @@ fs_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = fs_reg(0u); + inst->src[0] = brw_imm_ud(0u); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -3102,13 +3104,11 @@ fs_visitor::lower_uniform_pull_constant_loads() continue; if (devinfo->gen >= 7) { - /* The offset arg before was a vec4-aligned byte offset. We need to - * turn it into a dword offset. - */ + /* The offset arg is a vec4-aligned immediate byte offset. */ fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.ud /= 4; + assert(const_offset_reg.ud % 16 == 0); fs_reg payload, offset; if (devinfo->gen >= 9) { @@ -3433,8 +3433,7 @@ fs_visitor::lower_integer_multiplication() */ assert(mul->src[1].type == BRW_REGISTER_TYPE_D || mul->src[1].type == BRW_REGISTER_TYPE_UD); - mul->src[1].type = (type_is_signed(mul->src[1].type) ? - BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW); + mul->src[1].type = BRW_REGISTER_TYPE_UW; mul->src[1].stride *= 2; } else if (devinfo->gen == 7 && !devinfo->is_haswell && @@ -3603,6 +3602,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, assert(devinfo->gen >= 9); assert(bld.dispatch_width() != 16); + /* XXX: src_stencil is only available on gen9+. dst_depth is never + * available on gen9+. As such it's impossible to have both enabled at the + * same time and therefore length cannot overrun the array. + */ + assert(length < 15); + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.exec_all().annotate("FB write OS") .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], @@ -3671,7 +3676,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, (has_lod || shadow_c.file != BAD_FILE || (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) { for (unsigned i = coord_components; i < 3; i++) - bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f)); + bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 3 - coord_components); } @@ -3728,7 +3733,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - bld.MOV(msg_end, fs_reg(0.0f)); + bld.MOV(msg_end, brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 1); } @@ -3822,7 +3827,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, case SHADER_OPCODE_TXF_CMS: msg_lod = offset(msg_coords, bld, 3); /* lod */ - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); /* sample index */ bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, bld, 2); @@ -3902,7 +3907,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, if (bld.shader->stage != MESA_SHADER_FRAGMENT && op == SHADER_OPCODE_TEX) { op = SHADER_OPCODE_TXL; - lod = fs_reg(0.0f); + lod = brw_imm_f(0.0f); } /* Set up the LOD info */ @@ -4113,7 +4118,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask) { fs_builder ubld = bld.exec_all().group(8, 0); const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(dst, fs_reg(0)); + ubld.MOV(dst, brw_imm_d(0)); ubld.MOV(component(dst, 7), sample_mask); return dst; } @@ -4255,7 +4260,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_TYPED_SURFACE_READ, - fs_reg(0xffff)); + brw_imm_d(0xffff)); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: @@ -4680,6 +4685,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case IMM: unreachable("not reached"); } + if (inst->dst.stride != 1) + fprintf(file, "<%u>", inst->dst.stride); fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); for (int i = 0; i < inst->sources; i++) { @@ -4767,6 +4774,16 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "|"); if (inst->src[i].file != IMM) { + unsigned stride; + if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { + unsigned hstride = inst->src[i].hstride; + stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); + } else { + stride = inst->src[i].stride; + } + if (stride != 1) + fprintf(file, "<%u>", stride); + fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); } @@ -5225,6 +5242,40 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) return !failed; } +bool +fs_visitor::run_tes() +{ + assert(stage == MESA_SHADER_TESS_EVAL); + + /* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */ + payload.num_regs = 5; + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + emit_nir_code(); + + if (failed) + return false; + + emit_urb_writes(); + + if (shader_time_index >= 0) + emit_shader_time_end(); + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + assign_tes_urb_setup(); + + fixup_3src_null_dest(); + allocate_registers(); + + return !failed; +} + bool fs_visitor::run_gs() { @@ -5244,7 +5295,7 @@ fs_visitor::run_gs() */ if (gs_compile->control_data_header_size_bits <= 32) { const fs_builder abld = bld.annotate("initialize control data bits"); - abld.MOV(this->control_data_bits, fs_reg(0u)); + abld.MOV(this->control_data_bits, brw_imm_ud(0u)); } } @@ -5477,13 +5528,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, struct gl_program *prog, int shader_time_index8, int shader_time_index16, bool use_rep_send, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + /* key->alpha_test_func means simulating alpha testing via discards, * so the shader definitely kills pixels. */ @@ -5541,7 +5597,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base, - v.promoted_constants, v.runtime_check_aads_emit, "FS"); + v.promoted_constants, v.runtime_check_aads_emit, + MESA_SHADER_FRAGMENT); if (unlikely(INTEL_DEBUG & DEBUG_WM)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s", @@ -5558,42 +5615,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, return g.get_assembly(final_assembly_size); } -void -brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, - void *buffer, uint32_t threads, uint32_t stride) -{ - if (prog_data->local_invocation_id_regs == 0) - return; - - /* 'stride' should be an integer number of registers, that is, a multiple - * of 32 bytes. - */ - assert(stride % 32 == 0); - - unsigned x = 0, y = 0, z = 0; - for (unsigned t = 0; t < threads; t++) { - uint32_t *param = (uint32_t *) buffer + stride * t / 4; - - for (unsigned i = 0; i < prog_data->simd_size; i++) { - param[0 * prog_data->simd_size + i] = x; - param[1 * prog_data->simd_size + i] = y; - param[2 * prog_data->simd_size + i] = z; - - x++; - if (x == prog_data->local_size[0]) { - x = 0; - y++; - if (y == prog_data->local_size[1]) { - y = 0; - z++; - if (z == prog_data->local_size[2]) - z = 0; - } - } - } - } -} - fs_reg * fs_visitor::emit_cs_local_invocation_id_setup() { @@ -5636,11 +5657,16 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_cs_prog_key *key, struct brw_cs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + prog_data->local_size[0] = shader->info.cs.local_size[0]; prog_data->local_size[1] = shader->info.cs.local_size[1]; prog_data->local_size[2] = shader->info.cs.local_size[2]; @@ -5697,7 +5723,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, } fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base, - v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); + v8.promoted_constants, v8.runtime_check_aads_emit, + MESA_SHADER_COMPUTE); if (INTEL_DEBUG & DEBUG_CS) { char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s", shader->info.label ? shader->info.label : @@ -5710,3 +5737,39 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, return g.get_assembly(final_assembly_size); } + +void +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, + void *buffer, uint32_t threads, uint32_t stride) +{ + if (prog_data->local_invocation_id_regs == 0) + return; + + /* 'stride' should be an integer number of registers, that is, a multiple + * of 32 bytes. + */ + assert(stride % 32 == 0); + + unsigned x = 0, y = 0, z = 0; + for (unsigned t = 0; t < threads; t++) { + uint32_t *param = (uint32_t *) buffer + stride * t / 4; + + for (unsigned i = 0; i < prog_data->simd_size; i++) { + param[0 * prog_data->simd_size + i] = x; + param[1 * prog_data->simd_size + i] = y; + param[2 * prog_data->simd_size + i] = z; + + x++; + if (x == prog_data->local_size[0]) { + x = 0; + y++; + if (y == prog_data->local_size[1]) { + y = 0; + z++; + if (z == prog_data->local_size[2]) + z = 0; + } + } + } + } +}