From da361acd1c899d533caec6cae5a336f6ab35e076 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 17 Jul 2015 14:40:03 +0100 Subject: [PATCH] i965/fs: Handle non-const sample number in interpolateAtSample If a non-const sample number is given to interpolateAtSample it will now generate an indirect send message with the sample ID similar to how non-const sampler array indexing works. Previously non-const values were ignored and instead it ended up using a constant 0 value. The generator will try to determine if the sample ID is dynamically uniform via nir_src_is_dynamically_uniform. If not it will query the pixel interpolator in a loop, once for each different live sample number. The next live sample number is found using emit_uniformize. If multiple live channels have the same sample number then they will be handled in a single iteration of the loop. The loop is necessary because the indirect send message doesn't seem to have a way to specify a different value for each fragment. This fixes the following two Piglit tests: arb_gpu_shader5-interpolateAtSample-nonconst arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform v2: Handle dynamically non-uniform sample ids. v3: Remove the BREAK instruction and predicate the WHILE directly. Make the tokens arrays const. (Matt Turner) v4: Iterate over the live channels instead of each possible sample number. v5: Don't special case immediate values in brw_pixel_interpolator_query. Make a better wrapper for the function to set up the PI send instruction. Ensure that the SHL instructions are scalar. (Francisco Jerez). Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_eu.h | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 27 ++-- .../drivers/dri/i965/brw_fs_generator.cpp | 5 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 139 ++++++++++++++---- 4 files changed, 130 insertions(+), 43 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 761aa0ec5fa..0ac1ad9378b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index dc699bb6321..bf2fee9ed48 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, - msg_length, response_length, - false /* header is never present for PI */, - false); + struct brw_inst *insn; + const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current); - brw_inst_set_pi_simd_mode( - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); + /* brw_send_indirect_message will automatically use a direct send message + * if data is actually immediate. + */ + insn = brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data)); + brw_inst_set_mlen(devinfo, insn, msg_length); + brw_inst_set_rlen(devinfo, insn, response_length); + + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ brw_inst_set_pi_nopersp(devinfo, insn, noperspective); brw_inst_set_pi_message_type(devinfo, insn, mode); - brw_inst_set_pi_message_data(devinfo, insn, data); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 6f8b75e339f..17e19cf807a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg msg_data, unsigned msg_type) { - assert(msg_data.file == BRW_IMMEDIATE_VALUE && - msg_data.type == BRW_REGISTER_TYPE_UD); + assert(msg_data.type == BRW_REGISTER_TYPE_UD); brw_pixel_interpolator_query(p, retype(dst, BRW_REGISTER_TYPE_UW), src, inst->pi_noperspective, msg_type, - msg_data.dw1.ud, + msg_data, inst->mlen, inst->regs_written); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 03fe6804701..bc0df6850c4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1180,6 +1180,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type) } } +static fs_inst * +emit_pixel_interpolater_send(const fs_builder &bld, + enum opcode opcode, + const fs_reg &dst, + const fs_reg &src, + const fs_reg &desc, + glsl_interp_qualifier interpolation) +{ + fs_inst *inst; + fs_reg payload; + int mlen; + + if (src.file == BAD_FILE) { + /* Dummy payload */ + payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1); + mlen = 1; + } else { + payload = src; + mlen = 2 * bld.dispatch_width() / 8; + } + + inst = bld.emit(opcode, dst, payload, desc); + inst->mlen = mlen; + /* 2 floats per slot returned */ + inst->regs_written = 2 * bld.dispatch_width() / 8; + inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE; + + return inst; +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -1583,28 +1613,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true; fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - - /* For most messages, we need one reg of ignored data; the hardware - * requires mlen==1 even when there is no payload. in the per-slot - * offset case, we'll replace this with the proper source data. - */ - fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ - fs_inst *inst; + const glsl_interp_qualifier interpolation = + (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation; switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, - dst_xy, src, fs_reg(0u)); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_CENTROID, + dst_xy, + fs_reg(), /* src */ + fs_reg(0u), + interpolation); break; case nir_intrinsic_interp_var_at_sample: { - /* XXX: We should probably handle non-constant sample id's */ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - assert(const_sample); - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + + if (const_sample) { + unsigned msg_data = const_sample->i[0] << 4; + + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + fs_reg(msg_data), + interpolation); + } else { + const fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + + if (nir_src_is_dynamically_uniform(instr->src[0])) { + const fs_reg sample_id = bld.emit_uniformize(sample_src); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + } else { + /* Make a loop that sends a message to the pixel interpolater + * for the sample number in each live channel. If there are + * multiple channels with the same sample number then these + * will be handled simultaneously with a single interation of + * the loop. + */ + bld.emit(BRW_OPCODE_DO); + + /* Get the next live sample number into sample_id_reg */ + const fs_reg sample_id = bld.emit_uniformize(sample_src); + + /* Set the flag register so that we can perform the send + * message on all channels that have the same sample number + */ + bld.CMP(bld.null_reg_ud(), + sample_src, sample_id, + BRW_CONDITIONAL_EQ); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + fs_inst *inst = + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + set_predicate(BRW_PREDICATE_NORMAL, inst); + + /* Continue the loop if there are any live channels left */ + set_predicate_inv(BRW_PREDICATE_NORMAL, + true, /* inverse */ + bld.emit(BRW_OPCODE_WHILE)); + } + } + break; } @@ -1615,10 +1698,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(off_x | (off_y << 4))); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, + dst_xy, + fs_reg(), /* src */ + fs_reg(off_x | (off_y << 4)), + interpolation); } else { - src = vgrf(glsl_type::ivec2_type); + fs_reg src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { @@ -1646,9 +1733,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); } - mlen = 2 * dispatch_width / 8; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); + const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; + emit_pixel_interpolater_send(bld, + opcode, + dst_xy, + src, + fs_reg(0u), + interpolation); } break; } @@ -1657,12 +1748,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unreachable("Invalid intrinsic"); } - inst->mlen = mlen; - /* 2 floats per slot returned */ - inst->regs_written = 2 * dispatch_width / 8; - inst->pi_noperspective = instr->variables[0]->var->data.interpolation == - INTERP_QUALIFIER_NOPERSPECTIVE; - for (unsigned j = 0; j < instr->num_components; j++) { fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; -- 2.30.2