return reg;
}
-fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_c,
- fs_reg lod, fs_reg dPdy, int grad_components,
- uint32_t sampler)
-{
- int mlen;
- int base_mrf = 1;
- bool simd16 = false;
- fs_reg orig_dst;
-
- /* g0 header. */
- mlen = 1;
-
- if (shadow_c.file != BAD_FILE) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
-
- /* gen4's SIMD8 sampler always has the slots for u,v,r present.
- * the unused slots must be zeroed.
- */
- for (int i = coord_components; i < 3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
- }
- mlen += 3;
-
- if (op == ir_tex) {
- /* There's no plain shadow compare message, so we use shadow
- * compare with a bias of 0.0.
- */
- bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
- mlen++;
- } else if (op == ir_txb || op == ir_txl) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), lod);
- mlen++;
- } else {
- unreachable("Should not get here.");
- }
-
- bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c);
- mlen++;
- } else if (op == ir_tex) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
- /* zero the others. */
- for (int i = coord_components; i<3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
- }
- /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
- mlen += 3;
- } else if (op == ir_txd) {
- fs_reg &dPdx = lod;
-
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
- /* the slots for u and v are always present, but r is optional */
- mlen += MAX2(coord_components, 2);
-
- /* P = u, v, r
- * dPdx = dudx, dvdx, drdx
- * dPdy = dudy, dvdy, drdy
- *
- * 1-arg: Does not exist.
- *
- * 2-arg: dudx dvdx dudy dvdy
- * dPdx.x dPdx.y dPdy.x dPdy.y
- * m4 m5 m6 m7
- *
- * 3-arg: dudx dvdx drdx dudy dvdy drdy
- * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
- * m5 m6 m7 m8 m9 m10
- */
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx);
- dPdx = offset(dPdx, bld, 1);
- }
- mlen += MAX2(grad_components, 2);
-
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy);
- dPdy = offset(dPdy, bld, 1);
- }
- mlen += MAX2(grad_components, 2);
- } else if (op == ir_txs) {
- /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
- simd16 = true;
- bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
- mlen += 2;
- } else {
- /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
- * instructions. We'll need to do SIMD16 here.
- */
- simd16 = true;
- assert(op == ir_txb || op == ir_txl || op == ir_txf);
-
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
- coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
-
- /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
- * be necessary for TXF (ld), but seems wise to do for all messages.
- */
- for (int i = coord_components; i < 3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
- }
-
- /* lod/bias appears after u/v/r. */
- mlen += 6;
-
- bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod);
- mlen++;
-
- /* The unused upper half. */
- mlen++;
- }
-
- if (simd16) {
- /* Now, since we're doing simd16, the return is 2 interleaved
- * vec4s where the odd-indexed ones are junk. We'll need to move
- * this weirdness around to the expected layout.
- */
- orig_dst = dst;
- dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- default:
- unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->header_size = 1;
- inst->regs_written = simd16 ? 8 : 4;
-
- if (simd16) {
- for (int i = 0; i < 4; i++) {
- bld.MOV(orig_dst, dst);
- orig_dst = offset(orig_dst, bld, 1);
- dst = offset(dst, bld, 2);
- }
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int vector_elements,
- fs_reg shadow_c, fs_reg lod,
- uint32_t sampler)
-{
- fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F);
- bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs;
-
- if (has_lod && shadow_c.file != BAD_FILE)
- no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
-
- if (op == ir_txd)
- no16("textureGrad unsupported in SIMD16.");
-
- /* Copy the coordinates. */
- for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(message, bld, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
-
- fs_reg msg_end = offset(message, bld, vector_elements);
-
- /* Messages other than sample and ld require all three components */
- if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) {
- for (int i = vector_elements; i < 3; i++) {
- bld.MOV(offset(message, bld, i), fs_reg(0.0f));
- }
- msg_end = offset(message, bld, 3);
- }
-
- if (has_lod) {
- fs_reg msg_lod = retype(msg_end, op == ir_txf ?
- BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, bld, 1);
- }
-
- if (shadow_c.file != BAD_FILE) {
- fs_reg msg_ref = offset(message, bld, 3 + has_lod);
- bld.MOV(msg_ref, shadow_c);
- msg_end = offset(msg_ref, bld, 1);
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- default: unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = message.reg - 1;
- inst->mlen = msg_end.reg - inst->base_mrf;
- inst->header_size = 1;
- inst->regs_written = 8;
-
- return inst;
-}
-
-/* gen5's sampler has slots for u, v, r, array index, then optional
- * parameters like shadow comparitor or LOD bias. If optional
- * parameters aren't present, those base slots are optional and don't
- * need to be included in the message.
- *
- * We don't fill in the unnecessary slots regardless, which may look
- * surprising in the disassembly.
- */
-fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int vector_elements,
- fs_reg shadow_c,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, uint32_t sampler,
- bool has_offset)
-{
- int reg_width = dispatch_width / 8;
- unsigned header_size = 0;
-
- fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F);
- fs_reg msg_coords = message;
-
- if (has_offset) {
- /* The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
- */
- header_size = 1;
- message.reg--;
- }
-
- for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
- fs_reg msg_end = offset(msg_coords, bld, vector_elements);
- fs_reg msg_lod = offset(msg_coords, bld, 4);
-
- if (shadow_c.file != BAD_FILE) {
- fs_reg msg_shadow = msg_lod;
- bld.MOV(msg_shadow, shadow_c);
- msg_lod = offset(msg_shadow, bld, 1);
- msg_end = msg_lod;
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex:
- opcode = SHADER_OPCODE_TEX;
- break;
- case ir_txb:
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, bld, 1);
-
- opcode = FS_OPCODE_TXB;
- break;
- case ir_txl:
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, bld, 1);
-
- opcode = SHADER_OPCODE_TXL;
- break;
- case ir_txd: {
- /**
- * P = u, v, r
- * dPdx = dudx, dvdx, drdx
- * dPdy = dudy, dvdy, drdy
- *
- * Load up these values:
- * - dudx dudy dvdx dvdy drdx drdy
- * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
- */
- msg_end = msg_lod;
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(msg_end, lod);
- lod = offset(lod, bld, 1);
- msg_end = offset(msg_end, bld, 1);
-
- bld.MOV(msg_end, lod2);
- lod2 = offset(lod2, bld, 1);
- msg_end = offset(msg_end, bld, 1);
- }
-
- opcode = SHADER_OPCODE_TXD;
- break;
- }
- case ir_txs:
- msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, bld, 1);
-
- opcode = SHADER_OPCODE_TXS;
- break;
- case ir_query_levels:
- msg_lod = msg_end;
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
- msg_end = offset(msg_lod, bld, 1);
-
- opcode = SHADER_OPCODE_TXS;
- break;
- case ir_txf:
- msg_lod = offset(msg_coords, bld, 3);
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
- msg_end = offset(msg_lod, bld, 1);
-
- opcode = SHADER_OPCODE_TXF;
- break;
- case ir_txf_ms:
- msg_lod = offset(msg_coords, bld, 3);
- /* lod */
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
- /* sample index */
- bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
- msg_end = offset(msg_lod, bld, 2);
-
- opcode = SHADER_OPCODE_TXF_CMS;
- break;
- case ir_lod:
- opcode = SHADER_OPCODE_LOD;
- break;
- case ir_tg4:
- opcode = SHADER_OPCODE_TG4;
- break;
- default:
- unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = message.reg;
- inst->mlen = msg_end.reg - message.reg;
- inst->header_size = header_size;
- inst->regs_written = 4 * reg_width;
-
- if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
- fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
- " disallowed by hardware\n");
- }
-
- return inst;
-}
-
-static bool
-is_high_sampler(const struct brw_device_info *devinfo, fs_reg sampler)
-{
- if (devinfo->gen < 8 && !devinfo->is_haswell)
- return false;
-
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_c,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, fs_reg mcs, fs_reg sampler,
- fs_reg offset_value)
-{
- int reg_width = dispatch_width / 8;
- unsigned header_size = 0;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
- for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
- sources[i] = vgrf(glsl_type::float_type);
- }
- int length = 0;
-
- if (op == ir_tg4 || offset_value.file != BAD_FILE ||
- is_high_sampler(devinfo, sampler)) {
- /* For general texture offsets (no txf workaround), we need a header to
- * put them in. Note that we're only reserving space for it in the
- * message payload as it will be initialized implicitly by the
- * generator.
- *
- * * ir4_tg4 needs to place its channel select in the header,
- * for interaction with ARB_texture_swizzle
- *
- * The sampler index is only 4-bits, so for larger sampler numbers we
- * need to offset the Sampler State Pointer in the header.
- */
- header_size = 1;
- sources[0] = fs_reg();
- length++;
- }
-
- if (shadow_c.file != BAD_FILE) {
- bld.MOV(sources[length], shadow_c);
- length++;
- }
-
- bool has_nonconstant_offset =
- offset_value.file != BAD_FILE && offset_value.file != IMM;
- bool coordinate_done = false;
-
- /* The sampler can only meaningfully compute LOD for fragment shader
- * messages. For all other stages, we change the opcode to ir_txl and
- * hardcode the LOD to 0.
- */
- if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) {
- op = ir_txl;
- lod = fs_reg(0.0f);
- }
-
- /* Set up the LOD info */
- switch (op) {
- case ir_tex:
- case ir_lod:
- break;
- case ir_txb:
- bld.MOV(sources[length], lod);
- length++;
- break;
- case ir_txl:
- bld.MOV(sources[length], lod);
- length++;
- break;
- case ir_txd: {
- no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
-
- /* Load dPdx and the coordinate together:
- * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
- */
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
-
- /* For cube map array, the coordinate is (u,v,r,ai) but there are
- * only derivatives for (u, v, r).
- */
- if (i < grad_components) {
- bld.MOV(sources[length], lod);
- lod = offset(lod, bld, 1);
- length++;
-
- bld.MOV(sources[length], lod2);
- lod2 = offset(lod2, bld, 1);
- length++;
- }
- }
-
- coordinate_done = true;
- break;
- }
- case ir_txs:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
- length++;
- break;
- case ir_query_levels:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u));
- length++;
- break;
- case ir_txf:
- /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
- * On Gen9 they are u, v, lod, r
- */
-
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
-
- if (devinfo->gen >= 9) {
- if (coord_components >= 2) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, bld, 1);
- }
- length++;
- }
-
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
- length++;
-
- for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
- }
-
- coordinate_done = true;
- break;
- case ir_txf_ms:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
- length++;
-
- /* data from the multisample control surface */
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
- length++;
-
- /* there is no offsetting for this message; just copy in the integer
- * texture coordinates
- */
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
- }
-
- coordinate_done = true;
- break;
- case ir_tg4:
- if (has_nonconstant_offset) {
- if (shadow_c.file != BAD_FILE)
- no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
-
- /* More crazy intermixing */
- for (int i = 0; i < 2; i++) { /* u, v */
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
- }
-
- for (int i = 0; i < 2; i++) { /* offu, offv */
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
- offset_value = offset(offset_value, bld, 1);
- length++;
- }
-
- if (coord_components == 3) { /* r if present */
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
- }
-
- coordinate_done = true;
- }
- break;
- }
-
- /* Set up the coordinate (except for cases where it was done above) */
- if (!coordinate_done) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, bld, 1);
- length++;
- }
- }
-
- int mlen;
- if (reg_width == 2)
- mlen = length * reg_width - header_size;
- else
- mlen = length * reg_width;
-
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_F);
- bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
-
- /* Generate the SEND */
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
- case ir_lod: opcode = SHADER_OPCODE_LOD; break;
- case ir_tg4:
- if (has_nonconstant_offset)
- opcode = SHADER_OPCODE_TG4_OFFSET;
- else
- opcode = SHADER_OPCODE_TG4;
- break;
- default:
- unreachable("not reached");
- }
- fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler);
- inst->base_mrf = -1;
- inst->mlen = mlen;
- inst->header_size = header_size;
- inst->regs_written = 4 * reg_width;
-
- if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
- fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
- " disallowed by hardware\n");
- }
-
- return inst;
-}
-
fs_reg
fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
bool is_rect, uint32_t sampler, int texunit)