uint16_t mir_bytemask(midgard_instruction *ins);
uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits);
void mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask);
-unsigned mir_upper_override(midgard_instruction *ins);
+signed mir_upper_override(midgard_instruction *ins, unsigned inst_size);
/* MIR printing */
.alu = {
.op = midgard_alu_op_imov,
.reg_mode = midgard_reg_mode_32,
- .dest_override = midgard_dest_override_none,
.outmod = midgard_outmod_int_wrap
},
};
assert(src_bitsize == dst_bitsize); \
break;
+#define ALU_CHECK_CMP(sext) \
+ assert(src_bitsize == 16 || src_bitsize == 32); \
+ assert(dst_bitsize == 16 || dst_bitsize == 32); \
+
#define ALU_CASE_BCAST(nir, _op, count) \
case nir_op_##nir: \
op = midgard_alu_op_##_op; \
broadcast_swizzle = count; \
- assert(src_bitsize == dst_bitsize); \
+ ALU_CHECK_CMP(true); \
break;
-#define ALU_CHECK_CMP(sext) \
- if (src_bitsize == 16 && dst_bitsize == 32) { \
- /* inferred */ \
- } else { \
- assert(src_bitsize == dst_bitsize); \
- } \
-
#define ALU_CASE_CMP(nir, _op, sext) \
case nir_op_##nir: \
op = midgard_alu_op_##_op; \
midgard_reg_mode reg_mode =
reg_mode_for_nir(instr);
- /* Do we need a destination override? Used for inline
- * type conversion */
-
- midgard_dest_override dest_override =
- midgard_dest_override_none;
-
/* Should we swap arguments? */
bool flip_src12 = false;
ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
- ALU_CASE(b32all_fequal4, fball_eq);
+ ALU_CASE_CMP(b32all_fequal4, fball_eq, true);
ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
- ALU_CASE(b32any_fnequal4, fbany_neq);
+ ALU_CASE_CMP(b32any_fnequal4, fbany_neq, true);
ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
- ALU_CASE(b32all_iequal4, iball_eq);
+ ALU_CASE_CMP(b32all_iequal4, iball_eq, true);
ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
- ALU_CASE(b32any_inequal4, ibany_neq);
+ ALU_CASE_CMP(b32any_inequal4, ibany_neq, true);
/* Source mods will be shoved in later */
ALU_CASE(fabs, fmov);
else
op = midgard_alu_op_imov;
- if (dst_bitsize == (src_bitsize * 2)) {
- /* inferred */
- } else if (src_bitsize == (dst_bitsize * 2)) {
- /* Converting down */
- dest_override = midgard_dest_override_lower;
- }
-
break;
}
midgard_vector_alu alu = {
.op = op,
.reg_mode = reg_mode,
- .dest_override = dest_override,
.outmod = outmod,
};
* override to the lower or upper half, shifting the effective mask in
* the latter, so AAAA.... becomes AAAA */
- unsigned upper_shift = mir_upper_override(ins);
+ unsigned inst_size = 8 << ins->alu.reg_mode;
+ signed upper_shift = mir_upper_override(ins, inst_size);
- if (upper_shift) {
+ if (upper_shift >= 0) {
effective >>= upper_shift;
- ins->alu.dest_override = midgard_dest_override_upper;
+ ins->alu.dest_override = upper_shift ?
+ midgard_dest_override_upper :
+ midgard_dest_override_lower;
+ } else {
+ ins->alu.dest_override = midgard_dest_override_none;
}
if (ins->alu.reg_mode == midgard_reg_mode_32)
ins->texture.type = bundle->tag;
ins->texture.next_type = next_tag;
- ins->texture.mask = ins->mask;
+
+ signed override = mir_upper_override(ins, 32);
+
+ ins->texture.mask = override > 0 ?
+ ins->mask >> override :
+ ins->mask;
+
mir_pack_swizzle_tex(ins);
unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
assert(isz == 32 || isz == 16);
ins->texture.out_full = (osz == 32);
+ ins->texture.out_upper = override > 0;
ins->texture.in_reg_full = (isz == 32);
ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_size[3]);
/* First, install the texture coordinate */
- ins->texture.in_reg_full = 1;
- ins->texture.in_reg_upper = 0;
ins->texture.in_reg_select = coord.reg & 1;
offset_swizzle(ins->swizzle[1], coord.offset, coord.size, dest.size, 0);
/* Next, install the destination */
- ins->texture.out_full = 1;
- ins->texture.out_upper = 0;
ins->texture.out_reg_select = dest.reg & 1;
- offset_swizzle(ins->swizzle[0], 0, 4, dest.size, dest.offset);
+ offset_swizzle(ins->swizzle[0], 0, 4, dest.size,
+ dest_size == 2 ? dest.offset % 8 :
+ dest.offset);
mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
/* If there is a register LOD/bias, use it */
/* Only 16/32-bit can run on a scalar unit */
could_scalar &= ains->alu.reg_mode != midgard_reg_mode_8;
could_scalar &= ains->alu.reg_mode != midgard_reg_mode_64;
- could_scalar &= ains->alu.dest_override == midgard_dest_override_none;
if (ains->src[0] != ~0)
could_scalar &= (sz0 == 16) || (sz0 == 32);
/* Checks if we should use an upper destination override, rather than the lower
* one in the IR. Returns zero if no, returns the bytes to shift otherwise */
-unsigned
-mir_upper_override(midgard_instruction *ins)
+signed
+mir_upper_override(midgard_instruction *ins, unsigned inst_size)
{
- /* If there is no override, there is no upper override, tautology */
- if (ins->alu.dest_override == midgard_dest_override_none)
- return 0;
+ unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
- /* Make sure we didn't already lower somehow */
- assert(ins->alu.dest_override == midgard_dest_override_lower);
+ /* If the sizes are the same, there's nothing to override */
+ if (type_size == inst_size)
+ return -1;
/* There are 16 bytes per vector, so there are (16/bytes)
* components per vector. So the magic half is half of
* (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
* */
- unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
unsigned threshold = 64 / type_size;
/* How many components did we shift over? */