case BI_LOAD_ATTR: return "load_attr";
case BI_LOAD_VAR: return "load_var";
case BI_LOAD_VAR_ADDRESS: return "load_var_address";
- case BI_MAKE_VEC: return "make_vec";
case BI_MINMAX: return "minmax";
case BI_MOV: return "mov";
case BI_SHIFT: return "shift";
fprintf(fp, "%s", bi_cond_name(branch->cond));
}
+static void
+bi_print_writemask(bi_instruction *ins, FILE *fp)
+{
+ unsigned bytes_per_comp = nir_alu_type_get_type_size(ins->dest_type) / 8;
+ unsigned comps = 16 / bytes_per_comp;
+ unsigned smask = (1 << bytes_per_comp) - 1;
+ fprintf(fp, ".");
+
+ for (unsigned i = 0; i < comps; ++i) {
+ unsigned masked = (ins->writemask >> (i * bytes_per_comp)) & smask;
+ if (!masked)
+ continue;
+
+ assert(masked == smask);
+ assert(i < 4);
+ fputc("xyzw"[i], fp);
+ }
+}
+
void
bi_print_instruction(bi_instruction *ins, FILE *fp)
{
fprintf(fp, " ");
bi_print_index(fp, ins, ins->dest);
+
+ if (ins->dest)
+ bi_print_writemask(ins, fp);
+
fprintf(fp, ", ");
bi_foreach_src(ins, s) {
bi_block_add_successor(ctx->current_block, branch->branch.target);
}
+/* Gets a bytemask for a complete vecN write */
+static unsigned
+bi_mask_for_channels_32(unsigned i)
+{
+ return (1 << (4 * i)) - 1;
+}
+
static void
bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
{
},
.dest = bir_dest_index(&instr->dest),
.dest_type = nir_type_float | nir_dest_bit_size(instr->dest),
+ .writemask = bi_mask_for_channels_32(instr->num_components)
};
nir_src *offset = nir_get_io_offset_src(instr);
.type = BI_LOAD_ATTR,
.load = bi_direct_load_for_instr(instr),
.dest = bir_dest_index(&instr->dest),
- .dest_type = nir_intrinsic_type(instr)
+ .dest_type = nir_intrinsic_type(instr),
+ .writemask = bi_mask_for_channels_32(instr->num_components)
};
bi_emit(ctx, load);
.type = BI_LOAD_VAR_ADDRESS,
.load = bi_direct_load_for_instr(instr),
.dest_type = nir_intrinsic_type(instr),
- .dest = bi_make_temp(ctx)
+ .dest = bi_make_temp(ctx),
+ .writemask = bi_mask_for_channels_32(instr->num_components)
};
bi_instruction st = {
.load = bi_direct_load_for_instr(instr),
.dest = bir_dest_index(&instr->dest),
.dest_type = nir_intrinsic_type(instr),
+ .writemask = bi_mask_for_channels_32(instr->num_components),
.src = {
BIR_INDEX_ZERO /* TODO: UBOs */
}
.type = BI_MOV,
.dest = bir_ssa_index(&instr->def),
.dest_type = instr->def.bit_size | nir_type_uint,
+ .writemask = (1 << (instr->def.bit_size / 8)) - 1,
.src = {
BIR_INDEX_CONSTANT
},
bi_print_shader(ctx, stdout);
bi_schedule(ctx);
- bi_print_shader(ctx, stdout);
ralloc_free(ctx);
}
BI_LOAD_ATTR,
BI_LOAD_VAR,
BI_LOAD_VAR_ADDRESS,
- BI_MAKE_VEC,
BI_MINMAX,
BI_MOV,
BI_SHIFT,
/* Round mode (requires BI_ROUNDMODE) */
enum bifrost_roundmode roundmode;
+ /* Writemask (bit for each affected byte). This is quite restricted --
+ * ALU ops can only write to a single channel (exception: <32 in which
+ * you can write to 32/N contiguous aligned channels). Load/store can
+ * only write to all channels at once, in a sense. But it's still
+ * better to use this generic form than have synthetic ops flying
+ * about, since we're not essentially vector for RA purposes. */
+ uint16_t writemask;
+
/* Destination type. Usually the type of the instruction
* itself, but if sources and destination have different
* types, the type of the destination wins (so f2i would be