# between geometry stages - perhaps it's explicit access to the vertex cache.
# src[] = { value, offset }.
-store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
+store("shared_ir3", 2, [BASE, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { offset }.
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { write_mask, align_mul, align_offset }
-intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+intrinsic("store_global_ir3", [0, 2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# src[] = { address(vec2 of hi+lo uint32_t), offset }.
# const_index[] = { access, align_mul, align_offset }
intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
ir3_split_dest(b, dst, load, 0, intr->num_components);
}
-/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+/* src[] = { value, offset }. const_index[] = { base } */
static void
emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *store, *offset;
struct ir3_instruction * const *value;
- unsigned base, wrmask;
value = ir3_get_src(ctx, &intr->src[0]);
offset = ir3_get_src(ctx, &intr->src[1])[0];
- base = nir_intrinsic_base(intr);
- wrmask = nir_intrinsic_write_mask(intr);
-
- /* Combine groups of consecutive enabled channels in one write
- * message. We use ffs to find the first enabled channel and then ffs on
- * the bit-inverse, down-shifted writemask to determine the length of
- * the block of enabled bits.
- *
- * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
- */
- while (wrmask) {
- unsigned first_component = ffs(wrmask) - 1;
- unsigned length = ffs(~(wrmask >> first_component)) - 1;
-
- store = ir3_STLW(b, offset, 0,
- ir3_create_collect(ctx, &value[first_component], length), 0,
- create_immed(b, length), 0);
-
- store->cat6.dst_offset = first_component + base;
- store->cat6.type = utype_src(intr->src[0]);
- store->barrier_class = IR3_BARRIER_SHARED_W;
- store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
+ store = ir3_STLW(b, offset, 0,
+ ir3_create_collect(ctx, value, intr->num_components), 0,
+ create_immed(b, intr->num_components), 0);
- array_insert(b, b->keeps, store);
+ store->cat6.dst_offset = nir_intrinsic_base(intr);
+ store->cat6.type = utype_src(intr->src[0]);
+ store->barrier_class = IR3_BARRIER_SHARED_W;
+ store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
- /* Clear the bits in the writemask that we just wrote, then try
- * again to see if more channels are left.
- */
- wrmask &= (15 << (first_component + length));
- }
+ array_insert(b, b->keeps, store);
}
/*
case nir_intrinsic_store_output: {
// src[] = { value, offset }.
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
b->cursor = nir_instr_remove(&intr->instr);
nir_ssa_def *vertex_id = build_vertex_id(b, state);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
- nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
store->src[1] = nir_src_for_ssa(offset);
-
store->num_components = intr->num_components;
nir_builder_instr_insert(b, &store->instr);
b->cursor = nir_before_instr(&intr->instr);
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
nir_ssa_def *value = intr->src[0].ssa;
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[1].ssa, intr->src[2].ssa, var);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
- nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
-
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+ nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
break;
}
debug_assert(nir_intrinsic_component(intr) == 0);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
- intr->src[0].ssa, address, offset);
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+ intr->src[0].ssa, address, offset);
}
break;
}
store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[0]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
if (levels[1]) {
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[1]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
}
/* Finally, Insert endpatch instruction: