/*
* The first index is the address to load from, and the second index is the
- * number of array elements to load. For UBO's (and SSBO's), the first index
- * is the UBO buffer index (TODO nonconstant UBO buffer index) and the second
- * and third indices play the role of the first and second indices in the other
- * loads. Indirect loads have an additional register input, which is added
- * to the constant address to compute the final address to load from.
+ * number of array elements to load. Indirect loads have an additional
+ * register input, which is added to the constant address to compute the
+ * final address to load from. For UBO's (and SSBO's), the first source is
+ * the (possibly constant) UBO buffer index and the indirect (if it exists)
+ * is the second source.
*
* For vector backends, the address is in terms of one vec4, and so each array
* element is +4 scalar components from the previous array element. For scalar
* elements begin immediately after the previous array element.
*/
-#define LOAD(name, num_indices, flags) \
- INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
- INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+#define LOAD(name, extra_srcs, flags) \
+ INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
+ true, 0, 0, 2, flags)
-LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 2, 0) */
+LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* LOAD(ssbo, 1, 0) */
/*
* Stores work the same way as loads, except now the first register input is
break;
}
+ case nir_intrinsic_load_ubo_indirect:
+ has_indirect = true;
case nir_intrinsic_load_ubo: {
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
- (unsigned) instr->const_index[0]);
- fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
- packed_consts.type = dest.type;
-
- fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
- emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts, surf_index, const_offset_reg));
+ nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+ fs_reg surf_index;
- for (unsigned i = 0; i < instr->num_components; i++) {
- packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
+ if (const_index) {
+ surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
+ const_index->u[0]);
+ } else {
+ /* The block index is not a constant. Evaluate the index expression
+ * per-channel and add the base UBO index; the generator will select
+ * a value from any live channel.
+ */
+ surf_index = fs_reg(this, glsl_type::uint_type);
+ emit(ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start)))
+ ->force_writemask_all = true;
- /* The std140 packing rules don't allow vectors to cross 16-byte
- * boundaries, and a reg is 32 bytes.
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
*/
- assert(packed_consts.subreg_offset < 32);
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumUniformBlocks - 1);
+ }
- fs_inst *inst = MOV(dest, packed_consts);
- if (instr->has_predicate)
- inst->predicate = BRW_PREDICATE_NORMAL;
- emit(inst);
+ if (has_indirect) {
+ /* Turn the byte offset into a dword offset. */
+ fs_reg base_offset = fs_reg(this, glsl_type::int_type);
+ emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
+ BRW_REGISTER_TYPE_D),
+ fs_reg(2)));
- dest.reg_offset++;
- }
- break;
- }
+ unsigned vec4_offset = instr->const_index[0] / 4;
+ for (int i = 0; i < instr->num_components; i++) {
+ exec_list list = VARYING_PULL_CONSTANT_LOAD(offset(dest, i),
+ surf_index, base_offset,
+ vec4_offset + i);
- case nir_intrinsic_load_ubo_indirect: {
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
- instr->const_index[0]);
- /* Turn the byte offset into a dword offset. */
- unsigned base_offset = instr->const_index[1] / 4;
- fs_reg offset = fs_reg(this, glsl_type::int_type);
- emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
- fs_reg(2)));
+ fs_inst *last_inst = (fs_inst *) list.get_tail();
+ if (instr->has_predicate)
+ last_inst->predicate = BRW_PREDICATE_NORMAL;
+ emit(list);
+ }
+ } else {
+ fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
+ packed_consts.type = dest.type;
- for (unsigned i = 0; i < instr->num_components; i++) {
- exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
- offset, base_offset + i);
- fs_inst *last_inst = (fs_inst *) list.get_tail();
- if (instr->has_predicate)
- last_inst->predicate = BRW_PREDICATE_NORMAL;
- emit(list);
+ fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+ emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
+ surf_index, const_offset_reg);
- dest.reg_offset++;
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
+
+ /* The std140 packing rules don't allow vectors to cross 16-byte
+ * boundaries, and a reg is 32 bytes.
+ */
+ assert(packed_consts.subreg_offset < 32);
+
+ fs_inst *inst = MOV(dest, packed_consts);
+ if (instr->has_predicate)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ emit(inst);
+
+ dest.reg_offset++;
+ }
}
break;
}