[HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] =
"DC 4x2 atomic counter op",
[HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
+ [GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
+ "DC untyped atomic float op",
};
static const char *const aop[16] = {
[BRW_AOP_PREDEC] = "predec",
};
+static const char *const aop_float[4] = {
+ [BRW_AOP_FMAX] = "fmax",
+ [BRW_AOP_FMIN] = "fmin",
+ [BRW_AOP_FCMPWR] = "fcmpwr",
+};
+
static const char * const pixel_interpolator_msg_types[4] = {
[GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset",
[GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position",
simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf);
break;
}
+ case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+ format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
+ control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
+ &space);
+ break;
default:
format(file, "0x%x", msg_ctrl);
}
bool response_expected,
bool header_present);
+void
+brw_untyped_atomic_float(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg payload,
+ struct brw_reg surface,
+ unsigned atomic_op,
+ unsigned msg_length,
+ bool response_expected,
+ bool header_present);
+
+
void
brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst,
*/
SHADER_OPCODE_UNTYPED_ATOMIC,
SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
+ SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
+ SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
+#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
/* GEN9 */
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
#define GEN8_BTI_STATELESS_IA_COHERENT 255
#define GEN8_BTI_STATELESS_NON_COHERENT 253
-/* dataport atomic operations. */
+/* Dataport atomic operations for Untyped Atomic Integer Operation message
+ * (and others).
+ */
#define BRW_AOP_AND 1
#define BRW_AOP_OR 2
#define BRW_AOP_XOR 3
#define BRW_AOP_CMPWR 14
#define BRW_AOP_PREDEC 15
+/* Dataport atomic operations for Untyped Atomic Float Operation message. */
+#define BRW_AOP_FMAX 1
+#define BRW_AOP_FMIN 2
+#define BRW_AOP_FCMPWR 3
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
payload, surface, desc);
}
+static uint32_t
+brw_dp_untyped_atomic_float_desc(struct brw_codegen *p,
+ unsigned atomic_op,
+ bool response_expected)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
+ unsigned msg_control =
+ atomic_op | /* Atomic Operation Type: BRW_AOP_F* */
+ (response_expected ? 1 << 5 : 0); /* Return data expected */
+
+ assert(devinfo->gen >= 9);
+ assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
+
+ if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
+ msg_control |= 1 << 4; /* SIMD8 mode */
+
+ return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+void
+brw_untyped_atomic_float(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg payload,
+ struct brw_reg surface,
+ unsigned atomic_op,
+ unsigned msg_length,
+ bool response_expected,
+ bool header_present)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+
+ assert(devinfo->gen >= 9);
+ assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
+
+ const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
+ const unsigned response_length = brw_surface_payload_size(
+ p, response_expected, true, true);
+ const unsigned desc =
+ brw_message_desc(devinfo, msg_length, response_length, header_present) |
+ brw_dp_untyped_atomic_float_desc(p, atomic_op, response_expected);
+
+ brw_send_indirect_surface_message(p, sfid,
+ brw_writemask(dst, WRITEMASK_XYZW),
+ payload, surface, desc);
+}
+
static uint32_t
brw_dp_untyped_surface_read_desc(struct brw_codegen *p,
unsigned num_channels)
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return (i == 0 ? 2 : 1);
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {
+ assert(src[3].file == IMM &&
+ src[4].file == IMM);
+ const unsigned op = src[4].ud;
+ /* Surface coordinates. */
+ if (i == 0)
+ return src[3].ud;
+ /* Surface operation source. */
+ else if (i == 1 && op == BRW_AOP_FCMPWR)
+ return 2;
+ else
+ return 1;
+ }
+
default:
return 1;
}
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
ibld.sample_mask_reg());
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
+ ibld.sample_mask_reg());
+ break;
+
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_READ,
return 8;
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
nir_intrinsic_instr *instr);
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
int op, nir_intrinsic_instr *instr);
+ void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
+ int op, nir_intrinsic_instr *instr);
void nir_emit_shared_atomic(const brw::fs_builder &bld,
int op, nir_intrinsic_instr *instr);
+ void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
+ int op, nir_intrinsic_instr *instr);
void nir_emit_texture(const brw::fs_builder &bld,
nir_tex_instr *instr);
void nir_emit_jump(const brw::fs_builder &bld,
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
switch (inst->opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
return true;
inst->header_size);
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
+ inst->mlen, !inst->dst.is_null(),
+ inst->header_size);
+ break;
+
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
case nir_intrinsic_shared_atomic_comp_swap:
nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
break;
+ case nir_intrinsic_shared_atomic_fmin:
+ nir_emit_shared_atomic_float(bld, BRW_AOP_FMIN, instr);
+ break;
+ case nir_intrinsic_shared_atomic_fmax:
+ nir_emit_shared_atomic_float(bld, BRW_AOP_FMAX, instr);
+ break;
+ case nir_intrinsic_shared_atomic_fcomp_swap:
+ nir_emit_shared_atomic_float(bld, BRW_AOP_FCMPWR, instr);
+ break;
case nir_intrinsic_load_shared: {
assert(devinfo->gen >= 7);
case nir_intrinsic_ssbo_atomic_comp_swap:
nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
break;
+ case nir_intrinsic_ssbo_atomic_fmin:
+ nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_fmax:
+ nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ nir_emit_ssbo_atomic_float(bld, BRW_AOP_FCMPWR, instr);
+ break;
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
bld.MOV(dest, atomic_result);
}
+void
+fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
+ int op, nir_intrinsic_instr *instr)
+{
+ if (stage == MESA_SHADER_FRAGMENT)
+ brw_wm_prog_data(prog_data)->has_side_effects = true;
+
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ fs_reg surface;
+ nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+ if (const_surface) {
+ unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
+ const_surface->u32[0];
+ surface = brw_imm_ud(surf_index);
+ brw_mark_surface_used(prog_data, surf_index);
+ } else {
+ surface = vgrf(glsl_type::uint_type);
+ bld.ADD(surface, get_nir_src(instr->src[0]),
+ brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
+
+ /* Assume this may touch any SSBO. This is the same we do for other
+ * UBO/SSBO accesses with non-constant surface.
+ */
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ssbo_start +
+ nir->info.num_ssbos - 1);
+ }
+
+ fs_reg offset = get_nir_src(instr->src[1]);
+ fs_reg data1 = get_nir_src(instr->src[2]);
+ fs_reg data2;
+ if (op == BRW_AOP_FCMPWR)
+ data2 = get_nir_src(instr->src[3]);
+
+ /* Emit the actual atomic operation */
+
+ fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+}
+
void
fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
bld.MOV(dest, atomic_result);
}
+void
+fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
+ int op, nir_intrinsic_instr *instr)
+{
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
+ fs_reg offset;
+ fs_reg data1 = get_nir_src(instr->src[1]);
+ fs_reg data2;
+ if (op == BRW_AOP_FCMPWR)
+ data2 = get_nir_src(instr->src[2]);
+
+ /* Get the offset */
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset = vgrf(glsl_type::uint_type);
+ bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
+ /* Emit the actual atomic operation operation */
+
+ fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+}
+
void
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
{
addr, tmp, surface, dims, op, rsize, pred);
}
+ /**
+ * Emit an untyped surface atomic float opcode. \p dims determines the
+ * number of components of the address and \p rsize the number of
+ * components of the returned value (either zero or one).
+ */
+ fs_reg
+ emit_untyped_atomic_float(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred)
+ {
+ /* FINISHME: Factor out this frequently recurring pattern into a
+ * helper function.
+ */
+ const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ const fs_reg srcs[] = { src0, src1 };
+ const fs_reg tmp = bld.vgrf(src0.type, n);
+ bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+ addr, tmp, surface, dims, op, rsize, pred);
+ }
+
/**
* Emit a typed surface read opcode. \p dims determines the number of
* components of the address and \p size the number of components of the
unsigned dims, unsigned rsize, unsigned op,
brw_predicate pred = BRW_PREDICATE_NONE);
+ fs_reg
+ emit_untyped_atomic_float(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred);
+
fs_reg
emit_typed_read(const fs_builder &bld, const fs_reg &surface,
const fs_reg &addr, unsigned dims, unsigned size);
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
case SHADER_OPCODE_TYPED_ATOMIC:
/* Test code:
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
return "untyped_atomic";
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
return "untyped_atomic_logical";
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+ return "untyped_atomic_float";
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ return "untyped_atomic_float_logical";
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
return "untyped_surface_read";
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
switch (opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: