From e644ed468f986b44310b1cc5d5695ed4d473223d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 26 Nov 2018 15:15:04 -0600 Subject: [PATCH] intel/fs: Implement nir_intrinsic_global_atomic_* eviewed-by: Kenneth Graunke --- src/intel/compiler/brw_disasm.c | 5 + src/intel/compiler/brw_eu.h | 40 ++++++++ src/intel/compiler/brw_eu_defines.h | 4 + src/intel/compiler/brw_fs.cpp | 47 ++++++++++ src/intel/compiler/brw_fs.h | 4 + src/intel/compiler/brw_fs_nir.cpp | 94 +++++++++++++++++++ .../compiler/brw_schedule_instructions.cpp | 2 + src/intel/compiler/brw_shader.cpp | 6 ++ 8 files changed, 202 insertions(+) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 5624bedd2e7..efca3e2ce7d 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -439,10 +439,13 @@ static const char *const dp_dc1_msg_type_hsw[32] = { [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write", [GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read", [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read", + [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op", [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write", [GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write", [GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] = "DC untyped atomic float op", + [GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] = + "DC A64 untyped atomic float op", }; static const char *const aop[16] = { @@ -1940,6 +1943,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2: case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2: + case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: control(file, "atomic op", aop, msg_ctrl & 0xf, &space); break; case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: @@ -1954,6 +1958,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, break; } case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: + case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16); control(file, "atomic float op", aop_float, msg_ctrl & 0xf, &space); diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ec3aafb8363..104cbece9b3 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -749,6 +749,46 @@ brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info *devinfo, return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control); } +static inline uint32_t +brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo, + unsigned exec_size, /**< 0 for SIMD4x2 */ + unsigned bit_size, + unsigned atomic_op, + bool response_expected) +{ + assert(exec_size == 8); + assert(devinfo->gen >= 8); + assert(bit_size == 32 || bit_size == 64); + + const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; + + const unsigned msg_control = + SET_BITS(atomic_op, 3, 0) | + SET_BITS(bit_size == 64, 4, 4) | + SET_BITS(response_expected, 5, 5); + + return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control); +} + +static inline uint32_t +brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo, + unsigned exec_size, + unsigned atomic_op, + bool response_expected) +{ + assert(exec_size == 8); + assert(devinfo->gen >= 9); + + assert(exec_size > 0); + const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP; + + const unsigned msg_control = + SET_BITS(atomic_op, 1, 0) | + SET_BITS(response_expected, 5, 5); + + return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control); +} + static inline uint32_t brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo, unsigned exec_size, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 6c2a2f8ef7c..c0fee90fe5f 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -424,6 +424,8 @@ enum opcode { SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL, SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL, SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL, SHADER_OPCODE_TYPED_ATOMIC, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, @@ -1185,9 +1187,11 @@ enum brw_message_target { #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 #define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11 +#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19 #define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a #define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b +#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d /* GEN9 */ #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12 diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 28a6b44ea49..5a18ba86a96 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -797,6 +797,35 @@ fs_inst::components_read(unsigned i) const assert(src[2].file == IMM); return i == 1 ? src[2].ud : 1; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + assert(src[2].file == IMM); + if (i == 1) { + /* Data source */ + const unsigned op = src[2].ud; + switch (op) { + case BRW_AOP_INC: + case BRW_AOP_DEC: + case BRW_AOP_PREDEC: + return 0; + case BRW_AOP_CMPWR: + return 2; + default: + return 1; + } + } else { + return 1; + } + + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + assert(src[2].file == IMM); + if (i == 1) { + /* Data source */ + const unsigned op = src[2].ud; + return op == BRW_AOP_FCMPWR ? 2 : 1; + } else { + return 1; + } + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: /* Scattered logical opcodes use the following params: * src[0] Surface coordinates @@ -5292,6 +5321,18 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) true /* write */); break; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 32, + arg, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, + arg, /* atomic_op */ + !inst->dst.is_null()); + break; + default: unreachable("Unknown A64 logical instruction"); } @@ -5492,6 +5533,8 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: lower_a64_logical_send(ibld, inst); break; @@ -5998,6 +6041,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size); + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + return 8; + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 5361b768003..73aebbcfb22 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -233,6 +233,10 @@ public: int op, nir_intrinsic_instr *instr); void nir_emit_shared_atomic_float(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_global_atomic(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); + void nir_emit_global_atomic_float(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 06d45d77002..1041296b903 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4029,6 +4029,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } break; + case nir_intrinsic_global_atomic_add: + nir_emit_global_atomic(bld, get_op_for_atomic_add(instr, 1), instr); + break; + case nir_intrinsic_global_atomic_imin: + nir_emit_global_atomic(bld, BRW_AOP_IMIN, instr); + break; + case nir_intrinsic_global_atomic_umin: + nir_emit_global_atomic(bld, BRW_AOP_UMIN, instr); + break; + case nir_intrinsic_global_atomic_imax: + nir_emit_global_atomic(bld, BRW_AOP_IMAX, instr); + break; + case nir_intrinsic_global_atomic_umax: + nir_emit_global_atomic(bld, BRW_AOP_UMAX, instr); + break; + case nir_intrinsic_global_atomic_and: + nir_emit_global_atomic(bld, BRW_AOP_AND, instr); + break; + case nir_intrinsic_global_atomic_or: + nir_emit_global_atomic(bld, BRW_AOP_OR, instr); + break; + case nir_intrinsic_global_atomic_xor: + nir_emit_global_atomic(bld, BRW_AOP_XOR, instr); + break; + case nir_intrinsic_global_atomic_exchange: + nir_emit_global_atomic(bld, BRW_AOP_MOV, instr); + break; + case nir_intrinsic_global_atomic_comp_swap: + nir_emit_global_atomic(bld, BRW_AOP_CMPWR, instr); + break; + case nir_intrinsic_global_atomic_fmin: + nir_emit_global_atomic_float(bld, BRW_AOP_FMIN, instr); + break; + case nir_intrinsic_global_atomic_fmax: + nir_emit_global_atomic_float(bld, BRW_AOP_FMAX, instr); + break; + case nir_intrinsic_global_atomic_fcomp_swap: + nir_emit_global_atomic_float(bld, BRW_AOP_FCMPWR, instr); + break; + case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); @@ -4702,6 +4742,60 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, bld.MOV(dest, atomic_result); } +void +fs_visitor::nir_emit_global_atomic(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) +{ + if (stage == MESA_SHADER_FRAGMENT) + brw_wm_prog_data(prog_data)->has_side_effects = true; + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + fs_reg addr = get_nir_src(instr->src[0]); + + fs_reg data; + if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) + data = get_nir_src(instr->src[1]); + + if (op == BRW_AOP_CMPWR) { + fs_reg tmp = bld.vgrf(data.type, 2); + fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + bld.LOAD_PAYLOAD(tmp, sources, 2, 0); + data = tmp; + } + + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + dest, addr, data, brw_imm_ud(op)); +} + +void +fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) +{ + if (stage == MESA_SHADER_FRAGMENT) + brw_wm_prog_data(prog_data)->has_side_effects = true; + + assert(nir_intrinsic_infos[instr->intrinsic].has_dest); + fs_reg dest = get_nir_dest(instr->dest); + + fs_reg addr = get_nir_src(instr->src[0]); + + assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC); + fs_reg data = get_nir_src(instr->src[1]); + + if (op == BRW_AOP_FCMPWR) { + fs_reg tmp = bld.vgrf(data.type, 2); + fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + bld.LOAD_PAYLOAD(tmp, sources, 2, 0); + data = tmp; + } + + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + dest, addr, data, brw_imm_ud(op)); +} + void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 692398da496..861b2abfff2 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -499,6 +499,8 @@ schedule_node::set_latency_gen7(bool is_haswell) case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP: case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: + case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: + case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ latency = 14000; break; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index c2751557af8..569e68e02af 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -302,6 +302,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "a64_byte_scattered_read_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: return "a64_byte_scattered_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + return "a64_untyped_atomic_float_logical"; case SHADER_OPCODE_TYPED_ATOMIC: return "typed_atomic"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: @@ -1020,6 +1024,8 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC: -- 2.30.2