From cfaaa9bbb7a6ab5819f4fa9e38352b72d6293cff Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 11 Sep 2013 14:01:50 -0700 Subject: [PATCH] i965/gen7: Implement code generation for untyped atomic instructions. Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_eu.h | 9 +++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 62 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 2 + src/mesa/drivers/dri/i965/brw_fs.h | 5 ++ .../drivers/dri/i965/brw_fs_generator.cpp | 21 +++++++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 + src/mesa/drivers/dri/i965/brw_vec4.h | 5 ++ .../drivers/dri/i965/brw_vec4_generator.cpp | 22 +++++++ 9 files changed, 130 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 29ad9d02c91..1cadf6c2c3e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -775,6 +775,8 @@ enum opcode { SHADER_OPCODE_SHADER_TIME_ADD, + SHADER_OPCODE_UNTYPED_ATOMIC, + FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 072310d5544..66b7ba7720b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -424,6 +424,15 @@ void brw_CMP(struct brw_compile *p, struct brw_reg src0, struct brw_reg src1); +void +brw_untyped_atomic(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + GLuint atomic_op, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length); + /*********************************************************************** * brw_eu_util.c: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8efd6796a97..d1d40f7bee3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2485,6 +2485,68 @@ brw_svb_write(struct brw_compile *p, send_commit_msg); /* send_commit_msg */ } +static void +brw_set_dp_untyped_atomic_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint atomic_op, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length, + bool header_present) +{ + if (p->brw->is_haswell) { + brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, + msg_length, response_length, + header_present, false); + + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (insn->header.execution_size != BRW_EXECUTE_16) + insn->bits3.ud |= 1 << 12; /* SIMD8 mode */ + + insn->bits3.gen7_dp.msg_type = + HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; + } else { + insn->bits3.gen7_dp.msg_type = + HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; + } + + } else { + brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, + msg_length, response_length, + header_present, false); + + insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; + + if (insn->header.execution_size != BRW_EXECUTE_16) + insn->bits3.ud |= 1 << 12; /* SIMD8 mode */ + } + + if (response_length) + insn->bits3.ud |= 1 << 13; /* Return data expected */ + + insn->bits3.gen7_dp.binding_table_index = bind_table_index; + insn->bits3.ud |= atomic_op << 8; +} + +void +brw_untyped_atomic(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + GLuint atomic_op, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length) { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + + brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, insn, brw_imm_d(0)); + brw_set_dp_untyped_atomic_message( + p, insn, atomic_op, bind_table_index, msg_length, response_length, + insn->header.access_mode == BRW_ALIGN_1); +} + /** * This instruction is generated as a single-channel align1 instruction by * both the VS and FS stages when using INTEL_DEBUG=shader_time. diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6118cfad1f0..f3ceaad97b7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -772,6 +772,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return inst->mlen; case FS_OPCODE_SPILL: return 2; + case SHADER_OPCODE_UNTYPED_ATOMIC: + return 0; default: assert(!"not reached"); return inst->mlen; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b5aed23951b..bc67637f315 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -554,6 +554,11 @@ private: struct brw_reg offset, struct brw_reg value); + void generate_untyped_atomic(fs_inst *inst, + struct brw_reg dst, + struct brw_reg atomic_op, + struct brw_reg surf_index); + void mark_surface_used(unsigned surf_index); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 853120dc9bf..f639d7e185b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1189,6 +1189,23 @@ fs_generator::generate_shader_time_add(fs_inst *inst, mark_surface_used(c->prog_data.base.binding_table.shader_time_start); } +void +fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst, + struct brw_reg atomic_op, + struct brw_reg surf_index) +{ + assert(atomic_op.file == BRW_IMMEDIATE_VALUE && + atomic_op.type == BRW_REGISTER_TYPE_UD && + surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), + atomic_op.dw1.ud, surf_index.dw1.ud, + inst->mlen, dispatch_width / 8); + + mark_surface_used(surf_index.dw1.ud); +} + void fs_generator::generate_code(exec_list *instructions) { @@ -1588,6 +1605,10 @@ fs_generator::generate_code(exec_list *instructions) generate_shader_time_add(inst, src[0], src[1], src[2]); break; + case SHADER_OPCODE_UNTYPED_ATOMIC: + generate_untyped_atomic(inst, dst, src[0], src[1]); + break; + case FS_OPCODE_SET_SIMD4X2_OFFSET: generate_set_simd4x2_offset(inst, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e333c6b6ed1..dbdf442bbf3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -276,6 +276,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: return inst->header_present ? 1 : 0; + case SHADER_OPCODE_UNTYPED_ATOMIC: + return 0; default: assert(!"not reached"); return inst->mlen; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6ac7c4ce8d9..5ba32b64530 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -617,6 +617,11 @@ private: void generate_unpack_flags(vec4_instruction *inst, struct brw_reg dst); + void generate_untyped_atomic(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg atomic_op, + struct brw_reg surf_index); + void mark_surface_used(unsigned surf_index); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index ffc68223983..e8e9f072d14 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -854,6 +854,24 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, mark_surface_used(surf_index.dw1.ud); } +void +vec4_generator::generate_untyped_atomic(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg atomic_op, + struct brw_reg surf_index) +{ + assert(atomic_op.file == BRW_IMMEDIATE_VALUE && + atomic_op.type == BRW_REGISTER_TYPE_UD && + surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), + atomic_op.dw1.ud, surf_index.dw1.ud, + inst->mlen, 1); + + mark_surface_used(surf_index.dw1.ud); +} + /** * Generate assembly for a Vec4 IR instruction. * @@ -1166,6 +1184,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, mark_surface_used(prog_data->base.binding_table.shader_time_start); break; + case SHADER_OPCODE_UNTYPED_ATOMIC: + generate_untyped_atomic(inst, dst, src[0], src[1]); + break; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; -- 2.30.2