From: Francisco Jerez Date: Thu, 23 Apr 2015 11:30:28 +0000 (+0300) Subject: i965: Add memory fence opcode. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f1d1d17db6bdeac0519652aa7432048507154a28;p=mesa.git i965: Add memory fence opcode. Acked-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index edb113b9a97..634885b0d47 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -910,6 +910,8 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_READ, SHADER_OPCODE_TYPED_SURFACE_WRITE, + SHADER_OPCODE_MEMORY_FENCE, + SHADER_OPCODE_GEN4_SCRATCH_READ, SHADER_OPCODE_GEN4_SCRATCH_WRITE, SHADER_OPCODE_GEN7_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 3ee5de75856..84c1e57b536 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -447,6 +447,10 @@ brw_typed_surface_write(struct brw_codegen *p, unsigned msg_length, unsigned num_channels); +void +brw_memory_fence(struct brw_codegen *p, + struct brw_reg dst); + void brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 92d97dec824..30396623612 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3114,6 +3114,76 @@ brw_typed_surface_write(struct brw_codegen *p, p, insn, num_channels); } +static void +brw_set_memory_fence_message(struct brw_codegen *p, + struct brw_inst *insn, + enum brw_message_target sfid, + bool commit_enable) +{ + const struct brw_device_info *devinfo = p->devinfo; + + brw_set_message_descriptor(p, insn, sfid, + 1 /* message length */, + (commit_enable ? 1 : 0) /* response length */, + true /* header present */, + false); + + switch (sfid) { + case GEN6_SFID_DATAPORT_RENDER_CACHE: + brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE); + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE); + break; + default: + unreachable("Not reached"); + } + + if (commit_enable) + brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); +} + +void +brw_memory_fence(struct brw_codegen *p, + struct brw_reg dst) +{ + const struct brw_device_info *devinfo = p->devinfo; + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; + struct brw_inst *insn; + + /* Set dst as destination for dependency tracking, the MEMORY_FENCE + * message doesn't write anything back. + */ + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, dst); + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, + commit_enable); + + if (devinfo->gen == 7 && !devinfo->is_haswell) { + /* IVB does typed surface access through the render cache, so we need to + * flush it too. Use a different register so both flushes can be + * pipelined by the hardware. + */ + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, offset(dst, 1)); + brw_set_src0(p, insn, offset(dst, 1)); + brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, + commit_enable); + + /* Now write the response of the second message into the response of the + * first to trigger a pipeline stall -- This way future render and data + * cache messages will be properly ordered with respect to past data and + * render cache messages. + */ + brw_push_insn_state(p); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, dst, offset(dst, 1)); + brw_pop_insn_state(p); + } +} + void brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 5b9f95df098..b81978c8775 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2053,6 +2053,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud); break; + case SHADER_OPCODE_MEMORY_FENCE: + brw_memory_fence(p, dst); + break; + case FS_OPCODE_SET_SIMD4X2_OFFSET: generate_set_simd4x2_offset(inst, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a2cb39de585..20588e9a0fe 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -502,6 +502,8 @@ brw_instruction_name(enum opcode op) return "typed_surface_read"; case SHADER_OPCODE_TYPED_SURFACE_WRITE: return "typed_surface_write"; + case SHADER_OPCODE_MEMORY_FENCE: + return "memory_fence"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; @@ -1049,6 +1051,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_SURFACE_WRITE: + case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_URB_WRITE_SIMD8: case FS_OPCODE_FB_WRITE: return true; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 324f79552b3..c15fa165ec8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1508,6 +1508,10 @@ vec4_generator::generate_code(const cfg_t *cfg) src[2].dw1.ud); break; + case SHADER_OPCODE_MEMORY_FENCE: + brw_memory_fence(p, dst); + break; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(dst); break;