From ba885c30c74f9efc94743d4582d30a0e70924b97 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 25 Sep 2013 16:31:35 -0700 Subject: [PATCH] i965/gen7: Handle atomic instructions from the VEC4 back-end. This can deal with all the 15 32-bit untyped atomic operations the hardware supports, but only INC and PREDEC are going to be exposed through the API for now. v2: Represent atomics as GLSL intrinsics. Add support for variably indexed atomic counter arrays. v3: Add comment on why we don't need to assign uniform storage for atomic counters. Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_vec4.h | 9 ++ .../drivers/dri/i965/brw_vec4_visitor.cpp | 103 +++++++++++++++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a479646c69b..1f29e576caf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -489,6 +489,13 @@ public: void emit_shader_time_write(enum shader_time_shader_type type, src_reg value); + void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + dst_reg dst, src_reg offset, src_reg src0, + src_reg src1); + + void emit_untyped_surface_read(unsigned surf_index, dst_reg dst, + src_reg offset); + src_reg get_scratch_offset(vec4_instruction *inst, src_reg *reladdr, int reg_offset); src_reg get_pull_constant_offset(vec4_instruction *inst, @@ -514,6 +521,8 @@ public: void dump_instruction(backend_instruction *inst); + void visit_atomic_counter_intrinsic(ir_call *ir); + protected: void emit_vertex(); void lower_attributes_to_hw_regs(const int *attribute_map, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ff6d69ceaea..a036e2dbb06 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -974,8 +974,11 @@ vec4_visitor::visit(ir_variable *ir) /* Thanks to the lower_ubo_reference pass, we will see only * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO * variables, so no need for them to be in variable_ht. + * + * Atomic counters take no uniform storage, no need to do + * anything here. */ - if (ir->is_in_uniform_block()) + if (ir->is_in_uniform_block() || ir->type->contains_atomic()) return; /* Track how big the whole uniform variable is, in case we need to put a @@ -2160,10 +2163,57 @@ vec4_visitor::visit(ir_constant *ir) emit_constant_values(&dst, ir); } +void +vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) +{ + ir_dereference *deref = static_cast( + ir->actual_parameters.get_head()); + ir_variable *location = deref->variable_referenced(); + unsigned surf_index = (prog_data->base.binding_table.abo_start + + location->atomic.buffer_index); + + /* Calculate the surface offset */ + src_reg offset(this, glsl_type::uint_type); + ir_dereference_array *deref_array = deref->as_dereference_array(); + if (deref_array) { + deref_array->array_index->accept(this); + + src_reg tmp(this, glsl_type::uint_type); + emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE)); + emit(ADD(dst_reg(offset), tmp, location->atomic.offset)); + } else { + offset = location->atomic.offset; + } + + /* Emit the appropriate machine instruction */ + const char *callee = ir->callee->function_name(); + dst_reg dst = get_assignment_lhs(ir->return_deref, this); + + if (!strcmp("__intrinsic_atomic_read", callee)) { + emit_untyped_surface_read(surf_index, dst, offset); + + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, + src_reg(), src_reg()); + + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, + src_reg(), src_reg()); + } +} + void vec4_visitor::visit(ir_call *ir) { - assert(!"not reached"); + const char *callee = ir->callee->function_name(); + + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee)) { + visit_atomic_counter_intrinsic(ir); + } else { + assert(!"Unsupported intrinsic."); + } } void @@ -2557,6 +2607,55 @@ vec4_visitor::visit(ir_end_primitive *) assert(!"not reached"); } +void +vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + dst_reg dst, src_reg offset, + src_reg src0, src_reg src1) +{ + unsigned mlen = 0; + + /* Set the atomic operation offset. */ + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset)); + mlen++; + + /* Set the atomic operation arguments. */ + if (src0.file != BAD_FILE) { + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0)); + mlen++; + } + + if (src1.file != BAD_FILE) { + emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1)); + mlen++; + } + + /* Emit the instruction. Note that this maps to the normal SIMD8 + * untyped atomic message on Ivy Bridge, but that's OK because + * unused channels will be masked out. + */ + vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, + src_reg(atomic_op), src_reg(surf_index)); + inst->base_mrf = 0; + inst->mlen = mlen; +} + +void +vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, + src_reg offset) +{ + /* Set the surface read offset. */ + emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset)); + + /* Emit the instruction. Note that this maps to the normal SIMD8 + * untyped surface read message, but that's OK because unused + * channels will be masked out. + */ + vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, + dst, src_reg(surf_index)); + inst->base_mrf = 0; + inst->mlen = 1; +} + void vec4_visitor::emit_ndc_computation() { -- 2.30.2