From: Ilia Mirkin Date: Sat, 20 Sep 2014 00:59:17 +0000 (-0400) Subject: st/mesa: add atomic counter support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9d6f9ccf6b7ca7c9d1ac40354d6ae47c130e2024;p=mesa.git st/mesa: add atomic counter support Signed-off-by: Ilia Mirkin Reviewed-by: Marek Olšák --- diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 7af8becd607..c5379e87310 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -393,6 +393,7 @@ VBO_FILES = \ STATETRACKER_FILES = \ state_tracker/st_atom_array.c \ + state_tracker/st_atom_atomicbuf.c \ state_tracker/st_atom_blend.c \ state_tracker/st_atom.c \ state_tracker/st_atom_clip.c \ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index c9c30449734..f89ca891ac7 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2295,6 +2295,10 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, (void) row_major; + /* atomics don't get real storage */ + if (type->contains_atomic()) + return; + if (type->is_vector() || type->is_scalar()) { size = type->vector_elements; if (type->is_double()) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 03097225bb2..0676b087eea 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -75,6 +75,11 @@ static const struct st_tracked_state *atoms[] = &st_bind_tes_ubos, &st_bind_fs_ubos, &st_bind_gs_ubos, + &st_bind_vs_atomics, + &st_bind_tcs_atomics, + &st_bind_tes_atomics, + &st_bind_fs_atomics, + &st_bind_gs_atomics, &st_update_pixel_transfer, &st_update_tess, diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index a24842baa4f..7cbd52e1335 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -78,6 +78,11 @@ extern const struct st_tracked_state st_bind_vs_ubos; extern const struct st_tracked_state st_bind_gs_ubos; extern const struct st_tracked_state st_bind_tcs_ubos; extern const struct st_tracked_state st_bind_tes_ubos; +extern const struct st_tracked_state st_bind_fs_atomics; +extern const struct st_tracked_state st_bind_vs_atomics; +extern const struct st_tracked_state st_bind_gs_atomics; +extern const struct st_tracked_state st_bind_tcs_atomics; +extern const struct st_tracked_state st_bind_tes_atomics; extern const struct st_tracked_state st_update_pixel_transfer; extern const struct st_tracked_state st_update_tess; diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c new file mode 100644 index 00000000000..1c30d1fb701 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2014 Ilia Mirkin. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "compiler/glsl/ir_uniform.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "st_debug.h" +#include "st_cb_bufferobjects.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" + +static void +st_bind_atomics(struct st_context *st, + struct gl_shader_program *prog, + unsigned shader_type) +{ + unsigned i; + + if (!prog || !st->pipe->set_shader_buffers) + return; + + for (i = 0; i < prog->NumAtomicBuffers; i++) { + struct gl_active_atomic_buffer *atomic = &prog->AtomicBuffers[i]; + struct gl_atomic_buffer_binding *binding = + &st->ctx->AtomicBufferBindings[atomic->Binding]; + struct st_buffer_object *st_obj = + st_buffer_object(binding->BufferObject); + struct pipe_shader_buffer sb = { 0 }; + + sb.buffer = st_obj->buffer; + sb.buffer_offset = binding->Offset; + sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + + st->pipe->set_shader_buffers(st->pipe, shader_type, + atomic->Binding, 1, &sb); + } +} + +static void +bind_vs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + st_bind_atomics(st, prog, PIPE_SHADER_VERTEX); +} + +const struct st_tracked_state st_bind_vs_atomics = { + "st_bind_vs_atomics", + { + 0, + ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_vs_atomics +}; + +static void +bind_fs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + + st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT); +} + +const struct st_tracked_state st_bind_fs_atomics = { + "st_bind_fs_atomics", + { + 0, + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_fs_atomics +}; + +static void +bind_gs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY); +} + +const struct st_tracked_state st_bind_gs_atomics = { + "st_bind_gs_atomics", + { + 0, + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_gs_atomics +}; + +static void +bind_tcs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_atomics = { + "st_bind_tcs_atomics", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_tcs_atomics +}; + +static void +bind_tes_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_atomics = { + "st_bind_tes_atomics", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_tes_atomics +}; diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 68be8ba64ac..0c4c98911b1 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -237,6 +237,9 @@ st_bufferobj_data(struct gl_context *ctx, case GL_PARAMETER_BUFFER_ARB: bind = PIPE_BIND_COMMAND_ARGS_BUFFER; break; + case GL_ATOMIC_COUNTER_BUFFER: + bind = PIPE_BIND_SHADER_BUFFER; + break; default: bind = 0; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ce1e97aacb5..40779f962ee 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -350,6 +350,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f) f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER; f->NewDefaultTessLevels = ST_NEW_TESS_STATE; f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS; + f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER; } struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 9db5f11beb5..ee27533912c 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -62,6 +62,7 @@ struct u_upload_mgr; #define ST_NEW_TESSCTRL_PROGRAM (1 << 9) #define ST_NEW_TESSEVAL_PROGRAM (1 << 10) #define ST_NEW_SAMPLER_VIEWS (1 << 11) +#define ST_NEW_ATOMIC_BUFFER (1 << 12) struct st_state_flags { diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 53ea6767395..016d07d3411 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -218,6 +218,10 @@ void st_init_limits(struct pipe_screen *screen, c->MaxUniformBlockSize / 4 * pc->MaxUniformBlocks); + pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + pc->MaxAtomicBuffers = screen->get_shader_param( + screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); + /* Gallium doesn't really care about local vs. env parameters so use the * same limits. */ @@ -333,6 +337,19 @@ void st_init_limits(struct pipe_screen *screen, screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL); c->GLSLFrontFacingIsSysVal = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL); + + c->MaxAtomicBufferBindings = + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + c->MaxCombinedAtomicBuffers = + c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers + + c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers + + c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers + + c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers + + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS); + + if (c->MaxCombinedAtomicBuffers > 0) + extensions->ARB_shader_atomic_counters = GL_TRUE; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 06207d06aa5..f7e0bb81604 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -266,6 +266,8 @@ public: unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ + st_src_reg buffer; /**< buffer register */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ const struct tgsi_opcode_info *info; }; @@ -390,6 +392,7 @@ public: int samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ + int buffers_used; bool indirect_addr_consts; int wpos_transform_const; @@ -443,6 +446,8 @@ public: virtual void visit(ir_barrier *); /*@}*/ + void visit_atomic_counter_intrinsic(ir_call *); + st_src_reg result; /** List of variable_storage */ @@ -556,6 +561,27 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_resource_instruction(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + return true; + default: + return false; + } +} + static unsigned num_inst_dst_regs(const glsl_to_tgsi_instruction *op) { @@ -565,7 +591,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op) static unsigned num_inst_src_regs(const glsl_to_tgsi_instruction *op) { - return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src; + return op->info->is_tex || is_resource_instruction(op->op) ? + op->info->num_src - 1 : op->info->num_src; } glsl_to_tgsi_instruction * @@ -3072,14 +3099,73 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) return entry; } +void +glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + ir_dereference *deref = static_cast( + ir->actual_parameters.get_head()); + ir_variable *location = deref->variable_referenced(); + + st_src_reg buffer( + PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); + + /* Calculate the surface offset */ + st_src_reg offset; + ir_dereference_array *deref_array = deref->as_dereference_array(); + + if (deref_array) { + offset = get_temp(glsl_type::uint_type); + + deref_array->array_index->accept(this); + + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), + this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), + offset, st_src_reg_for_int(location->data.offset)); + } else { + offset = st_src_reg_for_int(location->data.offset); + } + + ir->return_deref->accept(this); + st_dst_reg dst(this->result); + dst.writemask = WRITEMASK_X; + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_atomic_read", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(1)); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(-1)); + inst->buffer = buffer; + emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); + } +} + void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; ir_function_signature *sig = ir->callee; - function_entry *entry = get_function_signature(sig); + const char *callee = sig->function_name(); + function_entry *entry; int i; + /* Filter out intrinsics */ + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee)) { + visit_atomic_counter_intrinsic(ir); + return; + } + + entry = get_function_signature(sig); /* Process in parameters. */ foreach_two_lists(formal_node, &sig->parameters, actual_node, &ir->actual_parameters) { @@ -3585,6 +3671,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() current_function = NULL; num_address_regs = 0; samplers_used = 0; + buffers_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; glsl_version = 0; @@ -3619,6 +3706,7 @@ static void count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; + v->buffers_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (inst->info->is_tex) { @@ -3636,6 +3724,12 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) } } } + if (inst->buffer.file != PROGRAM_UNDEFINED && ( + is_resource_instruction(inst->op) || + inst->op == TGSI_OPCODE_STORE)) { + if (inst->buffer.file == PROGRAM_BUFFER) + v->buffers_used |= 1 << inst->buffer.index; + } } prog->SamplersUsed = v->samplers_used; @@ -4231,7 +4325,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { if (!inst->dead_mask || !inst->dst[0].writemask) continue; - else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { + /* No amount of dead masks should remove memory stores */ + if (inst->info->is_store) + continue; + + if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { inst->remove(); delete inst; removed++; @@ -4409,6 +4507,7 @@ struct st_translate { struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; @@ -4816,13 +4915,13 @@ compile_tgsi_instruction(struct st_translate *t, const glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; - GLuint i; + int i; struct ureg_dst dst[2]; struct ureg_src src[4]; struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned num_dst; - unsigned num_src; + int num_dst; + int num_src; unsigned tex_target; num_dst = num_inst_dst_regs(inst); @@ -4870,7 +4969,7 @@ compile_tgsi_instruction(struct st_translate *t, src[num_src] = ureg_src_indirect(src[num_src], ureg_src(t->address[2])); num_src++; - for (i = 0; i < inst->tex_offset_num_offset; i++) { + for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -4883,6 +4982,26 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src); return; + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + for (i = num_src - 1; i >= 0; i--) + src[i + 1] = src[i]; + num_src++; + src[0] = t->buffers[inst->buffer.index]; + if (inst->buffer.reladdr) + src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); + break; + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); @@ -5172,6 +5291,8 @@ st_translate_program( { struct st_translate *t; unsigned i; + struct gl_program_constants *frag_const = + &ctx->Const.Program[MESA_SHADER_FRAGMENT]; enum pipe_error ret = PIPE_OK; assert(numInputs <= ARRAY_SIZE(t->inputs)); @@ -5487,7 +5608,7 @@ st_translate_program( assert(i == program->num_immediates); /* texture samplers */ - for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { + for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { unsigned type; @@ -5512,6 +5633,12 @@ st_translate_program( } } + for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { + if (program->buffers_used & (1 << i)) { + t->buffers[i] = ureg_DECL_buffer(ureg, i, true); + } + } + /* Emit each instruction in turn: */ foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {