+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ ir_constant *const_block = block->as_constant();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_block ? const_block->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+
+ if (!const_block) {
+ block->accept(this);
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ *buffer.reladdr = this->result;
+ emit_arl(ir, sampler_reladdr, this->result);
+ }
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_ssbo", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ if (dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+ } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ }
+
+ param = param->get_next();
+ ir_constant *access = NULL;
+ if (!param->is_tail_sentinel()) {
+ access = ((ir_instruction *)param)->as_constant();
+ assert(access);
+ }
+
+ /* The emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->buffer = buffer;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ if (inst->op == TGSI_OPCODE_UADD)
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ } while (inst && inst->op == op && inst->buffer.file == PROGRAM_UNDEFINED);
+}
+
+void
+glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_image", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
+ else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_group_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED |
+ TGSI_MEMBAR_THREAD_GROUP));
+ else
+ assert(!"Unexpected memory barrier intrinsic");
+}
+
+void
+glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_shared", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_store_shared", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ inst->buffer = buffer;
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_shared", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_shared", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_shared", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_shared", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_shared", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ inst->buffer = buffer;
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_dereference *img = (ir_dereference *)param;
+ const ir_variable *imgvar = img->variable_referenced();
+ const glsl_type *type = imgvar->type->without_array();
+ unsigned sampler_array_size = 1, sampler_base = 0;
+
+ st_src_reg reladdr;
+ st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
+
+ get_deref_offsets(img, &sampler_array_size, &sampler_base,
+ (unsigned int *)&image.index, &reladdr);
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ image.reladdr = ralloc(mem_ctx, st_src_reg);
+ *image.reladdr = reladdr;
+ emit_arl(ir, sampler_reladdr, reladdr);
+ }
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_image_size", callee)) {
+ dst.writemask = WRITEMASK_XYZ;
+ inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
+ } else if (!strcmp("__intrinsic_image_samples", callee)) {
+ st_src_reg res = get_temp(glsl_type::ivec4_type);
+ st_dst_reg dstres = st_dst_reg(res);
+ dstres.writemask = WRITEMASK_W;
+ inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
+ res.swizzle = SWIZZLE_WWWW;
+ emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
+ } else {
+ st_src_reg arg1 = undef_src, arg2 = undef_src;
+ st_src_reg coord;
+ st_dst_reg coord_dst;
+ coord = get_temp(glsl_type::ivec4_type);
+ coord_dst = st_dst_reg(coord);
+ coord_dst.writemask = (1 << type->coordinate_components()) - 1;
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ coord.swizzle = SWIZZLE_XXXX;
+ switch (type->coordinate_components()) {
+ case 4: assert(!"unexpected coord count");
+ /* fallthrough */
+ case 3: coord.swizzle |= SWIZZLE_Z << 6;
+ /* fallthrough */
+ case 2: coord.swizzle |= SWIZZLE_Y << 3;
+ }
+
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ param = param->get_next();
+ ((ir_dereference *)param)->accept(this);
+ st_src_reg sample = this->result;
+ sample.swizzle = SWIZZLE_XXXX;
+ coord_dst.writemask = WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
+ coord.swizzle |= SWIZZLE_W << 9;
+ }
+
+ param = param->get_next();
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg1 = this->result;
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ ((ir_dereference *)param)->accept(this);
+ arg2 = this->result;
+ param = param->get_next();
+ }
+
+ assert(param->is_tail_sentinel());
+
+ unsigned opcode;
+ if (!strcmp("__intrinsic_image_load", callee))
+ opcode = TGSI_OPCODE_LOAD;
+ else if (!strcmp("__intrinsic_image_store", callee))
+ opcode = TGSI_OPCODE_STORE;
+ else if (!strcmp("__intrinsic_image_atomic_add", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_image_atomic_min", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_image_atomic_max", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_image_atomic_and", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_image_atomic_or", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_image_atomic_xor", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_image_atomic_exchange", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee))
+ opcode = TGSI_OPCODE_ATOMCAS;
+ else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
+ if (opcode == TGSI_OPCODE_STORE)
+ inst->dst[0].writemask = WRITEMASK_XYZW;
+ }
+
+ inst->buffer = image;
+ inst->sampler_array_size = sampler_array_size;
+ inst->sampler_base = sampler_base;
+
+ switch (type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ inst->tex_target = TEXTURE_3D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_RECT:
+ inst->tex_target = TEXTURE_RECT_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_BUF:
+ inst->tex_target = TEXTURE_BUFFER_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_MS:
+ inst->tex_target = (type->sampler_array)
+ ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+ break;
+ default:
+ assert(!"Should not get here.");
+ }
+
+ inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
+ _mesa_get_shader_image_format(imgvar->data.image_format));
+
+ if (imgvar->data.image_coherent)
+ inst->buffer_access |= TGSI_MEMORY_COHERENT;
+ if (imgvar->data.image_restrict)
+ inst->buffer_access |= TGSI_MEMORY_RESTRICT;
+ if (imgvar->data.image_volatile)
+ inst->buffer_access |= TGSI_MEMORY_VOLATILE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+ glsl_to_tgsi_instruction *call_inst;
+ ir_function_signature *sig = ir->callee;
+ const char *callee = sig->function_name();
+ function_entry *entry;
+ int i;
+
+ /* Filter out intrinsics */
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee) ||
+ !strcmp("__intrinsic_atomic_add", callee) ||
+ !strcmp("__intrinsic_atomic_sub", callee) ||
+ !strcmp("__intrinsic_atomic_min", callee) ||
+ !strcmp("__intrinsic_atomic_max", callee) ||
+ !strcmp("__intrinsic_atomic_and", callee) ||
+ !strcmp("__intrinsic_atomic_or", callee) ||
+ !strcmp("__intrinsic_atomic_xor", callee) ||
+ !strcmp("__intrinsic_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_load_ssbo", callee) ||
+ !strcmp("__intrinsic_store_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ visit_ssbo_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_memory_barrier", callee) ||
+ !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
+ !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
+ !strcmp("__intrinsic_memory_barrier_image", callee) ||
+ !strcmp("__intrinsic_memory_barrier_shared", callee) ||
+ !strcmp("__intrinsic_group_memory_barrier", callee)) {
+ visit_membar_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_load_shared", callee) ||
+ !strcmp("__intrinsic_store_shared", callee) ||
+ !strcmp("__intrinsic_atomic_add_shared", callee) ||
+ !strcmp("__intrinsic_atomic_min_shared", callee) ||
+ !strcmp("__intrinsic_atomic_max_shared", callee) ||
+ !strcmp("__intrinsic_atomic_and_shared", callee) ||
+ !strcmp("__intrinsic_atomic_or_shared", callee) ||
+ !strcmp("__intrinsic_atomic_xor_shared", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_shared", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) {
+ visit_shared_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_image_load", callee) ||
+ !strcmp("__intrinsic_image_store", callee) ||
+ !strcmp("__intrinsic_image_atomic_add", callee) ||
+ !strcmp("__intrinsic_image_atomic_min", callee) ||
+ !strcmp("__intrinsic_image_atomic_max", callee) ||
+ !strcmp("__intrinsic_image_atomic_and", callee) ||
+ !strcmp("__intrinsic_image_atomic_or", callee) ||
+ !strcmp("__intrinsic_image_atomic_xor", callee) ||
+ !strcmp("__intrinsic_image_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_image_atomic_comp_swap", callee) ||
+ !strcmp("__intrinsic_image_size", callee) ||
+ !strcmp("__intrinsic_image_samples", callee)) {
+ visit_image_intrinsic(ir);
+ return;
+ }
+
+ entry = get_function_signature(sig);
+ /* Process in parameters. */
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
+
+ if (param->data.mode == ir_var_function_in ||
+ param->data.mode == ir_var_function_inout) {
+ variable_storage *storage = find_variable_storage(param);
+ assert(storage);
+
+ param_rval->accept(this);
+ st_src_reg r = this->result;
+
+ st_dst_reg l;
+ l.file = storage->file;
+ l.index = storage->index;
+ l.reladdr = NULL;
+ l.writemask = WRITEMASK_XYZW;
+
+ for (i = 0; i < type_size(param->type); i++) {
+ emit_asm(ir, TGSI_OPCODE_MOV, l, r);
+ l.index++;
+ r.index++;
+ }
+ }
+ }
+
+ /* Emit call instruction */
+ call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
+ call_inst->function = entry;
+
+ /* Process out parameters. */
+ foreach_two_lists(formal_node, &sig->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_rvalue *param_rval = (ir_rvalue *) actual_node;
+ ir_variable *param = (ir_variable *) formal_node;
+
+ if (param->data.mode == ir_var_function_out ||
+ param->data.mode == ir_var_function_inout) {
+ variable_storage *storage = find_variable_storage(param);
+ assert(storage);
+
+ st_src_reg r;
+ r.file = storage->file;