From 4e7f6437b5359fe41a48fbba510f46ac69db8653 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 3 Nov 2017 10:15:38 +1000 Subject: [PATCH] r600: add ARB_shader_storage_buffer_object support (v3) This just builds on the image support. Evergreen only has ssbo for fragment and compute no other stages. v2: handle images and ssbo in the same shader properly (Ilia) v3: fix RESQ on buffers, fix missing atom emit fix first element offset use R32 format write separate buffer rat store path. (from running deqp gles3.1 tests) Signed-off-by: Dave Airlie --- docs/features.txt | 4 +- docs/relnotes/17.4.0.html | 1 + src/gallium/drivers/r600/evergreen_state.c | 143 +++++++++++++- src/gallium/drivers/r600/r600_hw_context.c | 4 +- src/gallium/drivers/r600/r600_pipe.c | 8 +- src/gallium/drivers/r600/r600_pipe.h | 4 +- src/gallium/drivers/r600/r600_shader.c | 187 ++++++++++++++++++- src/gallium/drivers/r600/r600_state_common.c | 45 ++++- 8 files changed, 372 insertions(+), 24 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 01cd133ef01..5d65d4fdf02 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -179,7 +179,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, radeonsi GL_ARB_program_interface_query DONE (all drivers) GL_ARB_robust_buffer_access_behavior DONE (i965) GL_ARB_shader_image_size DONE (freedreno/a5xx, i965, r600, softpipe) - GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965, softpipe) + GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965, r600, softpipe) GL_ARB_stencil_texturing DONE (freedreno, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr) GL_ARB_texture_buffer_range DONE (freedreno, nv50, i965, r600, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) @@ -249,7 +249,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi GL_ARB_shader_atomic_counters DONE (freedreno/a5xx, i965/gen7+, r600, softpipe) GL_ARB_shader_image_load_store DONE (freedreno/a5xx, i965/gen7+, r600, softpipe) GL_ARB_shader_image_size DONE (freedreno/a5xx, i965/gen7+, r600, softpipe) - GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965/gen7+, softpipe) + GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965/gen7+, r600, softpipe) GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_stencil_texturing DONE (freedreno, nv50, r600, llvmpipe, softpipe, swr) diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html index ec2386b3305..b5f4476ce19 100644 --- a/docs/relnotes/17.4.0.html +++ b/docs/relnotes/17.4.0.html @@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
  • Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to "0" or "false"
  • GL_ARB_shader_atomic_counters and GL_ARB_shader_atomic_counter_ops on r600/evergreen+
  • GL_ARB_shader_image_load_store and GL_ARB_shader_image_size on r600/evergreen+
  • +
  • GL_ARB_shader_storage_buffer_object on r600/evergreen+
  • GL_ARB_cull_distance on r600/evergreen+
  • OpenGL 4.2 on r600/evergreen with hw fp64 support
  • diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index a9982b59155..4a5c1aa6aee 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -614,6 +614,7 @@ struct eg_buf_res_params { unsigned size; unsigned char swizzle[4]; bool uncached; + bool force_swizzle; }; static void evergreen_fill_buffer_resource_words(struct r600_context *rctx, @@ -635,7 +636,10 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx, desc = util_format_description(params->pipe_format); - swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE); + if (params->force_swizzle) + swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE); + else + swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE); va = tmp->resource.gpu_address + params->offset; *skip_mip_address_reloc = true; @@ -1029,7 +1033,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, { unsigned format, swap, ntype, endian; const struct util_format_description *desc; - unsigned block_size = align(util_format_get_blocksize(res->b.b.format), 4); + unsigned block_size = util_format_get_blocksize(res->b.b.format); unsigned pitch_alignment = MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size); unsigned pitch = align(res->b.b.width0, pitch_alignment); @@ -1082,7 +1086,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, color->dim = width_elements - 1; color->slice = 0; /* (width_elements / 64) - 1;*/ color->view = 0; - color->offset = res->gpu_address >> 8; + color->offset = (res->gpu_address + first_element) >> 8; color->fmask = color->offset; color->fmask_slice = 0; @@ -1679,7 +1683,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, } static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom, - int immed_id_base, int res_id_base) + int immed_id_base, int res_id_base, int offset) { struct r600_image_state *state = (struct r600_image_state *)atom; struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; @@ -1692,7 +1696,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at for (i = 0; i < R600_MAX_IMAGES; i++) { struct r600_image_view *image = &state->views[i]; unsigned reloc, immed_reloc; - int idx = i; + int idx = i + offset; idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0); if (!image->base.resource) @@ -1749,14 +1753,14 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at radeon_emit(cs, immed_reloc); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); - radeon_emit(cs, (immed_id_base + i) * 8); + radeon_emit(cs, (immed_id_base + i + offset) * 8); radeon_emit_array(cs, image->immed_resource_words, 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, immed_reloc); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); - radeon_emit(cs, (res_id_base + i) * 8); + radeon_emit(cs, (res_id_base + i + offset) * 8); radeon_emit_array(cs, image->resource_words, 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); @@ -1773,7 +1777,15 @@ static void evergreen_emit_fragment_image_state(struct r600_context *rctx, struc { evergreen_emit_image_state(rctx, atom, R600_IMAGE_IMMED_RESOURCE_OFFSET, - R600_IMAGE_REAL_RESOURCE_OFFSET); + R600_IMAGE_REAL_RESOURCE_OFFSET, 0); +} + +static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom) +{ + int offset = util_bitcount(rctx->fragment_images.enabled_mask); + evergreen_emit_image_state(rctx, atom, + R600_IMAGE_IMMED_RESOURCE_OFFSET, + R600_IMAGE_REAL_RESOURCE_OFFSET, offset); } static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) @@ -1852,6 +1864,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r i++; } i += util_bitcount(rctx->fragment_images.enabled_mask); + i += util_bitcount(rctx->fragment_buffers.enabled_mask); for (; i < 8 ; i++) radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); for (; i < 12; i++) @@ -1966,7 +1979,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1; unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1; - unsigned rat_colormask = ((1ULL << ((unsigned)a->nr_image_rats * 4)) - 1) << (a->nr_cbufs * 4); + unsigned rat_colormask = ((1ULL << ((unsigned)(a->nr_image_rats + a->nr_buffer_rats) * 4)) - 1) << (a->nr_cbufs * 4); radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */ /* This must match the used export instructions exactly. @@ -3871,6 +3884,116 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, } } +static void evergreen_set_shader_buffers(struct pipe_context *ctx, + enum pipe_shader_type shader, unsigned start_slot, + unsigned count, + const struct pipe_shader_buffer *buffers) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_screen *rscreen = (struct r600_screen *)ctx->screen; + struct r600_image_state *istate = NULL; + struct r600_image_view *rview; + struct r600_tex_color_info color; + struct eg_buf_res_params buf_params; + struct r600_resource *resource; + int i, idx; + unsigned old_mask; + bool skip_reloc = false; + + if (shader != PIPE_SHADER_FRAGMENT && count == 0) + return; + + assert(shader == PIPE_SHADER_FRAGMENT); + istate = &rctx->fragment_buffers; + + old_mask = istate->enabled_mask; + for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { + const struct pipe_shader_buffer *buf; + unsigned res_type; + + rview = &istate->views[i]; + + if (!buffers || !buffers[idx].buffer) { + pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); + istate->enabled_mask &= ~(1 << i); + continue; + } + + buf = &buffers[idx]; + pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer); + + resource = (struct r600_resource *)rview->base.resource; + if (!resource->immed_buffer) { + int immed_size = (rscreen->b.info.max_se * 256 * 64) * util_format_get_blocksize(resource->b.b.format); + + eg_resource_alloc_immed(&rscreen->b, resource, immed_size); + } + + color.offset = 0; + color.view = 0; + evergreen_set_color_surface_buffer(rctx, resource, + PIPE_FORMAT_R32_FLOAT, + buf->buffer_offset, + buf->buffer_offset + buf->buffer_size, + &color); + + res_type = V_028C70_BUFFER; + + rview->cb_color_base = color.offset; + rview->cb_color_dim = color.dim; + rview->cb_color_info = color.info | + S_028C70_RAT(1) | + S_028C70_RESOURCE_TYPE(res_type); + rview->cb_color_pitch = color.pitch; + rview->cb_color_slice = color.slice; + rview->cb_color_view = color.view; + rview->cb_color_attrib = color.attrib; + rview->cb_color_fmask = color.fmask; + rview->cb_color_fmask_slice = color.fmask_slice; + + memset(&buf_params, 0, sizeof(buf_params)); + buf_params.pipe_format = resource->b.b.format; + buf_params.size = resource->immed_buffer->b.b.width0; + buf_params.swizzle[0] = PIPE_SWIZZLE_X; + buf_params.swizzle[1] = PIPE_SWIZZLE_Y; + buf_params.swizzle[2] = PIPE_SWIZZLE_Z; + buf_params.swizzle[3] = PIPE_SWIZZLE_W; + buf_params.uncached = 1; + evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b, + &buf_params, &skip_reloc, + rview->immed_resource_words); + + memset(&buf_params, 0, sizeof(buf_params)); + buf_params.pipe_format = PIPE_FORMAT_R32_FLOAT; + buf_params.offset = buf->buffer_offset; + buf_params.size = buf->buffer_size; + buf_params.swizzle[0] = PIPE_SWIZZLE_X; + buf_params.swizzle[1] = PIPE_SWIZZLE_Y; + buf_params.swizzle[2] = PIPE_SWIZZLE_Z; + buf_params.swizzle[3] = PIPE_SWIZZLE_W; + buf_params.force_swizzle = true; + buf_params.uncached = 1; + evergreen_fill_buffer_resource_words(rctx, &resource->b.b, + &buf_params, + &rview->skip_mip_address_reloc, + rview->resource_words); + + istate->enabled_mask |= (1 << i); + } + + istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; + + if (old_mask != istate->enabled_mask) + r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); + + if (rctx->cb_misc_state.nr_buffer_rats != util_bitcount(istate->enabled_mask)) { + rctx->cb_misc_state.nr_buffer_rats = util_bitcount(istate->enabled_mask); + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); + } + + r600_mark_atom_dirty(rctx, &istate->atom); +} + static void evergreen_set_shader_images(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start_slot, unsigned count, @@ -4079,6 +4202,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) } r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0); + r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0); /* shader const */ r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0); r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0); @@ -4148,6 +4272,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->b.b.set_tess_state = evergreen_set_tess_state; rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers; rctx->b.b.set_shader_images = evergreen_set_shader_images; + rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers; if (rctx->b.chip_class == EVERGREEN) rctx->b.b.get_sample_position = evergreen_get_sample_position; else diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 8ffd02b5ba7..4218d719207 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -348,8 +348,10 @@ void r600_begin_new_cs(struct r600_context *ctx) r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); r600_mark_atom_dirty(ctx, &ctx->db_state.atom); r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom); - if (ctx->b.chip_class >= EVERGREEN) + if (ctx->b.chip_class >= EVERGREEN) { r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom); + r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom); + } r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom); r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom); r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9cdef10e480..01f9bf620f6 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -382,7 +382,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: case PIPE_CAP_QUERY_BUFFER_OBJECT: @@ -424,6 +423,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CULL_DISTANCE: return 1; + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + if (family >= CHIP_CEDAR) + return 256; + return 0; + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: if (family >= CHIP_CEDAR) return 30; @@ -609,10 +613,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: return 0; + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: if (rscreen->b.family >= CHIP_CEDAR && (shader == PIPE_SHADER_FRAGMENT)) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index f651d66e070..e54fada9a65 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "tgsi/tgsi_scan.h" -#define R600_NUM_ATOMS 53 +#define R600_NUM_ATOMS 54 #define R600_MAX_IMAGES 8 /* @@ -145,6 +145,7 @@ struct r600_cb_misc_state { unsigned nr_cbufs; unsigned nr_ps_color_outputs; unsigned nr_image_rats; + unsigned nr_buffer_rats; bool multiwrite; bool dual_src_blend; }; @@ -521,6 +522,7 @@ struct r600_context { struct r600_atomic_buffer_state atomic_buffer_state; /* only have images on fragment shader */ struct r600_image_state fragment_images; + struct r600_image_state fragment_buffers; /* Shaders and shader resources. */ struct r600_cso_state vertex_fetch_shader; struct r600_shader_state hw_shader_stages[EG_NUM_HW_STAGES]; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ae8326fdd14..5d78e4f8ade 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -968,6 +968,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_SAMPLER_VIEW: case TGSI_FILE_ADDRESS: + case TGSI_FILE_BUFFER: case TGSI_FILE_IMAGE: break; @@ -3064,7 +3065,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC]; shader->nsys_inputs = 0; - shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0; + shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 || + ctx.info.file_count[TGSI_FILE_BUFFER] > 0; indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.info.processor; @@ -7902,6 +7904,79 @@ static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_g return 0; } +static int tgsi_load_buffer(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + /* have to work out the offset into the RAT immediate return buffer */ + struct r600_bytecode_vtx vtx; + struct r600_bytecode_cf *cf; + int r; + int temp_reg = r600_get_temp(ctx); + unsigned rat_index_mode; + unsigned base; + + rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE + base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE]; + + if (inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE) { + int value = (ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]); + r = single_alu_op2(ctx, ALU_OP1_MOV, + temp_reg, 0, + V_SQ_ALU_SRC_LITERAL, value >> 2, + 0, 0); + if (r) + return r; + } else { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_LSHR_INT; + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 2; + alu.dst.sel = temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->bc->cf_last->barrier = 1; + memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = inst->Src[0].Register.Index + base; + vtx.buffer_index_mode = rat_index_mode; + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; + vtx.src_gpr = temp_reg; + vtx.src_sel_x = 0; + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ + vtx.num_format_all = 1; + vtx.format_comp_all = 1; + vtx.srf_mode_all = 0; + + if (inst->Dst[0].Register.WriteMask == 0xf) { + vtx.data_format = FMT_32_32_32_32; + vtx.use_const_fields = 0; + } else if (inst->Dst[0].Register.WriteMask == 0x7) { + vtx.data_format = FMT_32_32_32; + vtx.use_const_fields = 0; + } else if (inst->Dst[0].Register.WriteMask == 0x3) { + vtx.data_format = FMT_32_32; + vtx.use_const_fields = 0; + } else + vtx.use_const_fields = 1; + + r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); + if (r) + return r; + cf = ctx->bc->cf_last; + cf->barrier = 1; + return 0; +} + static int tgsi_load_rat(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -7982,6 +8057,95 @@ static int tgsi_load(struct r600_shader_ctx *ctx) return tgsi_load_rat(ctx); if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) return tgsi_load_gds(ctx); + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) + return tgsi_load_buffer(ctx); + return 0; +} + +static int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_cf *cf; + int r, i; + unsigned rat_index_mode; + int lasti; + int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx); + + if (inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) { + int value = (ctx->literals[4 * inst->Src[0].Register.Index + inst->Src[0].Register.SwizzleX]); + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg2, 0, + V_SQ_ALU_SRC_LITERAL, value >> 2, + 0, 0); + if (r) + return r; + } else { + r = single_alu_op2(ctx, ALU_OP2_LSHR_INT, + treg2, 0, + ctx->src[0].sel, ctx->src[0].swizzle[0], + V_SQ_ALU_SRC_LITERAL, 2); + if (r) + return r; + } + + rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE + if (rat_index_mode) + egcm_load_index_reg(ctx->bc, 1, false); + + for (i = 0; i <= 3; i++) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = temp_reg; + alu.dst.chan = i; + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.last = (i == 3); + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + for (i = 0; i <= lasti; i++) { + struct r600_bytecode_alu alu; + if (!((1 << i) & inst->Dst[0].Register.WriteMask)) + continue; + + r = single_alu_op2(ctx, ALU_OP2_ADD_INT, + temp_reg, 0, + treg2, 0, + V_SQ_ALU_SRC_LITERAL, i); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); + cf = ctx->bc->cf_last; + + cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE]; + cf->rat.inst = V_RAT_INST_STORE_TYPED; + cf->rat.index_mode = rat_index_mode; + cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; + cf->output.gpr = ctx->temp_reg; + cf->output.index_gpr = temp_reg; + cf->output.comp_mask = 1; + cf->output.burst_count = 1; + cf->vpm = 1; + cf->barrier = 1; + cf->output.elem_size = 0; + } return 0; } @@ -8044,7 +8208,11 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx) static int tgsi_store(struct r600_shader_ctx *ctx) { - return tgsi_store_rat(ctx); + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) + return tgsi_store_buffer_rat(ctx); + else + return tgsi_store_rat(ctx); } static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) @@ -8060,10 +8228,16 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) const struct util_format_description *desc; unsigned rat_index_mode; unsigned immed_base; + unsigned rat_base; immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; + rat_base = ctx->shader->rat_base; + + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + immed_base += ctx->info.file_count[TGSI_FILE_IMAGE]; + rat_base += ctx->info.file_count[TGSI_FILE_IMAGE]; + } - assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE); rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE r = load_index_src(ctx, 1, &idx_gpr); @@ -8113,7 +8287,7 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); cf = ctx->bc->cf_last; - cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index; + cf->rat.id = rat_base + inst->Src[0].Register.Index; cf->rat.inst = ctx->inst_info->op; cf->rat.index_mode = rat_index_mode; cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; @@ -8264,6 +8438,8 @@ static int tgsi_atomic_op(struct r600_shader_ctx *ctx) return tgsi_atomic_op_rat(ctx); if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) return tgsi_atomic_op_gds(ctx); + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) + return tgsi_atomic_op_rat(ctx); return 0; } @@ -8275,7 +8451,8 @@ static int tgsi_resq(struct r600_shader_ctx *ctx) int r; boolean has_txq_cube_array_z = false; - if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { ctx->shader->uses_tex_buffers = true; return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d9b15929852..fee7a21d27d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1333,26 +1333,35 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type { struct r600_textures_info *samplers = &rctx->samplers[shader_type]; struct r600_image_state *images = NULL; - int bits, sview_bits; + struct r600_image_state *buffers = NULL; + int bits, sview_bits, img_bits; uint32_t array_size; int i; uint32_t *constants; uint32_t base_offset; - if (shader_type == PIPE_SHADER_FRAGMENT) + if (shader_type == PIPE_SHADER_FRAGMENT) { images = &rctx->fragment_images; + buffers = &rctx->fragment_buffers; + } if (!samplers->views.dirty_buffer_constants && - (images && !images->dirty_buffer_constants)) + (images && !images->dirty_buffer_constants) && + (buffers && !buffers->dirty_buffer_constants)) return; if (images) images->dirty_buffer_constants = FALSE; + if (buffers) + buffers->dirty_buffer_constants = FALSE; samplers->views.dirty_buffer_constants = FALSE; bits = sview_bits = util_last_bit(samplers->views.enabled_mask); if (images) bits += util_last_bit(images->enabled_mask); + img_bits = bits; + if (buffers) + bits += util_last_bit(buffers->enabled_mask); array_size = bits * 2 * sizeof(uint32_t) * 4; constants = r600_alloc_buf_consts(rctx, shader_type, array_size, @@ -1366,7 +1375,7 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type } } if (images) { - for (i = sview_bits; i < bits; i++) { + for (i = sview_bits; i < img_bits; i++) { int idx = i - sview_bits; if (images->enabled_mask & (1 << idx)) { uint32_t offset = (base_offset / 4) + i * 2; @@ -1375,6 +1384,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type } } } + if (buffers) { + for (i = img_bits; i < bits; i++) { + int idx = i - img_bits; + if (buffers->enabled_mask & (1 << idx)) { + uint32_t offset = (base_offset / 4) + i * 2; + constants[offset] = buffers->views[i].base.resource->width0 / util_format_get_blocksize(buffers->views[i].base.format); + constants[offset + 1] = 0; + } + } + } } /* set sample xy locations as array of fragment shader constants */ @@ -3027,6 +3046,24 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc r600_sampler_views_dirty(rctx, state); } } + + /* SSBOs */ + struct r600_image_state *istate = &rctx->fragment_buffers; + { + uint32_t mask = istate->enabled_mask; + bool found = false; + while (mask) { + unsigned i = u_bit_scan(&mask); + if (istate->views[i].base.resource == &rbuffer->b.b) { + found = true; + istate->dirty_mask |= 1 << i; + } + } + if (found) { + r600_mark_atom_dirty(rctx, &istate->atom); + } + } + } static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable) -- 2.30.2