From 033968a94eab4bc40fbe642f23e0ec996a3fa7fa Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Mon, 18 May 2020 20:36:16 +0200 Subject: [PATCH] r600/sfn: Add lowering pass for shared IO Lower shared load and store to use the r600 specific intrinsics. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 90 ++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 8ae92fd09eb..0a4246fb067 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -512,6 +512,94 @@ r600_get_natural_size_align_bytes(const struct glsl_type *type, } } +static bool +r600_lower_shared_io_impl(nir_function *func) +{ + nir_builder b; + nir_builder_init(&b, func->impl); + + bool progress = false; + nir_foreach_block(block, func->impl) { + nir_foreach_instr_safe(instr, block) { + + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + if (op->intrinsic != nir_intrinsic_load_shared && + op->intrinsic != nir_intrinsic_store_shared) + continue; + + b.cursor = nir_before_instr(instr); + + if (op->intrinsic == nir_intrinsic_load_shared) { + nir_ssa_def *addr = op->src[0].ssa; + + switch (nir_dest_num_components(op->dest)) { + case 2: { + auto addr2 = nir_iadd_imm(&b, addr, 4); + addr = nir_vec2(&b, addr, addr2); + break; + } + case 3: { + auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8)); + addr = nir_vec3(&b, addr, + nir_channel(&b, addr2, 0), + nir_channel(&b, addr2, 1)); + break; + } + case 4: { + addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12)); + break; + } + } + + auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600); + load->num_components = nir_dest_num_components(op->dest); + load->src[0] = nir_src_for_ssa(addr); + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, 32, NULL); + nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa)); + nir_builder_instr_insert(&b, &load->instr); + } else { + nir_ssa_def *addr = op->src[1].ssa; + for (int i = 0; i < 2; ++i) { + unsigned test_mask = (0x3 << 2 * i); + if (!(nir_intrinsic_write_mask(op) & test_mask)) + continue; + + auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600); + unsigned writemask = nir_intrinsic_write_mask(op) & test_mask; + nir_intrinsic_set_write_mask(store, writemask); + store->src[0] = nir_src_for_ssa(op->src[0].ssa); + store->num_components = store->src[0].ssa->num_components; + bool start_even = (writemask & (1u << (2 * i))); + + auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4))); + store->src[1] = nir_src_for_ssa(addr2); + + nir_builder_instr_insert(&b, &store->instr); + } + } + nir_instr_remove(instr); + progress = true; + } + } + return progress; +} + +static bool +r600_lower_shared_io(nir_shader *nir) +{ + bool progress=false; + nir_foreach_function(function, nir) { + if (function->impl && + r600_lower_shared_io_impl(function)) + progress = true; + } + return progress; +} + static bool optimize_once(nir_shader *shader) { @@ -575,6 +663,8 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + NIR_PASS_V(sel->nir, r600_lower_shared_io); + static const struct nir_lower_tex_options lower_tex_options = { .lower_txp = ~0u, }; -- 2.30.2