From: Jason Ekstrand Date: Fri, 25 Mar 2016 18:19:53 +0000 (-0700) Subject: i965/fs: Move the code for load/store_shared to emit_cs_intrinsic X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5ea3647f89abccea5496824815b5b729f38f7a23;p=mesa.git i965/fs: Move the code for load/store_shared to emit_cs_intrinsic They are compute-shader only and that's where the code for doing atomics on shared variables lives so it seemes to make sense. Reviewed-by: Jordan Justen --- diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index b804f3c953f..90b878913b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2368,6 +2368,82 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); break; + case nir_intrinsic_load_shared: { + assert(devinfo->gen >= 7); + + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Get the offset to read from */ + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + + /* Read the vector */ + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + + break; + } + + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] + + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0] + 4 * first_component)); + } + + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); + } + + break; + } + default: nir_emit_intrinsic(bld, instr); break; @@ -2691,82 +2767,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_shared: { - assert(devinfo->gen >= 7); - - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Get the offset to read from */ - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0])); - } - - /* Read the vector */ - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, - instr->num_components, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - for (int i = 0; i < instr->num_components; i++) - bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); - - break; - } - - case nir_intrinsic_store_shared: { - assert(devinfo->gen >= 7); - - /* Block index */ - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Value */ - fs_reg val_reg = get_nir_src(instr->src[0]); - - /* Writemask */ - unsigned writemask = instr->const_index[1]; - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - */ - while (writemask) { - unsigned first_component = ffs(writemask) - 1; - unsigned length = ffs(~(writemask >> first_component)) - 1; - fs_reg offset_reg; - - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] + - 4 * first_component); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0] + 4 * first_component)); - } - - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, first_component), - 1 /* dims */, length, - BRW_PREDICATE_NONE); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - writemask &= (15 << (first_component + length)); - } - - break; - } - case nir_intrinsic_load_input: { fs_reg src; if (stage == MESA_SHADER_VERTEX) {