From: Eric Anholt Date: Fri, 24 Jan 2020 00:33:24 +0000 (-0800) Subject: v3d: Ask the state tracker to lower image accesses off of derefs. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=12cf484d025e3ed980dbbd8d65f2f9b95c6388db;p=mesa.git v3d: Ask the state tracker to lower image accesses off of derefs. This saves a bunch of hassle in handling derefs in the backend, and would be needed for reasonable handling of dynamic indexing of image arrays. Reviewed-by: Kenneth Graunke Part-of: --- diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 1479962fac8..6219b381d4e 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1760,11 +1760,8 @@ ntq_emit_ssa_undef(struct v3d_compile *c, nir_ssa_undef_instr *instr) static void ntq_emit_image_size(struct v3d_compile *c, nir_intrinsic_instr *instr) { - assert(instr->intrinsic == nir_intrinsic_image_deref_size); - nir_variable *var = nir_intrinsic_get_var(instr, 0); - unsigned image_index = var->data.driver_location; - const struct glsl_type *sampler_type = glsl_without_array(var->type); - bool is_array = glsl_sampler_type_is_array(sampler_type); + unsigned image_index = nir_src_as_uint(instr->src[0]); + bool is_array = nir_intrinsic_image_array(instr); ntq_store_dest(c, &instr->dest, 0, vir_uniform(c, QUNIFORM_IMAGE_WIDTH, image_index)); @@ -2104,18 +2101,18 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_emit_tmu_general(c, instr, true); break; - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: v3d40_vir_emit_image_load_store(c, instr); break; @@ -2205,7 +2202,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_emit_store_output(c, instr); break; - case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_size: ntq_emit_image_size(c, instr); break; @@ -2553,10 +2550,6 @@ static void ntq_emit_instr(struct v3d_compile *c, nir_instr *instr) { switch (instr->type) { - case nir_instr_type_deref: - /* ignored, will be walked by the intrinsic using it. */ - break; - case nir_instr_type_alu: ntq_emit_alu(c, nir_instr_as_alu(instr)); break; diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index 379f90c146a..1ba0ee7dccc 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -22,7 +22,6 @@ */ #include "v3d_compiler.h" -#include "compiler/nir/nir_deref.h" /* We don't do any address packing. */ #define __gen_user_data void @@ -236,39 +235,32 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) } } -static void -type_size_align_1(const struct glsl_type *type, unsigned *size, unsigned *align) -{ - *size = 1; - *align = 1; -} - static uint32_t v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: return V3D_TMU_OP_REGULAR; - case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_atomic_add: return v3d_get_op_for_atomic_add(instr, 3); - case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_atomic_imin: return V3D_TMU_OP_WRITE_SMIN; - case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_atomic_umin: return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR; - case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_atomic_imax: return V3D_TMU_OP_WRITE_SMAX; - case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_atomic_umax: return V3D_TMU_OP_WRITE_UMAX; - case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_atomic_and: return V3D_TMU_OP_WRITE_AND_READ_INC; - case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_atomic_or: return V3D_TMU_OP_WRITE_OR_READ_DEC; - case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_atomic_xor: return V3D_TMU_OP_WRITE_XOR_READ_NOT; - case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH; - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH; default: unreachable("unknown image intrinsic"); @@ -279,11 +271,8 @@ void v3d40_vir_emit_image_load_store(struct v3d_compile *c, nir_intrinsic_instr *instr) { - nir_variable *var = nir_intrinsic_get_var(instr, 0); - const struct glsl_type *sampler_type = glsl_without_array(var->type); - unsigned unit = (var->data.driver_location + - nir_deref_instr_get_const_offset(nir_src_as_deref(instr->src[0]), - type_size_align_1)); + unsigned format = nir_intrinsic_format(instr); + unsigned unit = nir_src_as_uint(instr->src[0]); int tmu_writes = 0; struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = { @@ -291,7 +280,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = { .per_pixel_mask_enable = true, - .output_type_32_bit = v3d_gl_format_is_return_32(var->data.image.format), + .output_type_32_bit = v3d_gl_format_is_return_32(format), }; struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 }; @@ -302,12 +291,12 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, * need/can to do things slightly different, like not loading the * amount to add/sub, as that is implicit. */ - bool atomic_add_replaced = (instr->intrinsic == nir_intrinsic_image_deref_atomic_add && + bool atomic_add_replaced = (instr->intrinsic == nir_intrinsic_image_atomic_add && (p2_unpacked.op == V3D_TMU_OP_WRITE_AND_READ_INC || p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC)); bool is_1d = false; - switch (glsl_get_sampler_dim(sampler_type)) { + switch (nir_intrinsic_image_dim(instr)) { case GLSL_SAMPLER_DIM_1D: is_1d = true; break; @@ -329,7 +318,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, unreachable("bad image sampler dim"); } - if (glsl_sampler_type_is_array(sampler_type)) { + if (nir_intrinsic_image_array(instr)) { vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI, ntq_get_src(c, instr->src[1], is_1d ? 1 : 2), &tmu_writes); @@ -373,7 +362,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed); /* Emit the data writes for atomics or image store. */ - if (instr->intrinsic != nir_intrinsic_image_deref_load && + if (instr->intrinsic != nir_intrinsic_image_load && !atomic_add_replaced) { /* Vector for stores, or first atomic argument */ struct qreg src[4]; @@ -385,7 +374,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, /* Second atomic argument */ if (instr->intrinsic == - nir_intrinsic_image_deref_atomic_comp_swap) { + nir_intrinsic_image_atomic_comp_swap) { vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUD, ntq_get_src(c, instr->src[4], 0), &tmu_writes); @@ -393,7 +382,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, } if (vir_in_nonuniform_control_flow(c) && - instr->intrinsic != nir_intrinsic_image_deref_load) { + instr->intrinsic != nir_intrinsic_image_load) { vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ); } @@ -402,7 +391,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, &tmu_writes); if (vir_in_nonuniform_control_flow(c) && - instr->intrinsic != nir_intrinsic_image_deref_load) { + instr->intrinsic != nir_intrinsic_image_load) { struct qinst *last_inst= (struct qinst *)c->cur_block->instructions.prev; vir_set_cond(last_inst, V3D_QPU_COND_IFA); } @@ -423,6 +412,6 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, if (nir_intrinsic_dest_components(instr) == 0) vir_TMUWT(c); - if (instr->intrinsic != nir_intrinsic_image_deref_load) + if (instr->intrinsic != nir_intrinsic_image_load) c->tmu_dirty_rcl = true; } diff --git a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c index e4db541f1b4..7b6b0b15cf8 100644 --- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c +++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c @@ -87,8 +87,7 @@ pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits, static void v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr) { - nir_variable *var = nir_intrinsic_get_var(instr, 0); - enum pipe_format format = var->data.image.format; + enum pipe_format format = nir_intrinsic_format(instr); const struct util_format_description *desc = util_format_description(format); const struct util_format_channel_description *r_chan = &desc->channel[0]; @@ -164,19 +163,20 @@ static void v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr) { static const unsigned bits16[] = {16, 16, 16, 16}; - nir_variable *var = nir_intrinsic_get_var(instr, 0); - const struct glsl_type *sampler_type = glsl_without_array(var->type); - enum glsl_base_type base_type = - glsl_get_sampler_result_type(sampler_type); + enum pipe_format format = nir_intrinsic_format(instr); - if (v3d_gl_format_is_return_32(var->data.image.format)) + if (v3d_gl_format_is_return_32(format)) return; b->cursor = nir_after_instr(&instr->instr); assert(instr->dest.is_ssa); nir_ssa_def *result = &instr->dest.ssa; - if (base_type == GLSL_TYPE_FLOAT) { + if (util_format_is_pure_uint(format)) { + result = nir_format_unpack_uint(b, result, bits16, 4); + } else if (util_format_is_pure_sint(format)) { + result = nir_format_unpack_sint(b, result, bits16, 4); + } else { nir_ssa_def *rg = nir_channel(b, result, 0); nir_ssa_def *ba = nir_channel(b, result, 1); result = nir_vec4(b, @@ -184,11 +184,6 @@ v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr) nir_unpack_half_2x16_split_y(b, rg), nir_unpack_half_2x16_split_x(b, ba), nir_unpack_half_2x16_split_y(b, ba)); - } else if (base_type == GLSL_TYPE_INT) { - result = nir_format_unpack_sint(b, result, bits16, 4); - } else { - assert(base_type == GLSL_TYPE_UINT); - result = nir_format_unpack_uint(b, result, bits16, 4); } nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, nir_src_for_ssa(result), @@ -214,10 +209,10 @@ v3d_nir_lower_image_load_store(nir_shader *s) nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { - case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_load: v3d_nir_lower_image_load(&b, intr); break; - case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_store: v3d_nir_lower_image_store(&b, intr); break; default: diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 1b0c219cb58..a449e458035 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -146,6 +146,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) */ return 0; + case PIPE_CAP_NIR_IMAGES_AS_DEREF: + return 0; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: /* XXX perf: we don't want to emit these extra blits for * glReadPixels(), since we still have to do an uncached read