nir: Make image load/store intrinsics variable-width
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 14 Aug 2018 19:03:05 +0000 (14:03 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 29 Aug 2018 19:04:02 +0000 (14:04 -0500)
Instead of requiring 4 components, this allows them to potentially use
fewer.  Both the SPIR-V and GLSL paths still generate vec4 intrinsics so
drivers which assume 4 components should be safe.  However, we want to
be able to shrink them for i965.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/compiler/glsl/glsl_to_nir.cpp
src/compiler/nir/nir_intrinsics.py
src/compiler/spirv/spirv_to_nir.c

index efbb2317ac6e34f920290877aaea1d782d1334a5..22419abc571c82f2c205239d68e2bd6eee5626e9 100644 (file)
@@ -904,12 +904,17 @@ nir_visitor::visit(ir_call *ir)
          /* Set the intrinsic destination. */
          if (ir->return_deref) {
             unsigned num_components = ir->return_deref->type->vector_elements;
-            if (instr->intrinsic == nir_intrinsic_image_deref_size)
-               instr->num_components = num_components;
             nir_ssa_dest_init(&instr->instr, &instr->dest,
                               num_components, 32, NULL);
          }
 
+         if (op == nir_intrinsic_image_deref_size) {
+            instr->num_components = instr->dest.ssa.num_components;
+         } else if (op == nir_intrinsic_image_deref_load ||
+                    op == nir_intrinsic_image_deref_store) {
+            instr->num_components = 4;
+         }
+
          if (op == nir_intrinsic_image_deref_size ||
              op == nir_intrinsic_image_deref_samples) {
             nir_builder_instr_insert(&b, &instr->instr);
index 67d336fd536bc1bed9577bd7a6ff03183a2b9dff..17212c4862f1da775b7277199b76983c338873f0 100644 (file)
@@ -297,9 +297,9 @@ atomic3("atomic_counter_comp_swap")
 # argument with the value to be written, and image atomic operations take
 # either one or two additional scalar arguments with the same meaning as in
 # the ARB_shader_image_load_store specification.
-intrinsic("image_deref_load", src_comp=[1, 4, 1], dest_comp=4,
+intrinsic("image_deref_load", src_comp=[1, 4, 1], dest_comp=0,
           flags=[CAN_ELIMINATE])
-intrinsic("image_deref_store", src_comp=[1, 4, 1, 4])
+intrinsic("image_deref_store", src_comp=[1, 4, 1, 0])
 intrinsic("image_deref_atomic_add",  src_comp=[1, 4, 1, 1], dest_comp=1)
 intrinsic("image_deref_atomic_min",  src_comp=[1, 4, 1, 1], dest_comp=1)
 intrinsic("image_deref_atomic_max",  src_comp=[1, 4, 1, 1], dest_comp=1)
index b5ec2de7bf9cf9ccc332f0302fa31d0d7b4f9b55..962243540576664b81b2fe708a5ef3d2b226c064 100644 (file)
@@ -2467,6 +2467,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
       const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3];
       nir_ssa_def *value = vtn_ssa_value(b, value_id)->def;
       /* nir_intrinsic_image_deref_store always takes a vec4 value */
+      assert(op == nir_intrinsic_image_deref_store);
+      intrin->num_components = 4;
       intrin->src[3] = nir_src_for_ssa(expand_to_vec4(&b->nb, value));
       break;
    }