intel: Use TXS for image_size when we have a typed surface
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 16 Aug 2018 16:01:24 +0000 (11:01 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 29 Aug 2018 19:04:03 +0000 (14:04 -0500)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_nir_lower_image_load_store.c
src/intel/compiler/brw_shader.cpp

index 883616d6bab04ba4045a500dafbca8672b9efa04..52957882b10fa4f9cf5bc02831e91560c6ab2e30 100644 (file)
@@ -354,6 +354,8 @@ enum opcode {
    SHADER_OPCODE_SAMPLEINFO,
    SHADER_OPCODE_SAMPLEINFO_LOGICAL,
 
+   SHADER_OPCODE_IMAGE_SIZE,
+
    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
     * For example, parameters for a send-from-GRF message.  Or, updating
index d40ce2ce0d7592ff029af5e270a1cf796911aae8..cb402cd4e75a3cc4b53f325196307478f4d29f8c 100644 (file)
@@ -958,6 +958,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
          }
          break;
       case SHADER_OPCODE_TXS:
+      case SHADER_OPCODE_IMAGE_SIZE:
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
         break;
       case SHADER_OPCODE_TXD:
@@ -1126,10 +1127,19 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       }
    }
 
-   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
-         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
-         ? prog_data->binding_table.gather_texture_start
-         : prog_data->binding_table.texture_start;
+   uint32_t base_binding_table_index;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_TG4:
+   case SHADER_OPCODE_TG4_OFFSET:
+      base_binding_table_index = prog_data->binding_table.gather_texture_start;
+      break;
+   case SHADER_OPCODE_IMAGE_SIZE:
+      base_binding_table_index = prog_data->binding_table.image_start;
+      break;
+   default:
+      base_binding_table_index = prog_data->binding_table.texture_start;
+      break;
+   }
 
    if (surface_index.file == BRW_IMMEDIATE_VALUE &&
        sampler_index.file == BRW_IMMEDIATE_VALUE) {
@@ -2114,6 +2124,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_SAMPLEINFO:
         generate_tex(inst, dst, src[0], src[1], src[2]);
         break;
+
+      case SHADER_OPCODE_IMAGE_SIZE:
+         generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
+         break;
+
       case FS_OPCODE_DDX_COARSE:
       case FS_OPCODE_DDX_FINE:
          generate_ddx(inst, dst, src[0]);
index aaba0e2a693ddfd3318bf7e72478b45baebfd490..2fef050f81a233491126a8e6d41da297d639beec 100644 (file)
@@ -3918,6 +3918,41 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_image_size: {
+      /* Unlike the [un]typed load and store opcodes, the TXS that this turns
+       * into will handle the binding table index for us in the geneerator.
+       */
+      fs_reg image = retype(get_nir_src_imm(instr->src[0]),
+                            BRW_REGISTER_TYPE_UD);
+      image = bld.emit_uniformize(image);
+
+      /* Since the image size is always uniform, we can just emit a SIMD8
+       * query instruction and splat the result out.
+       */
+      const fs_builder ubld = bld.exec_all().group(8, 0);
+
+      /* The LOD also serves as the message payload */
+      fs_reg lod = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+      ubld.MOV(lod, brw_imm_ud(0));
+
+      fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+      fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE, tmp, lod, image);
+      inst->mlen = 1;
+      inst->size_written = 4 * REG_SIZE;
+
+      for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
+         if (c == 2 && nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_CUBE) {
+            bld.emit(SHADER_OPCODE_INT_QUOTIENT,
+                     offset(retype(dest, tmp.type), bld, c),
+                     component(offset(tmp, ubld, c), 0), brw_imm_ud(6));
+         } else {
+            bld.MOV(offset(retype(dest, tmp.type), bld, c),
+                    component(offset(tmp, ubld, c), 0));
+         }
+      }
+      break;
+   }
+
    case nir_intrinsic_image_load_raw_intel: {
       const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg addr = retype(get_nir_src(instr->src[1]),
index 5eba9ddabd3e8bfeef99519cdf71c9a1eaae7a60..e8e00e1aa1918a8603a4db32fb8ea8f6db4f5f77 100644 (file)
@@ -725,6 +725,21 @@ lower_image_size_instr(nir_builder *b,
                        nir_intrinsic_instr *intrin)
 {
    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   /* For write-only images, we have an actual image surface so we fall back
+    * and let the back-end emit a TXS for this.
+    */
+   if (var->data.image.access & ACCESS_NON_READABLE)
+      return false;
+
+   /* If we have a matching typed format, then we have an actual image surface
+    * so we fall back and let the back-end emit a TXS for this.
+    */
+   const enum isl_format image_fmt =
+      isl_format_for_gl_format(var->data.image.format);
+   if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
+      return false;
 
    b->cursor = nir_instr_remove(&intrin->instr);
 
index 27de7c497767a52b4b4e37415ca68eabab6c0555..798c799a8286862ecc007813ca98cd767e949c0a 100644 (file)
@@ -267,6 +267,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
    case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
       return "sampleinfo_logical";
 
+   case SHADER_OPCODE_IMAGE_SIZE:
+      return "image_size";
+
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return "shader_time_add";