Merge branch 'master' of ../mesa into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_nir.cpp
index da8d47f1c5ec7d5599ddc8898bc5d2770c650b5d..61d0c896b8ea50e9b77076fdf7140ff713106568 100644 (file)
 #include "brw_fs.h"
 #include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
+#include "brw_fs_surface_builder.h"
 
 using namespace brw;
+using namespace brw::surface_access;
 
 void
 fs_visitor::emit_nir_code()
@@ -338,6 +340,20 @@ emit_system_values_block(nir_block *block, void *void_visitor)
                                  BRW_REGISTER_TYPE_D));
          break;
 
+      case nir_intrinsic_load_local_invocation_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_local_invocation_id_setup();
+         break;
+
+      case nir_intrinsic_load_work_group_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_work_group_id_setup();
+         break;
+
       default:
          break;
       }
@@ -1437,6 +1453,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_image_samples:
+      /* The driver does not support multi-sampled images. */
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+      break;
+
    case nir_intrinsic_load_front_face:
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
               *emit_frontfacing_interpolation());
@@ -1445,35 +1466,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_load_vertex_id:
       unreachable("should be lowered by lower_vertex_id()");
 
-   case nir_intrinsic_load_vertex_id_zero_base: {
-      fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-      assert(vertex_id.file != BAD_FILE);
-      dest.type = vertex_id.type;
-      bld.MOV(dest, vertex_id);
-      break;
-   }
-
-   case nir_intrinsic_load_base_vertex: {
-      fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-      assert(base_vertex.file != BAD_FILE);
-      dest.type = base_vertex.type;
-      bld.MOV(dest, base_vertex);
-      break;
-   }
-
-   case nir_intrinsic_load_instance_id: {
-      fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-      assert(instance_id.file != BAD_FILE);
-      dest.type = instance_id.type;
-      bld.MOV(dest, instance_id);
-      break;
-   }
-
-   case nir_intrinsic_load_sample_mask_in: {
-      fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
-      assert(sample_mask_in.file != BAD_FILE);
-      dest.type = sample_mask_in.type;
-      bld.MOV(dest, sample_mask_in);
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
       break;
    }
 
@@ -1486,14 +1488,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_sample_id: {
-      fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
-      assert(sample_id.file != BAD_FILE);
-      dest.type = sample_id.type;
-      bld.MOV(dest, sample_id);
-      break;
-   }
-
    case nir_intrinsic_load_uniform_indirect:
       has_indirect = true;
       /* fallthrough */
@@ -1546,7 +1540,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           */
          brw_mark_surface_used(prog_data,
                                stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
       }
 
       if (has_indirect) {
@@ -1583,6 +1577,68 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_load_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[0]);
+
+      fs_reg surf_index;
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                 fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      /* Get the offset to read from */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Read the vector */
+      for (int i = 0; i < instr->num_components; i++) {
+         fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                1 /* dims */, 1 /* size */,
+                                                BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+
+         /* Vector components are stored contiguous in memory */
+         if (i < instr->num_components) {
+            if (!has_indirect) {
+               const_offset_bytes += 4;
+               bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+            } else {
+               bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+            }
+         }
+      }
+
+      break;
+   }
+
    case nir_intrinsic_load_input_indirect:
       has_indirect = true;
       /* fallthrough */
@@ -1717,6 +1773,75 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_store_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      fs_reg surf_index;
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[1]);
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[1]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      /* Offset */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Value */
+      fs_reg val_reg = get_nir_src(instr->src[0]);
+
+      /* Writemask */
+      unsigned writemask = instr->const_index[1];
+
+      /* Write each component present in the writemask */
+      unsigned skipped_channels = 0;
+      for (int i = 0; i < instr->num_components; i++) {
+         int component_mask = 1 << i;
+         if (writemask & component_mask) {
+            if (skipped_channels) {
+               if (!has_indirect) {
+                  const_offset_bytes += 4 * skipped_channels;
+                  bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+               } else {
+                  bld.ADD(offset_reg, offset_reg,
+                           brw_imm_ud(4 * skipped_channels));
+               }
+               skipped_channels = 0;
+            }
+
+            emit_untyped_write(bld, surf_index, offset_reg,
+                               offset(val_reg, bld, i),
+                               1 /* dims */, 1 /* size */,
+                               BRW_PREDICATE_NONE);
+         }
+
+         skipped_channels++;
+      }
+      break;
+   }
+
    case nir_intrinsic_store_output_indirect:
       has_indirect = true;
       /* fallthrough */
@@ -1737,13 +1862,155 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_barrier:
       emit_barrier();
+      if (stage == MESA_SHADER_COMPUTE)
+         ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+      break;
+
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_work_group_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      for (unsigned i = 0; i < 3; i++)
+         bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+      break;
+   }
+
+   case nir_intrinsic_ssbo_atomic_add:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_min:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_max:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_and:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_or:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
       break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+      break;
+
+   case nir_intrinsic_get_buffer_size: {
+      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+      unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      int reg_width = dispatch_width / 8;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      /* Set LOD = 0 */
+      fs_reg source = fs_reg(0);
+
+      int mlen = 1 * reg_width;
+      fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+                                  BRW_REGISTER_TYPE_UD);
+      bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+
+      fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+      fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+                               src_payload, surf_index);
+      inst->header_size = 0;
+      inst->mlen = mlen;
+      bld.emit(inst);
+      break;
+   }
+
+   case nir_intrinsic_load_num_work_groups: {
+      assert(devinfo->gen >= 7);
+      assert(stage == MESA_SHADER_COMPUTE);
+
+      struct brw_cs_prog_data *cs_prog_data =
+         (struct brw_cs_prog_data *) prog_data;
+      const unsigned surface =
+         cs_prog_data->binding_table.work_groups_start;
+
+      cs_prog_data->uses_num_work_groups = true;
+
+      fs_reg surf_index = fs_reg(surface);
+      brw_mark_surface_used(prog_data, surface);
+
+      /* Read the 3 GLuint components of gl_NumWorkGroups */
+      for (unsigned i = 0; i < 3; i++) {
+         fs_reg read_result =
+            emit_untyped_read(bld, surf_index,
+                              fs_reg(i << 2),
+                              1 /* dims */, 1 /* size */,
+                              BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+      }
+      break;
+   }
 
    default:
       unreachable("unknown intrinsic");
    }
 }
 
+void
+fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+                                 int op, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+                            const_surface->u[0];
+      surface = fs_reg(surf_index);
+      brw_mark_surface_used(prog_data, surf_index);
+   } else {
+      surface = vgrf(glsl_type::uint_type);
+      bld.ADD(surface, get_nir_src(instr->src[0]),
+              fs_reg(stage_prog_data->binding_table.ubo_start));
+
+      /* Assume this may touch any UBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(prog_data,
+                            stage_prog_data->binding_table.ubo_start +
+                            shader_prog->NumBufferInterfaceBlocks - 1);
+   }
+
+   fs_reg offset = get_nir_src(instr->src[1]);
+   fs_reg data1 = get_nir_src(instr->src[2]);
+   fs_reg data2;
+   if (op == BRW_AOP_CMPWR)
+      data2 = get_nir_src(instr->src[3]);
+
+   /* Emit the actual atomic operation operation */
+
+   fs_reg atomic_result =
+      surface_access::emit_untyped_atomic(bld, surface, offset,
+                                          data1, data2,
+                                          1 /* dims */, 1 /* rsize */,
+                                          op,
+                                          BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
 void
 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
@@ -1885,6 +2152,16 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
    case nir_texop_txf_ms: op = ir_txf_ms; break;
    case nir_texop_txl: op = ir_txl; break;
    case nir_texop_txs: op = ir_txs; break;
+   case nir_texop_texture_samples: {
+      fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+      fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+                               bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+                               sampler_reg);
+      inst->mlen = 1;
+      inst->header_size = 1;
+      inst->base_mrf = -1;
+      return;
+   }
    default:
       unreachable("unknown texture opcode");
    }