nir: Add a new memory_barrier_tcs_patch intrinsic

[mesa.git] / src / compiler / nir / nir_intrinsics.py
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index 55050a9e0583d17597eb1405b2611bd9e7cc01cd..04f58b0172bdc6856107605e6fe35f0aef11c32f 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -255,6 +255,9 @@ barrier("memory_barrier_shared")
  barrier("begin_invocation_interlock")
  barrier("end_invocation_interlock")
  
+# Memory barrier for synchronizing TCS patch outputs
+barrier("memory_barrier_tcs_patch")
+
  # A conditional discard/demote, with a single boolean source.
  intrinsic("discard_if", src_comp=[1])
  intrinsic("demote_if", src_comp=[1])
@@ -379,8 +382,8 @@ def image(name, src_comp=[], **kwargs):
      intrinsic("bindless_image_" + name, src_comp=[1] + src_comp,
                indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS], **kwargs)
  
-image("load", src_comp=[4, 1], dest_comp=0, flags=[CAN_ELIMINATE])
-image("store", src_comp=[4, 1, 0])
+image("load", src_comp=[4, 1, 1], dest_comp=0, flags=[CAN_ELIMINATE])
+image("store", src_comp=[4, 1, 0, 1])
  image("atomic_add",  src_comp=[4, 1, 1], dest_comp=1)
  image("atomic_imin",  src_comp=[4, 1, 1], dest_comp=1)
  image("atomic_umin",  src_comp=[4, 1, 1], dest_comp=1)
@@ -798,6 +801,14 @@ system_value("tess_factor_base_ir3", 2)
  system_value("tess_param_base_ir3", 2)
  system_value("tcs_header_ir3", 1)
  
+# IR3-specific intrinsics for tessellation control shaders.  cond_end_ir3 end
+# the shader when src0 is false and is used to narrow down the TCS shader to
+# just thread 0 before writing out tessellation levels.
+intrinsic("cond_end_ir3", src_comp=[1])
+# end_patch_ir3 is used just before thread 0 exist the TCS and presumably
+# signals the TE that the patch is complete and can be tessellated.
+intrinsic("end_patch_ir3")
+
  # IR3-specific load/store intrinsics. These access a buffer used to pass data
  # between geometry stages - perhaps it's explicit access to the vertex cache.
  
@@ -828,10 +839,20 @@ intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, AL
  # One notable divergence is sRGB, which is asymmetric: raw_input_pan requires
  # an sRGB->linear conversion, but linear values should be written to
  # raw_output_pan and the hardware handles linear->sRGB.
+#
+# We also have format-specific Midgard intrinsics. There are rather
+# here-be-dragons. load_output_u8_as_fp16_pan does the equivalent of
+# load_raw_out_pan on an RGBA8 UNORM framebuffer followed by u2u16 -> fp16 ->
+# division by 255.
  
  # src[] = { value }
  store("raw_output_pan", 1, [])
  load("raw_output_pan", 0, [], [CAN_ELIMINATE, CAN_REORDER])
+load("output_u8_as_fp16_pan", 0, [], [CAN_ELIMINATE, CAN_REORDER])
+
+# Loads the sampler paramaters <min_lod, max_lod, lod_bias>
+# src[] = { sampler_index }
+load("sampler_lod_parameters_pan", 1, [CAN_ELIMINATE, CAN_REORDER])
  
  # V3D-specific instrinc for tile buffer color reads.
  #
@@ -850,3 +871,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], [])
  # src[] = { value, render_target }
  # BASE = sample index
  store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], [])
+
+# V3D-specific intrinsic to load the number of layers attached to
+# the target framebuffer
+intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])