freedreno/ir3: Add load and store intrinsics for global io

author Kristian H. Kristensen <hoegsberg@google.com>

Tue, 22 Oct 2019 23:16:35 +0000 (16:16 -0700)

committer Kristian H. Kristensen <hoegsberg@google.com>

Fri, 8 Nov 2019 00:36:44 +0000 (16:36 -0800)
author Kristian H. Kristensen <hoegsberg@google.com>
Tue, 22 Oct 2019 23:16:35 +0000 (16:16 -0700)
committer Kristian H. Kristensen <hoegsberg@google.com>
Fri, 8 Nov 2019 00:36:44 +0000 (16:36 -0800)
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index 02c781e7181a0171ed47996e3196761d27acd715..9c1418f2a4ec3328a36a8b1c09e8e654fd2f96c4 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -800,6 +800,17 @@ store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
  # src[] = { offset }.
  load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
  
+# IR3-specific load/store global intrinsics. They take a 64-bit base address
+# and a 32-bit offset.  The hardware will add the base and the offset, which
+# saves us from doing 64-bit math on the base address.
+
+# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
+# const_index[] = { write_mask, align_mul, align_offset }
+intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+# src[] = { address(vec2 of hi+lo uint32_t), offset }.
+# const_index[] = { access, align_mul, align_offset }
+intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
+
  # Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
  # within a blend shader to read/write the raw value from the tile buffer,
  # without applying any format conversion in the process. If the shader needs
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c

index e0bf9280975c96d162dea6ebb6cb323eab295bc8..f7af73f8227c6d1b12316d1be1e899e3505d0cff 100644 (file)
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1376,6 +1376,55 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                 dst[0] = ctx->primitive_id;
                 break;
  
+       case nir_intrinsic_store_global_ir3: {
+               struct ir3_instruction *value, *addr, *offset;
+
+               addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+                               ir3_get_src(ctx, &intr->src[1])[0],
+                               ir3_get_src(ctx, &intr->src[1])[1]
+               }, 2);
+
+               offset = ir3_get_src(ctx, &intr->src[2])[0];
+
+               value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]),
+                                                                  intr->num_components);
+
+               struct ir3_instruction *stg =
+                       ir3_STG_G(ctx->block, addr, 0, value, 0,
+                                         create_immed(ctx->block, intr->num_components), 0, offset, 0);
+               stg->cat6.type = TYPE_U32;
+               stg->cat6.iim_val = 1;
+
+               array_insert(b, b->keeps, stg);
+
+               stg->barrier_class = IR3_BARRIER_BUFFER_W;
+               stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+               break;
+       }
+
+       case nir_intrinsic_load_global_ir3: {
+               struct ir3_instruction *addr, *offset;
+
+               addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+                               ir3_get_src(ctx, &intr->src[0])[0],
+                               ir3_get_src(ctx, &intr->src[0])[1]
+               }, 2);
+
+               offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+               struct ir3_instruction *load =
+                       ir3_LDG(b, addr, 0, create_immed(ctx->block, intr->num_components),
+                                       0, offset, 0);
+               load->cat6.type = TYPE_U32;
+               load->regs[0]->wrmask = MASK(intr->num_components);
+
+               load->barrier_class = IR3_BARRIER_BUFFER_R;
+               load->barrier_conflict = IR3_BARRIER_BUFFER_W;
+
+               ir3_split_dest(b, dst, load, 0, intr->num_components);
+               break;
+       }
+
         case nir_intrinsic_load_ubo:
                 emit_intrinsic_load_ubo(ctx, intr, dst);
                 break;
author	Kristian H. Kristensen <hoegsberg@google.com>
	Tue, 22 Oct 2019 23:16:35 +0000 (16:16 -0700)
committer	Kristian H. Kristensen <hoegsberg@google.com>
	Fri, 8 Nov 2019 00:36:44 +0000 (16:36 -0800)
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history