i965/cs: Reserve local invocation id in payload regs
authorJordan Justen <jordan.l.justen@intel.com>
Sat, 22 Nov 2014 02:47:49 +0000 (18:47 -0800)
committerJordan Justen <jordan.l.justen@intel.com>
Sun, 13 Sep 2015 16:53:16 +0000 (09:53 -0700)
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_cs.cpp
src/mesa/drivers/dri/i965/brw_cs.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h

index 980ef52fe1738bd64adb4efb6d5428d04008bd40..757c77d459a5a66555bb285a87401e4398c42624 100644 (file)
@@ -457,6 +457,35 @@ const struct brw_tracked_state brw_cs_state = {
 };
 
 
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width)
+{
+   return 3 * dispatch_width;
+}
+
+
 /**
  * Creates a region containing the push constants for the CS on gen7+.
  *
index 8404aa3e824ed89d047d9be5875dca6de40fc71d..08310df77c16901e39b3d3b74871ebd5393a7f20 100644 (file)
@@ -42,6 +42,11 @@ void
 brw_upload_cs_prog(struct brw_context *brw);
 
 #ifdef __cplusplus
+
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width);
+
 }
 #endif
 
index 320f612682d2cce82b1f850ec5beb9b6fc1dbad1..10417c8748423f89a303e7b47551b1a266300d45 100644 (file)
@@ -42,6 +42,7 @@
 #include "brw_eu.h"
 #include "brw_wm.h"
 #include "brw_fs.h"
+#include "brw_cs.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
 #include "main/uniforms.h"
@@ -4731,6 +4732,15 @@ fs_visitor::setup_cs_payload()
    assert(devinfo->gen >= 7);
 
    payload.num_regs = 1;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      const unsigned local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+      assert((local_id_dwords & 0x7) == 0);
+      const unsigned local_id_regs = local_id_dwords / 8;
+      payload.local_invocation_id_reg = payload.num_regs;
+      payload.num_regs += local_id_regs;
+   }
 }
 
 void
index dd0526a15507a13175edb0162727308f96c0ed00..c584cc70cb868e47d381eb06ad0ab4aa721221e5 100644 (file)
@@ -364,6 +364,7 @@ public:
       uint8_t sample_pos_reg;
       uint8_t sample_mask_in_reg;
       uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+      uint8_t local_invocation_id_reg;
 
       /** The number of thread payload registers the hardware will supply. */
       uint8_t num_regs;