intel/fs: Allow multiple slots for position
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 21 Sep 2018 23:07:38 +0000 (16:07 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 7 Apr 2020 17:16:09 +0000 (17:16 +0000)
Change brw_compute_vue_map() to also take the number of pos slots.  If
more than one slot is used, the VARYING_SLOT_POS is treated as an
array.

When using Primitive Replication, instead of a single position, the
VUE must contain an array of positions.  Padding might be
necessary (after clip distance) to ensure rest of attributes start
aligned.

v2: Add note about array in the commit message and assert that
    pos_slots >= 1 to make clear 0 is invalid. (Jason)
    Move padding to be after the clip distance.

v3: Apply the correct offset when gathering the sources from outputs.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v2]
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>

13 files changed:
src/gallium/drivers/iris/iris_program.c
src/intel/blorp/blorp.c
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_visitor.cpp
src/intel/compiler/brw_shader.cpp
src/intel/compiler/brw_vec4_gs_visitor.cpp
src/intel/compiler/brw_vec4_tcs.cpp
src/intel/compiler/brw_vue_map.c
src/intel/vulkan/anv_pipeline.c
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_wm.c

index 436c7b815517578f4c81c61676e873958a264e14..b45f86623d6ce3e7c189a42c332ab4a746db3d69 100644 (file)
@@ -1097,7 +1097,7 @@ iris_compile_vs(struct iris_context *ice,
 
    brw_compute_vue_map(devinfo,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader, /* pos_slots */ 1);
 
    struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
 
@@ -1551,7 +1551,7 @@ iris_compile_gs(struct iris_context *ice,
 
    brw_compute_vue_map(devinfo,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader, /* pos_slots */ 1);
 
    struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
 
index 60585797ab81e411461a2a4b61c90c03e75bb922..d60d75e16c63492be9932e90fa896ccff99f0a90 100644 (file)
@@ -227,7 +227,8 @@ blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx,
    brw_compute_vue_map(compiler->devinfo,
                        &vs_prog_data->base.vue_map,
                        nir->info.outputs_written,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader,
+                       1);
 
    struct brw_vs_prog_key vs_key = { 0, };
 
@@ -285,7 +286,7 @@ blorp_ensure_sf_program(struct blorp_batch *batch,
    unsigned program_size;
 
    struct brw_vue_map vue_map;
-   brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false);
+   brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false, 1);
 
    struct brw_sf_prog_data prog_data_tmp;
    program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key,
index 99047998e9afb6c9c543e2f8dcf5ee368695cc9e..1364890beb41df15cfc87d4e7a764b3bcd35d64c 100644 (file)
@@ -1045,7 +1045,8 @@ GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
 void brw_compute_vue_map(const struct gen_device_info *devinfo,
                          struct brw_vue_map *vue_map,
                          uint64_t slots_valid,
-                         bool separate_shader);
+                         bool separate_shader,
+                         uint32_t pos_slots);
 
 void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map,
                               uint64_t slots_valid,
index ed9005f86d8d0ca024139cc007274671da73ab19..901a13e5bf55d3606e623c7e8ce68a8303309785 100644 (file)
@@ -1699,7 +1699,7 @@ calculate_urb_setup(const struct gen_device_info *devinfo,
          struct brw_vue_map prev_stage_vue_map;
          brw_compute_vue_map(devinfo, &prev_stage_vue_map,
                              key->input_slots_valid,
-                             nir->info.separate_shader);
+                             nir->info.separate_shader, 1);
 
          int first_slot =
             brw_compute_first_urb_slot_required(nir->info.inputs_read,
index e276227f0acf9b5b5b13e25d99f916b912239cb6..794bb24645155022d46fc6791bea2e21fe3032d6 100644 (file)
@@ -698,8 +698,18 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
                sources[length++] = reg;
             }
          } else {
-            for (unsigned i = 0; i < 4; i++)
-               sources[length++] = offset(this->outputs[varying], bld, i);
+            int slot_offset = 0;
+
+            /* When using Primitive Replication, there may be multiple slots
+             * assigned to POS.
+             */
+            if (varying == VARYING_SLOT_POS)
+               slot_offset = slot - vue_map->varying_to_slot[VARYING_SLOT_POS];
+
+            for (unsigned i = 0; i < 4; i++) {
+               sources[length++] = offset(this->outputs[varying], bld,
+                                          i + (slot_offset * 4));
+            }
          }
          break;
       }
index 72478e3c39e6539b830226879ede817b28aff962..5ae0f9080adee8080279b01825c9aaea2da9748d 100644 (file)
@@ -1277,7 +1277,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
 
    brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
                        nir->info.outputs_written,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader, 1);
 
    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
 
index 6a1e1bdaa3e1f20dacedb7d5b868a3cea82182a0..a0b78eb4d486e9cd8d1245b7174e830e10dafd78 100644 (file)
@@ -638,7 +638,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    GLbitfield64 inputs_read = shader->info.inputs_read;
    brw_compute_vue_map(compiler->devinfo,
                        &c.input_vue_map, inputs_read,
-                       shader->info.separate_shader);
+                       shader->info.separate_shader, 1);
 
    brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
    brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
index fcefe395f2d1822a7b5be7f6afcba2e0e2188d5e..26dc4f1804091b18d1cf9571d251441925b14572 100644 (file)
@@ -373,7 +373,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 
    struct brw_vue_map input_vue_map;
    brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader, 1);
    brw_compute_tess_vue_map(&vue_prog_data->vue_map,
                             nir->info.outputs_written,
                             nir->info.patch_outputs_written);
index 02ca51b1404fb291581e78e7e0c2bab1f44c1e97..df5cf5908cc062c61ffbc8faec9da832d4126892 100644 (file)
@@ -60,7 +60,8 @@ void
 brw_compute_vue_map(const struct gen_device_info *devinfo,
                     struct brw_vue_map *vue_map,
                     uint64_t slots_valid,
-                    bool separate)
+                    bool separate,
+                    uint32_t pos_slots)
 {
    /* Keep using the packed/contiguous layout on old hardware - we only need
     * the SSO layout when using geometry/tessellation shaders or 32 FS input
@@ -133,11 +134,27 @@ brw_compute_vue_map(const struct gen_device_info *devinfo,
        */
       assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
       assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
+
+      /* When using Primitive Replication, multiple slots are used for storing
+       * positions for each view.
+       */
+      assert(pos_slots >= 1);
+      if (pos_slots > 1) {
+         for (int i = 1; i < pos_slots; i++) {
+            vue_map->slot_to_varying[slot++] = VARYING_SLOT_POS;
+         }
+      }
+
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
       if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
          assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
 
+      /* Vertex URB Formats table says: "Vertex Header shall be padded at the
+       * end so that the header ends on a 32-byte boundary".
+       */
+      slot += slot % 2;
+
       /* front and back colors need to be consecutive so that we can use
        * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
        * two-sided color.
index 12dcb024acb2ab05852b0883900d5b504f51e80a..c1d0e393a96626507f0529402dfc04efa8383034 100644 (file)
@@ -741,7 +741,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler,
    brw_compute_vue_map(compiler->devinfo,
                        &vs_stage->prog_data.vs.base.vue_map,
                        vs_stage->nir->info.outputs_written,
-                       vs_stage->nir->info.separate_shader);
+                       vs_stage->nir->info.separate_shader, 1);
 
    vs_stage->num_stats = 1;
    vs_stage->code = brw_compile_vs(compiler, device, mem_ctx,
@@ -887,7 +887,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
    brw_compute_vue_map(compiler->devinfo,
                        &gs_stage->prog_data.gs.base.vue_map,
                        gs_stage->nir->info.outputs_written,
-                       gs_stage->nir->info.separate_shader);
+                       gs_stage->nir->info.separate_shader, 1);
 
    gs_stage->num_stats = 1;
    gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
index 74a6c3797116b9687a4ce3ef34786ec41e746b5f..c7c0ec0d41eb19b2161f29bff3605a2a9b814d60 100644 (file)
@@ -79,7 +79,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
 
    brw_compute_vue_map(devinfo,
                        &prog_data.base.vue_map, outputs_written,
-                       gp->program.info.separate_shader);
+                       gp->program.info.separate_shader, 1);
 
    int st_index = -1;
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
index 1e9cecf9f904722e1d2cc6019cd9e9c39bfcc27d..6ce7a1ce88932e2042b3c767ad3e863fb6377b52 100644 (file)
@@ -159,7 +159,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
 
    brw_compute_vue_map(devinfo,
                        &prog_data.base.vue_map, outputs_written,
-                       nir->info.separate_shader);
+                       nir->info.separate_shader, 1);
 
    if (0) {
       _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
index 6f5c144eaa975598b36bb2db90207a8866c8185b..256115103ddceaf1fdb2f0cffe5b7611ae026cd1 100644 (file)
@@ -602,7 +602,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
    if (devinfo->gen < 6) {
       brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
                           prog->info.inputs_read | VARYING_BIT_POS,
-                          false);
+                          false, 1);
    }
 
    bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);