From 395de69b1febf4cfca29482e1ff7ddd2ae400d8b Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Fri, 21 Sep 2018 16:07:38 -0700 Subject: [PATCH] intel/fs: Allow multiple slots for position Change brw_compute_vue_map() to also take the number of pos slots. If more than one slot is used, the VARYING_SLOT_POS is treated as an array. When using Primitive Replication, instead of a single position, the VUE must contain an array of positions. Padding might be necessary (after clip distance) to ensure rest of attributes start aligned. v2: Add note about array in the commit message and assert that pos_slots >= 1 to make clear 0 is invalid. (Jason) Move padding to be after the clip distance. v3: Apply the correct offset when gathering the sources from outputs. Reviewed-by: Jason Ekstrand [v2] Reviewed-by: Kenneth Graunke Reviewed-by: Rafael Antognolli Part-of: --- src/gallium/drivers/iris/iris_program.c | 4 ++-- src/intel/blorp/blorp.c | 5 +++-- src/intel/compiler/brw_compiler.h | 3 ++- src/intel/compiler/brw_fs.cpp | 2 +- src/intel/compiler/brw_fs_visitor.cpp | 14 ++++++++++++-- src/intel/compiler/brw_shader.cpp | 2 +- src/intel/compiler/brw_vec4_gs_visitor.cpp | 2 +- src/intel/compiler/brw_vec4_tcs.cpp | 2 +- src/intel/compiler/brw_vue_map.c | 19 ++++++++++++++++++- src/intel/vulkan/anv_pipeline.c | 4 ++-- src/mesa/drivers/dri/i965/brw_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_vs.c | 2 +- src/mesa/drivers/dri/i965/brw_wm.c | 2 +- 13 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 436c7b81551..b45f86623d6 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -1097,7 +1097,7 @@ iris_compile_vs(struct iris_context *ice, brw_compute_vue_map(devinfo, &vue_prog_data->vue_map, nir->info.outputs_written, - nir->info.separate_shader); + nir->info.separate_shader, /* pos_slots */ 1); struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key); @@ -1551,7 +1551,7 @@ iris_compile_gs(struct iris_context *ice, brw_compute_vue_map(devinfo, &vue_prog_data->vue_map, nir->info.outputs_written, - nir->info.separate_shader); + nir->info.separate_shader, /* pos_slots */ 1); struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key); diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index 60585797ab8..d60d75e16c6 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -227,7 +227,8 @@ blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx, brw_compute_vue_map(compiler->devinfo, &vs_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader); + nir->info.separate_shader, + 1); struct brw_vs_prog_key vs_key = { 0, }; @@ -285,7 +286,7 @@ blorp_ensure_sf_program(struct blorp_batch *batch, unsigned program_size; struct brw_vue_map vue_map; - brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false); + brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false, 1); struct brw_sf_prog_data prog_data_tmp; program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key, diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 99047998e9a..1364890beb4 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1045,7 +1045,8 @@ GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying) void brw_compute_vue_map(const struct gen_device_info *devinfo, struct brw_vue_map *vue_map, uint64_t slots_valid, - bool separate_shader); + bool separate_shader, + uint32_t pos_slots); void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map, uint64_t slots_valid, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index ed9005f86d8..901a13e5bf5 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1699,7 +1699,7 @@ calculate_urb_setup(const struct gen_device_info *devinfo, struct brw_vue_map prev_stage_vue_map; brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, - nir->info.separate_shader); + nir->info.separate_shader, 1); int first_slot = brw_compute_first_urb_slot_required(nir->info.inputs_read, diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index e276227f0ac..794bb246451 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -698,8 +698,18 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) sources[length++] = reg; } } else { - for (unsigned i = 0; i < 4; i++) - sources[length++] = offset(this->outputs[varying], bld, i); + int slot_offset = 0; + + /* When using Primitive Replication, there may be multiple slots + * assigned to POS. + */ + if (varying == VARYING_SLOT_POS) + slot_offset = slot - vue_map->varying_to_slot[VARYING_SLOT_POS]; + + for (unsigned i = 0; i < 4; i++) { + sources[length++] = offset(this->outputs[varying], bld, + i + (slot_offset * 4)); + } } break; } diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 72478e3c39e..5ae0f9080ad 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1277,7 +1277,7 @@ brw_compile_tes(const struct brw_compiler *compiler, brw_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader); + nir->info.separate_shader, 1); unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 6a1e1bdaa3e..a0b78eb4d48 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -638,7 +638,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, GLbitfield64 inputs_read = shader->info.inputs_read; brw_compute_vue_map(compiler->devinfo, &c.input_vue_map, inputs_read, - shader->info.separate_shader); + shader->info.separate_shader, 1); brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar); brw_nir_lower_vue_inputs(shader, &c.input_vue_map); diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index fcefe395f2d..26dc4f18040 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -373,7 +373,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, struct brw_vue_map input_vue_map; brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read, - nir->info.separate_shader); + nir->info.separate_shader, 1); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); diff --git a/src/intel/compiler/brw_vue_map.c b/src/intel/compiler/brw_vue_map.c index 02ca51b1404..df5cf5908cc 100644 --- a/src/intel/compiler/brw_vue_map.c +++ b/src/intel/compiler/brw_vue_map.c @@ -60,7 +60,8 @@ void brw_compute_vue_map(const struct gen_device_info *devinfo, struct brw_vue_map *vue_map, uint64_t slots_valid, - bool separate) + bool separate, + uint32_t pos_slots) { /* Keep using the packed/contiguous layout on old hardware - we only need * the SSO layout when using geometry/tessellation shaders or 32 FS input @@ -133,11 +134,27 @@ brw_compute_vue_map(const struct gen_device_info *devinfo, */ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++); assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++); + + /* When using Primitive Replication, multiple slots are used for storing + * positions for each view. + */ + assert(pos_slots >= 1); + if (pos_slots > 1) { + for (int i = 1; i < pos_slots; i++) { + vue_map->slot_to_varying[slot++] = VARYING_SLOT_POS; + } + } + if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)) assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++); if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)) assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++); + /* Vertex URB Formats table says: "Vertex Header shall be padded at the + * end so that the header ends on a 32-byte boundary". + */ + slot += slot % 2; + /* front and back colors need to be consecutive so that we can use * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing * two-sided color. diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 12dcb024acb..c1d0e393a96 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -741,7 +741,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &vs_stage->prog_data.vs.base.vue_map, vs_stage->nir->info.outputs_written, - vs_stage->nir->info.separate_shader); + vs_stage->nir->info.separate_shader, 1); vs_stage->num_stats = 1; vs_stage->code = brw_compile_vs(compiler, device, mem_ctx, @@ -887,7 +887,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &gs_stage->prog_data.gs.base.vue_map, gs_stage->nir->info.outputs_written, - gs_stage->nir->info.separate_shader); + gs_stage->nir->info.separate_shader, 1); gs_stage->num_stats = 1; gs_stage->code = brw_compile_gs(compiler, device, mem_ctx, diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 74a6c379711..c7c0ec0d41e 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -79,7 +79,7 @@ brw_codegen_gs_prog(struct brw_context *brw, brw_compute_vue_map(devinfo, &prog_data.base.vue_map, outputs_written, - gp->program.info.separate_shader); + gp->program.info.separate_shader, 1); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 1e9cecf9f90..6ce7a1ce889 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -159,7 +159,7 @@ brw_codegen_vs_prog(struct brw_context *brw, brw_compute_vue_map(devinfo, &prog_data.base.vue_map, outputs_written, - nir->info.separate_shader); + nir->info.separate_shader, 1); if (0) { _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6f5c144eaa9..256115103dd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -602,7 +602,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog) if (devinfo->gen < 6) { brw_compute_vue_map(&brw->screen->devinfo, &vue_map, prog->info.inputs_read | VARYING_BIT_POS, - false); + false, 1); } bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map); -- 2.30.2