i965: add component packing support for load_output intrinsics
authorTimothy Arceri <timothy.arceri@collabora.com>
Wed, 15 Jun 2016 02:35:49 +0000 (12:35 +1000)
committerTimothy Arceri <timothy.arceri@collabora.com>
Wed, 20 Jul 2016 23:10:53 +0000 (09:10 +1000)
Here we use the component qualifier (which is the first component)
as an offset when loading output varyings.

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 65bca6d6dbc0e48909e5b18d2846bbf2cc85a5f0..50d73eba08731a1fe9caf2cc51011cf68af281cb 100644 (file)
@@ -2590,6 +2590,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_load_per_vertex_output: {
       fs_reg indirect_offset = get_indirect_offset(instr);
       unsigned imm_offset = instr->const_index[0];
+      unsigned first_component = nir_intrinsic_component(instr);
 
       fs_inst *inst;
       if (indirect_offset.file == BAD_FILE) {
@@ -2670,10 +2671,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
             }
             bld.LOAD_PAYLOAD(dst, srcs, num_components, 0);
          } else {
-            inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
+            if (first_component != 0) {
+               unsigned read_components =
+                  instr->num_components + first_component;
+               fs_reg tmp = bld.vgrf(dst.type, read_components);
+               inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
+                               patch_handle);
+               inst->regs_written = read_components;
+               for (unsigned i = 0; i < instr->num_components; i++) {
+                  bld.MOV(offset(dst, bld, i),
+                          offset(tmp, bld, i + first_component));
+               }
+            } else {
+               inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
+                               patch_handle);
+               inst->regs_written = instr->num_components;
+            }
             inst->offset = imm_offset;
             inst->mlen = 1;
-            inst->regs_written = instr->num_components;
          }
       } else {
          /* Indirect indexing - use per-slot offsets as well. */
@@ -2683,11 +2698,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
          };
          fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
          bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
-
-         inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
+         if (first_component != 0) {
+            unsigned read_components =
+               instr->num_components + first_component;
+            fs_reg tmp = bld.vgrf(dst.type, read_components);
+            inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
+                            payload);
+            inst->regs_written = read_components;
+            for (unsigned i = 0; i < instr->num_components; i++) {
+               bld.MOV(offset(dst, bld, i),
+                       offset(tmp, bld, i + first_component));
+            }
+         } else {
+            inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
+                            payload);
+            inst->regs_written = instr->num_components;
+         }
          inst->offset = imm_offset;
          inst->mlen = 2;
-         inst->regs_written = instr->num_components;
       }
       break;
    }