i965/fs: Expose arbitrary pull constant load sizes to the IR.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 9 Dec 2016 04:05:18 +0000 (20:05 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Thu, 15 Dec 2016 00:50:26 +0000 (16:50 -0800)
Change the FS generator to ask the dataport for enough owords worth of
constants to fill the execution size of the instruction -- Which means
that the visitor now needs to set the execution size correctly for
uniform pull constant load instructions, which we were kind of
neglecting until now.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 6141bfb99eaae7a164c3eaf38549dc9136ae4522..8536a1307252f884467477c1b1f57171287299fe 100644 (file)
@@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
 }
 
 /**
- * Read a float[4] vector from the data port constant cache.
+ * Read float[4] vectors from the data port constant cache.
  * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
  */
@@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p,
    const unsigned target_cache =
       (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
        BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+   const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
 
    /* On newer hardware, offset is in units of owords. */
    if (devinfo->gen >= 6)
@@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p,
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    brw_push_insn_state(p);
-   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
+   brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
    /* set message header global offset field (reg 0, element 2) */
@@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p,
                               mrf.nr,
                               2), BRW_REGISTER_TYPE_UD),
           brw_imm_ud(offset));
+   brw_pop_insn_state(p);
 
    brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
@@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p,
       brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
    }
 
-   brw_set_dp_read_message(p,
-                          insn,
-                          bind_table_index,
-                          BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+   brw_set_dp_read_message(p, insn, bind_table_index,
+                           BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
                           BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
                           target_cache,
                           1, /* msg_length */
                            true, /* header_present */
-                          1); /* response_length (1 reg, 2 owords!) */
+                          DIV_ROUND_UP(exec_size, 8)); /* response_length */
 
    brw_pop_insn_state(p);
 }
index b22dc9a1a7b170d27313a4a885fa0f7053f0e5c6..977fd8c35f592ea32591355bce06d7fd410daeff 100644 (file)
@@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads()
 
          assert(inst->src[i].stride == 0);
 
-         const fs_builder ubld = ibld.exec_all().group(8, 0);
+         const fs_builder ubld = ibld.exec_all().group(4, 0);
          struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
          ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
                    dst, brw_imm_ud(index), offset);
index 8b9fa8e504b1828572f3efb1fcfb50db281580cf..93f4c4199b300894288f4dd1cf5637505976acc4 100644 (file)
@@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
                                                   struct brw_reg index,
                                                   struct brw_reg offset)
 {
+   assert(type_sz(dst.type) == 4);
    assert(inst->mlen != 0);
 
    assert(index.file == BRW_IMMEDIATE_VALUE &&
@@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 {
    assert(index.type == BRW_REGISTER_TYPE_UD);
    assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+   assert(type_sz(dst.type) == 4);
 
    if (index.file == BRW_IMMEDIATE_VALUE) {
       const uint32_t surf_index = index.ud;
 
       brw_push_insn_state(p);
-      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-      brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4);
       brw_pop_insn_state(p);
 
-      brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD)));
-      brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD)));
-      brw_set_dp_read_message(p, send,
-                              surf_index,
-                              BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+      brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+      brw_set_dp_read_message(p, send, surf_index,
+                              BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                               GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
                               GEN6_SFID_DATAPORT_CONSTANT_CACHE,
                               1, /* mlen */
                               true, /* header */
-                              1); /* rlen */
+                              DIV_ROUND_UP(inst->size_written, REG_SIZE));
 
    } else {
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
       /* dst = send(payload, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
-         vec4(retype(dst, BRW_REGISTER_TYPE_UD)),
-         vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr);
-      brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
-      brw_set_dp_read_message(p, insn,
-                              0, /* surface */
-                              BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+         retype(dst, BRW_REGISTER_TYPE_UD),
+         retype(payload, BRW_REGISTER_TYPE_UD), addr);
+      brw_set_dp_read_message(p, insn, 0 /* surface */,
+                              BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                               GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
                               GEN6_SFID_DATAPORT_CONSTANT_CACHE,
                               1, /* mlen */
                               true, /* header */
-                              1); /* rlen */
+                              DIV_ROUND_UP(inst->size_written, REG_SIZE));
 
       brw_pop_insn_state(p);
    }
index bfb286bb3449972c4da9b4d580f2604e54eb8fe8..7df74232457fcfaec5296555ffac18d89ed88109 100644 (file)
@@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           * and we have to split it if necessary.
           */
          const unsigned type_size = type_sz(dest.type);
-         const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+         const fs_builder ubld = bld.exec_all().group(4, 0);
+         const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+
          for (unsigned c = 0; c < instr->num_components;) {
             const unsigned base = const_offset->u32[0] + c * type_size;
 
@@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
             const unsigned count = MIN2(instr->num_components - c,
                                         (16 - base % 16) / type_size);
 
-            bld.exec_all()
-               .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                     packed_consts, surf_index, brw_imm_ud(base & ~15));
+            ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                      packed_consts, surf_index, brw_imm_ud(base & ~15));
 
             const fs_reg consts =
                retype(byte_offset(packed_consts, base & 15), dest.type);