From 9b22a0d295316b7547667ebbfe1e1b6182439186 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 8 Dec 2016 20:05:18 -0800 Subject: [PATCH] i965/fs: Expose arbitrary pull constant load sizes to the IR. Change the FS generator to ask the dataport for enough owords worth of constants to fill the execution size of the instruction -- Which means that the visitor now needs to set the execution size correctly for uniform pull constant load instructions, which we were kind of neglecting until now. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 15 ++++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- .../drivers/dri/i965/brw_fs_generator.cpp | 27 +++++++++---------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 ++++--- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6141bfb99ea..8536a130725 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port constant cache. + * Read float[4] vectors from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_DATAPORT_READ_TARGET_DATA_CACHE); + const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ @@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p, mrf.nr, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(offset)); + brw_pop_insn_state(p); brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); @@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dp_read_message(p, insn, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, target_cache, 1, /* msg_length */ true, /* header_present */ - 1); /* response_length (1 reg, 2 owords!) */ + DIV_ROUND_UP(exec_size, 8)); /* response_length */ brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b22dc9a1a7b..977fd8c35f5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads() assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(8, 0); + const fs_builder ubld = ibld.exec_all().group(4, 0); struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, brw_imm_ud(index), offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8b9fa8e504b..93f4c4199b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { + assert(type_sz(dst.type) == 4); assert(inst->mlen != 0); assert(index.file == BRW_IMMEDIATE_VALUE && @@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, { assert(index.type == BRW_REGISTER_TYPE_UD); assert(payload.file == BRW_GENERAL_REGISTER_FILE); + assert(type_sz(dst.type) == 4); if (index.file == BRW_IMMEDIATE_VALUE) { const uint32_t surf_index = index.ud; brw_push_insn_state(p); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4); brw_pop_insn_state(p); - brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD))); - brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD))); - brw_set_dp_read_message(p, send, - surf_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); + brw_set_dp_read_message(p, send, surf_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size), GEN7_DATAPORT_DC_OWORD_BLOCK_READ, GEN6_SFID_DATAPORT_CONSTANT_CACHE, 1, /* mlen */ true, /* header */ - 1); /* rlen */ + DIV_ROUND_UP(inst->size_written, REG_SIZE)); } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); @@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, /* dst = send(payload, a0.0 | ) */ brw_inst *insn = brw_send_indirect_message( p, GEN6_SFID_DATAPORT_CONSTANT_CACHE, - vec4(retype(dst, BRW_REGISTER_TYPE_UD)), - vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr); - brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4); - brw_set_dp_read_message(p, insn, - 0, /* surface */ - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + retype(dst, BRW_REGISTER_TYPE_UD), + retype(payload, BRW_REGISTER_TYPE_UD), addr); + brw_set_dp_read_message(p, insn, 0 /* surface */, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size), GEN7_DATAPORT_DC_OWORD_BLOCK_READ, GEN6_SFID_DATAPORT_CONSTANT_CACHE, 1, /* mlen */ true, /* header */ - 1); /* rlen */ + DIV_ROUND_UP(inst->size_written, REG_SIZE)); brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index bfb286bb344..7df74232457 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * and we have to split it if necessary. */ const unsigned type_size = type_sz(dest.type); - const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F); + const fs_builder ubld = bld.exec_all().group(4, 0); + const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F); + for (unsigned c = 0; c < instr->num_components;) { const unsigned base = const_offset->u32[0] + c * type_size; @@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const unsigned count = MIN2(instr->num_components - c, (16 - base % 16) / type_size); - bld.exec_all() - .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, surf_index, brw_imm_ud(base & ~15)); + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + packed_consts, surf_index, brw_imm_ud(base & ~15)); const fs_reg consts = retype(byte_offset(packed_consts, base & 15), dest.type); -- 2.30.2