From: Kenneth Graunke Date: Sat, 24 Aug 2019 01:23:32 +0000 (-0700) Subject: intel/compiler: Use new Gen11 headerless RT writes for MRT cases X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=23f42f8dcfe7ca275ff3cbb5586b5a07ce8df778;p=mesa.git intel/compiler: Use new Gen11 headerless RT writes for MRT cases Gen11 adds support for specifying the render target index and src0 alpha present bits in the extended message descriptor. Previously, we had to use a message header for this, requiring extra instructions to write the fields, and two registers of extra payload. Improves performance on my ICL 8x8 frequency locked to 700Mhz, on iris: GfxBench5 Manhattan 3.0: 2.13635% +/- 0.159859% (n=5) GfxBench5 Aztec Ruins: 1.57173% +/- 0.128749% (n=5) Synmark2 OglDeferred: 2.86914% +/- 0.191211% (n=10) Reviewed-by: Jason Ekstrand --- diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 72393b7a4b9..8bf11f9aa93 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4281,8 +4281,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length = 2; } else if ((devinfo->gen <= 7 && !devinfo->is_haswell && prog_data->uses_kill) || - color1.file != BAD_FILE || - key->nr_color_regions > 1) { + (devinfo->gen < 11 && + (color1.file != BAD_FILE || key->nr_color_regions > 1))) { /* From the Sandy Bridge PRM, volume 4, page 198: * * "Dispatched Pixel Enables. One bit per pixel indicating @@ -4356,6 +4356,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length++; } + bool src0_alpha_present = false; + if (src0_alpha.file != BAD_FILE) { for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) { const fs_builder &ubld = bld.exec_all().group(8, i) @@ -4365,12 +4367,14 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, setup_color_payload(ubld, key, &sources[length], tmp, 1); length++; } + src0_alpha_present = true; } else if (prog_data->replicate_alpha && inst->target != 0) { /* Handle the case when fragment shader doesn't write to draw buffer * zero. No need to call setup_color_payload() for src0_alpha because * alpha value will be undefined. */ length += bld.dispatch_width() / 8; + src0_alpha_present = true; } if (sample_mask.file != BAD_FILE) { @@ -4448,6 +4452,13 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, inst->last_rt, false); + if (devinfo->gen >= 11) { + /* Set the "Render Target Index" and "Src0 Alpha Present" fields + * in the extended message descriptor, in lieu of using a header. + */ + ex_desc = inst->target << 12 | src0_alpha_present << 15; + } + inst->opcode = SHADER_OPCODE_SEND; inst->resize_sources(3); inst->sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;