v3d: add lowering for OpenGL logic operations
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 3 Jul 2019 07:38:39 +0000 (09:38 +0200)
committerIago Toral Quiroga <itoral@igalia.com>
Fri, 12 Jul 2019 07:16:38 +0000 (09:16 +0200)
This implements support for OpenGL logic operations by emitting code to read
from the TLB if needed and blending the fragment output accordingly. It is
similar to VC4's blend lowering pass, but exclusive to logic operations, since
blending is otherwise supported in hardware.

The pass doesn't handle MSAA targets yet.

Fixes the following piglit tests:
spec/!opengl 1.0/gl-1.0-logicop/*
spec/!opengl 1.1/gl-1.1-xor
spec/!opengl 1.1/gl-1.1-xor-copypixels

It also fixes text cursor rendering in Libreoffice with the GTK+2 theme, which
is rendered via glamor using the XOR logic operation.

v2: fix checks for allowed variable location and maximum render target (Eric)

Reviewed-by: Eric Anholt <eric@anholt.net>
src/broadcom/compiler/meson.build
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/v3d_nir_lower_logic_ops.c [new file with mode: 0644]
src/broadcom/compiler/vir.c

index 1249af5e8ab2ce1dd0db1a8fc5b382290e54bef2..d7af999c3210b3ee7d72a947d24f6bad0a15b7a3 100644 (file)
@@ -37,6 +37,7 @@ libbroadcom_compiler_files = files(
   'v3d_compiler.h',
   'v3d_nir_lower_io.c',
   'v3d_nir_lower_image_load_store.c',
+  'v3d_nir_lower_logic_ops.c',
   'v3d_nir_lower_scratch.c',
   'v3d_nir_lower_txf_ms.c',
 )
index 67c7dd48d8c6fd628679bd56f03f339aa059cb9c..288273aac1476099a04e742637ae1ee5933bd20e 100644 (file)
@@ -812,6 +812,7 @@ bool vir_opt_small_immediates(struct v3d_compile *c);
 bool vir_opt_vpm(struct v3d_compile *c);
 void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
+void v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_scratch(nir_shader *s);
 void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_image_load_store(nir_shader *s);
diff --git a/src/broadcom/compiler/v3d_nir_lower_logic_ops.c b/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
new file mode 100644 (file)
index 0000000..849e554
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Copyright © 2019 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements lowering for logical operations.
+ *
+ * V3D doesn't have any hardware support for logic ops.  Instead, you read the
+ * current contents of the destination from the tile buffer, then do math using
+ * your output color and that destination value, and update the output color
+ * appropriately.
+ */
+
+#include "util/u_format.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
+#include "v3d_compiler.h"
+
+static nir_ssa_def *
+v3d_logicop(nir_builder *b, int logicop_func,
+            nir_ssa_def *src, nir_ssa_def *dst)
+{
+        switch (logicop_func) {
+        case PIPE_LOGICOP_CLEAR:
+                return nir_imm_int(b, 0);
+        case PIPE_LOGICOP_NOR:
+                return nir_inot(b, nir_ior(b, src, dst));
+        case PIPE_LOGICOP_AND_INVERTED:
+                return nir_iand(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_COPY_INVERTED:
+                return nir_inot(b, src);
+        case PIPE_LOGICOP_AND_REVERSE:
+                return nir_iand(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_INVERT:
+                return nir_inot(b, dst);
+        case PIPE_LOGICOP_XOR:
+                return nir_ixor(b, src, dst);
+        case PIPE_LOGICOP_NAND:
+                return nir_inot(b, nir_iand(b, src, dst));
+        case PIPE_LOGICOP_AND:
+                return nir_iand(b, src, dst);
+        case PIPE_LOGICOP_EQUIV:
+                return nir_inot(b, nir_ixor(b, src, dst));
+        case PIPE_LOGICOP_NOOP:
+                return dst;
+        case PIPE_LOGICOP_OR_INVERTED:
+                return nir_ior(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_OR_REVERSE:
+                return nir_ior(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_OR:
+                return nir_ior(b, src, dst);
+        case PIPE_LOGICOP_SET:
+                return nir_imm_int(b, ~0);
+        default:
+                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+                /* FALLTHROUGH */
+        case PIPE_LOGICOP_COPY:
+                return src;
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+        switch (swiz) {
+        default:
+        case PIPE_SWIZZLE_NONE:
+                fprintf(stderr, "warning: unknown swizzle\n");
+                /* FALLTHROUGH */
+        case PIPE_SWIZZLE_0:
+                return nir_imm_float(b, 0.0);
+        case PIPE_SWIZZLE_1:
+                return nir_imm_float(b, 1.0);
+        case PIPE_SWIZZLE_X:
+        case PIPE_SWIZZLE_Y:
+        case PIPE_SWIZZLE_Z:
+        case PIPE_SWIZZLE_W:
+                return srcs[swiz];
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_swizzle_and_pack(nir_builder *b, nir_ssa_def **chans,
+                         const uint8_t *swiz)
+{
+        nir_ssa_def *c[4];
+        for (int i = 0; i < 4; i++)
+                c[i] = v3d_nir_get_swizzled_channel(b, chans, swiz[i]);
+
+        return nir_pack_unorm_4x8(b, nir_vec4(b, c[0], c[1], c[2], c[3]));
+}
+
+static nir_ssa_def *
+v3d_nir_unpack_and_swizzle(nir_builder *b, nir_ssa_def *packed,
+                           const uint8_t *swiz)
+{
+        nir_ssa_def *unpacked = nir_unpack_unorm_4x8(b, packed);
+
+        nir_ssa_def *unpacked_chans[4];
+        for (int i = 0; i < 4; i++)
+                unpacked_chans[i] = nir_channel(b, unpacked, i);
+
+        nir_ssa_def *c[4];
+        for (int i = 0; i < 4; i++)
+                c[i] = v3d_nir_get_swizzled_channel(b, unpacked_chans, swiz[i]);
+
+        return nir_vec4(b, c[0], c[1], c[2], c[3]);
+}
+
+static const uint8_t *
+v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt)
+{
+        static const uint8_t ident[4] = { 0, 1, 2, 3 };
+
+        /* We will automatically swap R and B channels for BGRA formats
+         * on tile loads and stores (see 'swap_rb' field in v3d_resource) so
+         * we want to treat these surfaces as if they were regular RGBA formats.
+         */
+        if (c->fs_key->color_fmt[rt].swizzle[0] == 2 &&
+            c->fs_key->color_fmt[rt].format != PIPE_FORMAT_B5G6R5_UNORM) {
+                return ident;
+        } else {
+                return  c->fs_key->color_fmt[rt].swizzle;
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_get_tlb_color(nir_builder *b, int rt, int sample)
+{
+        nir_ssa_def *color[4];
+        for (int i = 0; i < 4; i++) {
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_tlb_color_v3d);
+                load->num_components = 1;
+                nir_intrinsic_set_base(load, sample);
+                nir_intrinsic_set_component(load, i);
+                load->src[0] = nir_src_for_ssa(nir_imm_int(b, rt));
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+                color[i] = &load->dest.ssa;
+        }
+
+        return nir_vec4(b, color[0], color[1], color[2], color[3]);
+}
+
+static nir_ssa_def *
+v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
+                      nir_ssa_def *src, int rt, int sample)
+{
+        nir_ssa_def *dst = v3d_nir_get_tlb_color(b, rt, sample);
+
+        nir_ssa_def *src_chans[4], *dst_chans[4];
+        for (unsigned i = 0; i < 4; i++) {
+                src_chans[i] = nir_channel(b, src, i);
+                dst_chans[i] = nir_channel(b, dst, i);
+        }
+
+        const uint8_t src_swz[4] = { 0, 1, 2, 3 };
+        nir_ssa_def *packed_src =
+                v3d_nir_swizzle_and_pack(b, src_chans, src_swz);
+
+        const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
+        nir_ssa_def *packed_dst =
+                v3d_nir_swizzle_and_pack(b, dst_chans, fmt_swz);
+
+        nir_ssa_def *packed_result =
+                v3d_logicop(b, c->fs_key->logicop_func, packed_src, packed_dst);
+
+        return v3d_nir_unpack_and_swizzle(b, packed_result, fmt_swz);
+}
+
+static void
+v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
+                             nir_builder *b,
+                             nir_intrinsic_instr *intr,
+                             int rt)
+{
+        nir_ssa_def *frag_color = intr->src[0].ssa;
+
+        /* XXX: this is not correct for MSAA render targets */
+        nir_ssa_def *result = v3d_nir_emit_logic_op(c, b, frag_color, rt, 0);
+
+        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+                              nir_src_for_ssa(result));
+        intr->num_components = result->num_components;
+}
+
+static bool
+v3d_nir_lower_logic_ops_block(nir_block *block, struct v3d_compile *c)
+{
+        nir_foreach_instr_safe(instr, block) {
+                if (instr->type != nir_instr_type_intrinsic)
+                        continue;
+
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                if (intr->intrinsic != nir_intrinsic_store_output)
+                        continue;
+
+                nir_foreach_variable(var, &c->s->outputs) {
+                        const int driver_loc = var->data.driver_location;
+                        if (driver_loc != nir_intrinsic_base(intr))
+                                continue;
+
+                        const int loc = var->data.location;
+                        if (loc != FRAG_RESULT_COLOR &&
+                            (loc < FRAG_RESULT_DATA0 ||
+                             loc >= FRAG_RESULT_DATA0 + V3D_MAX_DRAW_BUFFERS)) {
+                                continue;
+                        }
+
+                        /* Logic operations do not apply on floating point or
+                         * sRGB enabled render targets.
+                         */
+                        const int rt = driver_loc;
+                        assert(rt < V3D_MAX_DRAW_BUFFERS);
+
+                        const enum pipe_format format =
+                                c->fs_key->color_fmt[rt].format;
+                        if (util_format_is_float(format) ||
+                            util_format_is_srgb(format)) {
+                                continue;
+                        }
+
+                        nir_function_impl *impl =
+                                nir_cf_node_get_function(&block->cf_node);
+                        nir_builder b;
+                        nir_builder_init(&b, impl);
+                        b.cursor = nir_before_instr(&intr->instr);
+                        v3d_nir_lower_logic_op_instr(c, &b, intr, rt);
+                }
+        }
+
+        return true;
+}
+
+void
+v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c)
+{
+        /* Nothing to do if logic op is 'copy src to dst' or if logic ops are
+         * disabled (we set the logic op to copy in that case).
+         */
+        if (c->fs_key->logicop_func == PIPE_LOGICOP_COPY)
+                return;
+
+        nir_foreach_function(function, s) {
+                if (function->impl) {
+                        nir_foreach_block(block, function->impl)
+                                v3d_nir_lower_logic_ops_block(block, c);
+
+                        nir_metadata_preserve(function->impl,
+                                              nir_metadata_block_index |
+                                              nir_metadata_dominance);
+                }
+        }
+}
index 4f1ee605214a47699fbf77fdf783b1cf10094ccd..c18318a62957eb72a7e546eb9fe49414b3769472 100644 (file)
@@ -799,6 +799,8 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
         if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
                 v3d_fixup_fs_output_types(c);
 
+        NIR_PASS_V(c->s, v3d_nir_lower_logic_ops, c);
+
         /* If the shader has no non-TLB side effects, we can promote it to
          * enabling early_fragment_tests even if the user didn't.
          */