X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_blend_shaders.c;h=d697246f9179d9d89f31b085dabbd279ca419920;hb=bb577051dd5dbb5519f770eabf149de8675770ba;hp=a58808ba7be3d1247deae358306e3079f52d1aa2;hpb=318d641cd9f51f0058d973caf907d68f88566ac2;p=mesa.git

diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
index a58808ba7be..d697246f917 100644
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@@ -28,6 +28,7 @@
 #include "midgard/midgard_compile.h"
 #include "compiler/nir/nir_builder.h"
 #include "nir/nir_lower_blend.h"
+#include "panfrost/util/pan_lower_framebuffer.h"
 #include "gallium/auxiliary/util/u_blend.h"
 #include "util/u_memory.h"
 
@@ -48,29 +49,17 @@
  *
  * As is typical with Midgard, shader binaries must be accompanied by
  * information about the first tag (ORed with the bottom nibble of address,
- * like usual) and work registers. Work register count is specified in the
- * blend descriptor, as well as in the coresponding fragment shader's work
- * count. This suggests that blend shader invocation is tied to fragment shader
+ * like usual) and work registers. Work register count is assumed to be less
+ * than or equal to the coresponding fragment shader's work count. This
+ * suggests that blend shader invocation is tied to fragment shader
  * execution.
  *
- * ---
- *
- * As for blend shaders, they use the standard ISA.
- *
- * The source pixel colour, including alpha, is preloaded into r0 as a vec4 of
- * float32.
- *
- * The destination pixel colour must be loaded explicitly via load/store ops.
- * TODO: Investigate.
- *
- * They use fragment shader writeout; however, instead of writing a vec4 of
- * float32 for RGBA encoding, we writeout a vec4 of uint8, using 8-bit imov
- * instead of 32-bit fmov. The net result is that r0 encodes a single uint32
- * containing all four channels of the color.  Accordingly, the blend shader
- * epilogue has to scale all four channels by 255 and then type convert to a
- * uint8.
- *
- * ---
+ * The shaders themselves use the standard ISA. The source pixel colour,
+ * including alpha, is preloaded into r0 as a vec4 of float32. The destination
+ * pixel colour must be loaded explicitly via load/store ops, possibly
+ * performing conversions in software. The blended colour must be stored with a
+ * fragment writeout in the correct framebuffer format, either in software or
+ * via conversion opcodes on the load/store pipe.
  *
  * Blend shaders hardcode constants. Naively, this requires recompilation each
  * time the blend color changes, which is a performance risk. Accordingly, we
@@ -85,43 +74,52 @@
  */
 
 static nir_lower_blend_options
-nir_make_options(const struct pipe_blend_state *blend, unsigned nr_cbufs)
+nir_make_options(const struct pipe_blend_state *blend, unsigned i)
 {
-        nir_lower_blend_options options;
-
-        for (unsigned i = 0; i < nr_cbufs; ++i) {
-                /* If blend is disabled, we just use replace mode */
-
-                nir_lower_blend_channel rgb = {
-                        .func = BLEND_FUNC_ADD,
-                        .src_factor = BLEND_FACTOR_ZERO,
-                        .invert_src_factor = true,
-                        .dst_factor = BLEND_FACTOR_ZERO,
-                        .invert_dst_factor = false
-                };
-
-                nir_lower_blend_channel alpha = rgb;
-
-                if (blend->rt[i].blend_enable) {
-                        rgb.func = util_blend_func_to_shader(blend->rt[i].rgb_func);
-                        rgb.src_factor = util_blend_factor_to_shader(blend->rt[i].rgb_src_factor);
-                        rgb.dst_factor = util_blend_factor_to_shader(blend->rt[i].rgb_dst_factor);
-                        rgb.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_src_factor);
-                        rgb.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_dst_factor);
-
-                        alpha.func = util_blend_func_to_shader(blend->rt[i].alpha_func);
-                        alpha.src_factor = util_blend_factor_to_shader(blend->rt[i].alpha_src_factor);
-                        alpha.dst_factor = util_blend_factor_to_shader(blend->rt[i].alpha_dst_factor);
-                        alpha.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_src_factor);
-                        alpha.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_dst_factor);
-                }
-
-                options.rt[i].rgb = rgb;
-                options.rt[i].alpha = alpha;
-
-                options.rt[i].colormask = blend->rt[i].colormask;
+        nir_lower_blend_options options = { 0 };
+
+        if (blend->logicop_enable) {
+            options.logicop_enable = true;
+            options.logicop_func = blend->logicop_func;
+            return options;
+        }
+
+        options.logicop_enable = false;
+
+        if (!blend->independent_blend_enable)
+                i = 0;
+
+        /* If blend is disabled, we just use replace mode */
+
+        nir_lower_blend_channel rgb = {
+                .func = BLEND_FUNC_ADD,
+                .src_factor = BLEND_FACTOR_ZERO,
+                .invert_src_factor = true,
+                .dst_factor = BLEND_FACTOR_ZERO,
+                .invert_dst_factor = false
+        };
+
+        nir_lower_blend_channel alpha = rgb;
+
+        if (blend->rt[i].blend_enable) {
+                rgb.func = util_blend_func_to_shader(blend->rt[i].rgb_func);
+                rgb.src_factor = util_blend_factor_to_shader(blend->rt[i].rgb_src_factor);
+                rgb.dst_factor = util_blend_factor_to_shader(blend->rt[i].rgb_dst_factor);
+                rgb.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_src_factor);
+                rgb.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_dst_factor);
+
+                alpha.func = util_blend_func_to_shader(blend->rt[i].alpha_func);
+                alpha.src_factor = util_blend_factor_to_shader(blend->rt[i].alpha_src_factor);
+                alpha.dst_factor = util_blend_factor_to_shader(blend->rt[i].alpha_dst_factor);
+                alpha.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_src_factor);
+                alpha.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_dst_factor);
         }
 
+        options.rgb = rgb;
+        options.alpha = alpha;
+
+        options.colormask = blend->rt[i].colormask;
+
         return options;
 }
 
@@ -129,20 +127,39 @@ struct panfrost_blend_shader
 panfrost_compile_blend_shader(
         struct panfrost_context *ctx,
         struct pipe_blend_state *cso,
-        enum pipe_format format)
+        enum pipe_format format,
+        unsigned rt)
 {
+        struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct panfrost_blend_shader res;
 
+        res.ctx = ctx;
+
         /* Build the shader */
 
         nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
         nir_function *fn = nir_function_create(shader, "main");
         nir_function_impl *impl = nir_function_impl_create(fn);
 
+        const struct util_format_description *format_desc =
+                util_format_description(format);
+
+        nir_alu_type T = pan_unpacked_type_for_format(format_desc);
+        enum glsl_base_type g =
+                (T == nir_type_float16) ? GLSL_TYPE_FLOAT16 :
+                (T == nir_type_float32) ? GLSL_TYPE_FLOAT :
+                (T == nir_type_int8) ? GLSL_TYPE_INT8 :
+                (T == nir_type_int16) ? GLSL_TYPE_INT16 :
+                (T == nir_type_int32) ? GLSL_TYPE_INT :
+                (T == nir_type_uint8) ? GLSL_TYPE_UINT8 :
+                (T == nir_type_uint16) ? GLSL_TYPE_UINT16 :
+                (T == nir_type_uint32) ? GLSL_TYPE_UINT :
+                GLSL_TYPE_FLOAT;
+
         /* Create the blend variables */
 
         nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color");
-        nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_FragColor");
+        nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(g, 4), "gl_FragColor");
 
         c_src->data.location = VARYING_SLOT_COL0;
         c_out->data.location = FRAG_RESULT_COLOR;
@@ -158,35 +175,40 @@ panfrost_compile_blend_shader(
 
         nir_ssa_def *s_src = nir_load_var(b, c_src);
 
+        if (T == nir_type_float16)
+                s_src = nir_f2f16(b, s_src);
+        else if (T == nir_type_int16)
+                s_src = nir_i2i16(b, s_src);
+        else if (T == nir_type_uint16)
+                s_src = nir_u2u16(b, s_src);
+        else if (T == nir_type_int8)
+                s_src = nir_i2i8(b, s_src);
+        else if (T == nir_type_uint8)
+                s_src = nir_u2u8(b, s_src);
+
         /* Build a trivial blend shader */
         nir_store_var(b, c_out, s_src, 0xFF);
 
         nir_lower_blend_options options =
-                nir_make_options(cso, 1);
-        NIR_PASS_V(shader, nir_lower_blend, options);
+                nir_make_options(cso, rt);
+        options.format = format;
 
-        NIR_PASS_V(shader, nir_lower_framebuffer, format);
+        if (T == nir_type_float16)
+                options.half = true;
 
-        /* Compile the built shader */
-
-        midgard_program program;
-        midgard_compile_shader_nir(shader, &program, true);
-
-        /* Upload the shader */
-
-        int size = program.compiled.size;
-        uint8_t *dst = program.compiled.data;
+        NIR_PASS_V(shader, nir_lower_blend, options);
+        NIR_PASS_V(shader, pan_lower_framebuffer, format_desc, dev->quirks);
 
-        res.shader.cpu = mem_dup(dst, size);
-        res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size);
+        /* Compile the built shader */
 
-        /* At least two work registers are needed due to an encoding quirk */
-        res.work_count = MAX2(program.work_register_count, 2);
+        panfrost_program program;
+        midgard_compile_shader_nir(shader, &program, true, rt, dev->gpu_id, false);
 
         /* Allow us to patch later */
         res.patch_index = program.blend_patch_offset;
         res.first_tag = program.first_tag;
-        res.size = size;
+        res.size = program.compiled.size;
+        res.buffer = program.compiled.data;
 
         return res;
 }