panfrost: Free hash_to_temp map
[mesa.git] / src / panfrost / util / pan_lower_framebuffer.c
index 6fdb9695b371481c050ac76574072bc3c7aefd02..1b733687b33c4dc6da6180d3e496c3a4b01cf991 100644 (file)
@@ -89,6 +89,15 @@ pan_unpacked_type_for_format(const struct util_format_description *desc)
 enum pan_format_class
 pan_format_class_load(const struct util_format_description *desc, unsigned quirks)
 {
+        /* Pure integers can be loaded via EXT_framebuffer_fetch and should be
+         * handled as a raw load with a size conversion (it's cheap). Likewise,
+         * since float framebuffers are internally implemented as raw (i.e.
+         * integer) framebuffers with blend shaders to go back and forth, they
+         * should be s/w as well */
+
+        if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format))
+                return PAN_FORMAT_SOFTWARE;
+
         /* Check if we can do anything better than software architecturally */
         if (quirks & MIDGARD_NO_TYPED_BLEND_LOADS) {
                 return (quirks & NO_BLEND_PACKS)
@@ -221,6 +230,22 @@ pan_fill_4(nir_builder *b, nir_ssa_def *v)
         return nir_vec(b, q, 4);
 }
 
+static nir_ssa_def *
+pan_extend(nir_builder *b, nir_ssa_def *v, unsigned N)
+{
+        nir_ssa_def *q[4];
+        assert(v->num_components <= 4);
+        assert(N <= 4);
+
+        for (unsigned j = 0; j < v->num_components; ++j)
+                q[j] = nir_channel(b, v, j);
+
+        for (unsigned j = v->num_components; j < N; ++j)
+                q[j] = nir_imm_int(b, 0);
+
+        return nir_vec(b, q, N);
+}
+
 static nir_ssa_def *
 pan_replicate_4(nir_builder *b, nir_ssa_def *v)
 {
@@ -259,7 +284,7 @@ pan_unpack_unorm_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
 {
         assert(num_components <= 4);
         nir_ssa_def *unpacked = nir_unpack_unorm_4x8(b, nir_channel(b, pack, 0));
-        return nir_f2f16(b, unpacked);
+        return nir_f2fmp(b, unpacked);
 }
 
 /* UNORM 4 is also unpacked to f16, which prevents us from using the shared
@@ -290,7 +315,7 @@ pan_unpack_unorm_small(nir_builder *b, nir_ssa_def *pack,
                 nir_ssa_def *scales, nir_ssa_def *shifts)
 {
         nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
-        nir_ssa_def *raw = nir_ushr(b, nir_u2u16(b, channels), shifts);
+        nir_ssa_def *raw = nir_ushr(b, nir_u2ump(b, channels), shifts);
         return nir_fmul(b, nir_u2f16(b, raw), scales);
 }
 
@@ -377,12 +402,12 @@ pan_unpack_unorm_1010102(nir_builder *b, nir_ssa_def *packed)
 {
         nir_ssa_def *p = nir_channel(b, packed, 0);
         nir_ssa_def *bytes = nir_unpack_32_4x8(b, p);
-        nir_ssa_def *ubytes = nir_u2u16(b, bytes);
+        nir_ssa_def *ubytes = nir_u2ump(b, bytes);
 
         nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)),
                         nir_imm_ivec4(b, 0, 2, 4, 6));
         nir_ssa_def *precision = nir_iand(b, shifts,
-                        nir_i2i16(b, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)));
+                        nir_i2imp(b, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)));
 
         nir_ssa_def *top_rgb = nir_ishl(b, nir_channels(b, ubytes, 0x7), nir_imm_int(b, 2));
         top_rgb = nir_ior(b, nir_channels(b, precision, 0x7), top_rgb);
@@ -395,7 +420,7 @@ pan_unpack_unorm_1010102(nir_builder *b, nir_ssa_def *packed)
         };
 
         nir_ssa_def *scale = nir_imm_vec4(b, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 3.0);
-        return nir_f2f16(b, nir_fmul(b, nir_u2f32(b, nir_vec(b, chans, 4)), scale));
+        return nir_f2fmp(b, nir_fmul(b, nir_u2f32(b, nir_vec(b, chans, 4)), scale));
 }
 
 /* On the other hand, the pure int RGB10_A2 is identical to the spec */
@@ -424,7 +449,7 @@ pan_unpack_uint_1010102(nir_builder *b, nir_ssa_def *packed)
         nir_ssa_def *mask = nir_iand(b, shift,
                         nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
 
-        return nir_u2u16(b, mask);
+        return nir_u2ump(b, mask);
 }
 
 /* NIR means we can *finally* catch a break */
@@ -440,7 +465,7 @@ static nir_ssa_def *
 pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
 {
         nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
-        nir_ssa_def *f16 = nir_f2f16(b, f32);
+        nir_ssa_def *f16 = nir_f2fmp(b, f32);
 
         /* Extend to vec4 with alpha */
         nir_ssa_def *components[4] = {
@@ -461,7 +486,7 @@ pan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear)
         nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
 
         /* TODO: fp16 native conversion */
-        nir_ssa_def *srgb = nir_f2f16(b,
+        nir_ssa_def *srgb = nir_f2fmp(b,
                         nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
 
         nir_ssa_def *comp[4] = {
@@ -480,7 +505,7 @@ pan_srgb_to_linear(nir_builder *b, nir_ssa_def *srgb)
         nir_ssa_def *rgb = nir_channels(b, srgb, 0x7);
 
         /* TODO: fp16 native conversion */
-        nir_ssa_def *linear = nir_f2f16(b,
+        nir_ssa_def *linear = nir_f2fmp(b,
                         nir_format_srgb_to_linear(b, nir_f2f32(b, rgb)));
 
         nir_ssa_def *comp[4] = {
@@ -635,16 +660,23 @@ pan_lower_fb_store(nir_shader *shader,
         nir_builder_instr_insert(b, &new->instr);
 }
 
+static nir_ssa_def *
+pan_sample_id(nir_builder *b, int sample)
+{
+        return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
+}
+
 static void
 pan_lower_fb_load(nir_shader *shader,
                 nir_builder *b,
                 nir_intrinsic_instr *intr,
                 const struct util_format_description *desc,
-                unsigned base, unsigned quirks)
+                unsigned base, int sample, unsigned quirks)
 {
         nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader,
                        nir_intrinsic_load_raw_output_pan);
         new->num_components = 4;
+        new->src[0] = nir_src_for_ssa(pan_sample_id(b, sample));
 
         nir_intrinsic_set_base(new, base);
 
@@ -683,6 +715,7 @@ pan_lower_fb_load(nir_shader *shader,
         }
 
         unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
+        unpacked = pan_extend(b, unpacked, nir_dest_num_components(intr->dest));
 
         nir_src rewritten = nir_src_for_ssa(unpacked);
         nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, &intr->instr);
@@ -690,7 +723,7 @@ pan_lower_fb_load(nir_shader *shader,
 
 bool
 pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
-                      bool lower_store, unsigned quirks)
+                      bool is_blend, unsigned quirks)
 {
         if (shader->info.stage != MESA_SHADER_FRAGMENT)
                return false;
@@ -708,7 +741,7 @@ pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
                                 bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
                                 bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
 
-                                if (!(is_load || (is_store && lower_store)))
+                                if (!(is_load || (is_store && is_blend)))
                                         continue;
 
                                 nir_variable *var = nir_intrinsic_get_var(intr, 0);
@@ -726,6 +759,9 @@ pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
                                 else
                                         continue;
 
+                                if (rt_fmts[rt] == PIPE_FORMAT_NONE)
+                                        continue;
+
                                 const struct util_format_description *desc =
                                    util_format_description(rt_fmts[rt]);
 
@@ -736,6 +772,12 @@ pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
                                 if (fmt_class == PAN_FORMAT_NATIVE)
                                         continue;
 
+                                /* EXT_shader_framebuffer_fetch requires
+                                 * per-sample loads.
+                                 * MSAA blend shaders are not yet handled, so
+                                 * for now always load sample 0. */
+                                int sample = is_blend ? 0 : -1;
+
                                 nir_builder b;
                                 nir_builder_init(&b, func->impl);
 
@@ -744,7 +786,7 @@ pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
                                         pan_lower_fb_store(shader, &b, intr, desc, quirks);
                                 } else {
                                         b.cursor = nir_after_instr(instr);
-                                        pan_lower_fb_load(shader, &b, intr, desc, base, quirks);
+                                        pan_lower_fb_load(shader, &b, intr, desc, base, sample, quirks);
                                 }
 
                                 nir_instr_remove(instr);