2 * Copyright (C) 2020 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
28 * Implements framebuffer format conversions in software for Midgard/Bifrost
29 * blend shaders. This pass is designed for a single render target; Midgard
30 * duplicates blend shaders for MRT to simplify everything. A particular
31 * framebuffer format may be categorized as 1) typed load available, 2) typed
32 * unpack available, or 3) software unpack only, and likewise for stores. The
33 * first two types are handled in the compiler backend directly, so this module
34 * is responsible for identifying type 3 formats (hardware dependent) and
35 * inserting appropriate ALU code to perform the conversion from the packed
36 * type to a designated unpacked type, and vice versa.
38 * The unpacked type depends on the format:
40 * - For 32-bit float formats, 32-bit floats.
41 * - For other floats, 16-bit floats.
42 * - For 32-bit ints, 32-bit ints.
43 * - For 8-bit ints, 8-bit ints.
44 * - For other ints, 16-bit ints.
46 * The rationale is to optimize blending and logic op instructions by using the
47 * smallest precision necessary to store the pixel losslessly.
50 #include "compiler/nir/nir.h"
51 #include "compiler/nir/nir_builder.h"
52 #include "compiler/nir/nir_format_convert.h"
53 #include "util/format/u_format.h"
54 #include "pan_lower_framebuffer.h"
55 #include "panfrost-quirks.h"
57 /* Determines the unpacked type best suiting a given format, so the rest of the
58 * pipeline may be adjusted accordingly */
61 pan_unpacked_type_for_format(const struct util_format_description
*desc
)
63 int c
= util_format_get_first_non_void_channel(desc
->format
);
66 unreachable("Void format not renderable");
68 bool large
= (desc
->channel
[c
].size
> 16);
69 bool bit8
= (desc
->channel
[c
].size
== 8);
70 assert(desc
->channel
[c
].size
<= 32);
72 if (desc
->channel
[c
].normalized
)
73 return large
? nir_type_float32
: nir_type_float16
;
75 switch (desc
->channel
[c
].type
) {
76 case UTIL_FORMAT_TYPE_UNSIGNED
:
77 return bit8
? nir_type_uint8
:
78 large
? nir_type_uint32
: nir_type_uint16
;
79 case UTIL_FORMAT_TYPE_SIGNED
:
80 return bit8
? nir_type_int8
:
81 large
? nir_type_int32
: nir_type_int16
;
82 case UTIL_FORMAT_TYPE_FLOAT
:
83 return large
? nir_type_float32
: nir_type_float16
;
85 unreachable("Format not renderable");
90 pan_format_class_load(const struct util_format_description
*desc
, unsigned quirks
)
92 /* Pure integers can be loaded via EXT_framebuffer_fetch and should be
93 * handled as a raw load with a size conversion (it's cheap). Likewise,
94 * since float framebuffers are internally implemented as raw (i.e.
95 * integer) framebuffers with blend shaders to go back and forth, they
96 * should be s/w as well */
98 if (util_format_is_pure_integer(desc
->format
) || util_format_is_float(desc
->format
))
99 return PAN_FORMAT_SOFTWARE
;
101 /* Check if we can do anything better than software architecturally */
102 if (quirks
& MIDGARD_NO_TYPED_BLEND_LOADS
) {
103 return (quirks
& NO_BLEND_PACKS
)
104 ? PAN_FORMAT_SOFTWARE
: PAN_FORMAT_PACK
;
107 /* Some formats are missing as typed on some GPUs but have unpacks */
108 if (quirks
& MIDGARD_MISSING_LOADS
) {
109 switch (desc
->format
) {
110 case PIPE_FORMAT_R11G11B10_FLOAT
:
111 case PIPE_FORMAT_R10G10B10A2_UNORM
:
112 case PIPE_FORMAT_B10G10R10A2_UNORM
:
113 case PIPE_FORMAT_R10G10B10X2_UNORM
:
114 case PIPE_FORMAT_B10G10R10X2_UNORM
:
115 case PIPE_FORMAT_R10G10B10A2_UINT
:
116 return PAN_FORMAT_PACK
;
118 return PAN_FORMAT_NATIVE
;
122 /* Otherwise, we can do native */
123 return PAN_FORMAT_NATIVE
;
126 enum pan_format_class
127 pan_format_class_store(const struct util_format_description
*desc
, unsigned quirks
)
129 /* Check if we can do anything better than software architecturally */
130 if (quirks
& MIDGARD_NO_TYPED_BLEND_STORES
) {
131 return (quirks
& NO_BLEND_PACKS
)
132 ? PAN_FORMAT_SOFTWARE
: PAN_FORMAT_PACK
;
135 return PAN_FORMAT_NATIVE
;
138 /* Convenience method */
140 static enum pan_format_class
141 pan_format_class(const struct util_format_description
*desc
, unsigned quirks
, bool is_store
)
144 return pan_format_class_store(desc
, quirks
);
146 return pan_format_class_load(desc
, quirks
);
149 /* Software packs/unpacks, by format class. Packs take in the pixel value typed
150 * as `pan_unpacked_type_for_format` of the format and return an i32vec4
151 * suitable for storing (with components replicated to fill). Unpacks do the
152 * reverse but cannot rely on replication.
154 * Pure 32 formats (R32F ... RGBA32F) are 32 unpacked, so just need to
155 * replicate to fill */
158 pan_pack_pure_32(nir_builder
*b
, nir_ssa_def
*v
)
160 nir_ssa_def
*replicated
[4];
162 for (unsigned i
= 0; i
< 4; ++i
)
163 replicated
[i
] = nir_channel(b
, v
, i
% v
->num_components
);
165 return nir_vec(b
, replicated
, 4);
169 pan_unpack_pure_32(nir_builder
*b
, nir_ssa_def
*pack
, unsigned num_components
)
171 return nir_channels(b
, pack
, (1 << num_components
) - 1);
174 /* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack
175 * upper/lower halves of course */
178 pan_pack_pure_16(nir_builder
*b
, nir_ssa_def
*v
)
180 nir_ssa_def
*replicated
[4];
182 for (unsigned i
= 0; i
< 4; ++i
) {
185 nir_ssa_def
*parts
[2] = {
186 nir_channel(b
, v
, (c
+ 0) % v
->num_components
),
187 nir_channel(b
, v
, (c
+ 1) % v
->num_components
)
190 replicated
[i
] = nir_pack_32_2x16(b
, nir_vec(b
, parts
, 2));
193 return nir_vec(b
, replicated
, 4);
197 pan_unpack_pure_16(nir_builder
*b
, nir_ssa_def
*pack
, unsigned num_components
)
199 nir_ssa_def
*unpacked
[4];
201 assert(num_components
<= 4);
203 for (unsigned i
= 0; i
< num_components
; i
+= 2) {
204 nir_ssa_def
*halves
=
205 nir_unpack_32_2x16(b
, nir_channel(b
, pack
, i
>> 1));
207 unpacked
[i
+ 0] = nir_channel(b
, halves
, 0);
208 unpacked
[i
+ 1] = nir_channel(b
, halves
, 1);
211 for (unsigned i
= num_components
; i
< 4; ++i
)
212 unpacked
[i
] = nir_imm_intN_t(b
, 0, 16);
214 return nir_vec(b
, unpacked
, 4);
217 /* And likewise for x8. pan_fill_4 fills a 4-channel vector with a n-channel
218 * vector (n <= 4), replicating as needed. pan_replicate_4 constructs a
219 * 4-channel vector from a scalar via replication */
222 pan_fill_4(nir_builder
*b
, nir_ssa_def
*v
)
225 assert(v
->num_components
<= 4);
227 for (unsigned j
= 0; j
< 4; ++j
)
228 q
[j
] = nir_channel(b
, v
, j
% v
->num_components
);
230 return nir_vec(b
, q
, 4);
234 pan_replicate_4(nir_builder
*b
, nir_ssa_def
*v
)
236 nir_ssa_def
*replicated
[4] = { v
, v
, v
, v
};
237 return nir_vec(b
, replicated
, 4);
241 pan_pack_pure_8(nir_builder
*b
, nir_ssa_def
*v
)
243 return pan_replicate_4(b
, nir_pack_32_4x8(b
, pan_fill_4(b
, v
)));
247 pan_unpack_pure_8(nir_builder
*b
, nir_ssa_def
*pack
, unsigned num_components
)
249 assert(num_components
<= 4);
250 nir_ssa_def
*unpacked
= nir_unpack_32_4x8(b
, nir_channel(b
, pack
, 0));
251 return nir_channels(b
, unpacked
, (1 << num_components
) - 1);
254 /* UNORM 8 is unpacked to f16 vec4. We could directly use the un/pack_unorm_4x8
255 * ops provided we replicate appropriately, but for packing we'd rather stay in
256 * 8/16-bit whereas the NIR op forces 32-bit, so we do it manually */
259 pan_pack_unorm_8(nir_builder
*b
, nir_ssa_def
*v
)
261 return pan_replicate_4(b
, nir_pack_32_4x8(b
,
262 nir_f2u8(b
, nir_fround_even(b
, nir_fmul(b
, nir_fsat(b
,
263 pan_fill_4(b
, v
)), nir_imm_float16(b
, 255.0))))));
267 pan_unpack_unorm_8(nir_builder
*b
, nir_ssa_def
*pack
, unsigned num_components
)
269 assert(num_components
<= 4);
270 nir_ssa_def
*unpacked
= nir_unpack_unorm_4x8(b
, nir_channel(b
, pack
, 0));
271 return nir_f2fmp(b
, unpacked
);
274 /* UNORM 4 is also unpacked to f16, which prevents us from using the shared
275 * unpack which strongly assumes fp32. However, on the tilebuffer it is actually packed as:
277 * [AAAA] [0000] [BBBB] [0000] [GGGG] [0000] [RRRR] [0000]
279 * In other words, spacing it out so we're aligned to bytes and on top. So
282 * pack_32_4x8(f2u8_rte(v * 15.0) << 4)
286 pan_pack_unorm_small(nir_builder
*b
, nir_ssa_def
*v
,
287 nir_ssa_def
*scales
, nir_ssa_def
*shifts
)
289 nir_ssa_def
*f
= nir_fmul(b
, nir_fsat(b
, pan_fill_4(b
, v
)), scales
);
290 nir_ssa_def
*u8
= nir_f2u8(b
, nir_fround_even(b
, f
));
291 nir_ssa_def
*s
= nir_ishl(b
, u8
, shifts
);
292 nir_ssa_def
*repl
= nir_pack_32_4x8(b
, s
);
294 return pan_replicate_4(b
, repl
);
298 pan_unpack_unorm_small(nir_builder
*b
, nir_ssa_def
*pack
,
299 nir_ssa_def
*scales
, nir_ssa_def
*shifts
)
301 nir_ssa_def
*channels
= nir_unpack_32_4x8(b
, nir_channel(b
, pack
, 0));
302 nir_ssa_def
*raw
= nir_ushr(b
, nir_u2ump(b
, channels
), shifts
);
303 return nir_fmul(b
, nir_u2f16(b
, raw
), scales
);
307 pan_pack_unorm_4(nir_builder
*b
, nir_ssa_def
*v
)
309 return pan_pack_unorm_small(b
, v
,
310 nir_imm_vec4_16(b
, 15.0, 15.0, 15.0, 15.0),
311 nir_imm_ivec4(b
, 4, 4, 4, 4));
315 pan_unpack_unorm_4(nir_builder
*b
, nir_ssa_def
*v
)
317 return pan_unpack_unorm_small(b
, v
,
318 nir_imm_vec4_16(b
, 1.0 / 15.0, 1.0 / 15.0, 1.0 / 15.0, 1.0 / 15.0),
319 nir_imm_ivec4(b
, 4, 4, 4, 4));
322 /* UNORM RGB5_A1 and RGB565 are similar */
325 pan_pack_unorm_5551(nir_builder
*b
, nir_ssa_def
*v
)
327 return pan_pack_unorm_small(b
, v
,
328 nir_imm_vec4_16(b
, 31.0, 31.0, 31.0, 1.0),
329 nir_imm_ivec4(b
, 3, 3, 3, 7));
333 pan_unpack_unorm_5551(nir_builder
*b
, nir_ssa_def
*v
)
335 return pan_unpack_unorm_small(b
, v
,
336 nir_imm_vec4_16(b
, 1.0 / 31.0, 1.0 / 31.0, 1.0 / 31.0, 1.0),
337 nir_imm_ivec4(b
, 3, 3, 3, 7));
341 pan_pack_unorm_565(nir_builder
*b
, nir_ssa_def
*v
)
343 return pan_pack_unorm_small(b
, v
,
344 nir_imm_vec4_16(b
, 31.0, 63.0, 31.0, 0.0),
345 nir_imm_ivec4(b
, 3, 2, 3, 0));
349 pan_unpack_unorm_565(nir_builder
*b
, nir_ssa_def
*v
)
351 return pan_unpack_unorm_small(b
, v
,
352 nir_imm_vec4_16(b
, 1.0 / 31.0, 1.0 / 63.0, 1.0 / 31.0, 0.0),
353 nir_imm_ivec4(b
, 3, 2, 3, 0));
356 /* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top
357 * 8-bits of RGB and the top byte being RGBA as 2-bits packed. As imirkin
358 * pointed out, this means free conversion to RGBX8 */
361 pan_pack_unorm_1010102(nir_builder
*b
, nir_ssa_def
*v
)
363 nir_ssa_def
*scale
= nir_imm_vec4_16(b
, 1023.0, 1023.0, 1023.0, 3.0);
364 nir_ssa_def
*s
= nir_f2u32(b
, nir_fround_even(b
, nir_f2f32(b
, nir_fmul(b
, nir_fsat(b
, v
), scale
))));
366 nir_ssa_def
*top8
= nir_ushr(b
, s
, nir_imm_ivec4(b
, 0x2, 0x2, 0x2, 0x2));
367 nir_ssa_def
*top8_rgb
= nir_pack_32_4x8(b
, nir_u2u8(b
, top8
));
369 nir_ssa_def
*bottom2
= nir_iand(b
, s
, nir_imm_ivec4(b
, 0x3, 0x3, 0x3, 0x3));
374 nir_ishl(b
, nir_channel(b
, bottom2
, 0), nir_imm_int(b
, 24 + 0)),
375 nir_ishl(b
, nir_channel(b
, bottom2
, 1), nir_imm_int(b
, 24 + 2))),
377 nir_ishl(b
, nir_channel(b
, bottom2
, 2), nir_imm_int(b
, 24 + 4)),
378 nir_ishl(b
, nir_channel(b
, bottom2
, 3), nir_imm_int(b
, 24 + 6))));
380 nir_ssa_def
*p
= nir_ior(b
, top
, top8_rgb
);
381 return pan_replicate_4(b
, p
);
385 pan_unpack_unorm_1010102(nir_builder
*b
, nir_ssa_def
*packed
)
387 nir_ssa_def
*p
= nir_channel(b
, packed
, 0);
388 nir_ssa_def
*bytes
= nir_unpack_32_4x8(b
, p
);
389 nir_ssa_def
*ubytes
= nir_u2ump(b
, bytes
);
391 nir_ssa_def
*shifts
= nir_ushr(b
, pan_replicate_4(b
, nir_channel(b
, ubytes
, 3)),
392 nir_imm_ivec4(b
, 0, 2, 4, 6));
393 nir_ssa_def
*precision
= nir_iand(b
, shifts
,
394 nir_i2imp(b
, nir_imm_ivec4(b
, 0x3, 0x3, 0x3, 0x3)));
396 nir_ssa_def
*top_rgb
= nir_ishl(b
, nir_channels(b
, ubytes
, 0x7), nir_imm_int(b
, 2));
397 top_rgb
= nir_ior(b
, nir_channels(b
, precision
, 0x7), top_rgb
);
399 nir_ssa_def
*chans
[4] = {
400 nir_channel(b
, top_rgb
, 0),
401 nir_channel(b
, top_rgb
, 1),
402 nir_channel(b
, top_rgb
, 2),
403 nir_channel(b
, precision
, 3)
406 nir_ssa_def
*scale
= nir_imm_vec4(b
, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 3.0);
407 return nir_f2fmp(b
, nir_fmul(b
, nir_u2f32(b
, nir_vec(b
, chans
, 4)), scale
));
410 /* On the other hand, the pure int RGB10_A2 is identical to the spec */
413 pan_pack_uint_1010102(nir_builder
*b
, nir_ssa_def
*v
)
415 nir_ssa_def
*shift
= nir_ishl(b
, nir_u2u32(b
, v
),
416 nir_imm_ivec4(b
, 0, 10, 20, 30));
418 nir_ssa_def
*p
= nir_ior(b
,
419 nir_ior(b
, nir_channel(b
, shift
, 0), nir_channel(b
, shift
, 1)),
420 nir_ior(b
, nir_channel(b
, shift
, 2), nir_channel(b
, shift
, 3)));
422 return pan_replicate_4(b
, p
);
426 pan_unpack_uint_1010102(nir_builder
*b
, nir_ssa_def
*packed
)
428 nir_ssa_def
*chan
= nir_channel(b
, packed
, 0);
430 nir_ssa_def
*shift
= nir_ushr(b
, pan_replicate_4(b
, chan
),
431 nir_imm_ivec4(b
, 0, 10, 20, 30));
433 nir_ssa_def
*mask
= nir_iand(b
, shift
,
434 nir_imm_ivec4(b
, 0x3ff, 0x3ff, 0x3ff, 0x3));
436 return nir_u2ump(b
, mask
);
439 /* NIR means we can *finally* catch a break */
442 pan_pack_r11g11b10(nir_builder
*b
, nir_ssa_def
*v
)
444 return pan_replicate_4(b
, nir_format_pack_11f11f10f(b
,
449 pan_unpack_r11g11b10(nir_builder
*b
, nir_ssa_def
*v
)
451 nir_ssa_def
*f32
= nir_format_unpack_11f11f10f(b
, nir_channel(b
, v
, 0));
452 nir_ssa_def
*f16
= nir_f2fmp(b
, f32
);
454 /* Extend to vec4 with alpha */
455 nir_ssa_def
*components
[4] = {
456 nir_channel(b
, f16
, 0),
457 nir_channel(b
, f16
, 1),
458 nir_channel(b
, f16
, 2),
459 nir_imm_float16(b
, 1.0)
462 return nir_vec(b
, components
, 4);
465 /* Wrapper around sRGB conversion */
468 pan_linear_to_srgb(nir_builder
*b
, nir_ssa_def
*linear
)
470 nir_ssa_def
*rgb
= nir_channels(b
, linear
, 0x7);
472 /* TODO: fp16 native conversion */
473 nir_ssa_def
*srgb
= nir_f2fmp(b
,
474 nir_format_linear_to_srgb(b
, nir_f2f32(b
, rgb
)));
476 nir_ssa_def
*comp
[4] = {
477 nir_channel(b
, srgb
, 0),
478 nir_channel(b
, srgb
, 1),
479 nir_channel(b
, srgb
, 2),
480 nir_channel(b
, linear
, 3),
483 return nir_vec(b
, comp
, 4);
487 pan_srgb_to_linear(nir_builder
*b
, nir_ssa_def
*srgb
)
489 nir_ssa_def
*rgb
= nir_channels(b
, srgb
, 0x7);
491 /* TODO: fp16 native conversion */
492 nir_ssa_def
*linear
= nir_f2fmp(b
,
493 nir_format_srgb_to_linear(b
, nir_f2f32(b
, rgb
)));
495 nir_ssa_def
*comp
[4] = {
496 nir_channel(b
, linear
, 0),
497 nir_channel(b
, linear
, 1),
498 nir_channel(b
, linear
, 2),
499 nir_channel(b
, srgb
, 3),
502 return nir_vec(b
, comp
, 4);
507 /* Generic dispatches for un/pack regardless of format */
510 pan_is_unorm4(const struct util_format_description
*desc
)
512 switch (desc
->format
) {
513 case PIPE_FORMAT_B4G4R4A4_UNORM
:
514 case PIPE_FORMAT_B4G4R4X4_UNORM
:
515 case PIPE_FORMAT_A4R4_UNORM
:
516 case PIPE_FORMAT_R4A4_UNORM
:
517 case PIPE_FORMAT_A4B4G4R4_UNORM
:
518 case PIPE_FORMAT_R4G4B4A4_UNORM
:
527 pan_unpack(nir_builder
*b
,
528 const struct util_format_description
*desc
,
531 if (util_format_is_unorm8(desc
))
532 return pan_unpack_unorm_8(b
, packed
, desc
->nr_channels
);
534 if (pan_is_unorm4(desc
))
535 return pan_unpack_unorm_4(b
, packed
);
537 if (desc
->is_array
) {
538 int c
= util_format_get_first_non_void_channel(desc
->format
);
540 struct util_format_channel_description d
= desc
->channel
[c
];
542 if (d
.size
== 32 || d
.size
== 16) {
543 assert(!d
.normalized
);
544 assert(d
.type
== UTIL_FORMAT_TYPE_FLOAT
|| d
.pure_integer
);
546 return d
.size
== 32 ? pan_unpack_pure_32(b
, packed
, desc
->nr_channels
) :
547 pan_unpack_pure_16(b
, packed
, desc
->nr_channels
);
548 } else if (d
.size
== 8) {
549 assert(d
.pure_integer
);
550 return pan_unpack_pure_8(b
, packed
, desc
->nr_channels
);
552 unreachable("Unrenderable size");
556 switch (desc
->format
) {
557 case PIPE_FORMAT_B5G5R5A1_UNORM
:
558 case PIPE_FORMAT_R5G5B5A1_UNORM
:
559 return pan_unpack_unorm_5551(b
, packed
);
560 case PIPE_FORMAT_B5G6R5_UNORM
:
561 return pan_unpack_unorm_565(b
, packed
);
562 case PIPE_FORMAT_R10G10B10A2_UNORM
:
563 return pan_unpack_unorm_1010102(b
, packed
);
564 case PIPE_FORMAT_R10G10B10A2_UINT
:
565 return pan_unpack_uint_1010102(b
, packed
);
566 case PIPE_FORMAT_R11G11B10_FLOAT
:
567 return pan_unpack_r11g11b10(b
, packed
);
572 fprintf(stderr
, "%s\n", desc
->name
);
573 unreachable("Unknown format");
577 pan_pack(nir_builder
*b
,
578 const struct util_format_description
*desc
,
579 nir_ssa_def
*unpacked
)
581 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
)
582 unpacked
= pan_linear_to_srgb(b
, unpacked
);
584 if (util_format_is_unorm8(desc
))
585 return pan_pack_unorm_8(b
, unpacked
);
587 if (pan_is_unorm4(desc
))
588 return pan_pack_unorm_4(b
, unpacked
);
590 if (desc
->is_array
) {
591 int c
= util_format_get_first_non_void_channel(desc
->format
);
593 struct util_format_channel_description d
= desc
->channel
[c
];
595 if (d
.size
== 32 || d
.size
== 16) {
596 assert(!d
.normalized
);
597 assert(d
.type
== UTIL_FORMAT_TYPE_FLOAT
|| d
.pure_integer
);
599 return d
.size
== 32 ? pan_pack_pure_32(b
, unpacked
) :
600 pan_pack_pure_16(b
, unpacked
);
601 } else if (d
.size
== 8) {
602 assert(d
.pure_integer
);
603 return pan_pack_pure_8(b
, unpacked
);
605 unreachable("Unrenderable size");
609 switch (desc
->format
) {
610 case PIPE_FORMAT_B5G5R5A1_UNORM
:
611 case PIPE_FORMAT_R5G5B5A1_UNORM
:
612 return pan_pack_unorm_5551(b
, unpacked
);
613 case PIPE_FORMAT_B5G6R5_UNORM
:
614 return pan_pack_unorm_565(b
, unpacked
);
615 case PIPE_FORMAT_R10G10B10A2_UNORM
:
616 return pan_pack_unorm_1010102(b
, unpacked
);
617 case PIPE_FORMAT_R10G10B10A2_UINT
:
618 return pan_pack_uint_1010102(b
, unpacked
);
619 case PIPE_FORMAT_R11G11B10_FLOAT
:
620 return pan_pack_r11g11b10(b
, unpacked
);
625 fprintf(stderr
, "%s\n", desc
->name
);
626 unreachable("Unknown format");
630 pan_lower_fb_store(nir_shader
*shader
,
632 nir_intrinsic_instr
*intr
,
633 const struct util_format_description
*desc
,
636 /* For stores, add conversion before */
637 nir_ssa_def
*unpacked
= nir_ssa_for_src(b
, intr
->src
[1], 4);
638 nir_ssa_def
*packed
= pan_pack(b
, desc
, unpacked
);
640 nir_intrinsic_instr
*new =
641 nir_intrinsic_instr_create(shader
, nir_intrinsic_store_raw_output_pan
);
642 new->src
[0] = nir_src_for_ssa(packed
);
643 new->num_components
= 4;
644 nir_builder_instr_insert(b
, &new->instr
);
648 pan_lower_fb_load(nir_shader
*shader
,
650 nir_intrinsic_instr
*intr
,
651 const struct util_format_description
*desc
,
652 unsigned base
, unsigned quirks
)
654 nir_intrinsic_instr
*new = nir_intrinsic_instr_create(shader
,
655 nir_intrinsic_load_raw_output_pan
);
656 new->num_components
= 4;
658 nir_intrinsic_set_base(new, base
);
660 nir_ssa_dest_init(&new->instr
, &new->dest
, 4, 32, NULL
);
661 nir_builder_instr_insert(b
, &new->instr
);
663 /* Convert the raw value */
664 nir_ssa_def
*packed
= &new->dest
.ssa
;
665 nir_ssa_def
*unpacked
= pan_unpack(b
, desc
, packed
);
667 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
)
668 unpacked
= pan_srgb_to_linear(b
, unpacked
);
670 /* Convert to the size of the load intrinsic.
672 * We can assume that the type will match with the framebuffer format:
674 * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
676 * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
677 * conversion is applied. If the values written by the fragment shader
678 * do not match the format(s) of the corresponding color buffer(s),
679 * the result is undefined.
682 unsigned bits
= nir_dest_bit_size(intr
->dest
);
684 nir_alu_type src_type
;
685 if (desc
->channel
[0].pure_integer
) {
686 if (desc
->channel
[0].type
== UTIL_FORMAT_TYPE_SIGNED
)
687 src_type
= nir_type_int
;
689 src_type
= nir_type_uint
;
691 src_type
= nir_type_float
;
694 unpacked
= nir_convert_to_bit_size(b
, unpacked
, src_type
, bits
);
696 nir_src rewritten
= nir_src_for_ssa(unpacked
);
697 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
, rewritten
, &intr
->instr
);
701 pan_lower_framebuffer(nir_shader
*shader
, enum pipe_format
*rt_fmts
,
702 bool lower_store
, unsigned quirks
)
704 if (shader
->info
.stage
!= MESA_SHADER_FRAGMENT
)
707 bool progress
= false;
709 nir_foreach_function(func
, shader
) {
710 nir_foreach_block(block
, func
->impl
) {
711 nir_foreach_instr_safe(instr
, block
) {
712 if (instr
->type
!= nir_instr_type_intrinsic
)
715 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
717 bool is_load
= intr
->intrinsic
== nir_intrinsic_load_deref
;
718 bool is_store
= intr
->intrinsic
== nir_intrinsic_store_deref
;
720 if (!(is_load
|| (is_store
&& lower_store
)))
723 nir_variable
*var
= nir_intrinsic_get_var(intr
, 0);
725 if (var
->data
.mode
!= nir_var_shader_out
)
728 unsigned base
= var
->data
.driver_location
;
731 if (var
->data
.location
== FRAG_RESULT_COLOR
)
733 else if (var
->data
.location
>= FRAG_RESULT_DATA0
)
734 rt
= var
->data
.location
- FRAG_RESULT_DATA0
;
738 if (rt_fmts
[rt
] == PIPE_FORMAT_NONE
)
741 const struct util_format_description
*desc
=
742 util_format_description(rt_fmts
[rt
]);
744 enum pan_format_class fmt_class
=
745 pan_format_class(desc
, quirks
, is_store
);
748 if (fmt_class
== PAN_FORMAT_NATIVE
)
752 nir_builder_init(&b
, func
->impl
);
755 b
.cursor
= nir_before_instr(instr
);
756 pan_lower_fb_store(shader
, &b
, intr
, desc
, quirks
);
758 b
.cursor
= nir_after_instr(instr
);
759 pan_lower_fb_load(shader
, &b
, intr
, desc
, base
, quirks
);
762 nir_instr_remove(instr
);
768 nir_metadata_preserve(func
->impl
, nir_metadata_block_index
|
769 nir_metadata_dominance
);