panfrost: Always use SOFTWARE for pure formats
[mesa.git] / src / panfrost / util / pan_lower_framebuffer.c
1 /*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 /**
28 * Implements framebuffer format conversions in software for Midgard/Bifrost
29 * blend shaders. This pass is designed for a single render target; Midgard
30 * duplicates blend shaders for MRT to simplify everything. A particular
31 * framebuffer format may be categorized as 1) typed load available, 2) typed
32 * unpack available, or 3) software unpack only, and likewise for stores. The
33 * first two types are handled in the compiler backend directly, so this module
34 * is responsible for identifying type 3 formats (hardware dependent) and
35 * inserting appropriate ALU code to perform the conversion from the packed
36 * type to a designated unpacked type, and vice versa.
37 *
38 * The unpacked type depends on the format:
39 *
40 * - For 32-bit float formats, 32-bit floats.
41 * - For other floats, 16-bit floats.
42 * - For 32-bit ints, 32-bit ints.
43 * - For 8-bit ints, 8-bit ints.
44 * - For other ints, 16-bit ints.
45 *
46 * The rationale is to optimize blending and logic op instructions by using the
47 * smallest precision necessary to store the pixel losslessly.
48 */
49
50 #include "compiler/nir/nir.h"
51 #include "compiler/nir/nir_builder.h"
52 #include "compiler/nir/nir_format_convert.h"
53 #include "util/format/u_format.h"
54 #include "pan_lower_framebuffer.h"
55 #include "panfrost-quirks.h"
56
57 /* Determines the unpacked type best suiting a given format, so the rest of the
58 * pipeline may be adjusted accordingly */
59
60 nir_alu_type
61 pan_unpacked_type_for_format(const struct util_format_description *desc)
62 {
63 int c = util_format_get_first_non_void_channel(desc->format);
64
65 if (c == -1)
66 unreachable("Void format not renderable");
67
68 bool large = (desc->channel[c].size > 16);
69 bool bit8 = (desc->channel[c].size == 8);
70 assert(desc->channel[c].size <= 32);
71
72 if (desc->channel[c].normalized)
73 return large ? nir_type_float32 : nir_type_float16;
74
75 switch (desc->channel[c].type) {
76 case UTIL_FORMAT_TYPE_UNSIGNED:
77 return bit8 ? nir_type_uint8 :
78 large ? nir_type_uint32 : nir_type_uint16;
79 case UTIL_FORMAT_TYPE_SIGNED:
80 return bit8 ? nir_type_int8 :
81 large ? nir_type_int32 : nir_type_int16;
82 case UTIL_FORMAT_TYPE_FLOAT:
83 return large ? nir_type_float32 : nir_type_float16;
84 default:
85 unreachable("Format not renderable");
86 }
87 }
88
89 enum pan_format_class
90 pan_format_class_load(const struct util_format_description *desc, unsigned quirks)
91 {
92 /* Pure integers can be loaded via EXT_framebuffer_fetch and should be
93 * handled as a raw load with a size conversion (it's cheap). Likewise,
94 * since float framebuffers are internally implemented as raw (i.e.
95 * integer) framebuffers with blend shaders to go back and forth, they
96 * should be s/w as well */
97
98 if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format))
99 return PAN_FORMAT_SOFTWARE;
100
101 /* Check if we can do anything better than software architecturally */
102 if (quirks & MIDGARD_NO_TYPED_BLEND_LOADS) {
103 return (quirks & NO_BLEND_PACKS)
104 ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK;
105 }
106
107 /* Some formats are missing as typed on some GPUs but have unpacks */
108 if (quirks & MIDGARD_MISSING_LOADS) {
109 switch (desc->format) {
110 case PIPE_FORMAT_R11G11B10_FLOAT:
111 case PIPE_FORMAT_R10G10B10A2_UNORM:
112 case PIPE_FORMAT_B10G10R10A2_UNORM:
113 case PIPE_FORMAT_R10G10B10X2_UNORM:
114 case PIPE_FORMAT_B10G10R10X2_UNORM:
115 case PIPE_FORMAT_R10G10B10A2_UINT:
116 return PAN_FORMAT_PACK;
117 default:
118 return PAN_FORMAT_NATIVE;
119 }
120 }
121
122 /* Otherwise, we can do native */
123 return PAN_FORMAT_NATIVE;
124 }
125
126 enum pan_format_class
127 pan_format_class_store(const struct util_format_description *desc, unsigned quirks)
128 {
129 /* Check if we can do anything better than software architecturally */
130 if (quirks & MIDGARD_NO_TYPED_BLEND_STORES) {
131 return (quirks & NO_BLEND_PACKS)
132 ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK;
133 }
134
135 return PAN_FORMAT_NATIVE;
136 }
137
138 /* Convenience method */
139
140 static enum pan_format_class
141 pan_format_class(const struct util_format_description *desc, unsigned quirks, bool is_store)
142 {
143 if (is_store)
144 return pan_format_class_store(desc, quirks);
145 else
146 return pan_format_class_load(desc, quirks);
147 }
148
149 /* Software packs/unpacks, by format class. Packs take in the pixel value typed
150 * as `pan_unpacked_type_for_format` of the format and return an i32vec4
151 * suitable for storing (with components replicated to fill). Unpacks do the
152 * reverse but cannot rely on replication.
153 *
154 * Pure 32 formats (R32F ... RGBA32F) are 32 unpacked, so just need to
155 * replicate to fill */
156
157 static nir_ssa_def *
158 pan_pack_pure_32(nir_builder *b, nir_ssa_def *v)
159 {
160 nir_ssa_def *replicated[4];
161
162 for (unsigned i = 0; i < 4; ++i)
163 replicated[i] = nir_channel(b, v, i % v->num_components);
164
165 return nir_vec(b, replicated, 4);
166 }
167
168 static nir_ssa_def *
169 pan_unpack_pure_32(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
170 {
171 return nir_channels(b, pack, (1 << num_components) - 1);
172 }
173
174 /* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack
175 * upper/lower halves of course */
176
177 static nir_ssa_def *
178 pan_pack_pure_16(nir_builder *b, nir_ssa_def *v)
179 {
180 nir_ssa_def *replicated[4];
181
182 for (unsigned i = 0; i < 4; ++i) {
183 unsigned c = 2 * i;
184
185 nir_ssa_def *parts[2] = {
186 nir_channel(b, v, (c + 0) % v->num_components),
187 nir_channel(b, v, (c + 1) % v->num_components)
188 };
189
190 replicated[i] = nir_pack_32_2x16(b, nir_vec(b, parts, 2));
191 }
192
193 return nir_vec(b, replicated, 4);
194 }
195
196 static nir_ssa_def *
197 pan_unpack_pure_16(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
198 {
199 nir_ssa_def *unpacked[4];
200
201 assert(num_components <= 4);
202
203 for (unsigned i = 0; i < num_components; i += 2) {
204 nir_ssa_def *halves =
205 nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1));
206
207 unpacked[i + 0] = nir_channel(b, halves, 0);
208 unpacked[i + 1] = nir_channel(b, halves, 1);
209 }
210
211 for (unsigned i = num_components; i < 4; ++i)
212 unpacked[i] = nir_imm_intN_t(b, 0, 16);
213
214 return nir_vec(b, unpacked, 4);
215 }
216
217 /* And likewise for x8. pan_fill_4 fills a 4-channel vector with a n-channel
218 * vector (n <= 4), replicating as needed. pan_replicate_4 constructs a
219 * 4-channel vector from a scalar via replication */
220
221 static nir_ssa_def *
222 pan_fill_4(nir_builder *b, nir_ssa_def *v)
223 {
224 nir_ssa_def *q[4];
225 assert(v->num_components <= 4);
226
227 for (unsigned j = 0; j < 4; ++j)
228 q[j] = nir_channel(b, v, j % v->num_components);
229
230 return nir_vec(b, q, 4);
231 }
232
233 static nir_ssa_def *
234 pan_replicate_4(nir_builder *b, nir_ssa_def *v)
235 {
236 nir_ssa_def *replicated[4] = { v, v, v, v };
237 return nir_vec(b, replicated, 4);
238 }
239
240 static nir_ssa_def *
241 pan_pack_pure_8(nir_builder *b, nir_ssa_def *v)
242 {
243 return pan_replicate_4(b, nir_pack_32_4x8(b, pan_fill_4(b, v)));
244 }
245
246 static nir_ssa_def *
247 pan_unpack_pure_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
248 {
249 assert(num_components <= 4);
250 nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
251 return nir_channels(b, unpacked, (1 << num_components) - 1);
252 }
253
254 /* UNORM 8 is unpacked to f16 vec4. We could directly use the un/pack_unorm_4x8
255 * ops provided we replicate appropriately, but for packing we'd rather stay in
256 * 8/16-bit whereas the NIR op forces 32-bit, so we do it manually */
257
258 static nir_ssa_def *
259 pan_pack_unorm_8(nir_builder *b, nir_ssa_def *v)
260 {
261 return pan_replicate_4(b, nir_pack_32_4x8(b,
262 nir_f2u8(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b,
263 pan_fill_4(b, v)), nir_imm_float16(b, 255.0))))));
264 }
265
266 static nir_ssa_def *
267 pan_unpack_unorm_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
268 {
269 assert(num_components <= 4);
270 nir_ssa_def *unpacked = nir_unpack_unorm_4x8(b, nir_channel(b, pack, 0));
271 return nir_f2fmp(b, unpacked);
272 }
273
274 /* UNORM 4 is also unpacked to f16, which prevents us from using the shared
275 * unpack which strongly assumes fp32. However, on the tilebuffer it is actually packed as:
276 *
277 * [AAAA] [0000] [BBBB] [0000] [GGGG] [0000] [RRRR] [0000]
278 *
279 * In other words, spacing it out so we're aligned to bytes and on top. So
280 * pack as:
281 *
282 * pack_32_4x8(f2u8_rte(v * 15.0) << 4)
283 */
284
285 static nir_ssa_def *
286 pan_pack_unorm_small(nir_builder *b, nir_ssa_def *v,
287 nir_ssa_def *scales, nir_ssa_def *shifts)
288 {
289 nir_ssa_def *f = nir_fmul(b, nir_fsat(b, pan_fill_4(b, v)), scales);
290 nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f));
291 nir_ssa_def *s = nir_ishl(b, u8, shifts);
292 nir_ssa_def *repl = nir_pack_32_4x8(b, s);
293
294 return pan_replicate_4(b, repl);
295 }
296
297 static nir_ssa_def *
298 pan_unpack_unorm_small(nir_builder *b, nir_ssa_def *pack,
299 nir_ssa_def *scales, nir_ssa_def *shifts)
300 {
301 nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
302 nir_ssa_def *raw = nir_ushr(b, nir_u2ump(b, channels), shifts);
303 return nir_fmul(b, nir_u2f16(b, raw), scales);
304 }
305
306 static nir_ssa_def *
307 pan_pack_unorm_4(nir_builder *b, nir_ssa_def *v)
308 {
309 return pan_pack_unorm_small(b, v,
310 nir_imm_vec4_16(b, 15.0, 15.0, 15.0, 15.0),
311 nir_imm_ivec4(b, 4, 4, 4, 4));
312 }
313
314 static nir_ssa_def *
315 pan_unpack_unorm_4(nir_builder *b, nir_ssa_def *v)
316 {
317 return pan_unpack_unorm_small(b, v,
318 nir_imm_vec4_16(b, 1.0 / 15.0, 1.0 / 15.0, 1.0 / 15.0, 1.0 / 15.0),
319 nir_imm_ivec4(b, 4, 4, 4, 4));
320 }
321
322 /* UNORM RGB5_A1 and RGB565 are similar */
323
324 static nir_ssa_def *
325 pan_pack_unorm_5551(nir_builder *b, nir_ssa_def *v)
326 {
327 return pan_pack_unorm_small(b, v,
328 nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0),
329 nir_imm_ivec4(b, 3, 3, 3, 7));
330 }
331
332 static nir_ssa_def *
333 pan_unpack_unorm_5551(nir_builder *b, nir_ssa_def *v)
334 {
335 return pan_unpack_unorm_small(b, v,
336 nir_imm_vec4_16(b, 1.0 / 31.0, 1.0 / 31.0, 1.0 / 31.0, 1.0),
337 nir_imm_ivec4(b, 3, 3, 3, 7));
338 }
339
340 static nir_ssa_def *
341 pan_pack_unorm_565(nir_builder *b, nir_ssa_def *v)
342 {
343 return pan_pack_unorm_small(b, v,
344 nir_imm_vec4_16(b, 31.0, 63.0, 31.0, 0.0),
345 nir_imm_ivec4(b, 3, 2, 3, 0));
346 }
347
348 static nir_ssa_def *
349 pan_unpack_unorm_565(nir_builder *b, nir_ssa_def *v)
350 {
351 return pan_unpack_unorm_small(b, v,
352 nir_imm_vec4_16(b, 1.0 / 31.0, 1.0 / 63.0, 1.0 / 31.0, 0.0),
353 nir_imm_ivec4(b, 3, 2, 3, 0));
354 }
355
356 /* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top
357 * 8-bits of RGB and the top byte being RGBA as 2-bits packed. As imirkin
358 * pointed out, this means free conversion to RGBX8 */
359
360 static nir_ssa_def *
361 pan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v)
362 {
363 nir_ssa_def *scale = nir_imm_vec4_16(b, 1023.0, 1023.0, 1023.0, 3.0);
364 nir_ssa_def *s = nir_f2u32(b, nir_fround_even(b, nir_f2f32(b, nir_fmul(b, nir_fsat(b, v), scale))));
365
366 nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2));
367 nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8));
368
369 nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3));
370
371 nir_ssa_def *top =
372 nir_ior(b,
373 nir_ior(b,
374 nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)),
375 nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))),
376 nir_ior(b,
377 nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)),
378 nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6))));
379
380 nir_ssa_def *p = nir_ior(b, top, top8_rgb);
381 return pan_replicate_4(b, p);
382 }
383
384 static nir_ssa_def *
385 pan_unpack_unorm_1010102(nir_builder *b, nir_ssa_def *packed)
386 {
387 nir_ssa_def *p = nir_channel(b, packed, 0);
388 nir_ssa_def *bytes = nir_unpack_32_4x8(b, p);
389 nir_ssa_def *ubytes = nir_u2ump(b, bytes);
390
391 nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)),
392 nir_imm_ivec4(b, 0, 2, 4, 6));
393 nir_ssa_def *precision = nir_iand(b, shifts,
394 nir_i2imp(b, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)));
395
396 nir_ssa_def *top_rgb = nir_ishl(b, nir_channels(b, ubytes, 0x7), nir_imm_int(b, 2));
397 top_rgb = nir_ior(b, nir_channels(b, precision, 0x7), top_rgb);
398
399 nir_ssa_def *chans [4] = {
400 nir_channel(b, top_rgb, 0),
401 nir_channel(b, top_rgb, 1),
402 nir_channel(b, top_rgb, 2),
403 nir_channel(b, precision, 3)
404 };
405
406 nir_ssa_def *scale = nir_imm_vec4(b, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 3.0);
407 return nir_f2fmp(b, nir_fmul(b, nir_u2f32(b, nir_vec(b, chans, 4)), scale));
408 }
409
410 /* On the other hand, the pure int RGB10_A2 is identical to the spec */
411
412 static nir_ssa_def *
413 pan_pack_uint_1010102(nir_builder *b, nir_ssa_def *v)
414 {
415 nir_ssa_def *shift = nir_ishl(b, nir_u2u32(b, v),
416 nir_imm_ivec4(b, 0, 10, 20, 30));
417
418 nir_ssa_def *p = nir_ior(b,
419 nir_ior(b, nir_channel(b, shift, 0), nir_channel(b, shift, 1)),
420 nir_ior(b, nir_channel(b, shift, 2), nir_channel(b, shift, 3)));
421
422 return pan_replicate_4(b, p);
423 }
424
425 static nir_ssa_def *
426 pan_unpack_uint_1010102(nir_builder *b, nir_ssa_def *packed)
427 {
428 nir_ssa_def *chan = nir_channel(b, packed, 0);
429
430 nir_ssa_def *shift = nir_ushr(b, pan_replicate_4(b, chan),
431 nir_imm_ivec4(b, 0, 10, 20, 30));
432
433 nir_ssa_def *mask = nir_iand(b, shift,
434 nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
435
436 return nir_u2ump(b, mask);
437 }
438
439 /* NIR means we can *finally* catch a break */
440
441 static nir_ssa_def *
442 pan_pack_r11g11b10(nir_builder *b, nir_ssa_def *v)
443 {
444 return pan_replicate_4(b, nir_format_pack_11f11f10f(b,
445 nir_f2f32(b, v)));
446 }
447
448 static nir_ssa_def *
449 pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
450 {
451 nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
452 nir_ssa_def *f16 = nir_f2fmp(b, f32);
453
454 /* Extend to vec4 with alpha */
455 nir_ssa_def *components[4] = {
456 nir_channel(b, f16, 0),
457 nir_channel(b, f16, 1),
458 nir_channel(b, f16, 2),
459 nir_imm_float16(b, 1.0)
460 };
461
462 return nir_vec(b, components, 4);
463 }
464
465 /* Wrapper around sRGB conversion */
466
467 static nir_ssa_def *
468 pan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear)
469 {
470 nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
471
472 /* TODO: fp16 native conversion */
473 nir_ssa_def *srgb = nir_f2fmp(b,
474 nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
475
476 nir_ssa_def *comp[4] = {
477 nir_channel(b, srgb, 0),
478 nir_channel(b, srgb, 1),
479 nir_channel(b, srgb, 2),
480 nir_channel(b, linear, 3),
481 };
482
483 return nir_vec(b, comp, 4);
484 }
485
486 static nir_ssa_def *
487 pan_srgb_to_linear(nir_builder *b, nir_ssa_def *srgb)
488 {
489 nir_ssa_def *rgb = nir_channels(b, srgb, 0x7);
490
491 /* TODO: fp16 native conversion */
492 nir_ssa_def *linear = nir_f2fmp(b,
493 nir_format_srgb_to_linear(b, nir_f2f32(b, rgb)));
494
495 nir_ssa_def *comp[4] = {
496 nir_channel(b, linear, 0),
497 nir_channel(b, linear, 1),
498 nir_channel(b, linear, 2),
499 nir_channel(b, srgb, 3),
500 };
501
502 return nir_vec(b, comp, 4);
503 }
504
505
506
507 /* Generic dispatches for un/pack regardless of format */
508
509 static bool
510 pan_is_unorm4(const struct util_format_description *desc)
511 {
512 switch (desc->format) {
513 case PIPE_FORMAT_B4G4R4A4_UNORM:
514 case PIPE_FORMAT_B4G4R4X4_UNORM:
515 case PIPE_FORMAT_A4R4_UNORM:
516 case PIPE_FORMAT_R4A4_UNORM:
517 case PIPE_FORMAT_A4B4G4R4_UNORM:
518 case PIPE_FORMAT_R4G4B4A4_UNORM:
519 return true;
520 default:
521 return false;
522 }
523
524 }
525
526 static nir_ssa_def *
527 pan_unpack(nir_builder *b,
528 const struct util_format_description *desc,
529 nir_ssa_def *packed)
530 {
531 if (util_format_is_unorm8(desc))
532 return pan_unpack_unorm_8(b, packed, desc->nr_channels);
533
534 if (pan_is_unorm4(desc))
535 return pan_unpack_unorm_4(b, packed);
536
537 if (desc->is_array) {
538 int c = util_format_get_first_non_void_channel(desc->format);
539 assert(c >= 0);
540 struct util_format_channel_description d = desc->channel[c];
541
542 if (d.size == 32 || d.size == 16) {
543 assert(!d.normalized);
544 assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer);
545
546 return d.size == 32 ? pan_unpack_pure_32(b, packed, desc->nr_channels) :
547 pan_unpack_pure_16(b, packed, desc->nr_channels);
548 } else if (d.size == 8) {
549 assert(d.pure_integer);
550 return pan_unpack_pure_8(b, packed, desc->nr_channels);
551 } else {
552 unreachable("Unrenderable size");
553 }
554 }
555
556 switch (desc->format) {
557 case PIPE_FORMAT_B5G5R5A1_UNORM:
558 case PIPE_FORMAT_R5G5B5A1_UNORM:
559 return pan_unpack_unorm_5551(b, packed);
560 case PIPE_FORMAT_B5G6R5_UNORM:
561 return pan_unpack_unorm_565(b, packed);
562 case PIPE_FORMAT_R10G10B10A2_UNORM:
563 return pan_unpack_unorm_1010102(b, packed);
564 case PIPE_FORMAT_R10G10B10A2_UINT:
565 return pan_unpack_uint_1010102(b, packed);
566 case PIPE_FORMAT_R11G11B10_FLOAT:
567 return pan_unpack_r11g11b10(b, packed);
568 default:
569 break;
570 }
571
572 fprintf(stderr, "%s\n", desc->name);
573 unreachable("Unknown format");
574 }
575
576 static nir_ssa_def *
577 pan_pack(nir_builder *b,
578 const struct util_format_description *desc,
579 nir_ssa_def *unpacked)
580 {
581 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
582 unpacked = pan_linear_to_srgb(b, unpacked);
583
584 if (util_format_is_unorm8(desc))
585 return pan_pack_unorm_8(b, unpacked);
586
587 if (pan_is_unorm4(desc))
588 return pan_pack_unorm_4(b, unpacked);
589
590 if (desc->is_array) {
591 int c = util_format_get_first_non_void_channel(desc->format);
592 assert(c >= 0);
593 struct util_format_channel_description d = desc->channel[c];
594
595 if (d.size == 32 || d.size == 16) {
596 assert(!d.normalized);
597 assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer);
598
599 return d.size == 32 ? pan_pack_pure_32(b, unpacked) :
600 pan_pack_pure_16(b, unpacked);
601 } else if (d.size == 8) {
602 assert(d.pure_integer);
603 return pan_pack_pure_8(b, unpacked);
604 } else {
605 unreachable("Unrenderable size");
606 }
607 }
608
609 switch (desc->format) {
610 case PIPE_FORMAT_B5G5R5A1_UNORM:
611 case PIPE_FORMAT_R5G5B5A1_UNORM:
612 return pan_pack_unorm_5551(b, unpacked);
613 case PIPE_FORMAT_B5G6R5_UNORM:
614 return pan_pack_unorm_565(b, unpacked);
615 case PIPE_FORMAT_R10G10B10A2_UNORM:
616 return pan_pack_unorm_1010102(b, unpacked);
617 case PIPE_FORMAT_R10G10B10A2_UINT:
618 return pan_pack_uint_1010102(b, unpacked);
619 case PIPE_FORMAT_R11G11B10_FLOAT:
620 return pan_pack_r11g11b10(b, unpacked);
621 default:
622 break;
623 }
624
625 fprintf(stderr, "%s\n", desc->name);
626 unreachable("Unknown format");
627 }
628
629 static void
630 pan_lower_fb_store(nir_shader *shader,
631 nir_builder *b,
632 nir_intrinsic_instr *intr,
633 const struct util_format_description *desc,
634 unsigned quirks)
635 {
636 /* For stores, add conversion before */
637 nir_ssa_def *unpacked = nir_ssa_for_src(b, intr->src[1], 4);
638 nir_ssa_def *packed = pan_pack(b, desc, unpacked);
639
640 nir_intrinsic_instr *new =
641 nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
642 new->src[0] = nir_src_for_ssa(packed);
643 new->num_components = 4;
644 nir_builder_instr_insert(b, &new->instr);
645 }
646
647 static void
648 pan_lower_fb_load(nir_shader *shader,
649 nir_builder *b,
650 nir_intrinsic_instr *intr,
651 const struct util_format_description *desc,
652 unsigned base, unsigned quirks)
653 {
654 nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader,
655 nir_intrinsic_load_raw_output_pan);
656 new->num_components = 4;
657
658 nir_intrinsic_set_base(new, base);
659
660 nir_ssa_dest_init(&new->instr, &new->dest, 4, 32, NULL);
661 nir_builder_instr_insert(b, &new->instr);
662
663 /* Convert the raw value */
664 nir_ssa_def *packed = &new->dest.ssa;
665 nir_ssa_def *unpacked = pan_unpack(b, desc, packed);
666
667 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
668 unpacked = pan_srgb_to_linear(b, unpacked);
669
670 /* Convert to the size of the load intrinsic.
671 *
672 * We can assume that the type will match with the framebuffer format:
673 *
674 * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
675 *
676 * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
677 * conversion is applied. If the values written by the fragment shader
678 * do not match the format(s) of the corresponding color buffer(s),
679 * the result is undefined.
680 */
681
682 unsigned bits = nir_dest_bit_size(intr->dest);
683
684 nir_alu_type src_type;
685 if (desc->channel[0].pure_integer) {
686 if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
687 src_type = nir_type_int;
688 else
689 src_type = nir_type_uint;
690 } else {
691 src_type = nir_type_float;
692 }
693
694 unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
695
696 nir_src rewritten = nir_src_for_ssa(unpacked);
697 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, &intr->instr);
698 }
699
700 bool
701 pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
702 bool lower_store, unsigned quirks)
703 {
704 if (shader->info.stage != MESA_SHADER_FRAGMENT)
705 return false;
706
707 bool progress = false;
708
709 nir_foreach_function(func, shader) {
710 nir_foreach_block(block, func->impl) {
711 nir_foreach_instr_safe(instr, block) {
712 if (instr->type != nir_instr_type_intrinsic)
713 continue;
714
715 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
716
717 bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
718 bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
719
720 if (!(is_load || (is_store && lower_store)))
721 continue;
722
723 nir_variable *var = nir_intrinsic_get_var(intr, 0);
724
725 if (var->data.mode != nir_var_shader_out)
726 continue;
727
728 unsigned base = var->data.driver_location;
729
730 unsigned rt;
731 if (var->data.location == FRAG_RESULT_COLOR)
732 rt = 0;
733 else if (var->data.location >= FRAG_RESULT_DATA0)
734 rt = var->data.location - FRAG_RESULT_DATA0;
735 else
736 continue;
737
738 if (rt_fmts[rt] == PIPE_FORMAT_NONE)
739 continue;
740
741 const struct util_format_description *desc =
742 util_format_description(rt_fmts[rt]);
743
744 enum pan_format_class fmt_class =
745 pan_format_class(desc, quirks, is_store);
746
747 /* Don't lower */
748 if (fmt_class == PAN_FORMAT_NATIVE)
749 continue;
750
751 nir_builder b;
752 nir_builder_init(&b, func->impl);
753
754 if (is_store) {
755 b.cursor = nir_before_instr(instr);
756 pan_lower_fb_store(shader, &b, intr, desc, quirks);
757 } else {
758 b.cursor = nir_after_instr(instr);
759 pan_lower_fb_load(shader, &b, intr, desc, base, quirks);
760 }
761
762 nir_instr_remove(instr);
763
764 progress = true;
765 }
766 }
767
768 nir_metadata_preserve(func->impl, nir_metadata_block_index |
769 nir_metadata_dominance);
770 }
771
772 return progress;
773 }