2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
30 * Implements framebuffer format conversions in software, specifically for
31 * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more
32 * correctly -- pre I/O lowering) normally for the fragment stage within the
33 * blend shader will operate with purely vec4 float ("nir") encodings. This
34 * lowering stage, to be run before I/O is lowered, converts the native
35 * framebuffer format to a NIR encoding after loads and vice versa before
36 * stores. This pass is designed for a single render target; Midgard duplicates
37 * blend shaders for MRT to simplify everything.
40 #include "compiler/nir/nir.h"
41 #include "compiler/nir/nir_builder.h"
42 #include "compiler/nir/nir_format_convert.h"
43 #include "nir_lower_blend.h"
44 #include "util/format/u_format.h"
46 /* Determines the best NIR intrinsic to load a tile buffer of a given type,
47 * using native format conversion where possible. RGBA8 UNORM has a fast path
48 * (on some chips). Otherwise, we default to raw reads. */
50 static nir_intrinsic_op
51 nir_best_load_for_format(
52 const struct util_format_description
*desc
,
53 unsigned *special_bitsize
,
54 unsigned *special_components
,
57 if (util_format_is_unorm8(desc
) && gpu_id
!= 0x750) {
58 *special_bitsize
= 16;
59 return nir_intrinsic_load_output_u8_as_fp16_pan
;
60 } else if (desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
) {
61 *special_bitsize
= 32;
62 *special_components
= 1;
63 return nir_intrinsic_load_raw_output_pan
;
65 return nir_intrinsic_load_raw_output_pan
;
69 /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
72 nir_float_to_unorm8(nir_builder
*b
, nir_ssa_def
*c_float
)
74 /* First, we degrade quality to fp16; we don't need the extra bits */
75 nir_ssa_def
*degraded
= /*nir_f2f16(b, c_float)*/c_float
;
77 /* Scale from [0, 1] to [0, 255.0] */
78 nir_ssa_def
*scaled
= nir_fmul_imm(b
, nir_fsat(b
, degraded
), 255.0);
80 /* Next, we type convert */
81 nir_ssa_def
*converted
= nir_u2u8(b
, nir_f2u16(b
,
82 nir_fround_even(b
, nir_f2f16(b
, scaled
))));
88 nir_unorm8_to_float(nir_builder
*b
, nir_ssa_def
*c_native
)
90 /* First, we convert up from u8 to f16 */
91 nir_ssa_def
*converted
= nir_f2f32(b
, nir_u2f16(b
, nir_u2u16(b
, c_native
)));
93 /* Next, we scale down from [0, 255.0] to [0, 1] */
94 nir_ssa_def
*scaled
= nir_fsat(b
, nir_fmul_imm(b
, converted
, 1.0/255.0));
99 /* Converters for UNORM4 formats, packing the final result into 16-bit */
102 nir_float_to_unorm4(nir_builder
*b
, nir_ssa_def
*c_float
)
104 /* First, we degrade quality to fp16; we don't need the extra bits */
105 nir_ssa_def
*degraded
= nir_f2f16(b
, c_float
);
107 /* Scale from [0, 1] to [0, 15.0] */
108 nir_ssa_def
*scaled
= nir_fmul_imm(b
, nir_fsat(b
, degraded
), 15.0);
110 /* Next, we type convert to u16 */
111 nir_ssa_def
*converted
= nir_f2u16(b
,
112 nir_fround_even(b
, scaled
));
114 /* In u16 land, we now need to pack */
115 nir_ssa_def
*cr
= nir_channel(b
, converted
, 0);
116 nir_ssa_def
*cg
= nir_channel(b
, converted
, 1);
117 nir_ssa_def
*cb
= nir_channel(b
, converted
, 2);
118 nir_ssa_def
*ca
= nir_channel(b
, converted
, 3);
122 nir_ior(b
, cr
, nir_ishl(b
, cg
, nir_imm_int(b
, 4))),
123 nir_ior(b
, nir_ishl(b
, cb
, nir_imm_int(b
, 8)), nir_ishl(b
, ca
, nir_imm_int(b
, 12))));
129 nir_float_to_rgb10a2(nir_builder
*b
, nir_ssa_def
*c_float
, bool normalize
)
131 nir_ssa_def
*converted
= c_float
;
134 nir_ssa_def
*scaled
= nir_fmul(b
, nir_fsat(b
, c_float
),
135 nir_imm_vec4(b
, 1023.0, 1023.0, 1023.0, 3.0));
137 converted
= nir_f2u32(b
,
138 nir_fround_even(b
, scaled
));
141 nir_ssa_def
*cr
= nir_channel(b
, converted
, 0);
142 nir_ssa_def
*cg
= nir_channel(b
, converted
, 1);
143 nir_ssa_def
*cb
= nir_channel(b
, converted
, 2);
144 nir_ssa_def
*ca
= nir_channel(b
, converted
, 3);
148 nir_ior(b
, cr
, nir_ishl(b
, cg
, nir_imm_int(b
, 10))),
149 nir_ior(b
, nir_ishl(b
, cb
, nir_imm_int(b
, 20)), nir_ishl(b
, ca
, nir_imm_int(b
, 30))));
155 nir_float_to_rgb5a1(nir_builder
*b
, nir_ssa_def
*c_float
)
157 nir_ssa_def
*degraded
= nir_f2f16(b
, c_float
);
159 nir_ssa_def
*scaled
= nir_fmul(b
, nir_fsat(b
, degraded
),
160 nir_imm_vec4_16(b
, 31.0, 31.0, 31.0, 1.0));
162 nir_ssa_def
*converted
= nir_f2u16(b
,
163 nir_fround_even(b
, scaled
));
165 nir_ssa_def
*cr
= nir_channel(b
, converted
, 0);
166 nir_ssa_def
*cg
= nir_channel(b
, converted
, 1);
167 nir_ssa_def
*cb
= nir_channel(b
, converted
, 2);
168 nir_ssa_def
*ca
= nir_channel(b
, converted
, 3);
172 nir_ior(b
, cr
, nir_ishl(b
, cg
, nir_imm_int(b
, 5))),
173 nir_ior(b
, nir_ishl(b
, cb
, nir_imm_int(b
, 10)), nir_ishl(b
, ca
, nir_imm_int(b
, 15))));
179 nir_shader_to_native(nir_builder
*b
,
180 nir_ssa_def
*c_shader
,
181 const struct util_format_description
*desc
,
183 bool homogenous_bits
)
185 bool float_or_pure_int
=
186 util_format_is_float(desc
->format
) ||
187 util_format_is_pure_integer(desc
->format
);
189 if (util_format_is_unorm8(desc
))
190 return nir_float_to_unorm8(b
, c_shader
);
191 else if (homogenous_bits
&& float_or_pure_int
)
192 return c_shader
; /* type is already correct */
194 //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */
195 //c_shader = nir_swizzle(b, c_shader, swiz, 4);
197 /* Special formats */
198 switch (desc
->format
) {
199 case PIPE_FORMAT_B4G4R4A4_UNORM
:
200 case PIPE_FORMAT_B4G4R4X4_UNORM
:
201 case PIPE_FORMAT_A4R4_UNORM
:
202 case PIPE_FORMAT_R4A4_UNORM
:
203 case PIPE_FORMAT_A4B4G4R4_UNORM
:
204 return nir_float_to_unorm4(b
, c_shader
);
206 case PIPE_FORMAT_R10G10B10A2_UNORM
:
207 case PIPE_FORMAT_B10G10R10A2_UNORM
:
208 case PIPE_FORMAT_R10G10B10X2_UNORM
:
209 case PIPE_FORMAT_B10G10R10X2_UNORM
:
210 return nir_float_to_rgb10a2(b
, c_shader
, true);
212 case PIPE_FORMAT_R10G10B10A2_UINT
:
213 return nir_float_to_rgb10a2(b
, c_shader
, false);
215 case PIPE_FORMAT_B5G5R5A1_UNORM
:
216 return nir_float_to_rgb5a1(b
, c_shader
);
218 case PIPE_FORMAT_R11G11B10_FLOAT
:
219 return nir_format_pack_11f11f10f(b
, c_shader
);
222 fprintf(stderr
, "%s\n", desc
->name
);
223 unreachable("Unknown format name");
228 nir_native_to_shader(nir_builder
*b
,
229 nir_ssa_def
*c_native
,
231 const struct util_format_description
*desc
,
233 bool homogenous_bits
)
235 bool float_or_pure_int
=
236 util_format_is_float(desc
->format
) ||
237 util_format_is_pure_integer(desc
->format
);
239 /* Handle preconverted formats */
240 if (op
== nir_intrinsic_load_output_u8_as_fp16_pan
) {
241 assert(util_format_is_unorm8(desc
));
242 return nir_f2f32(b
, c_native
);
245 /* Otherwise, we're raw */
246 assert(op
== nir_intrinsic_load_raw_output_pan
);
248 if (util_format_is_unorm8(desc
))
249 return nir_unorm8_to_float(b
, c_native
);
250 else if (homogenous_bits
&& float_or_pure_int
)
251 return c_native
; /* type is already correct */
253 /* Special formats */
254 switch (desc
->format
) {
255 case PIPE_FORMAT_R11G11B10_FLOAT
: {
256 nir_ssa_def
*unpacked
= nir_format_unpack_11f11f10f(b
, c_native
);
258 /* Extend to vec4 with alpha */
259 nir_ssa_def
*components
[4] = {
260 nir_channel(b
, unpacked
, 0),
261 nir_channel(b
, unpacked
, 1),
262 nir_channel(b
, unpacked
, 2),
263 nir_imm_float(b
, 1.0)
266 return nir_vec(b
, components
, 4);
270 fprintf(stderr
, "%s\n", desc
->name
);
271 unreachable("Unknown format name");
276 nir_lower_framebuffer(nir_shader
*shader
, enum pipe_format format
,
279 /* Blend shaders are represented as special fragment shaders */
280 assert(shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
282 const struct util_format_description
*format_desc
=
283 util_format_description(format
);
285 unsigned nr_channels
= format_desc
->nr_channels
;
286 unsigned bits
= format_desc
->channel
[0].size
;
288 /* Do all channels have the same bit count? */
289 bool homogenous_bits
= true;
291 for (unsigned c
= 1; c
< nr_channels
; ++c
)
292 homogenous_bits
&= (format_desc
->channel
[c
].size
== bits
);
294 if (format
== PIPE_FORMAT_R11G11B10_FLOAT
)
295 homogenous_bits
= false;
297 /* Figure out the formats for the raw */
298 unsigned raw_bitsize_in
= bits
;
299 unsigned raw_bitsize_out
= bits
;
300 unsigned raw_out_components
= 4;
302 /* We pack a 4-bit vec4 as 16-bit vec1 */
303 if ((homogenous_bits
&& bits
== 4 && util_format_is_unorm(format
)) || format
== PIPE_FORMAT_B5G5R5A1_UNORM
) {
304 raw_bitsize_out
= 16;
305 raw_out_components
= 1;
306 } else if (format
== PIPE_FORMAT_R10G10B10A2_UNORM
|| format
== PIPE_FORMAT_B10G10R10A2_UNORM
|| format
== PIPE_FORMAT_R10G10B10A2_UINT
|| format
== PIPE_FORMAT_R11G11B10_FLOAT
) {
307 raw_bitsize_out
= 32;
308 raw_out_components
= 1;
311 nir_foreach_function(func
, shader
) {
312 nir_foreach_block(block
, func
->impl
) {
313 nir_foreach_instr_safe(instr
, block
) {
314 if (instr
->type
!= nir_instr_type_intrinsic
)
317 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
319 bool is_load
= intr
->intrinsic
== nir_intrinsic_load_deref
;
320 bool is_store
= intr
->intrinsic
== nir_intrinsic_store_deref
;
322 if (!(is_load
|| is_store
))
325 /* Don't worry about MRT */
326 nir_variable
*var
= nir_intrinsic_get_var(intr
, 0);
328 if (var
->data
.location
!= FRAG_RESULT_COLOR
)
332 nir_builder_init(&b
, func
->impl
);
335 /* For stores, add conversion before */
336 b
.cursor
= nir_before_instr(instr
);
338 /* Grab the input color */
339 nir_ssa_def
*c_nir
= nir_ssa_for_src(&b
, intr
->src
[1], 4);
341 /* Apply sRGB transform */
343 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
) {
344 nir_ssa_def
*rgb
= nir_channels(&b
, c_nir
, 0x7);
345 nir_ssa_def
*trans
= nir_format_linear_to_srgb(&b
, rgb
);
347 nir_ssa_def
*comp
[4] = {
348 nir_channel(&b
, trans
, 0),
349 nir_channel(&b
, trans
, 1),
350 nir_channel(&b
, trans
, 2),
351 nir_channel(&b
, c_nir
, 3),
354 c_nir
= nir_vec(&b
, comp
, 4);
358 nir_ssa_def
*converted
= nir_shader_to_native(&b
, c_nir
, format_desc
, bits
, homogenous_bits
);
360 if (util_format_is_float(format
)) {
361 if (raw_bitsize_out
== 16)
362 converted
= nir_f2f16(&b
, converted
);
363 else if (raw_bitsize_out
== 32)
364 converted
= nir_f2f32(&b
, converted
);
366 converted
= nir_i2i(&b
, converted
, raw_bitsize_out
);
369 /* Rewrite to use a native store by creating a new intrinsic */
370 nir_intrinsic_instr
*new =
371 nir_intrinsic_instr_create(shader
, nir_intrinsic_store_raw_output_pan
);
372 new->src
[0] = nir_src_for_ssa(converted
);
374 new->num_components
= raw_out_components
;
376 nir_builder_instr_insert(&b
, &new->instr
);
378 /* (And finally removing the old) */
379 nir_instr_remove(instr
);
381 /* For loads, add conversion after */
382 b
.cursor
= nir_after_instr(instr
);
384 /* Determine the best op for the format/hardware */
385 unsigned bitsize
= raw_bitsize_in
;
386 unsigned components
= 4;
387 nir_intrinsic_op op
= nir_best_load_for_format(format_desc
,
392 /* Rewrite to use a native load by creating a new intrinsic */
393 nir_intrinsic_instr
*new = nir_intrinsic_instr_create(shader
, op
);
394 new->num_components
= components
;
396 nir_ssa_dest_init(&new->instr
, &new->dest
, components
, bitsize
, NULL
);
397 nir_builder_instr_insert(&b
, &new->instr
);
399 /* Convert the raw value */
400 nir_ssa_def
*raw
= &new->dest
.ssa
;
401 nir_ssa_def
*converted
= nir_native_to_shader(&b
, raw
, op
, format_desc
, bits
, homogenous_bits
);
403 if (util_format_is_float(format
))
404 converted
= nir_f2f32(&b
, converted
);
406 converted
= nir_i2i32(&b
, converted
);
408 /* Rewrite to use the converted value */
409 nir_src rewritten
= nir_src_for_ssa(converted
);
410 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
, rewritten
, instr
);
412 /* Finally, remove the old load */
413 nir_instr_remove(instr
);
418 nir_metadata_preserve(func
->impl
, nir_metadata_block_index
|
419 nir_metadata_dominance
);