util: Move gallium's PIPE_FORMAT utils to /util/format/
[mesa.git] / src / gallium / drivers / panfrost / nir / nir_lower_framebuffer.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 /**
28 * @file
29 *
30 * Implements framebuffer format conversions in software, specifically for
31 * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more
32 * correctly -- pre I/O lowering) normally for the fragment stage within the
33 * blend shader will operate with purely vec4 float ("nir") encodings. This
34 * lowering stage, to be run before I/O is lowered, converts the native
35 * framebuffer format to a NIR encoding after loads and vice versa before
36 * stores. This pass is designed for a single render target; Midgard duplicates
37 * blend shaders for MRT to simplify everything.
38 */
39
40 #include "compiler/nir/nir.h"
41 #include "compiler/nir/nir_builder.h"
42 #include "compiler/nir/nir_format_convert.h"
43 #include "nir_lower_blend.h"
44 #include "util/format/u_format.h"
45
46 /* Determines the best NIR intrinsic to load a tile buffer of a given type,
47 * using native format conversion where possible. RGBA8 UNORM has a fast path
48 * (on some chips). Otherwise, we default to raw reads. */
49
50 static nir_intrinsic_op
51 nir_best_load_for_format(
52 const struct util_format_description *desc,
53 unsigned *special_bitsize,
54 unsigned gpu_id)
55 {
56 if (util_format_is_unorm8(desc) && gpu_id != 0x750) {
57 *special_bitsize = 16;
58 return nir_intrinsic_load_output_u8_as_fp16_pan;
59 } else
60 return nir_intrinsic_load_raw_output_pan;
61 }
62
63
64 /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
65
66 static nir_ssa_def *
67 nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float)
68 {
69 /* First, we degrade quality to fp16; we don't need the extra bits */
70 nir_ssa_def *degraded = /*nir_f2f16(b, c_float)*/c_float;
71
72 /* Scale from [0, 1] to [0, 255.0] */
73 nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0);
74
75 /* Next, we type convert */
76 nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b,
77 nir_fround_even(b, nir_f2f16(b, scaled))));
78
79 return converted;
80 }
81
82 static nir_ssa_def *
83 nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native)
84 {
85 /* First, we convert up from u8 to f16 */
86 nir_ssa_def *converted = nir_f2f32(b, nir_u2f16(b, nir_u2u16(b, c_native)));
87
88 /* Next, we scale down from [0, 255.0] to [0, 1] */
89 nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0));
90
91 return scaled;
92 }
93
94 /* Converters for UNORM4 formats, packing the final result into 16-bit */
95
96 static nir_ssa_def *
97 nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float)
98 {
99 /* First, we degrade quality to fp16; we don't need the extra bits */
100 nir_ssa_def *degraded = nir_f2f16(b, c_float);
101
102 /* Scale from [0, 1] to [0, 15.0] */
103 nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0);
104
105 /* Next, we type convert to u16 */
106 nir_ssa_def *converted = nir_f2u16(b,
107 nir_fround_even(b, scaled));
108
109 /* In u16 land, we now need to pack */
110 nir_ssa_def *cr = nir_channel(b, converted, 0);
111 nir_ssa_def *cg = nir_channel(b, converted, 1);
112 nir_ssa_def *cb = nir_channel(b, converted, 2);
113 nir_ssa_def *ca = nir_channel(b, converted, 3);
114
115 nir_ssa_def *pack =
116 nir_ior(b,
117 nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))),
118 nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12))));
119
120 return pack;
121 }
122
123 static nir_ssa_def *
124 nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize)
125 {
126 nir_ssa_def *converted = c_float;
127
128 if (normalize) {
129 nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float),
130 nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0));
131
132 converted = nir_f2u32(b,
133 nir_fround_even(b, scaled));
134 }
135
136 nir_ssa_def *cr = nir_channel(b, converted, 0);
137 nir_ssa_def *cg = nir_channel(b, converted, 1);
138 nir_ssa_def *cb = nir_channel(b, converted, 2);
139 nir_ssa_def *ca = nir_channel(b, converted, 3);
140
141 nir_ssa_def *pack =
142 nir_ior(b,
143 nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))),
144 nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30))));
145
146 return pack;
147 }
148
149 static nir_ssa_def *
150 nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float)
151 {
152 nir_ssa_def *degraded = nir_f2f16(b, c_float);
153
154 nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded),
155 nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0));
156
157 nir_ssa_def *converted = nir_f2u16(b,
158 nir_fround_even(b, scaled));
159
160 nir_ssa_def *cr = nir_channel(b, converted, 0);
161 nir_ssa_def *cg = nir_channel(b, converted, 1);
162 nir_ssa_def *cb = nir_channel(b, converted, 2);
163 nir_ssa_def *ca = nir_channel(b, converted, 3);
164
165 nir_ssa_def *pack =
166 nir_ior(b,
167 nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))),
168 nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15))));
169
170 return pack;
171 }
172
173 static nir_ssa_def *
174 nir_shader_to_native(nir_builder *b,
175 nir_ssa_def *c_shader,
176 const struct util_format_description *desc,
177 unsigned bits,
178 bool homogenous_bits)
179 {
180 bool float_or_pure_int =
181 util_format_is_float(desc->format) ||
182 util_format_is_pure_integer(desc->format);
183
184 if (util_format_is_unorm8(desc))
185 return nir_float_to_unorm8(b, c_shader);
186 else if (homogenous_bits && float_or_pure_int)
187 return c_shader; /* type is already correct */
188
189 //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */
190 //c_shader = nir_swizzle(b, c_shader, swiz, 4);
191
192 /* Special formats */
193 switch (desc->format) {
194 case PIPE_FORMAT_B4G4R4A4_UNORM:
195 case PIPE_FORMAT_B4G4R4X4_UNORM:
196 case PIPE_FORMAT_A4R4_UNORM:
197 case PIPE_FORMAT_R4A4_UNORM:
198 case PIPE_FORMAT_A4B4G4R4_UNORM:
199 return nir_float_to_unorm4(b, c_shader);
200
201 case PIPE_FORMAT_R10G10B10A2_UNORM:
202 case PIPE_FORMAT_B10G10R10A2_UNORM:
203 case PIPE_FORMAT_R10G10B10X2_UNORM:
204 case PIPE_FORMAT_B10G10R10X2_UNORM:
205 return nir_float_to_rgb10a2(b, c_shader, true);
206
207 case PIPE_FORMAT_R10G10B10A2_UINT:
208 return nir_float_to_rgb10a2(b, c_shader, false);
209
210 case PIPE_FORMAT_B5G5R5A1_UNORM:
211 return nir_float_to_rgb5a1(b, c_shader);
212
213 case PIPE_FORMAT_R11G11B10_FLOAT:
214 return nir_format_pack_11f11f10f(b, c_shader);
215
216 default:
217 printf("%s\n", desc->name);
218 unreachable("Unknown format name");
219 }
220 }
221
222 static nir_ssa_def *
223 nir_native_to_shader(nir_builder *b,
224 nir_ssa_def *c_native,
225 nir_intrinsic_op op,
226 const struct util_format_description *desc,
227 unsigned bits,
228 bool homogenous_bits)
229 {
230 bool float_or_pure_int =
231 util_format_is_float(desc->format) ||
232 util_format_is_pure_integer(desc->format);
233
234 /* Handle preconverted formats */
235 if (op == nir_intrinsic_load_output_u8_as_fp16_pan) {
236 assert(util_format_is_unorm8(desc));
237 return nir_f2f32(b, c_native);
238 }
239
240 /* Otherwise, we're raw */
241 assert(op == nir_intrinsic_load_raw_output_pan);
242
243 if (util_format_is_unorm8(desc))
244 return nir_unorm8_to_float(b, c_native);
245 else if (homogenous_bits && float_or_pure_int)
246 return c_native; /* type is already correct */
247 else {
248 printf("%s\n", desc->name);
249 unreachable("Unknown format name");
250 }
251 }
252
253 void
254 nir_lower_framebuffer(nir_shader *shader, enum pipe_format format,
255 unsigned gpu_id)
256 {
257 /* Blend shaders are represented as special fragment shaders */
258 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
259
260 const struct util_format_description *format_desc =
261 util_format_description(format);
262
263 unsigned nr_channels = format_desc->nr_channels;
264 unsigned bits = format_desc->channel[0].size;
265
266 /* Do all channels have the same bit count? */
267 bool homogenous_bits = true;
268
269 for (unsigned c = 1; c < nr_channels; ++c)
270 homogenous_bits &= (format_desc->channel[c].size == bits);
271
272 if (format == PIPE_FORMAT_R11G11B10_FLOAT)
273 homogenous_bits = false;
274
275 /* Figure out the formats for the raw */
276 unsigned raw_bitsize_in = bits;
277 unsigned raw_bitsize_out = bits;
278 unsigned raw_out_components = 4;
279
280 /* We pack a 4-bit vec4 as 16-bit vec1 */
281 if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) {
282 raw_bitsize_out = 16;
283 raw_out_components = 1;
284 } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) {
285 raw_bitsize_out = 32;
286 raw_out_components = 1;
287 }
288
289 nir_foreach_function(func, shader) {
290 nir_foreach_block(block, func->impl) {
291 nir_foreach_instr_safe(instr, block) {
292 if (instr->type != nir_instr_type_intrinsic)
293 continue;
294
295 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
296
297 bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
298 bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
299
300 if (!(is_load || is_store))
301 continue;
302
303 /* Don't worry about MRT */
304 nir_variable *var = nir_intrinsic_get_var(intr, 0);
305
306 if (var->data.location != FRAG_RESULT_COLOR)
307 continue;
308
309 nir_builder b;
310 nir_builder_init(&b, func->impl);
311
312 if (is_store) {
313 /* For stores, add conversion before */
314 b.cursor = nir_before_instr(instr);
315
316 /* Grab the input color */
317 nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4);
318
319 /* Format convert */
320 nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits);
321
322 if (util_format_is_float(format)) {
323 if (raw_bitsize_out == 16)
324 converted = nir_f2f16(&b, converted);
325 else if (raw_bitsize_out == 32)
326 converted = nir_f2f32(&b, converted);
327 } else {
328 converted = nir_i2i(&b, converted, raw_bitsize_out);
329 }
330
331 /* Rewrite to use a native store by creating a new intrinsic */
332 nir_intrinsic_instr *new =
333 nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
334 new->src[0] = nir_src_for_ssa(converted);
335
336 new->num_components = raw_out_components;
337
338 nir_builder_instr_insert(&b, &new->instr);
339
340 /* (And finally removing the old) */
341 nir_instr_remove(instr);
342 } else {
343 /* For loads, add conversion after */
344 b.cursor = nir_after_instr(instr);
345
346 /* Determine the best op for the format/hardware */
347 unsigned bitsize = raw_bitsize_in;
348 nir_intrinsic_op op = nir_best_load_for_format(format_desc,
349 &bitsize,
350 gpu_id);
351
352 /* Rewrite to use a native load by creating a new intrinsic */
353 nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader, op);
354 new->num_components = 4;
355
356 nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL);
357 nir_builder_instr_insert(&b, &new->instr);
358
359 /* Convert the raw value */
360 nir_ssa_def *raw = &new->dest.ssa;
361 nir_ssa_def *converted = nir_native_to_shader(&b, raw, op, format_desc, bits, homogenous_bits);
362
363 /* Rewrite to use the converted value */
364 nir_src rewritten = nir_src_for_ssa(converted);
365 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr);
366
367 /* Finally, remove the old load */
368 nir_instr_remove(instr);
369 }
370 }
371 }
372
373 nir_metadata_preserve(func->impl, nir_metadata_block_index |
374 nir_metadata_dominance);
375 }
376 }