src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 /**
  28  * @file
  29  *
  30  * Implements framebuffer format conversions in software, specifically for
  31  * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more
  32  * correctly -- pre I/O lowering) normally for the fragment stage within the
  33  * blend shader will operate with purely vec4 float ("nir") encodings. This
  34  * lowering stage, to be run before I/O is lowered, converts the native
  35  * framebuffer format to a NIR encoding after loads and vice versa before
  36  * stores. This pass is designed for a single render target; Midgard duplicates
  37  * blend shaders for MRT to simplify everything.
  38  */
  39
  40 #include "compiler/nir/nir.h"
  41 #include "compiler/nir/nir_builder.h"
  42 #include "compiler/nir/nir_format_convert.h"
  43 #include "nir_lower_blend.h"
  44 #include "util/format/u_format.h"
  45
  46 /* Determines the best NIR intrinsic to load a tile buffer of a given type,
  47  * using native format conversion where possible. RGBA8 UNORM has a fast path
  48  * (on some chips). Otherwise, we default to raw reads. */
  49
  50 static nir_intrinsic_op
  51 nir_best_load_for_format(
  52       const struct util_format_description *desc,
  53       unsigned *special_bitsize,
  54       unsigned *special_components,
  55       unsigned gpu_id)
  56 {
  57    if (util_format_is_unorm8(desc) && gpu_id != 0x750) {
  58       *special_bitsize = 16;
  59       return nir_intrinsic_load_output_u8_as_fp16_pan;
  60    } else if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
  61       *special_bitsize = 32;
  62       *special_components = 1;
  63       return nir_intrinsic_load_raw_output_pan;
  64    } else
  65       return nir_intrinsic_load_raw_output_pan;
  66 }
  67
  68
  69 /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
  70
  71 static nir_ssa_def *
  72 nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float)
  73 {
  74    /* First, we degrade quality to fp16; we don't need the extra bits */
  75    nir_ssa_def *degraded = /*nir_f2f16(b, c_float)*/c_float;
  76
  77    /* Scale from [0, 1] to [0, 255.0] */
  78    nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0);
  79
  80    /* Next, we type convert */
  81    nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b,
  82                                      nir_fround_even(b, nir_f2f16(b, scaled))));
  83
  84    return converted;
  85 }
  86
  87 static nir_ssa_def *
  88 nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native)
  89 {
  90    /* First, we convert up from u8 to f16 */
  91    nir_ssa_def *converted = nir_f2f32(b, nir_u2f16(b, nir_u2u16(b, c_native)));
  92
  93    /* Next, we scale down from [0, 255.0] to [0, 1] */
  94    nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0));
  95
  96    return scaled;
  97 }
  98
  99 /* Converters for UNORM4 formats, packing the final result into 16-bit */
 100
 101 static nir_ssa_def *
 102 nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float)
 103 {
 104    /* First, we degrade quality to fp16; we don't need the extra bits */
 105    nir_ssa_def *degraded = nir_f2f16(b, c_float);
 106
 107    /* Scale from [0, 1] to [0, 15.0] */
 108    nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0);
 109
 110    /* Next, we type convert to u16 */
 111    nir_ssa_def *converted = nir_f2u16(b,
 112                                       nir_fround_even(b, scaled));
 113
 114    /* In u16 land, we now need to pack */
 115    nir_ssa_def *cr = nir_channel(b, converted, 0);
 116    nir_ssa_def *cg = nir_channel(b, converted, 1);
 117    nir_ssa_def *cb = nir_channel(b, converted, 2);
 118    nir_ssa_def *ca = nir_channel(b, converted, 3);
 119
 120    nir_ssa_def *pack =
 121       nir_ior(b,
 122               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))),
 123               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12))));
 124
 125    return pack;
 126 }
 127
 128 static nir_ssa_def *
 129 nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize)
 130 {
 131    nir_ssa_def *converted = c_float;
 132
 133    if (normalize) {
 134       nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float),
 135                                      nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0));
 136
 137       converted = nir_f2u32(b,
 138                             nir_fround_even(b, scaled));
 139    }
 140
 141    nir_ssa_def *cr = nir_channel(b, converted, 0);
 142    nir_ssa_def *cg = nir_channel(b, converted, 1);
 143    nir_ssa_def *cb = nir_channel(b, converted, 2);
 144    nir_ssa_def *ca = nir_channel(b, converted, 3);
 145
 146    nir_ssa_def *pack =
 147       nir_ior(b,
 148               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))),
 149               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30))));
 150
 151    return pack;
 152 }
 153
 154 static nir_ssa_def *
 155 nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float)
 156 {
 157    nir_ssa_def *degraded = nir_f2f16(b, c_float);
 158
 159    nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded),
 160                                   nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0));
 161
 162    nir_ssa_def *converted = nir_f2u16(b,
 163                                       nir_fround_even(b, scaled));
 164
 165    nir_ssa_def *cr = nir_channel(b, converted, 0);
 166    nir_ssa_def *cg = nir_channel(b, converted, 1);
 167    nir_ssa_def *cb = nir_channel(b, converted, 2);
 168    nir_ssa_def *ca = nir_channel(b, converted, 3);
 169
 170    nir_ssa_def *pack =
 171       nir_ior(b,
 172               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))),
 173               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15))));
 174
 175    return pack;
 176 }
 177
 178 static nir_ssa_def *
 179 nir_shader_to_native(nir_builder *b,
 180                      nir_ssa_def *c_shader,
 181                      const struct util_format_description *desc,
 182                      unsigned bits,
 183                      bool homogenous_bits)
 184 {
 185    bool float_or_pure_int =
 186       util_format_is_float(desc->format) ||
 187       util_format_is_pure_integer(desc->format);
 188
 189    if (util_format_is_unorm8(desc))
 190       return nir_float_to_unorm8(b, c_shader);
 191    else if (homogenous_bits && float_or_pure_int)
 192       return c_shader; /* type is already correct */
 193
 194    //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */
 195    //c_shader = nir_swizzle(b, c_shader, swiz, 4);
 196
 197    /* Special formats */
 198    switch (desc->format) {
 199    case PIPE_FORMAT_B4G4R4A4_UNORM:
 200    case PIPE_FORMAT_B4G4R4X4_UNORM:
 201    case PIPE_FORMAT_A4R4_UNORM:
 202    case PIPE_FORMAT_R4A4_UNORM:
 203    case PIPE_FORMAT_A4B4G4R4_UNORM:
 204       return nir_float_to_unorm4(b, c_shader);
 205
 206    case PIPE_FORMAT_R10G10B10A2_UNORM:
 207    case PIPE_FORMAT_B10G10R10A2_UNORM:
 208    case PIPE_FORMAT_R10G10B10X2_UNORM:
 209    case PIPE_FORMAT_B10G10R10X2_UNORM:
 210       return nir_float_to_rgb10a2(b, c_shader, true);
 211
 212    case PIPE_FORMAT_R10G10B10A2_UINT:
 213       return nir_float_to_rgb10a2(b, c_shader, false);
 214
 215    case PIPE_FORMAT_B5G5R5A1_UNORM:
 216       return nir_float_to_rgb5a1(b, c_shader);
 217
 218    case PIPE_FORMAT_R11G11B10_FLOAT:
 219       return nir_format_pack_11f11f10f(b, c_shader);
 220
 221    default:
 222       fprintf(stderr, "%s\n", desc->name);
 223       unreachable("Unknown format name");
 224    }
 225 }
 226
 227 static nir_ssa_def *
 228 nir_native_to_shader(nir_builder *b,
 229                      nir_ssa_def *c_native,
 230                      nir_intrinsic_op op,
 231                      const struct util_format_description *desc,
 232                      unsigned bits,
 233                      bool homogenous_bits)
 234 {
 235    bool float_or_pure_int =
 236       util_format_is_float(desc->format) ||
 237       util_format_is_pure_integer(desc->format);
 238
 239    /* Handle preconverted formats */
 240    if (op == nir_intrinsic_load_output_u8_as_fp16_pan) {
 241       assert(util_format_is_unorm8(desc));
 242       return nir_f2f32(b, c_native);
 243    }
 244
 245    /* Otherwise, we're raw */
 246    assert(op == nir_intrinsic_load_raw_output_pan);
 247
 248    if (util_format_is_unorm8(desc))
 249       return nir_unorm8_to_float(b, c_native);
 250    else if (homogenous_bits && float_or_pure_int)
 251       return c_native; /* type is already correct */
 252
 253    /* Special formats */
 254    switch (desc->format) {
 255    case PIPE_FORMAT_R11G11B10_FLOAT: {
 256       nir_ssa_def *unpacked = nir_format_unpack_11f11f10f(b, c_native);
 257
 258       /* Extend to vec4 with alpha */
 259       nir_ssa_def *components[4] = {
 260          nir_channel(b, unpacked, 0),
 261          nir_channel(b, unpacked, 1),
 262          nir_channel(b, unpacked, 2),
 263          nir_imm_float(b, 1.0)
 264       };
 265
 266       return nir_vec(b, components, 4);
 267    }
 268
 269    default:
 270       fprintf(stderr, "%s\n", desc->name);
 271       unreachable("Unknown format name");
 272    }
 273 }
 274
 275 void
 276 nir_lower_framebuffer(nir_shader *shader, enum pipe_format format,
 277                       unsigned gpu_id)
 278 {
 279    /* Blend shaders are represented as special fragment shaders */
 280    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
 281
 282    const struct util_format_description *format_desc =
 283       util_format_description(format);
 284
 285    unsigned nr_channels = format_desc->nr_channels;
 286    unsigned bits = format_desc->channel[0].size;
 287
 288    /* Do all channels have the same bit count? */
 289    bool homogenous_bits = true;
 290
 291    for (unsigned c = 1; c < nr_channels; ++c)
 292       homogenous_bits &= (format_desc->channel[c].size == bits);
 293
 294    if (format == PIPE_FORMAT_R11G11B10_FLOAT)
 295       homogenous_bits = false;
 296
 297    /* Figure out the formats for the raw */
 298    unsigned raw_bitsize_in = bits;
 299    unsigned raw_bitsize_out = bits;
 300    unsigned raw_out_components = 4;
 301
 302    /* We pack a 4-bit vec4 as 16-bit vec1 */
 303    if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) {
 304       raw_bitsize_out = 16;
 305       raw_out_components = 1;
 306    } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) {
 307       raw_bitsize_out = 32;
 308       raw_out_components = 1;
 309    }
 310
 311    nir_foreach_function(func, shader) {
 312       nir_foreach_block(block, func->impl) {
 313          nir_foreach_instr_safe(instr, block) {
 314             if (instr->type != nir_instr_type_intrinsic)
 315                continue;
 316
 317             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 318
 319             bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
 320             bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
 321
 322             if (!(is_load || is_store))
 323                continue;
 324
 325             /* Don't worry about MRT */
 326             nir_variable *var = nir_intrinsic_get_var(intr, 0);
 327
 328             if (var->data.location != FRAG_RESULT_COLOR)
 329                continue;
 330
 331             nir_builder b;
 332             nir_builder_init(&b, func->impl);
 333
 334             if (is_store) {
 335                /* For stores, add conversion before */
 336                b.cursor = nir_before_instr(instr);
 337
 338                /* Grab the input color */
 339                nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4);
 340
 341                /* Apply sRGB transform */
 342
 343                if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
 344                   nir_ssa_def *rgb = nir_channels(&b, c_nir, 0x7);
 345                   nir_ssa_def *trans = nir_format_linear_to_srgb(&b, rgb);
 346
 347                   nir_ssa_def *comp[4] = {
 348                      nir_channel(&b, trans, 0),
 349                      nir_channel(&b, trans, 1),
 350                      nir_channel(&b, trans, 2),
 351                      nir_channel(&b, c_nir, 3),
 352                   };
 353
 354                   c_nir = nir_vec(&b, comp, 4);
 355                }
 356
 357                /* Format convert */
 358                nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits);
 359
 360                if (util_format_is_float(format)) {
 361                   if (raw_bitsize_out == 16)
 362                      converted = nir_f2f16(&b, converted);
 363                   else if (raw_bitsize_out == 32)
 364                      converted = nir_f2f32(&b, converted);
 365                } else {
 366                   converted = nir_i2i(&b, converted, raw_bitsize_out);
 367                }
 368
 369                /* Rewrite to use a native store by creating a new intrinsic */
 370                nir_intrinsic_instr *new =
 371                   nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
 372                new->src[0] = nir_src_for_ssa(converted);
 373
 374                new->num_components = raw_out_components;
 375
 376                nir_builder_instr_insert(&b, &new->instr);
 377
 378                /* (And finally removing the old) */
 379                nir_instr_remove(instr);
 380             } else {
 381                /* For loads, add conversion after */
 382                b.cursor = nir_after_instr(instr);
 383
 384                /* Determine the best op for the format/hardware */
 385                unsigned bitsize = raw_bitsize_in;
 386                unsigned components = 4;
 387                nir_intrinsic_op op = nir_best_load_for_format(format_desc,
 388                                                               &bitsize,
 389                                                               &components,
 390                                                               gpu_id);
 391
 392                /* Rewrite to use a native load by creating a new intrinsic */
 393                nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader, op);
 394                new->num_components = components;
 395
 396                nir_ssa_dest_init(&new->instr, &new->dest, components, bitsize, NULL);
 397                nir_builder_instr_insert(&b, &new->instr);
 398
 399                /* Convert the raw value */
 400                nir_ssa_def *raw = &new->dest.ssa;
 401                nir_ssa_def *converted = nir_native_to_shader(&b, raw, op, format_desc, bits, homogenous_bits);
 402
 403                if (util_format_is_float(format))
 404                   converted = nir_f2f32(&b, converted);
 405                else
 406                   converted = nir_i2i32(&b, converted);
 407
 408                /* Rewrite to use the converted value */
 409                nir_src rewritten = nir_src_for_ssa(converted);
 410                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr);
 411
 412                /* Finally, remove the old load */
 413                nir_instr_remove(instr);
 414             }
 415          }
 416       }
 417
 418       nir_metadata_preserve(func->impl, nir_metadata_block_index |
 419                             nir_metadata_dominance);
 420    }
 421 }