src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 /**
  28  * @file
  29  *
  30  * Implements framebuffer format conversions in software, specifically for
  31  * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more
  32  * correctly -- pre I/O lowering) normally for the fragment stage within the
  33  * blend shader will operate with purely vec4 float ("nir") encodings. This
  34  * lowering stage, to be run before I/O is lowered, converts the native
  35  * framebuffer format to a NIR encoding after loads and vice versa before
  36  * stores. This pass is designed for a single render target; Midgard duplicates
  37  * blend shaders for MRT to simplify everything.
  38  */
  39
  40 #include "compiler/nir/nir.h"
  41 #include "compiler/nir/nir_builder.h"
  42 #include "compiler/nir/nir_format_convert.h"
  43 #include "nir_lower_blend.h"
  44 #include "util/format/u_format.h"
  45
  46 /* Determines the best NIR intrinsic to load a tile buffer of a given type,
  47  * using native format conversion where possible. RGBA8 UNORM has a fast path
  48  * (on some chips). Otherwise, we default to raw reads. */
  49
  50 static nir_intrinsic_op
  51 nir_best_load_for_format(
  52       const struct util_format_description *desc,
  53       unsigned *special_bitsize,
  54       unsigned gpu_id)
  55 {
  56    if (util_format_is_unorm8(desc) && gpu_id != 0x750) {
  57       *special_bitsize = 16;
  58       return nir_intrinsic_load_output_u8_as_fp16_pan;
  59    } else
  60       return nir_intrinsic_load_raw_output_pan;
  61 }
  62
  63
  64 /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
  65
  66 static nir_ssa_def *
  67 nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float)
  68 {
  69    /* First, we degrade quality to fp16; we don't need the extra bits */
  70    nir_ssa_def *degraded = /*nir_f2f16(b, c_float)*/c_float;
  71
  72    /* Scale from [0, 1] to [0, 255.0] */
  73    nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0);
  74
  75    /* Next, we type convert */
  76    nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b,
  77                                      nir_fround_even(b, nir_f2f16(b, scaled))));
  78
  79    return converted;
  80 }
  81
  82 static nir_ssa_def *
  83 nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native)
  84 {
  85    /* First, we convert up from u8 to f16 */
  86    nir_ssa_def *converted = nir_f2f32(b, nir_u2f16(b, nir_u2u16(b, c_native)));
  87
  88    /* Next, we scale down from [0, 255.0] to [0, 1] */
  89    nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0));
  90
  91    return scaled;
  92 }
  93
  94 /* Converters for UNORM4 formats, packing the final result into 16-bit */
  95
  96 static nir_ssa_def *
  97 nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float)
  98 {
  99    /* First, we degrade quality to fp16; we don't need the extra bits */
 100    nir_ssa_def *degraded = nir_f2f16(b, c_float);
 101
 102    /* Scale from [0, 1] to [0, 15.0] */
 103    nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0);
 104
 105    /* Next, we type convert to u16 */
 106    nir_ssa_def *converted = nir_f2u16(b,
 107                                       nir_fround_even(b, scaled));
 108
 109    /* In u16 land, we now need to pack */
 110    nir_ssa_def *cr = nir_channel(b, converted, 0);
 111    nir_ssa_def *cg = nir_channel(b, converted, 1);
 112    nir_ssa_def *cb = nir_channel(b, converted, 2);
 113    nir_ssa_def *ca = nir_channel(b, converted, 3);
 114
 115    nir_ssa_def *pack =
 116       nir_ior(b,
 117               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))),
 118               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12))));
 119
 120    return pack;
 121 }
 122
 123 static nir_ssa_def *
 124 nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize)
 125 {
 126    nir_ssa_def *converted = c_float;
 127
 128    if (normalize) {
 129       nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float),
 130                                      nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0));
 131
 132       converted = nir_f2u32(b,
 133                             nir_fround_even(b, scaled));
 134    }
 135
 136    nir_ssa_def *cr = nir_channel(b, converted, 0);
 137    nir_ssa_def *cg = nir_channel(b, converted, 1);
 138    nir_ssa_def *cb = nir_channel(b, converted, 2);
 139    nir_ssa_def *ca = nir_channel(b, converted, 3);
 140
 141    nir_ssa_def *pack =
 142       nir_ior(b,
 143               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))),
 144               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30))));
 145
 146    return pack;
 147 }
 148
 149 static nir_ssa_def *
 150 nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float)
 151 {
 152    nir_ssa_def *degraded = nir_f2f16(b, c_float);
 153
 154    nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded),
 155                                   nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0));
 156
 157    nir_ssa_def *converted = nir_f2u16(b,
 158                                       nir_fround_even(b, scaled));
 159
 160    nir_ssa_def *cr = nir_channel(b, converted, 0);
 161    nir_ssa_def *cg = nir_channel(b, converted, 1);
 162    nir_ssa_def *cb = nir_channel(b, converted, 2);
 163    nir_ssa_def *ca = nir_channel(b, converted, 3);
 164
 165    nir_ssa_def *pack =
 166       nir_ior(b,
 167               nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))),
 168               nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15))));
 169
 170    return pack;
 171 }
 172
 173 static nir_ssa_def *
 174 nir_shader_to_native(nir_builder *b,
 175                      nir_ssa_def *c_shader,
 176                      const struct util_format_description *desc,
 177                      unsigned bits,
 178                      bool homogenous_bits)
 179 {
 180    bool float_or_pure_int =
 181       util_format_is_float(desc->format) ||
 182       util_format_is_pure_integer(desc->format);
 183
 184    if (util_format_is_unorm8(desc))
 185       return nir_float_to_unorm8(b, c_shader);
 186    else if (homogenous_bits && float_or_pure_int)
 187       return c_shader; /* type is already correct */
 188
 189    //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */
 190    //c_shader = nir_swizzle(b, c_shader, swiz, 4);
 191
 192    /* Special formats */
 193    switch (desc->format) {
 194    case PIPE_FORMAT_B4G4R4A4_UNORM:
 195    case PIPE_FORMAT_B4G4R4X4_UNORM:
 196    case PIPE_FORMAT_A4R4_UNORM:
 197    case PIPE_FORMAT_R4A4_UNORM:
 198    case PIPE_FORMAT_A4B4G4R4_UNORM:
 199       return nir_float_to_unorm4(b, c_shader);
 200
 201    case PIPE_FORMAT_R10G10B10A2_UNORM:
 202    case PIPE_FORMAT_B10G10R10A2_UNORM:
 203    case PIPE_FORMAT_R10G10B10X2_UNORM:
 204    case PIPE_FORMAT_B10G10R10X2_UNORM:
 205       return nir_float_to_rgb10a2(b, c_shader, true);
 206
 207    case PIPE_FORMAT_R10G10B10A2_UINT:
 208       return nir_float_to_rgb10a2(b, c_shader, false);
 209
 210    case PIPE_FORMAT_B5G5R5A1_UNORM:
 211       return nir_float_to_rgb5a1(b, c_shader);
 212
 213    case PIPE_FORMAT_R11G11B10_FLOAT:
 214       return nir_format_pack_11f11f10f(b, c_shader);
 215
 216    default:
 217       printf("%s\n", desc->name);
 218       unreachable("Unknown format name");
 219    }
 220 }
 221
 222 static nir_ssa_def *
 223 nir_native_to_shader(nir_builder *b,
 224                      nir_ssa_def *c_native,
 225                      nir_intrinsic_op op,
 226                      const struct util_format_description *desc,
 227                      unsigned bits,
 228                      bool homogenous_bits)
 229 {
 230    bool float_or_pure_int =
 231       util_format_is_float(desc->format) ||
 232       util_format_is_pure_integer(desc->format);
 233
 234    /* Handle preconverted formats */
 235    if (op == nir_intrinsic_load_output_u8_as_fp16_pan) {
 236       assert(util_format_is_unorm8(desc));
 237       return nir_f2f32(b, c_native);
 238    }
 239
 240    /* Otherwise, we're raw */
 241    assert(op == nir_intrinsic_load_raw_output_pan);
 242
 243    if (util_format_is_unorm8(desc))
 244       return nir_unorm8_to_float(b, c_native);
 245    else if (homogenous_bits && float_or_pure_int)
 246       return c_native; /* type is already correct */
 247    else {
 248       printf("%s\n", desc->name);
 249       unreachable("Unknown format name");
 250    }
 251 }
 252
 253 void
 254 nir_lower_framebuffer(nir_shader *shader, enum pipe_format format,
 255                       unsigned gpu_id)
 256 {
 257    /* Blend shaders are represented as special fragment shaders */
 258    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
 259
 260    const struct util_format_description *format_desc =
 261       util_format_description(format);
 262
 263    unsigned nr_channels = format_desc->nr_channels;
 264    unsigned bits = format_desc->channel[0].size;
 265
 266    /* Do all channels have the same bit count? */
 267    bool homogenous_bits = true;
 268
 269    for (unsigned c = 1; c < nr_channels; ++c)
 270       homogenous_bits &= (format_desc->channel[c].size == bits);
 271
 272    if (format == PIPE_FORMAT_R11G11B10_FLOAT)
 273       homogenous_bits = false;
 274
 275    /* Figure out the formats for the raw */
 276    unsigned raw_bitsize_in = bits;
 277    unsigned raw_bitsize_out = bits;
 278    unsigned raw_out_components = 4;
 279
 280    /* We pack a 4-bit vec4 as 16-bit vec1 */
 281    if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) {
 282       raw_bitsize_out = 16;
 283       raw_out_components = 1;
 284    } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) {
 285       raw_bitsize_out = 32;
 286       raw_out_components = 1;
 287    }
 288
 289    nir_foreach_function(func, shader) {
 290       nir_foreach_block(block, func->impl) {
 291          nir_foreach_instr_safe(instr, block) {
 292             if (instr->type != nir_instr_type_intrinsic)
 293                continue;
 294
 295             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 296
 297             bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
 298             bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
 299
 300             if (!(is_load || is_store))
 301                continue;
 302
 303             /* Don't worry about MRT */
 304             nir_variable *var = nir_intrinsic_get_var(intr, 0);
 305
 306             if (var->data.location != FRAG_RESULT_COLOR)
 307                continue;
 308
 309             nir_builder b;
 310             nir_builder_init(&b, func->impl);
 311
 312             if (is_store) {
 313                /* For stores, add conversion before */
 314                b.cursor = nir_before_instr(instr);
 315
 316                /* Grab the input color */
 317                nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4);
 318
 319                /* Format convert */
 320                nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits);
 321
 322                if (util_format_is_float(format)) {
 323                   if (raw_bitsize_out == 16)
 324                      converted = nir_f2f16(&b, converted);
 325                   else if (raw_bitsize_out == 32)
 326                      converted = nir_f2f32(&b, converted);
 327                } else {
 328                   converted = nir_i2i(&b, converted, raw_bitsize_out);
 329                }
 330
 331                /* Rewrite to use a native store by creating a new intrinsic */
 332                nir_intrinsic_instr *new =
 333                   nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
 334                new->src[0] = nir_src_for_ssa(converted);
 335
 336                new->num_components = raw_out_components;
 337
 338                nir_builder_instr_insert(&b, &new->instr);
 339
 340                /* (And finally removing the old) */
 341                nir_instr_remove(instr);
 342             } else {
 343                /* For loads, add conversion after */
 344                b.cursor = nir_after_instr(instr);
 345
 346                /* Determine the best op for the format/hardware */
 347                unsigned bitsize = raw_bitsize_in;
 348                nir_intrinsic_op op = nir_best_load_for_format(format_desc,
 349                                                               &bitsize,
 350                                                               gpu_id);
 351
 352                /* Rewrite to use a native load by creating a new intrinsic */
 353                nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader, op);
 354                new->num_components = 4;
 355
 356                nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL);
 357                nir_builder_instr_insert(&b, &new->instr);
 358
 359                /* Convert the raw value */
 360                nir_ssa_def *raw = &new->dest.ssa;
 361                nir_ssa_def *converted = nir_native_to_shader(&b, raw, op, format_desc, bits, homogenous_bits);
 362
 363                /* Rewrite to use the converted value */
 364                nir_src rewritten = nir_src_for_ssa(converted);
 365                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr);
 366
 367                /* Finally, remove the old load */
 368                nir_instr_remove(instr);
 369             }
 370          }
 371       }
 372
 373       nir_metadata_preserve(func->impl, nir_metadata_block_index |
 374                             nir_metadata_dominance);
 375    }
 376 }