enum pan_format_class
pan_format_class_load(const struct util_format_description *desc, unsigned quirks)
{
+ /* Pure integers can be loaded via EXT_framebuffer_fetch and should be
+ * handled as a raw load with a size conversion (it's cheap). Likewise,
+ * since float framebuffers are internally implemented as raw (i.e.
+ * integer) framebuffers with blend shaders to go back and forth, they
+ * should be s/w as well */
+
+ if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format))
+ return PAN_FORMAT_SOFTWARE;
+
/* Check if we can do anything better than software architecturally */
if (quirks & MIDGARD_NO_TYPED_BLEND_LOADS) {
return (quirks & NO_BLEND_PACKS)
return nir_vec(b, q, 4);
}
+static nir_ssa_def *
+pan_extend(nir_builder *b, nir_ssa_def *v, unsigned N)
+{
+ nir_ssa_def *q[4];
+ assert(v->num_components <= 4);
+ assert(N <= 4);
+
+ for (unsigned j = 0; j < v->num_components; ++j)
+ q[j] = nir_channel(b, v, j);
+
+ for (unsigned j = v->num_components; j < N; ++j)
+ q[j] = nir_imm_int(b, 0);
+
+ return nir_vec(b, q, N);
+}
+
static nir_ssa_def *
pan_replicate_4(nir_builder *b, nir_ssa_def *v)
{
{
assert(num_components <= 4);
nir_ssa_def *unpacked = nir_unpack_unorm_4x8(b, nir_channel(b, pack, 0));
- return nir_f2f16(b, unpacked);
+ return nir_f2fmp(b, unpacked);
}
/* UNORM 4 is also unpacked to f16, which prevents us from using the shared
nir_ssa_def *scales, nir_ssa_def *shifts)
{
nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
- nir_ssa_def *raw = nir_ushr(b, nir_u2u16(b, channels), shifts);
+ nir_ssa_def *raw = nir_ushr(b, nir_u2ump(b, channels), shifts);
return nir_fmul(b, nir_u2f16(b, raw), scales);
}
{
nir_ssa_def *p = nir_channel(b, packed, 0);
nir_ssa_def *bytes = nir_unpack_32_4x8(b, p);
- nir_ssa_def *ubytes = nir_u2u16(b, bytes);
+ nir_ssa_def *ubytes = nir_u2ump(b, bytes);
nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)),
nir_imm_ivec4(b, 0, 2, 4, 6));
nir_ssa_def *precision = nir_iand(b, shifts,
- nir_i2i16(b, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)));
+ nir_i2imp(b, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)));
nir_ssa_def *top_rgb = nir_ishl(b, nir_channels(b, ubytes, 0x7), nir_imm_int(b, 2));
top_rgb = nir_ior(b, nir_channels(b, precision, 0x7), top_rgb);
};
nir_ssa_def *scale = nir_imm_vec4(b, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 1023.0, 1.0 / 3.0);
- return nir_f2f16(b, nir_fmul(b, nir_u2f32(b, nir_vec(b, chans, 4)), scale));
+ return nir_f2fmp(b, nir_fmul(b, nir_u2f32(b, nir_vec(b, chans, 4)), scale));
}
/* On the other hand, the pure int RGB10_A2 is identical to the spec */
nir_ssa_def *mask = nir_iand(b, shift,
nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
- return nir_u2u16(b, mask);
+ return nir_u2ump(b, mask);
}
/* NIR means we can *finally* catch a break */
pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
{
nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
- nir_ssa_def *f16 = nir_f2f16(b, f32);
+ nir_ssa_def *f16 = nir_f2fmp(b, f32);
/* Extend to vec4 with alpha */
nir_ssa_def *components[4] = {
nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
/* TODO: fp16 native conversion */
- nir_ssa_def *srgb = nir_f2f16(b,
+ nir_ssa_def *srgb = nir_f2fmp(b,
nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
nir_ssa_def *comp[4] = {
nir_ssa_def *rgb = nir_channels(b, srgb, 0x7);
/* TODO: fp16 native conversion */
- nir_ssa_def *linear = nir_f2f16(b,
+ nir_ssa_def *linear = nir_f2fmp(b,
nir_format_srgb_to_linear(b, nir_f2f32(b, rgb)));
nir_ssa_def *comp[4] = {
nir_builder_instr_insert(b, &new->instr);
}
+static nir_ssa_def *
+pan_sample_id(nir_builder *b, int sample)
+{
+ return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
+}
+
static void
pan_lower_fb_load(nir_shader *shader,
nir_builder *b,
nir_intrinsic_instr *intr,
const struct util_format_description *desc,
- unsigned base, unsigned quirks)
+ unsigned base, int sample, unsigned quirks)
{
nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader,
nir_intrinsic_load_raw_output_pan);
new->num_components = 4;
+ new->src[0] = nir_src_for_ssa(pan_sample_id(b, sample));
nir_intrinsic_set_base(new, base);
}
unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
+ unpacked = pan_extend(b, unpacked, nir_dest_num_components(intr->dest));
nir_src rewritten = nir_src_for_ssa(unpacked);
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, &intr->instr);
bool
pan_lower_framebuffer(nir_shader *shader, enum pipe_format *rt_fmts,
- bool lower_store, unsigned quirks)
+ bool is_blend, unsigned quirks)
{
if (shader->info.stage != MESA_SHADER_FRAGMENT)
return false;
bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
- if (!(is_load || (is_store && lower_store)))
+ if (!(is_load || (is_store && is_blend)))
continue;
nir_variable *var = nir_intrinsic_get_var(intr, 0);
if (fmt_class == PAN_FORMAT_NATIVE)
continue;
+ /* EXT_shader_framebuffer_fetch requires
+ * per-sample loads.
+ * MSAA blend shaders are not yet handled, so
+ * for now always load sample 0. */
+ int sample = is_blend ? 0 : -1;
+
nir_builder b;
nir_builder_init(&b, func->impl);
pan_lower_fb_store(shader, &b, intr, desc, quirks);
} else {
b.cursor = nir_after_instr(instr);
- pan_lower_fb_load(shader, &b, intr, desc, base, quirks);
+ pan_lower_fb_load(shader, &b, intr, desc, base, sample, quirks);
}
nir_instr_remove(instr);