X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_nir_lower_io.c;h=2b4ad9c5ea6dddd08e0caa02a37a04981fe1009c;hb=89918c1e74e454af119e7ae23f3ed66fc26abc4b;hp=229d41147d8aa60907a98f94d07e40fb51031e8e;hpb=6a7ca4ef2cd3f39d3b5e77051cb3f3175e9e60df;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 229d41147d8..2b4ad9c5ea6 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -22,8 +22,8 @@ */ #include "vc4_qir.h" -#include "tgsi/tgsi_info.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir_builder.h" +#include "util/u_format.h" /** * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into @@ -47,37 +47,217 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr, /* Replace the old intrinsic with a reference to our reconstructed * vec4. */ - nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec), - ralloc_parent(b->impl)); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec)); nir_instr_remove(&intr->instr); } +static nir_ssa_def * +vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_ubitfield_extract(b, + src, + nir_imm_int(b, 8 * chan), + nir_imm_int(b, 8)); +} + +/** Returns the 16 bit field as a sign-extended 32-bit value. */ +static nir_ssa_def * +vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_ibitfield_extract(b, + src, + nir_imm_int(b, 16 * chan), + nir_imm_int(b, 16)); +} + +/** Returns the 16 bit field as an unsigned 32 bit value. */ +static nir_ssa_def * +vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + if (chan == 0) { + return nir_iand(b, src, nir_imm_int(b, 0xffff)); + } else { + return nir_ushr(b, src, nir_imm_int(b, 16)); + } +} + +static nir_ssa_def * +vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan); +} + +static nir_ssa_def * +vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, + nir_builder *b, + nir_ssa_def **vpm_reads, + uint8_t swiz, + const struct util_format_description *desc) +{ + const struct util_format_channel_description *chan = + &desc->channel[swiz]; + nir_ssa_def *temp; + + if (swiz > PIPE_SWIZZLE_W) { + return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); + } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) { + return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); + } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) { + if (chan->normalized) { + return nir_fmul(b, + nir_i2f(b, vpm_reads[swiz]), + nir_imm_float(b, + 1.0 / 0x7fffffff)); + } else { + return nir_i2f(b, vpm_reads[swiz]); + } + } else if (chan->size == 8 && + (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || + chan->type == UTIL_FORMAT_TYPE_SIGNED)) { + nir_ssa_def *vpm = vpm_reads[0]; + if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { + temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080)); + if (chan->normalized) { + return nir_fsub(b, nir_fmul(b, + vc4_nir_unpack_8f(b, temp, swiz), + nir_imm_float(b, 2.0)), + nir_imm_float(b, 1.0)); + } else { + return nir_fadd(b, + nir_i2f(b, + vc4_nir_unpack_8i(b, temp, + swiz)), + nir_imm_float(b, -128.0)); + } + } else { + if (chan->normalized) { + return vc4_nir_unpack_8f(b, vpm, swiz); + } else { + return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz)); + } + } + } else if (chan->size == 16 && + (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || + chan->type == UTIL_FORMAT_TYPE_SIGNED)) { + nir_ssa_def *vpm = vpm_reads[swiz / 2]; + + /* Note that UNPACK_16F eats a half float, not ints, so we use + * UNPACK_16_I for all of these. + */ + if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { + temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); + if (chan->normalized) { + return nir_fmul(b, temp, + nir_imm_float(b, 1/32768.0f)); + } else { + return temp; + } + } else { + temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); + if (chan->normalized) { + return nir_fmul(b, temp, + nir_imm_float(b, 1 / 65535.0)); + } else { + return temp; + } + } + } else { + return NULL; + } +} + static void -vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, - nir_intrinsic_instr *intr) +vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, + nir_intrinsic_instr *intr) { - nir_builder_insert_before_instr(b, &intr->instr); + b->cursor = nir_before_instr(&intr->instr); + + int attr = intr->const_index[0]; + enum pipe_format format = c->vs_key->attr_formats[attr]; + uint32_t attr_size = util_format_get_blocksize(format); + + /* All TGSI-to-NIR inputs are vec4. */ + assert(intr->num_components == 4); + + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u32[0] == 0); + + /* Generate dword loads for the VPM values (Since these intrinsics may + * be reordered, the actual reads will be generated at the top of the + * shader by ntq_setup_inputs(). + */ + nir_ssa_def *vpm_reads[4]; + for (int i = 0; i < align(attr_size, 4) / 4; i++) { + nir_intrinsic_instr *intr_comp = + nir_intrinsic_instr_create(c->s, + nir_intrinsic_load_input); + intr_comp->num_components = 1; + intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &intr_comp->instr); - if (c->stage == QSTAGE_FRAG && intr->const_index[0] == - VC4_NIR_TLB_COLOR_READ_INPUT) { + vpm_reads[i] = &intr_comp->dest.ssa; + } + + bool format_warned = false; + const struct util_format_description *desc = + util_format_description(format); + + nir_ssa_def *dests[4]; + for (int i = 0; i < 4; i++) { + uint8_t swiz = desc->swizzle[i]; + dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz, + desc); + + if (!dests[i]) { + if (!format_warned) { + fprintf(stderr, + "vtx element %d unsupported type: %s\n", + attr, util_format_name(format)); + format_warned = true; + } + dests[i] = nir_imm_float(b, 0.0); + } + } + + replace_intrinsic_with_vec4(b, intr, dests); +} + +static void +vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, + nir_intrinsic_instr *intr) +{ + b->cursor = nir_before_instr(&intr->instr); + + if (intr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT && + intr->const_index[0] < (VC4_NIR_TLB_COLOR_READ_INPUT + + VC4_MAX_SAMPLES)) { /* This doesn't need any lowering. */ return; } nir_variable *input_var = NULL; - foreach_list_typed(nir_variable, var, node, &c->s->inputs) { + nir_foreach_variable(var, &c->s->inputs) { if (var->data.driver_location == intr->const_index[0]) { input_var = var; break; } } assert(input_var); - int semantic_name = input_var->data.location; - int semantic_index = input_var->data.index; /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); + /* We only accept direct inputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u32[0] == 0); + /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { @@ -85,46 +265,43 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); nir_builder_instr_insert(b, &intr_comp->instr); dests[i] = &intr_comp->dest.ssa; } - switch (c->stage) { - case QSTAGE_FRAG: - switch (semantic_name) { - case TGSI_SEMANTIC_FACE: - dests[0] = nir_fsub(b, - nir_imm_float(b, 1.0), - nir_fmul(b, - nir_i2f(b, dests[0]), - nir_imm_float(b, 2.0))); - dests[1] = nir_imm_float(b, 0.0); + if (input_var->data.location == VARYING_SLOT_FACE) { + /* TGSI-to-NIR's front face. Convert to using the system + * value boolean instead. + */ + nir_ssa_def *face = + nir_load_system_value(b, + nir_intrinsic_load_front_face, + 0); + dests[0] = nir_bcsel(b, face, nir_imm_float(b, 1.0), + nir_imm_float(b, -1.0)); + dests[1] = nir_imm_float(b, 0.0); + dests[2] = nir_imm_float(b, 0.0); + dests[3] = nir_imm_float(b, 1.0); + } else if (input_var->data.location >= VARYING_SLOT_VAR0) { + if (c->fs_key->point_sprite_mask & + (1 << (input_var->data.location - + VARYING_SLOT_VAR0))) { + if (!c->fs_key->is_points) { + dests[0] = nir_imm_float(b, 0.0); + dests[1] = nir_imm_float(b, 0.0); + } + if (c->fs_key->point_coord_upper_left) { + dests[1] = nir_fsub(b, + nir_imm_float(b, 1.0), + dests[1]); + } dests[2] = nir_imm_float(b, 0.0); dests[3] = nir_imm_float(b, 1.0); - break; - case TGSI_SEMANTIC_GENERIC: - if (c->fs_key->point_sprite_mask & - (1 << semantic_index)) { - if (!c->fs_key->is_points) { - dests[0] = nir_imm_float(b, 0.0); - dests[1] = nir_imm_float(b, 0.0); - } - if (c->fs_key->point_coord_upper_left) { - dests[1] = nir_fsub(b, - nir_imm_float(b, 1.0), - dests[1]); - } - dests[2] = nir_imm_float(b, 0.0); - dests[3] = nir_imm_float(b, 1.0); - } - break; } - break; - case QSTAGE_COORD: - case QSTAGE_VERT: - break; } replace_intrinsic_with_vec4(b, intr, dests); @@ -135,24 +312,26 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { nir_variable *output_var = NULL; - foreach_list_typed(nir_variable, var, node, &c->s->outputs) { + nir_foreach_variable(var, &c->s->outputs) { if (var->data.driver_location == intr->const_index[0]) { output_var = var; break; } } assert(output_var); - unsigned semantic_name = output_var->data.location; if (c->stage == QSTAGE_COORD && - (semantic_name != TGSI_SEMANTIC_POSITION && - semantic_name != TGSI_SEMANTIC_PSIZE)) { + output_var->data.location != VARYING_SLOT_POS && + output_var->data.location != VARYING_SLOT_PSIZ) { nir_instr_remove(&intr->instr); return; } /* Color output is lowered by vc4_nir_lower_blend(). */ - if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) { + if (c->stage == QSTAGE_FRAG && + (output_var->data.location == FRAG_RESULT_COLOR || + output_var->data.location == FRAG_RESULT_DATA0 || + output_var->data.location == FRAG_RESULT_SAMPLE_MASK)) { intr->const_index[0] *= 4; return; } @@ -160,7 +339,13 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR outputs are VEC4. */ assert(intr->num_components == 4); - nir_builder_insert_before_instr(b, &intr->instr); + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[1]) && + nir_src_as_const_value(intr->src[1])->u32[0] == 0); + + b->cursor = nir_before_instr(&intr->instr); for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = @@ -169,9 +354,9 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, intr_comp->const_index[0] = intr->const_index[0] * 4 + i; assert(intr->src[0].is_ssa); - intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b, - intr->src[0].ssa, - &i, 1, false)); + intr_comp->src[0] = + nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); + intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &intr_comp->instr); } @@ -182,14 +367,14 @@ static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - /* All TGSI-to-NIR uniform loads are vec4, but we may create dword - * loads in our lowering passes. + /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets + * in the backend. */ if (intr->num_components == 1) return; assert(intr->num_components == 4); - nir_builder_insert_before_instr(b, &intr->instr); + b->cursor = nir_before_instr(&intr->instr); /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; @@ -197,27 +382,16 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, intr->intrinsic); intr_comp->num_components = 1; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); - - if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) { - /* Convert the variable TGSI register index to a byte - * offset. - */ - intr_comp->src[0] = - nir_src_for_ssa(nir_ishl(b, - intr->src[0].ssa, - nir_imm_int(b, 4))); - - /* Convert the offset to be a byte index, too. */ - intr_comp->const_index[0] = (intr->const_index[0] * 16 + - i * 4); - } else { - /* We want a dword index for non-indirect uniform - * loads. - */ - intr_comp->const_index[0] = (intr->const_index[0] * 4 + - i); - } + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); + + /* Convert the uniform offset to bytes. If it happens to be a + * constant, constant-folding will clean up the shift for us. + */ + intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); + + intr_comp->src[0] = + nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, + nir_imm_int(b, 4))); dests[i] = &intr_comp->dest.ssa; @@ -237,7 +411,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, switch (intr->intrinsic) { case nir_intrinsic_load_input: - vc4_nir_lower_input(c, b, intr); + if (c->stage == QSTAGE_FRAG) + vc4_nir_lower_fs_input(c, b, intr); + else + vc4_nir_lower_vertex_attr(c, b, intr); break; case nir_intrinsic_store_output: @@ -245,35 +422,25 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, break; case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: vc4_nir_lower_uniform(c, b, intr); break; + case nir_intrinsic_load_user_clip_plane: default: break; } } static bool -vc4_nir_lower_io_block(nir_block *block, void *arg) +vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) { - struct vc4_compile *c = arg; - nir_function_impl *impl = - nir_cf_node_get_function(&block->cf_node); - nir_builder b; nir_builder_init(&b, impl); - nir_foreach_instr_safe(block, instr) - vc4_nir_lower_io_instr(c, &b, instr); - - return true; -} - -static bool -vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) -{ - nir_foreach_block(impl, vc4_nir_lower_io_block, c); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) + vc4_nir_lower_io_instr(c, &b, instr); + } nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); @@ -282,10 +449,10 @@ vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) } void -vc4_nir_lower_io(struct vc4_compile *c) +vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c) { - nir_foreach_overload(c->s, overload) { - if (overload->impl) - vc4_nir_lower_io_impl(c, overload->impl); + nir_foreach_function(function, s) { + if (function->impl) + vc4_nir_lower_io_impl(c, function->impl); } }