X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_nir_lower_io.c;h=c82d99812259bf18d69038f88c2d2321350ed4e3;hb=882ca6dfb0f3d17e0f8bc917307d915ab1718069;hp=d47e3bf52b0b5822ad371f1d6da120f39bd547aa;hpb=a39a8fbbaa129f4e52f2a3ad2747182e9a74d910;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index d47e3bf52b0..c82d9981225 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -23,29 +23,30 @@ #include "vc4_qir.h" #include "compiler/nir/nir_builder.h" -#include "util/u_format.h" +#include "util/format/u_format.h" /** - * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into - * something amenable to the VC4 architecture. + * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io + * intrinsics into something amenable to the VC4 architecture. * - * Currently, it split inputs, outputs, and uniforms into scalars, drops any + * Currently, it splits VS inputs and uniforms into scalars, drops any * non-position outputs in coordinate shaders, and fixes up the addressing on - * indirect uniform loads. + * indirect uniform loads. FS input and VS output scalarization is handled by + * nir_lower_io_to_scalar(). */ static void -replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr, - nir_ssa_def **comps) +replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr, + nir_ssa_def **comps) { - /* Batch things back together into a vec4. This will get split by the - * later ALU scalarization pass. + /* Batch things back together into a vector. This will get split by + * the later ALU scalarization pass. */ - nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]); + nir_ssa_def *vec = nir_vec(b, comps, intr->num_components); /* Replace the old intrinsic with a reference to our reconstructed - * vec4. + * vector. */ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec)); nir_instr_remove(&intr->instr); @@ -98,18 +99,18 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, &desc->channel[swiz]; nir_ssa_def *temp; - if (swiz > UTIL_FORMAT_SWIZZLE_W) { + if (swiz > PIPE_SWIZZLE_W) { return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) { return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) { if (chan->normalized) { return nir_fmul(b, - nir_i2f(b, vpm_reads[swiz]), + nir_i2f32(b, vpm_reads[swiz]), nir_imm_float(b, 1.0 / 0x7fffffff)); } else { - return nir_i2f(b, vpm_reads[swiz]); + return nir_i2f32(b, vpm_reads[swiz]); } } else if (chan->size == 8 && (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || @@ -124,16 +125,16 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, nir_imm_float(b, 1.0)); } else { return nir_fadd(b, - nir_i2f(b, - vc4_nir_unpack_8i(b, temp, - swiz)), + nir_i2f32(b, + vc4_nir_unpack_8i(b, temp, + swiz)), nir_imm_float(b, -128.0)); } } else { if (chan->normalized) { return vc4_nir_unpack_8f(b, vpm, swiz); } else { - return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz)); + return nir_i2f32(b, vc4_nir_unpack_8i(b, vpm, swiz)); } } } else if (chan->size == 16 && @@ -145,7 +146,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, * UNPACK_16_I for all of these. */ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { - temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); + temp = nir_i2f32(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); if (chan->normalized) { return nir_fmul(b, temp, nir_imm_float(b, 1/32768.0f)); @@ -153,7 +154,7 @@ vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, return temp; } } else { - temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); + temp = nir_i2f32(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); if (chan->normalized) { return nir_fmul(b, temp, nir_imm_float(b, 1 / 65535.0)); @@ -172,18 +173,14 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, { b->cursor = nir_before_instr(&intr->instr); - int attr = intr->const_index[0]; + int attr = nir_intrinsic_base(intr); enum pipe_format format = c->vs_key->attr_formats[attr]; uint32_t attr_size = util_format_get_blocksize(format); - /* All TGSI-to-NIR inputs are vec4. */ - assert(intr->num_components == 4); - /* We only accept direct outputs and TGSI only ever gives them to us * with an offset value of 0. */ - assert(nir_src_as_const_value(intr->src[0]) && - nir_src_as_const_value(intr->src[0])->u[0] == 0); + assert(nir_src_as_uint(intr->src[0]) == 0); /* Generate dword loads for the VPM values (Since these intrinsics may * be reordered, the actual reads will be generated at the top of the @@ -195,9 +192,10 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); intr_comp->num_components = 1; - intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + nir_intrinsic_set_base(intr_comp, nir_intrinsic_base(intr)); + nir_intrinsic_set_component(intr_comp, i); intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); nir_builder_instr_insert(b, &intr_comp->instr); vpm_reads[i] = &intr_comp->dest.ssa; @@ -208,7 +206,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, util_format_description(format); nir_ssa_def *dests[4]; - for (int i = 0; i < 4; i++) { + for (int i = 0; i < intr->num_components; i++) { uint8_t swiz = desc->swizzle[i]; dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz, desc); @@ -224,83 +222,78 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, } } - replace_intrinsic_with_vec4(b, intr, dests); + replace_intrinsic_with_vec(b, intr, dests); +} + +static bool +is_point_sprite(struct vc4_compile *c, nir_variable *var) +{ + if (var->data.location < VARYING_SLOT_VAR0 || + var->data.location > VARYING_SLOT_VAR31) + return false; + + return (c->fs_key->point_sprite_mask & + (1 << (var->data.location - VARYING_SLOT_VAR0))); } static void vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - b->cursor = nir_before_instr(&intr->instr); + b->cursor = nir_after_instr(&intr->instr); - if (intr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT && - intr->const_index[0] < (VC4_NIR_TLB_COLOR_READ_INPUT + - VC4_MAX_SAMPLES)) { + if (nir_intrinsic_base(intr) >= VC4_NIR_TLB_COLOR_READ_INPUT && + nir_intrinsic_base(intr) < (VC4_NIR_TLB_COLOR_READ_INPUT + + VC4_MAX_SAMPLES)) { /* This doesn't need any lowering. */ return; } nir_variable *input_var = NULL; nir_foreach_variable(var, &c->s->inputs) { - if (var->data.driver_location == intr->const_index[0]) { + if (var->data.driver_location == nir_intrinsic_base(intr)) { input_var = var; break; } } assert(input_var); - /* All TGSI-to-NIR inputs are vec4. */ - assert(intr->num_components == 4); + int comp = nir_intrinsic_component(intr); - /* We only accept direct inputs and TGSI only ever gives them to us - * with an offset value of 0. - */ - assert(nir_src_as_const_value(intr->src[0]) && - nir_src_as_const_value(intr->src[0])->u[0] == 0); + /* Lower away point coordinates, and fix up PNTC. */ + if (is_point_sprite(c, input_var) || + input_var->data.location == VARYING_SLOT_PNTC) { + assert(intr->num_components == 1); - /* Generate scalar loads equivalent to the original VEC4. */ - nir_ssa_def *dests[4]; - for (unsigned i = 0; i < intr->num_components; i++) { - nir_intrinsic_instr *intr_comp = - nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); - intr_comp->num_components = 1; - intr_comp->const_index[0] = intr->const_index[0] * 4 + i; - intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_def *result = &intr->dest.ssa; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); - nir_builder_instr_insert(b, &intr_comp->instr); + switch (comp) { + case 0: + case 1: + /* If we're not rendering points, we need to set a + * defined value for the input that would come from + * PNTC. + */ + if (!c->fs_key->is_points) + result = nir_imm_float(b, 0.0); + break; + case 2: + result = nir_imm_float(b, 0.0); + break; + case 3: + result = nir_imm_float(b, 1.0); + break; + } - dests[i] = &intr_comp->dest.ssa; - } + if (c->fs_key->point_coord_upper_left && comp == 1) + result = nir_fsub(b, nir_imm_float(b, 1.0), result); - if (input_var->data.location == VARYING_SLOT_FACE) { - dests[0] = nir_fsub(b, - nir_imm_float(b, 1.0), - nir_fmul(b, - nir_i2f(b, dests[0]), - nir_imm_float(b, 2.0))); - dests[1] = nir_imm_float(b, 0.0); - dests[2] = nir_imm_float(b, 0.0); - dests[3] = nir_imm_float(b, 1.0); - } else if (input_var->data.location >= VARYING_SLOT_VAR0) { - if (c->fs_key->point_sprite_mask & - (1 << (input_var->data.location - - VARYING_SLOT_VAR0))) { - if (!c->fs_key->is_points) { - dests[0] = nir_imm_float(b, 0.0); - dests[1] = nir_imm_float(b, 0.0); - } - if (c->fs_key->point_coord_upper_left) { - dests[1] = nir_fsub(b, - nir_imm_float(b, 1.0), - dests[1]); - } - dests[2] = nir_imm_float(b, 0.0); - dests[3] = nir_imm_float(b, 1.0); + if (result != &intr->dest.ssa) { + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + nir_src_for_ssa(result), + result->parent_instr); } } - - replace_intrinsic_with_vec4(b, intr, dests); } static void @@ -309,7 +302,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, { nir_variable *output_var = NULL; nir_foreach_variable(var, &c->s->outputs) { - if (var->data.driver_location == intr->const_index[0]) { + if (var->data.driver_location == nir_intrinsic_base(intr)) { output_var = var; break; } @@ -322,89 +315,43 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, nir_instr_remove(&intr->instr); return; } - - /* Color output is lowered by vc4_nir_lower_blend(). */ - if (c->stage == QSTAGE_FRAG && - (output_var->data.location == FRAG_RESULT_COLOR || - output_var->data.location == FRAG_RESULT_DATA0 || - output_var->data.location == FRAG_RESULT_SAMPLE_MASK)) { - intr->const_index[0] *= 4; - return; - } - - /* All TGSI-to-NIR outputs are VEC4. */ - assert(intr->num_components == 4); - - /* We only accept direct outputs and TGSI only ever gives them to us - * with an offset value of 0. - */ - assert(nir_src_as_const_value(intr->src[1]) && - nir_src_as_const_value(intr->src[1])->u[0] == 0); - - b->cursor = nir_before_instr(&intr->instr); - - for (unsigned i = 0; i < intr->num_components; i++) { - nir_intrinsic_instr *intr_comp = - nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); - intr_comp->num_components = 1; - intr_comp->const_index[0] = intr->const_index[0] * 4 + i; - - assert(intr->src[0].is_ssa); - intr_comp->src[0] = - nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); - intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_builder_instr_insert(b, &intr_comp->instr); - } - - nir_instr_remove(&intr->instr); } static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets - * in the backend. - */ - if (intr->num_components == 1) - return; - assert(intr->num_components == 4); - b->cursor = nir_before_instr(&intr->instr); - /* Generate scalar loads equivalent to the original VEC4. */ + /* Generate scalar loads equivalent to the original vector. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, intr->intrinsic); intr_comp->num_components = 1; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, + intr->dest.ssa.bit_size, NULL); - /* Convert the uniform (not user_clip_plane) offset to bytes. - * If it happens to be a constant, constant-folding will clean - * up the shift for us. + /* Convert the uniform offset to bytes. If it happens + * to be a constant, constant-folding will clean up + * the shift for us. */ - if (intr->intrinsic == nir_intrinsic_load_uniform) { - /* Convert the base offset to bytes and add the - * component - */ - intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); + nir_intrinsic_set_base(intr_comp, + nir_intrinsic_base(intr) * 16 + + i * 4); + nir_intrinsic_set_range(intr_comp, + nir_intrinsic_range(intr) * 16 - i * 4); - intr_comp->src[0] = - nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, - nir_imm_int(b, 4))); - } else { - assert(intr->intrinsic == - nir_intrinsic_load_user_clip_plane); - intr_comp->const_index[0] = intr->const_index[0] * 4 + i; - } + intr_comp->src[0] = + nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, + nir_imm_int(b, 4))); dests[i] = &intr_comp->dest.ssa; nir_builder_instr_insert(b, &intr_comp->instr); } - replace_intrinsic_with_vec4(b, intr, dests); + replace_intrinsic_with_vec(b, intr, dests); } static void @@ -428,35 +375,25 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, break; case nir_intrinsic_load_uniform: - case nir_intrinsic_load_user_clip_plane: vc4_nir_lower_uniform(c, b, intr); break; + case nir_intrinsic_load_user_clip_plane: default: break; } } static bool -vc4_nir_lower_io_block(nir_block *block, void *arg) +vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) { - struct vc4_compile *c = arg; - nir_function_impl *impl = - nir_cf_node_get_function(&block->cf_node); - nir_builder b; nir_builder_init(&b, impl); - nir_foreach_instr_safe(block, instr) - vc4_nir_lower_io_instr(c, &b, instr); - - return true; -} - -static bool -vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) -{ - nir_foreach_block(impl, vc4_nir_lower_io_block, c); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) + vc4_nir_lower_io_instr(c, &b, instr); + } nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); @@ -465,9 +402,9 @@ vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) } void -vc4_nir_lower_io(struct vc4_compile *c) +vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c) { - nir_foreach_function(c->s, function) { + nir_foreach_function(function, s) { if (function->impl) vc4_nir_lower_io_impl(c, function->impl); }