From: Eric Anholt Date: Thu, 26 Apr 2018 16:24:32 +0000 (-0700) Subject: broadcom/vc5: Add support for centroid varyings. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=76ee9edcb4f5be8699cfb9a6c4aa231c4e7d4183;p=mesa.git broadcom/vc5: Add support for centroid varyings. It would be nice to share the flags packet emit logic with flat shade flags, but I couldn't come up with a good way while still using our pack macros. We need to refactor this to shader record setup at compile time, anyway. Fixes ext_framebuffer_multisample-interpolation * centroid-* --- diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index d6c2d1902b6..ec8f22321f3 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -436,6 +436,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var, /* FALLTHROUGH */ case INTERP_MODE_SMOOTH: if (var->data.centroid) { + BITSET_SET(c->centroid_flags, i); return vir_FADD(c, vir_FMUL(c, vary, c->payload_w_centroid), r5); } else { @@ -1985,6 +1986,36 @@ vir_emit_last_thrsw(struct v3d_compile *c) c->last_thrsw->is_last_thrsw = true; } +/* There's a flag in the shader for "centroid W used in addition to center W", + * so we need to walk the program after VIR optimization to see if both are + * used. + */ +static void +vir_check_payload_w(struct v3d_compile *c) +{ + if (c->s->info.stage != MESA_SHADER_FRAGMENT) + return; + + bool any_centroid = false; + for (int i = 0; i < ARRAY_SIZE(c->centroid_flags); i++) { + if (c->centroid_flags[i]) + any_centroid = true; + } + if (!any_centroid) + return; + + vir_for_each_inst_inorder(inst, c) { + for (int i = 0; i < vir_get_nsrc(inst); i++) { + if (inst->src[i].file == QFILE_REG && + inst->src[i].index == 0) { + c->uses_centroid_and_center_w = true; + return; + } + } + } + +} + void v3d_nir_to_vir(struct v3d_compile *c) { @@ -2024,6 +2055,8 @@ v3d_nir_to_vir(struct v3d_compile *c) vir_optimize(c); vir_lower_uniforms(c); + vir_check_payload_w(c); + /* XXX: vir_schedule_instructions(c); */ if (V3D_DEBUG & (V3D_DEBUG_VIR | diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index e89ea7be217..4dba23c067e 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -478,6 +478,10 @@ struct v3d_compile { */ uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + bool uses_centroid_and_center_w; + struct v3d_ubo_range *ubo_ranges; bool *ubo_range_used; uint32_t ubo_ranges_array_size; @@ -657,8 +661,11 @@ struct v3d_fs_prog_data { */ uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; + bool writes_z; bool discard; + bool uses_centroid_and_center_w; }; /* Special nir_load_input intrinsic index for loading the current TLB diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 93990ee806f..0de5335d12a 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -758,6 +758,9 @@ v3d_set_fs_prog_data_inputs(struct v3d_compile *c, for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) { if (BITSET_TEST(c->flat_shade_flags, i)) prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24); + + if (BITSET_TEST(c->centroid_flags, i)) + prog_data->centroid_flags[i / 24] |= 1 << (i % 24); } } @@ -838,6 +841,7 @@ uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, prog_data->writes_z = (c->s->info.outputs_written & (1 << FRAG_RESULT_DEPTH)); prog_data->discard = c->s->info.fs.uses_discard; + prog_data->uses_centroid_and_center_w = c->uses_centroid_and_center_w; return v3d_return_qpu_insts(c, final_assembly_size); } diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h index e08a2a59909..9e55f80f947 100644 --- a/src/gallium/drivers/vc5/vc5_context.h +++ b/src/gallium/drivers/vc5/vc5_context.h @@ -82,6 +82,7 @@ void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); #define VC5_DIRTY_FS_INPUTS (1 << 26) #define VC5_DIRTY_STREAMOUT (1 << 27) #define VC5_DIRTY_OQ (1 << 28) +#define VC5_DIRTY_CENTROID_FLAGS (1 << 29) #define VC5_MAX_FS_INPUTS 64 diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c index ff14d1c1358..ecb1aa37831 100644 --- a/src/gallium/drivers/vc5/vc5_draw.c +++ b/src/gallium/drivers/vc5/vc5_draw.c @@ -177,6 +177,9 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5, (vc5->prog.fs->prog_data.fs->writes_z || vc5->prog.fs->prog_data.fs->discard); + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w; + shader.number_of_varyings_in_fragment_shader = vc5->prog.fs->prog_data.base->num_inputs; diff --git a/src/gallium/drivers/vc5/vc5_emit.c b/src/gallium/drivers/vc5/vc5_emit.c index 0d11d7e1ad1..cb8af953300 100644 --- a/src/gallium/drivers/vc5/vc5_emit.c +++ b/src/gallium/drivers/vc5/vc5_emit.c @@ -589,6 +589,45 @@ v3dX(emit_state)(struct pipe_context *pctx) } } +#if V3D_VERSION >= 40 + if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->centroid_flags[i]) + continue; + + cl_emit(&job->bcl, CENTROID_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.centroid_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->centroid_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); + } + } +#endif + /* Set up the transform feedback data specs (which VPM entries to * output to which buffers). */ diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c index d885cdf2975..23d2d73a2dc 100644 --- a/src/gallium/drivers/vc5/vc5_program.c +++ b/src/gallium/drivers/vc5/vc5_program.c @@ -477,10 +477,16 @@ vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) vc5->dirty |= VC5_DIRTY_COMPILED_FS; - if (old_fs && - vc5->prog.fs->prog_data.fs->flat_shade_flags != - old_fs->prog_data.fs->flat_shade_flags) { - vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + if (old_fs) { + if (vc5->prog.fs->prog_data.fs->flat_shade_flags != + old_fs->prog_data.fs->flat_shade_flags) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + if (vc5->prog.fs->prog_data.fs->centroid_flags != + old_fs->prog_data.fs->centroid_flags) { + vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS; + } } if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,