If the load_interpolated_input is scalarized, we would be too
conservative about deciding the tex instruction wasn't a candidate to
pre-fetch:
vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
vec2 32 ssa_1 = intrinsic load_barycentric_pixel () (0) /* interp_mode=0 */
vec1 32 ssa_2 = intrinsic load_interpolated_input (ssa_1, ssa_0) (0, 0) /* base=0 */ /* component=0 */ /* packed:v_uv,v_uv1 */
vec1 32 ssa_3 = intrinsic load_interpolated_input (ssa_1, ssa_0) (0, 1) /* base=0 */ /* component=1 */ /* packed:v_uv,v_uv1 */
vec2 32 ssa_8 = vec2 ssa_2, ssa_3
vec4 32 ssa_9 = tex ssa_8 (coord), 0 (texture), 0 (sampler)
Really we don't care that the texcoord components come from different
load_interpolated_input instructions, just that they have consecutive
varying offsets.
Reported-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
if (alu->op != nir_op_vec2)
return -1;
- for (int i = 0; i < 2; i++) {
+ if (!alu->src[0].src.is_ssa)
+ return -1;
+
+ int base_offset = coord_offset(alu->src[0].src.ssa) +
+ alu->src[0].swizzle[0];
+
+ /* NOTE it might be possible to support more than 2D? */
+ for (int i = 1; i < 2; i++) {
if (!alu->src[i].src.is_ssa)
return -1;
- if (alu->src[i].swizzle[0] != (alu->src[0].swizzle[0] + i))
- return -1;
+ int nth_offset = coord_offset(alu->src[i].src.ssa) +
+ alu->src[i].swizzle[0];
- if (alu->src[i].src.ssa != alu->src[0].src.ssa)
+ if (nth_offset != (base_offset + i))
return -1;
}
- int off = coord_offset(alu->src[0].src.ssa);
- if (off < 0)
- return -1;
-
- return off + alu->src[0].swizzle[0];
+ return base_offset;
}
if (parent_instr->type != nir_instr_type_intrinsic)