+ emit_endif();
+}
+
+void
+brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
+{
+ /* We do this computation by performing the following operations:
+ *
+ * In case of 4x, 8x MSAA:
+ * - Compute the pixel coordinates and sample numbers (a, b, c, d)
+ * which are later used for interpolation
+ * - linearly interpolate samples a and b in X
+ * - linearly interpolate samples c and d in X
+ * - linearly interpolate the results of last two operations in Y
+ *
+ * result = lrp(lrp(a + b) + lrp(c + d))
+ */
+ struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+ struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+ struct brw_reg t1_f = retype(t1, BRW_REGISTER_TYPE_F);
+ struct brw_reg t2_f = retype(t2, BRW_REGISTER_TYPE_F);
+
+ for (unsigned i = 0; i < 4; ++i) {
+ assert(i < ARRAY_SIZE(texture_data));
+ s_is_zero = false;
+
+ /* Compute pixel coordinates */
+ emit_add(vec16(x_sample_coords), Xp_f,
+ brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
+ emit_add(vec16(y_sample_coords), Yp_f,
+ brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
+ emit_mov(vec16(X), x_sample_coords);
+ emit_mov(vec16(Y), y_sample_coords);
+
+ /* The MCS value we fetch has to match up with the pixel that we're
+ * sampling from. Since we sample from different pixels in each
+ * iteration of this "for" loop, the call to mcs_fetch() should be
+ * here inside the loop after computing the pixel coordinates.
+ */
+ if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+ mcs_fetch();
+
+ /* Compute sample index and map the sample index to a sample number.
+ * Sample index layout shows the numbering of slots in a rectangular
+ * grid of samples with in a pixel. Sample number layout shows the
+ * rectangular grid of samples roughly corresponding to the real sample
+ * locations with in a pixel.
+ * In case of 4x MSAA, layout of sample indices matches the layout of
+ * sample numbers:
+ * ---------
+ * | 0 | 1 |
+ * ---------
+ * | 2 | 3 |
+ * ---------
+ *
+ * In case of 8x MSAA the two layouts don't match.
+ * sample index layout : --------- sample number layout : ---------
+ * | 0 | 1 | | 5 | 2 |
+ * --------- ---------
+ * | 2 | 3 | | 4 | 6 |
+ * --------- ---------
+ * | 4 | 5 | | 0 | 3 |
+ * --------- ---------
+ * | 6 | 7 | | 7 | 1 |
+ * --------- ---------
+ */
+ emit_frc(vec16(t1_f), x_sample_coords);
+ emit_frc(vec16(t2_f), y_sample_coords);
+ emit_mul(vec16(t1_f), t1_f, brw_imm_f(key->x_scale));
+ emit_mul(vec16(t2_f), t2_f, brw_imm_f(key->x_scale * key->y_scale));
+ emit_add(vec16(t1_f), t1_f, t2_f);
+ emit_mov(vec16(S), t1_f);
+
+ if (num_samples == 8) {
+ /* Map the sample index to a sample number */
+ emit_cmp_if(BRW_CONDITIONAL_L, S, brw_imm_d(4));
+ {
+ emit_mov(vec16(t2), brw_imm_d(5));
+ emit_if_eq_mov(S, 1, vec16(t2), 2);
+ emit_if_eq_mov(S, 2, vec16(t2), 4);
+ emit_if_eq_mov(S, 3, vec16(t2), 6);
+ }
+ emit_else();
+ {
+ emit_mov(vec16(t2), brw_imm_d(0));
+ emit_if_eq_mov(S, 5, vec16(t2), 3);
+ emit_if_eq_mov(S, 6, vec16(t2), 7);
+ emit_if_eq_mov(S, 7, vec16(t2), 1);
+ }
+ emit_endif();
+ emit_mov(vec16(S), t2);
+ }
+ texel_fetch(texture_data[i]);
+ }
+
+#define SAMPLE(x, y) offset(texture_data[x], y)
+ for (int index = 3; index > 0; ) {
+ /* Since we're doing SIMD16, 4 color channels fits in to 8 registers.
+ * Counter value of 8 in 'for' loop below is used to interpolate all
+ * the color components.
+ */
+ for (int k = 0; k < 8; k += 2)
+ emit_lrp(vec8(SAMPLE(index - 1, k)),
+ x_frac,
+ vec8(SAMPLE(index, k)),
+ vec8(SAMPLE(index - 1, k)));
+ index -= 2;
+ }
+ for (int k = 0; k < 8; k += 2)
+ emit_lrp(vec8(SAMPLE(0, k)),
+ y_frac,
+ vec8(SAMPLE(2, k)),
+ vec8(SAMPLE(0, k)));
+#undef SAMPLE