unsigned thread_width_max =
(group_size + simd_size - 1) / simd_size;
- uint32_t right_mask = (1u << simd_size) - 1;
+ uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
const unsigned right_non_aligned = group_size & (simd_size - 1);
if (right_non_aligned != 0)
right_mask >>= (simd_size - right_non_aligned);
OUT_BATCH(right_mask); /* Right Execution Mask */
OUT_BATCH(0xffffffff); /* Bottom Execution Mask */
ADVANCE_BATCH();
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(MEDIA_STATE_FLUSH << 16 | (2 - 2));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
}