Classic compiler mistake. In the example below, the OMOD optimization
was combining instructions 4 and 10, but since there was an instruction
(#8) in between them that wrote to the same registers as instruction 10,
instruction 11 was reading the wrong value.
Example of the mistake:
Before OMOD:
4: MAD temp[0].y, temp[3]._y__, const[0]._x__, const[0]._y__;
...
8: ADD temp[2].x, temp[1].x___, -temp[4].x___;
...
10: MUL temp[2].x, const[1].y___, temp[0].y___;
11: FRC temp[5].x, temp[2].x___;
After OMOD:
4: MAD temp[2].x / 8, temp[3]._y__, const[0]._x__, const[0]._y__;
...
8: ADD temp[2].x, temp[1].x___, -temp[4].x___;
...
11: FRC temp[5].x, temp[2].x___;
https://bugs.freedesktop.org/show_bug.cgi?id=41367
}
}
+static void omod_filter_writer_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct peephole_mul_cb_data * d = userdata;
+ if (file == d->Writer->File && index == d->Writer->Index &&
+ (mask & d->Writer->WriteMask)) {
+ d->Clobbered = 1;
+ }
+}
+
static int peephole_mul_omod(
struct radeon_compiler * c,
struct rc_instruction * inst_mul,
inst = inst->Prev) {
rc_for_all_reads_mask(inst, omod_filter_reader_cb,
&cb_data);
+ rc_for_all_writes_mask(inst, omod_filter_writer_cb,
+ &cb_data);
if (cb_data.Clobbered) {
break;
}