Ken suggested instead of a big and complicated optimization pass, to
just recognize the operations here. It's certainly less code and a lot
prettier, but it seems to actually perform worse for currently unknown
reasons.
total instructions in shared programs:
8923452 ->
8904108 (-0.22%)
instructions in affected programs: 814563 -> 795219 (-2.37%)
helped: 3336
HURT: 10
total cycles in shared programs:
66970734 ->
66651476 (-0.48%)
cycles in affected programs:
10582686 ->
10263428 (-3.02%)
helped: 2438
HURT: 691
total spills in shared programs: 1811 -> 1789 (-1.21%)
spills in affected programs: 85 -> 63 (-25.88%)
helped: 4
total fills in shared programs: 3143 -> 3109 (-1.08%)
fills in affected programs: 167 -> 133 (-20.36%)
helped: 4
LOST: 2
GAINED: 36
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
switch (op) {
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXL:
+ if (devinfo->gen >= 9 && op == SHADER_OPCODE_TXL && lod.is_zero()) {
+ op = SHADER_OPCODE_TXL_LZ;
+ break;
+ }
bld.MOV(sources[length], lod);
length++;
break;
length++;
}
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
- length++;
+ if (devinfo->gen >= 9 && lod.is_zero()) {
+ op = SHADER_OPCODE_TXF_LZ;
+ } else {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
+ length++;
+ }
for (unsigned i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);