INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
+ INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
.lower_POW = true,
.lower_EXP = true,
.lower_LOG = true,
- .lower_DP2 = true,
+ .lower_DP2 = !specs->has_halti2_instructions,
.lower_TRUNC = true,
};
unsigned has_shader_range_registers : 1;
/* has the new sin/cos/log functions */
unsigned has_new_transcendentals : 1;
+ /* has the new dp2/dpX_norm instructions, among others */
+ unsigned has_halti2_instructions : 1;
/* supports single-buffer rendering with multiple pixel pipes */
unsigned single_buffer : 1;
/* has unified uniforms memory */
VIV_FEATURE(screen, chipMinorFeatures1, NON_POWER_OF_TWO);
screen->specs.has_new_transcendentals =
VIV_FEATURE(screen, chipMinorFeatures3, HAS_FAST_TRANSCENDENTALS);
+ screen->specs.has_halti2_instructions =
+ VIV_FEATURE(screen, chipMinorFeatures4, HALTI2);
if (VIV_FEATURE(screen, chipMinorFeatures3, INSTRUCTION_CACHE)) {
/* GC3000 - this core is capable of loading shaders from