Needed by ARB_gpu_shader5.
v2: select DMAD for FMA with double precision
v3: add and select DFMA
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
{ 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
{ 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
{ 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
- { 0, 0, 0, 0, 0, 0, NONE, "", 19 }, /* removed */
+ { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
{ 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
{ 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
{ 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
{ 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
{ 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
- { 0, 0, 0, 0, 0, 0, NONE, "", 118 }, /* removed */
+ { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
{ 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
{ 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
{ 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_SUB:
case TGSI_OPCODE_LRP:
+ case TGSI_OPCODE_FMA:
case TGSI_OPCODE_FRC:
case TGSI_OPCODE_CEIL:
case TGSI_OPCODE_CLAMP:
is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
* ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
DLDEXP are supported.
+* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only)
+ are supported.
.. _pipe_compute_cap:
dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
+.. opcode:: FMA - Fused Multiply-Add
+
+Perform a * b + c with no intermediate rounding step.
+
+.. math::
+
+ dst.x = src0.x \times src1.x + src2.x
+
+ dst.y = src0.y \times src1.y + src2.y
+
+ dst.z = src0.z \times src1.z + src2.z
+
+ dst.w = src0.w \times src1.w + src2.w
+
+
.. opcode:: DP2A - 2-component Dot Product And Add
.. math::
dst.zw = src0.zw \times src1.zw + src2.zw
+.. opcode:: DFMA - Fused Multiply-Add
+
+Perform a * b + c with no intermediate rounding step.
+
+.. math::
+
+ dst.xy = src0.xy \times src1.xy + src2.xy
+
+ dst.zw = src0.zw \times src1.zw + src2.zw
+
+
.. opcode:: DRCP - Reciprocal
.. math::
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
default:
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
default:
debug_printf("unknown vertex shader param %d\n", param);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
default:
debug_printf("unknown fragment shader param %d\n", param);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
return 0;
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
/* If we get here, we failed to handle a cap above */
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
}
/* If we get here, we failed to handle a cap above */
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
PIPE_SHADER_CAP_DOUBLES,
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
+ PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
};
/**
#define TGSI_OPCODE_MAD 16
#define TGSI_OPCODE_SUB 17
#define TGSI_OPCODE_LRP 18
- /* gap */
+#define TGSI_OPCODE_FMA 19
#define TGSI_OPCODE_SQRT 20
#define TGSI_OPCODE_DP2A 21
/* gap */
#define TGSI_OPCODE_BREAKC 115
#define TGSI_OPCODE_KILL_IF 116 /* conditional kill */
#define TGSI_OPCODE_END 117 /* aka HALT */
- /* gap */
+#define TGSI_OPCODE_DFMA 118
#define TGSI_OPCODE_F2I 119
#define TGSI_OPCODE_IDIV 120
#define TGSI_OPCODE_IMAX 121
#define TGSI_OPCODE_DSNE 206 /* SM5 */
#define TGSI_OPCODE_DRCP 207 /* eg, cayman */
#define TGSI_OPCODE_DSQRT 208 /* eg, cayman also has DRSQ */
-#define TGSI_OPCODE_DMAD 209 /* DFMA? */
+#define TGSI_OPCODE_DMAD 209
#define TGSI_OPCODE_DFRAC 210 /* eg, cayman */
#define TGSI_OPCODE_DLDEXP 211 /* eg, cayman */
#define TGSI_OPCODE_DFRACEXP 212 /* eg, cayman */
int glsl_version;
bool native_integers;
bool have_sqrt;
+ bool have_fma;
variable_storage *find_variable_storage(ir_variable *var);
case3fid(ADD, UADD, DADD);
case3fid(MUL, UMUL, DMUL);
case3fid(MAD, UMAD, DMAD);
+ case3fid(FMA, UMAD, DFMA);
case3(DIV, IDIV, UDIV);
case4d(MAX, IMAX, UMAX, DMAX);
case4d(MIN, IMIN, UMIN, DMIN);
emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
break;
case ir_triop_fma:
- /* NOTE: Perhaps there should be a special opcode that enforces fused
- * mul-add. Just use MAD for now.
- */
- emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+ /* In theory, MAD is incorrect here. */
+ if (have_fma)
+ emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
+ else
+ emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
break;
case ir_unop_interpolate_at_centroid:
emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+ v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
_mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,