gallium: add FMA and DFMA opcodes (v3)
authorMarek Olšák <marek.olsak@amd.com>
Fri, 27 Feb 2015 23:26:31 +0000 (00:26 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 16 Mar 2015 11:54:18 +0000 (12:54 +0100)
Needed by ARB_gpu_shader5.

v2: select DMAD for FMA with double precision
v3: add and select DFMA

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
20 files changed:
src/gallium/auxiliary/gallivm/lp_bld_limits.h
src/gallium/auxiliary/tgsi/tgsi_exec.h
src/gallium/auxiliary/tgsi/tgsi_info.c
src/gallium/auxiliary/tgsi/tgsi_util.c
src/gallium/docs/source/screen.rst
src/gallium/docs/source/tgsi.rst
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/i915/i915_screen.c
src/gallium/drivers/nouveau/nv30/nv30_screen.c
src/gallium/drivers/nouveau/nv50/nv50_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/r300/r300_screen.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/svga/svga_screen.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/include/pipe/p_defines.h
src/gallium/include/pipe/p_shader_tokens.h
src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 296236006bc1c79172e89122b987dce4cc4c2c75..c5c51c18a0a4301a5e1fc0db73c439f69b00aec3 100644 (file)
@@ -129,6 +129,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    }
    /* if we get here, we missed a shader cap above (and should have seen
index 609c81b1d472516feb3cb2acd6fc67c9da0890c9..0e59b884897ecd4044c234379b0d7ff81c3ee5fd 100644 (file)
@@ -459,6 +459,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    }
    /* if we get here, we missed a shader cap above (and should have seen
index 4d838fdd52ca17fe4f6b5e0651c82a35dc23e325..11947097e9f137cd2378641c0512d29a8cddb28e 100644 (file)
@@ -56,7 +56,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
    { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
    { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 19 },      /* removed */
+   { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
    { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
    { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
    { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
@@ -155,7 +155,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
    { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
    { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 118 },     /* removed */
+   { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
    { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
    { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
    { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
index d572ff03d781f7e9f075bf236988f50454dfa0dc..e5b8427a03073720ffe46a751fb501b4dd49fbfa 100644 (file)
@@ -193,6 +193,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
    case TGSI_OPCODE_MAD:
    case TGSI_OPCODE_SUB:
    case TGSI_OPCODE_LRP:
+   case TGSI_OPCODE_FMA:
    case TGSI_OPCODE_FRC:
    case TGSI_OPCODE_CEIL:
    case TGSI_OPCODE_CLAMP:
index e0fd1a2dbac391f01de55ff65a0ed7da0f89d47c..26cc9ffc6f70ae0a054efce3d2d34172778c24f8 100644 (file)
@@ -336,6 +336,8 @@ to be 0.
   is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
 * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
   DLDEXP are supported.
+* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only)
+  are supported.
 
 
 .. _pipe_compute_cap:
index b0a975aa70aa281dd4e50357065e6e81d7811213..7771136f167a2df0db49e8b24260bbd568029d36 100644 (file)
@@ -272,6 +272,21 @@ This instruction replicates its result.
   dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
 
 
+.. opcode:: FMA - Fused Multiply-Add
+
+Perform a * b + c with no intermediate rounding step.
+
+.. math::
+
+  dst.x = src0.x \times src1.x + src2.x
+
+  dst.y = src0.y \times src1.y + src2.y
+
+  dst.z = src0.z \times src1.z + src2.z
+
+  dst.w = src0.w \times src1.w + src2.w
+
+
 .. opcode:: DP2A - 2-component Dot Product And Add
 
 .. math::
@@ -1962,6 +1977,17 @@ source is an integer.
   dst.zw = src0.zw \times src1.zw + src2.zw
 
 
+.. opcode:: DFMA - Fused Multiply-Add
+
+Perform a * b + c with no intermediate rounding step.
+
+.. math::
+
+  dst.xy = src0.xy \times src1.xy + src2.xy
+
+  dst.zw = src0.zw \times src1.zw + src2.zw
+
+
 .. opcode:: DRCP - Reciprocal
 
 .. math::
index a4699e4b69e53921c5df77e1635b59d5d114e3c8..1d735137042d62042012f68757caaf83e3f9233a 100644 (file)
@@ -363,6 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
        case PIPE_SHADER_CAP_DOUBLES:
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
                return 0;
        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
                return 1;
index dc76464f1f6a59e9020eb071f8ab406f032ac637..50847e2b42a97d0f2637cf7c895fc227a3f40443 100644 (file)
@@ -158,6 +158,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
index 0fca9e06af0456a2bbedbaeb5c441cc6182fa548..eeb714864e2a8f5a262998932cb68e6624d1a7d4 100644 (file)
@@ -250,6 +250,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("unknown vertex shader param %d\n", param);
@@ -289,6 +290,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("unknown fragment shader param %d\n", param);
index ed07ba442dbe191edcf5aba3ad36c5b7ff9ccafa..829dfbc13fa28589a6424e5f247392f9334538bc 100644 (file)
@@ -289,6 +289,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
index 686d892d0e5cdafb006f077485d7f21bd9448bd7..04c34f537ea17fa7a64188d7fa89b138c1fd91a3 100644 (file)
@@ -295,6 +295,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       return 16; /* would be 32 in linked (OpenGL-style) mode */
index fca8001a03d6d1ee4c1a96b2491b0caccbae7ee5..752d7e59fd5f60100a58e11461d1753016b273e0 100644 (file)
@@ -287,6 +287,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
             return 0;
         case PIPE_SHADER_CAP_PREFERRED_IR:
             return PIPE_SHADER_IR_TGSI;
@@ -341,6 +342,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
             return 0;
         case PIPE_SHADER_CAP_PREFERRED_IR:
             return PIPE_SHADER_IR_TGSI;
index 24d901ead8194d6a88318843f00578b31132cd3e..21e5d42adc315718d4b9f7e978cf11969dd294c0 100644 (file)
@@ -493,6 +493,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
                return 0;
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
                return 0;
        }
        return 0;
index 2ee59c8bac00906f888ec79fc7d19c9c63f6bf2d..54540c3840e0ecaf550581f340e5afc85df9f3c1 100644 (file)
@@ -7295,7 +7295,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
-       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7494,7 +7494,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
-       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7693,7 +7693,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
-       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
index f1a53883f2a618e10c573a037eb92350935a7667..0aacab12db7a14ca32e14782ead6ba670ed03486 100644 (file)
@@ -425,6 +425,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        case PIPE_SHADER_CAP_DOUBLES:
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
                return 0;
        }
        return 0;
index bac0dbcdff96181a6bf04a0cb2f87359e8c36bc1..7b01d35a93a9326b5f08347f415b3584ed385f78 100644 (file)
@@ -375,6 +375,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
@@ -431,6 +432,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
index 7c628470ea8ab2fb24d6955a09faa49be3b2b513..0be8ec2c980a11a57a3e0ac0a363fd23610a7f55 100644 (file)
@@ -319,6 +319,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
                 return 0;
         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
index a8ffe9cfff38d98da6647d9e1be778b401ac301d..67f48e42936b090d115ea703c4f1890a579bf6d2 100644 (file)
@@ -644,6 +644,7 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_DOUBLES,
    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
+   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
 };
 
 /**
index 95ac5900f3b0bcd6f6f3a83bd055092e67bdd5fc..c14bcbca33613cc2c4f8d7674b172011feebfb1e 100644 (file)
@@ -306,7 +306,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_MAD                 16
 #define TGSI_OPCODE_SUB                 17
 #define TGSI_OPCODE_LRP                 18
-                                /* gap */
+#define TGSI_OPCODE_FMA                 19
 #define TGSI_OPCODE_SQRT                20
 #define TGSI_OPCODE_DP2A                21
                                 /* gap */
@@ -404,7 +404,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_BREAKC              115
 #define TGSI_OPCODE_KILL_IF             116  /* conditional kill */
 #define TGSI_OPCODE_END                 117  /* aka HALT */
-                                /* gap */
+#define TGSI_OPCODE_DFMA                118
 #define TGSI_OPCODE_F2I                 119
 #define TGSI_OPCODE_IDIV                120
 #define TGSI_OPCODE_IMAX                121
@@ -510,7 +510,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_DSNE                206 /* SM5 */
 #define TGSI_OPCODE_DRCP                207 /* eg, cayman */
 #define TGSI_OPCODE_DSQRT               208 /* eg, cayman also has DRSQ */
-#define TGSI_OPCODE_DMAD                209 /* DFMA? */
+#define TGSI_OPCODE_DMAD                209
 #define TGSI_OPCODE_DFRAC               210 /* eg, cayman */
 #define TGSI_OPCODE_DLDEXP              211 /* eg, cayman */
 #define TGSI_OPCODE_DFRACEXP            212 /* eg, cayman */
index bd191d864fe600ee13b72b5766dcba98e31ec8dd..efee4b258e5aba9e7fb57f292894f5fe32466e17 100644 (file)
@@ -332,6 +332,7 @@ public:
    int glsl_version;
    bool native_integers;
    bool have_sqrt;
+   bool have_fma;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -836,6 +837,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
       case3fid(ADD, UADD, DADD);
       case3fid(MUL, UMUL, DMUL);
       case3fid(MAD, UMAD, DMAD);
+      case3fid(FMA, UMAD, DFMA);
       case3(DIV, IDIV, UDIV);
       case4d(MAX, IMAX, UMAX, DMAX);
       case4d(MIN, IMIN, UMIN, DMIN);
@@ -2222,10 +2224,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
       break;
    case ir_triop_fma:
-      /* NOTE: Perhaps there should be a special opcode that enforces fused
-       * mul-add. Just use MAD for now.
-       */
-      emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+      /* In theory, MAD is incorrect here. */
+      if (have_fma)
+         emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
+      else
+         emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
       break;
    case ir_unop_interpolate_at_centroid:
       emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
@@ -5564,6 +5567,8 @@ get_mesa_program(struct gl_context *ctx,
 
    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+   v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
+                                           PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
 
    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,