gallium: add LDEXP TGSI instruction and corresponding cap
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 15 Sep 2017 14:51:14 +0000 (16:51 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 29 Sep 2017 10:08:01 +0000 (12:08 +0200)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
20 files changed:
src/gallium/auxiliary/gallivm/lp_bld_limits.h
src/gallium/auxiliary/tgsi/tgsi_exec.c
src/gallium/auxiliary/tgsi/tgsi_exec.h
src/gallium/auxiliary/tgsi/tgsi_info.c
src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
src/gallium/docs/source/screen.rst
src/gallium/docs/source/tgsi.rst
src/gallium/drivers/etnaviv/etnaviv_screen.c
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/i915/i915_screen.c
src/gallium/drivers/nouveau/nv30/nv30_screen.c
src/gallium/drivers/nouveau/nv50/nv50_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/r300/r300_screen.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/svga/svga_screen.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/include/pipe/p_defines.h
src/gallium/include/pipe/p_shader_tokens.h

index 421eeda4e7aa40e9744a47437925b5db3ebf71fb..ea320bb5f7bf0631da09012a9f0a394bfe9f1094 100644 (file)
@@ -134,6 +134,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
index 2a47f5dfaef1d4674bf4ba4be0e4eb04d4eca6d5..9c019a311d7f8797c6a6475ae848c29326b51f70 100644 (file)
@@ -1453,6 +1453,17 @@ micro_pow(
 #endif
 }
 
+static void
+micro_ldexp(union tgsi_exec_channel *dst,
+            const union tgsi_exec_channel *src0,
+            const union tgsi_exec_channel *src1)
+{
+   dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
+   dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
+   dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
+   dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
+}
+
 static void
 micro_sub(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src0,
@@ -5082,6 +5093,10 @@ exec_instruction(
       exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
+   case TGSI_OPCODE_LDEXP:
+      exec_scalar_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
+      break;
+
    case TGSI_OPCODE_COS:
       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
index d28d99793b8b9a7e18c1b954af2f835ca7aa2f5a..514c69ede307b8d4f4f05998c60c707124cca32e 100644 (file)
@@ -534,6 +534,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
index 62b41c031b34e819500e30805b0bdb7bde696319..4e399508e5b2603dea896cc37b86b53e00efd4ee 100644 (file)
@@ -244,7 +244,8 @@ tgsi_opcode_infer_type( uint opcode )
 enum tgsi_opcode_type
 tgsi_opcode_infer_src_type(uint opcode, uint src_idx)
 {
-   if (src_idx == 1 && opcode == TGSI_OPCODE_DLDEXP)
+   if (src_idx == 1 &&
+       (opcode == TGSI_OPCODE_DLDEXP || opcode == TGSI_OPCODE_LDEXP))
       return TGSI_TYPE_SIGNED;
 
    switch (opcode) {
index 3f39afe21964630e0e62193138d881154ce0fc28..fdb0f1078a1eba79ccfedcf83ee4ec017b302d8f 100644 (file)
@@ -19,7 +19,7 @@ OPCODE(1, 2, OTHR, TEX_LZ, .is_tex = 1)
 OPCODE(1, 3, COMP, LRP)
 OPCODE(1, 3, COMP, FMA)
 OPCODE(1, 1, REPL, SQRT)
-OPCODE_GAP(21) /* removed */
+OPCODE(1, 2, COMP, LDEXP)
 OPCODE(1, 1, COMP, F2U64)
 OPCODE(1, 1, COMP, F2I64)
 OPCODE(1, 1, COMP, FRC)
index f344354e33c1087ac3ac3b6c355a76d654743f3a..553b8e661a4fdd70569701ca995236dd406a3fa7 100644 (file)
@@ -485,6 +485,7 @@ MOV OUT[0], CONST[0][3]  # copy vector 3 of constbuf 0
   is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
 * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
   DLDEXP are supported.
+* ``PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED``: Whether LDEXP is supported.
 * ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only)
   are supported.
 * ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
index fd78c40ba3cec388cd606506c040243d1f3b79be..274646703b44f1f56260eb531714afb57771bf48 100644 (file)
@@ -351,6 +351,18 @@ This instruction replicates its result.
   dst = src0.x^{src1.x}
 
 
+.. opcode:: LDEXP - Multiply Number by Integral Power of 2
+
+src1 is an integer.
+
+.. math::
+
+  dst.x = src0.x * 2^{src1.x}
+  dst.y = src0.y * 2^{src1.y}
+  dst.z = src0.z * 2^{src1.z}
+  dst.w = src0.w * 2^{src1.w}
+
+
 .. opcode:: COS - Cosine
 
 This instruction replicates its result.
index 8ffda61428b46d903162f1e2aa9bb7214ed8cc28..42905ab06206b298e84a0bc3c2203dbd0d82c6db 100644 (file)
@@ -442,6 +442,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
       return 4096;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return false;
index 061cc86c6d3829cb1afb511000531974ce5bcacb..040c2c99ec0a9b6f36f03e4274e57ae327261ccd 100644 (file)
@@ -518,6 +518,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
        case PIPE_SHADER_CAP_SUBROUTINES:
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
                return 0;
index bacd40a57d467dcbb2f938e6d47c7eb99a382bc3..8411c0f15cc1fd38a141b9cc509c19ba403bc77b 100644 (file)
@@ -165,6 +165,7 @@ i915_get_shader_param(struct pipe_screen *screen,
          return I915_TEX_UNITS;
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
index 0d40bfed7a090bf6840b6ab5b7c3c62cb5630075..a66b4fbe67b08f201eec548bb4e0de7dd8cb8af4 100644 (file)
@@ -318,6 +318,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen,
       case PIPE_SHADER_CAP_FP16:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -367,6 +368,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen,
       case PIPE_SHADER_CAP_FP16:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
index 706e8d369ba1208f607823c3fcb47857bd759099..479283e1b7cc57e70b06b4b7e90e31005b7dff69 100644 (file)
@@ -363,6 +363,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
       return 32;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
index 87fc754f4c7f49f191a30afa294241d471064070..ac850c493dafffedf2c51f5e1db501f6a3ea9766 100644 (file)
@@ -403,6 +403,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
    case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
    case PIPE_SHADER_CAP_INT64_ATOMICS:
index 130b91b4ba9e8d92ccd4429d770c2323996708dd..0c3e097535dbf0fa7acecc48125fbefaf5cc2e19 100644 (file)
@@ -358,6 +358,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
         case PIPE_SHADER_CAP_FP16:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
         case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
         case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
@@ -421,6 +422,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
         case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
         case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
index 9ed7a17c5c7cf8d35fcbf6470330d64bd6a5bc14..655b5411ed5e5a1db0ea4d5f6a278d296dbfe979 100644 (file)
@@ -596,6 +596,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
                return 0;
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
        case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
index 97e3847240927438a0ff9d9222aa88e54e5b780f..c82aff23b5ffd2003bd889af3cacd8411c499a8f 100644 (file)
@@ -768,6 +768,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
        case PIPE_SHADER_CAP_SUBROUTINES:
        case PIPE_SHADER_CAP_SUPPORTED_IRS:
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
                return 0;
        }
        return 0;
index 9209974d28d083d24ffe5550d04b1c72c777e1c2..08cc50f6664286a159ed0de0ae7434f87ba9f1e3 100644 (file)
@@ -533,6 +533,7 @@ vgpu9_get_shader_param(struct pipe_screen *screen,
          return 0;
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -597,6 +598,7 @@ vgpu9_get_shader_param(struct pipe_screen *screen,
          return 0;
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -694,6 +696,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
       return 0;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
index 4f38346d838e8c5b83baa762e45fc682b4ce449c..5743e13045fc3655f83b5e94a371e352ebc039d7 100644 (file)
@@ -411,6 +411,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen,
         case PIPE_SHADER_CAP_FP16:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
                 return 0;
index be5907e4fb234fed2f4424f321125e2568eb1e5e..155d0e3979c3c677b0c60a44e61939557579bd29 100644 (file)
@@ -848,6 +848,7 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_MAX_SHADER_IMAGES,
    PIPE_SHADER_CAP_LOWER_IF_THRESHOLD,
    PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS,
+   PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED,
 };
 
 /**
index fa73054e5fd82eaae7c0cc7b2f05220bf373dd86..a5adedd9875b475f315f74d22f741d1197a8541f 100644 (file)
@@ -359,7 +359,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_LRP                 18
 #define TGSI_OPCODE_FMA                 19
 #define TGSI_OPCODE_SQRT                20
-/* gap */
+#define TGSI_OPCODE_LDEXP               21
 #define TGSI_OPCODE_F2U64               22
 #define TGSI_OPCODE_F2I64               23
 #define TGSI_OPCODE_FRC                 24