nouveau: add result scaling to shader backend, use it in RSQ emul for NV40.
authorBen Skeggs <darktama@iinet.net.au>
Thu, 25 Jan 2007 02:40:51 +0000 (13:40 +1100)
committerBen Skeggs <darktama@iinet.net.au>
Thu, 25 Jan 2007 02:40:51 +0000 (13:40 +1100)
src/mesa/drivers/dri/nouveau/nouveau_shader.c
src/mesa/drivers/dri/nouveau/nouveau_shader.h
src/mesa/drivers/dri/nouveau/nouveau_shader_0.c
src/mesa/drivers/dri/nouveau/nouveau_shader_2.c
src/mesa/drivers/dri/nouveau/nv40_fragprog.c
src/mesa/drivers/dri/nouveau/nv40_shader.h

index f911347d624671ca8d42fd95dd72b7a3b6d6924a..cdb79fca1e6a7b7aa36cc1243fd6e9016a1bd723 100644 (file)
@@ -179,7 +179,7 @@ nvsBuildTextShader(GLcontext *ctx, GLenum target, const char *text)
                                     strlen(text),
                                     &nvs->mesa.vp);
    } else if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      _mesa_init_fragment_program(ctx, &nvs->mesa.fp, GL_VERTEX_PROGRAM_ARB, 0);
+      _mesa_init_fragment_program(ctx, &nvs->mesa.fp, GL_FRAGMENT_PROGRAM_ARB, 0);
       _mesa_parse_arb_fragment_program(ctx,
                                       GL_FRAGMENT_PROGRAM_ARB,
                                       text,
index 8b4be9dfe7c5cc43b88526f7cc36ea5fba6db26a..7329ccd9eadc5548e5228c7519cef924bb2f1591 100644 (file)
@@ -194,6 +194,16 @@ typedef enum {
    NVS_TEX_TARGET_UNKNOWN = 0
 } nvsTexTarget;
 
+typedef enum {
+       NVS_SCALE_1X     = 0,
+       NVS_SCALE_2X     = 1,
+       NVS_SCALE_4X     = 2,
+       NVS_SCALE_8X     = 3,
+       NVS_SCALE_INV_2X = 5,
+       NVS_SCALE_INV_4X = 6,
+       NVS_SCALE_INV_8X = 7,
+} nvsScale;
+
 /* Arith/TEX instructions */
 typedef struct nvs_instruction {
    nvsFragmentHeader header;
@@ -203,6 +213,7 @@ typedef struct nvs_instruction {
 
    nvsRegister dest;
    unsigned int        mask;
+   nvsScale    dest_scale;
 
    nvsRegister src[3];
 
@@ -307,6 +318,7 @@ struct _nvsFunc {
 
    void                (*InitInstruction)      (nvsFunc *);
    int         (*SupportsOpcode)       (nvsFunc *, nvsOpcode);
+   int         (*SupportsResultScale)  (nvsFunc *, nvsScale);
    void                (*SetOpcode)            (nvsFunc *, unsigned int opcode,
                                         int slot);
    void                (*SetCCUpdate)          (nvsFunc *);
@@ -314,6 +326,7 @@ struct _nvsFunc {
                                         nvsSwzComp *swizzle);
    void                (*SetResult)            (nvsFunc *, nvsRegister *,
                                         unsigned int mask, int slot);
+   void                (*SetResultScale)       (nvsFunc *, nvsScale);
    void                (*SetSource)            (nvsFunc *, nvsRegister *, int pos);
    void                (*SetTexImageUnit)      (nvsFunc *, int unit);
    void                (*SetSaturate)          (nvsFunc *);
index 28c6ad803b18b6e8ea5e6f11800b9c87fd030c50..3bcc2ba755c78dcc11e7c4a2036c3996eda3dedf 100644 (file)
@@ -402,6 +402,7 @@ pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos,
        sif->saturate   = saturate;
        sif->dest       = dst;
        sif->mask       = mask;
+       sif->dest_scale = NVS_SCALE_1X;
        sif->src[0]     = src0;
        sif->src[1]     = src1;
        sif->src[2]     = src2;
@@ -667,25 +668,13 @@ pass0_emulate_instruction(nouveauShader *nvs,
                }
                break;
        case OPCODE_RSQ:
-               if (rec->const_half.file != NVS_FILE_CONST) {
-                       GLfloat const_half[4] = { 0.5, 0.0, 0.0, 0.0 };
-                       pass0_make_reg(nvs, &rec->const_half, NVS_FILE_CONST,
-                                       _mesa_add_unnamed_constant(
-                                               nvs->mesa.vp.Base.Parameters,
-                                               const_half, 4));
-                       COPY_4V(nvs->params[rec->const_half.index].val,
-                               const_half);
-               }
                pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
                ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
                                 nvsAbs(nvsSwizzle(src[0], X, X, X, X)),
                                 nvr_unused, nvr_unused);
-               ARITHu(NVS_OP_MUL, temp, SMASK_X, 0,
-                                nvsSwizzle(temp, X, X, X, X),
-                                nvsNegate(rec->const_half),
-                                nvr_unused);
+               nvsinst->dest_scale = NVS_SCALE_INV_2X;
                ARITH (NVS_OP_EX2, dest, mask, sat,
-                                nvsSwizzle(temp, X, X, X, X),
+                                nvsNegate(nvsSwizzle(temp, X, X, X, X)),
                                 nvr_unused, nvr_unused);
                break;
        case OPCODE_SCS:
index c106fd2d9493685af133b8a8011d2acf143bc3bc..b043f877e420b5e52f91afd98d5bbba2d3303eee 100644 (file)
@@ -135,6 +135,10 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst,
 
        reg = pass2_mangle_reg(nvs, inst, inst->dest);
        shader->SetResult(shader, &reg, inst->mask, slot);
+
+       if (inst->dest_scale != NVS_SCALE_1X) {
+               shader->SetResultScale(shader, inst->dest_scale);
+       }
 }
 
 static int
index 8bca6ae93833453a56314da060e0eaa63f694cd1..3e4ae0496e4b3f1ca5e115191f5cf536b791a9bf 100644 (file)
@@ -11,6 +11,30 @@ struct _op_xlat NVFP_TX_BOP[64];
  *     - These extend the NV30 routines, which are almost identical.  NV40
  *       just has branching hacked into the instruction set.
  */
+static int
+NV40FPSupportsResultScale(nvsFunc *shader, nvsScale scale)
+{
+       switch (scale) {
+       case NVS_SCALE_1X:
+       case NVS_SCALE_2X:
+       case NVS_SCALE_4X:
+       case NVS_SCALE_8X:
+       case NVS_SCALE_INV_2X:
+       case NVS_SCALE_INV_4X:
+       case NVS_SCALE_INV_8X:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+static void
+NV40FPSetResultScale(nvsFunc *shader, nvsScale scale)
+{
+       shader->inst[2] &= ~NV40_FP_OP_DST_SCALE_MASK;
+       shader->inst[2] |= ((unsigned int)scale << NV40_FP_OP_DST_SCALE_SHIFT);
+}
+
 static void
 NV40FPSetBranchTarget(nvsFunc *shader, int addr)
 {
@@ -179,6 +203,9 @@ NV40FPInitShaderFuncs(nvsFunc * shader)
    MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_REP , NVS_OP_REP , -1, -1, -1);
    MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_RET , NVS_OP_RET , -1, -1, -1);
 
+   shader->SupportsResultScale = NV40FPSupportsResultScale;
+   shader->SetResultScale      = NV40FPSetResultScale;
+
    /* fragment.facing */
    shader->GetSourceID         = NV40FPGetSourceID;
 
index 2a2b5639b6c855d3f3b27f25ccc4233fd23d3ac1..584f4c23e0838e6fd36016e929b946b0d3107a65 100644 (file)
 
 /* high order bits of SRC1 */
 #define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31)
-#define NV40_FP_OP_SRC_SCALE_SHIFT                                            28
-#define NV40_FP_OP_SRC_SCALE_MASK                                      (3 << 28)
+#define NV40_FP_OP_DST_SCALE_SHIFT                                            28
+#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28)
 
 /* SRC1 LOOP */
 #define NV40_FP_OP_LOOP_INCR_SHIFT                                            19