From 17baa01bfbebf71c68aebea5196ebcb313612038 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 19 Aug 2009 15:32:45 +0100
Subject: [PATCH] llvmpipe: Put the position coefficients together with the
 inputs.

The automatic search'n'replace leaves lp_setup.c a bit ugly, but this code
will be eventually code generated as well.
---
 src/gallium/drivers/llvmpipe/lp_quad.h        |  7 +-
 .../drivers/llvmpipe/lp_quad_depth_test.c     | 12 +--
 src/gallium/drivers/llvmpipe/lp_quad_fs.c     | 16 +--
 src/gallium/drivers/llvmpipe/lp_setup.c       | 98 +++++++++----------
 4 files changed, 65 insertions(+), 68 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h
index d4b5fc5d860..92977495de9 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad.h
@@ -93,9 +93,9 @@ struct quad_header_output
  */
 struct quad_interp_coef
 {
-   float ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   float ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   float ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
 };
 
 
@@ -110,7 +110,6 @@ struct quad_header {
 
    /* Redundant/duplicated:
     */
-   const struct tgsi_interp_coef *posCoef;
    const struct quad_interp_coef *coef;
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
index 48a6671c244..fefb99c1ffa 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
@@ -104,9 +104,9 @@ interpolate_quad_depth( struct quad_header *quad )
 {
    const float fx = (float) quad->input.x0;
    const float fy = (float) quad->input.y0;
-   const float dzdx = quad->posCoef->dadx[2];
-   const float dzdy = quad->posCoef->dady[2];
-   const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+   const float dzdx = quad->coef->dadx[0][2];
+   const float dzdy = quad->coef->dady[0][2];
+   const float z0 = quad->coef->a0[0][2] + dzdx * fx + dzdy * fy;
 
    quad->output.depth[0] = z0;
    quad->output.depth[1] = z0 + dzdx;
@@ -722,9 +722,9 @@ depth_interp_z16_less_write(struct quad_stage *qs,
    const unsigned iy = quads[0]->input.y0;
    const float fx = (float) ix;
    const float fy = (float) iy;
-   const float dzdx = quads[0]->posCoef->dadx[2];
-   const float dzdy = quads[0]->posCoef->dady[2];
-   const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+   const float dzdx = quads[0]->coef->dadx[0][2];
+   const float dzdy = quads[0]->coef->dady[0][2];
+   const float z0 = quads[0]->coef->a0[0][2] + dzdx * fx + dzdy * fy;
    struct llvmpipe_cached_tile *tile;
    ushort (*depth16)[TILE_SIZE];
    ushort idepth[4], depth_step;
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index 8fa357dd4f3..d5c3f57a45a 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -69,7 +69,7 @@ quad_shade_stage(struct quad_stage *qs)
 
 static void
 setup_pos_vector(struct quad_shade_stage *qss,
-                 const struct tgsi_interp_coef *coef,
+                 const struct quad_interp_coef *coef,
                  float x, float y)
 {
    uint chan;
@@ -88,9 +88,9 @@ setup_pos_vector(struct quad_shade_stage *qss,
 
    /* do Z and W for all fragments in the quad */
    for (chan = 2; chan < 4; chan++) {
-      const float dadx = coef->dadx[chan];
-      const float dady = coef->dady[chan];
-      const float a0 = coef->a0[chan] + dadx * x + dady * y;
+      const float dadx = coef->dadx[0][chan];
+      const float dady = coef->dady[0][chan];
+      const float a0 = coef->a0[0][chan] + dadx * x + dady * y;
       qss->pos[chan].f[0] = a0;
       qss->pos[chan].f[1] = a0 + dadx;
       qss->pos[chan].f[2] = a0 + dady;
@@ -113,7 +113,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
 
    /* Compute X, Y, Z, W vals for this quad */
    setup_pos_vector(qss,
-                    quad->posCoef,
+                    quad->coef,
                     (float)quad->input.x0, (float)quad->input.y0);
 
 
@@ -125,9 +125,9 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
 
    /* run shader */
    llvmpipe->fs->jit_function( qss->pos,
-                               quad->coef->a0,
-                               quad->coef->dadx,
-                               quad->coef->dady,
+                               quad->coef->a0 + 1,
+                               quad->coef->dadx + 1,
+                               quad->coef->dady + 1,
                                constants,
                                qss->mask,
                                quad->output.color,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 3474f9f82bb..d45b34f3603 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -94,7 +94,6 @@ struct setup_context {
    unsigned count;
 
    struct quad_interp_coef coef;
-   struct tgsi_interp_coef posCoef;  /* For Z, W */
 
    struct {
       int left[2];   /**< [0] = row0, [1] = row1 */
@@ -398,8 +397,8 @@ static void tri_pos_coeff( struct setup_context *setup,
 
    assert(i <= 3);
 
-   setup->posCoef.dadx[i] = dadx;
-   setup->posCoef.dady[i] = dady;
+   setup->coef.dadx[0][i] = dadx;
+   setup->coef.dady[0][i] = dady;
 
    /* calculate a0 as the value which would be sampled for the
     * fragment at (0,0), taking into account that we want to sample at
@@ -413,7 +412,7 @@ static void tri_pos_coeff( struct setup_context *setup,
     * to define a0 as the sample at a pixel center somewhere near vmin
     * instead - i'll switch to this later.
     */
-   setup->posCoef.a0[i] = (setup->vmin[vertSlot][i] -
+   setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
                            (dadx * (setup->vmin[0][0] - 0.5f) +
                             dady * (setup->vmin[0][1] - 0.5f)));
 
@@ -437,12 +436,12 @@ static void tri_pos_coeff( struct setup_context *setup,
 static void const_pos_coeff( struct setup_context *setup,
                              uint vertSlot, unsigned i)
 {
-   setup->posCoef.dadx[i] = 0;
-   setup->posCoef.dady[i] = 0;
+   setup->coef.dadx[0][i] = 0;
+   setup->coef.dady[0][i] = 0;
 
    /* need provoking vertex info!
     */
-   setup->posCoef.a0[i] = setup->vprovoke[vertSlot][i];
+   setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i];
 }
 
 
@@ -459,12 +458,12 @@ static void const_coeff( struct setup_context *setup,
 {
    unsigned i;
    for (i = 0; i < NUM_CHANNELS; ++i) {
-      setup->coef.dadx[attrib][i] = 0;
-      setup->coef.dady[attrib][i] = 0;
+      setup->coef.dadx[1 + attrib][i] = 0;
+      setup->coef.dady[1 + attrib][i] = 0;
 
       /* need provoking vertex info!
        */
-      setup->coef.a0[attrib][i] = setup->vprovoke[vertSlot][i];
+      setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i];
    }
 }
 
@@ -488,8 +487,8 @@ static void tri_linear_coeff( struct setup_context *setup,
 
       assert(i <= 3);
 
-      setup->coef.dadx[attrib][i] = dadx;
-      setup->coef.dady[attrib][i] = dady;
+      setup->coef.dadx[1 + attrib][i] = dadx;
+      setup->coef.dady[1 + attrib][i] = dady;
 
       /* calculate a0 as the value which would be sampled for the
        * fragment at (0,0), taking into account that we want to sample at
@@ -503,7 +502,7 @@ static void tri_linear_coeff( struct setup_context *setup,
        * to define a0 as the sample at a pixel center somewhere near vmin
        * instead - i'll switch to this later.
        */
-      setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
+      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
                      (dadx * (setup->vmin[0][0] - 0.5f) +
                       dady * (setup->vmin[0][1] - 0.5f)));
 
@@ -553,9 +552,9 @@ static void tri_persp_coeff( struct setup_context *setup,
       */
       assert(i <= 3);
 
-      setup->coef.dadx[attrib][i] = dadx;
-      setup->coef.dady[attrib][i] = dady;
-      setup->coef.a0[attrib][i] = (mina -
+      setup->coef.dadx[1 + attrib][i] = dadx;
+      setup->coef.dady[1 + attrib][i] = dady;
+      setup->coef.a0[1 + attrib][i] = (mina -
                      (dadx * (setup->vmin[0][0] - 0.5f) +
                       dady * (setup->vmin[0][1] - 0.5f)));
    }
@@ -572,21 +571,21 @@ static void
 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 {
    /*X*/
-   setup->coef.a0[slot][0] = 0;
-   setup->coef.dadx[slot][0] = 1.0;
-   setup->coef.dady[slot][0] = 0.0;
+   setup->coef.a0[1 + slot][0] = 0;
+   setup->coef.dadx[1 + slot][0] = 1.0;
+   setup->coef.dady[1 + slot][0] = 0.0;
    /*Y*/
-   setup->coef.a0[slot][1] = 0.0;
-   setup->coef.dadx[slot][1] = 0.0;
-   setup->coef.dady[slot][1] = 1.0;
+   setup->coef.a0[1 + slot][1] = 0.0;
+   setup->coef.dadx[1 + slot][1] = 0.0;
+   setup->coef.dady[1 + slot][1] = 1.0;
    /*Z*/
-   setup->coef.a0[slot][2] = setup->posCoef.a0[2];
-   setup->coef.dadx[slot][2] = setup->posCoef.dadx[2];
-   setup->coef.dady[slot][2] = setup->posCoef.dady[2];
+   setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2];
+   setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2];
+   setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2];
    /*W*/
-   setup->coef.a0[slot][3] = setup->posCoef.a0[3];
-   setup->coef.dadx[slot][3] = setup->posCoef.dadx[3];
-   setup->coef.dady[slot][3] = setup->posCoef.dady[3];
+   setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3];
+   setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3];
+   setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3];
 }
 
 
@@ -630,9 +629,9 @@ static void setup_tri_coefficients( struct setup_context *setup )
       }
 
       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
-         setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
-         setup->coef.dadx[fragSlot][0] = 0.0;
-         setup->coef.dady[fragSlot][0] = 0.0;
+         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
+         setup->coef.dadx[1 + fragSlot][0] = 0.0;
+         setup->coef.dady[1 + fragSlot][0] = 0.0;
       }
    }
 }
@@ -842,9 +841,9 @@ linear_pos_coeff(struct setup_context *setup,
    const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
    const float dady = da * setup->emaj.dy * setup->oneoverarea;
-   setup->posCoef.dadx[i] = dadx;
-   setup->posCoef.dady[i] = dady;
-   setup->posCoef.a0[i] = (setup->vmin[vertSlot][i] -
+   setup->coef.dadx[0][i] = dadx;
+   setup->coef.dady[0][i] = dady;
+   setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
                            (dadx * (setup->vmin[0][0] - 0.5f) +
                             dady * (setup->vmin[0][1] - 0.5f)));
 }
@@ -864,9 +863,9 @@ line_linear_coeff(struct setup_context *setup,
       const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
       const float dady = da * setup->emaj.dy * setup->oneoverarea;
-      setup->coef.dadx[attrib][i] = dadx;
-      setup->coef.dady[attrib][i] = dady;
-      setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
+      setup->coef.dadx[1 + attrib][i] = dadx;
+      setup->coef.dady[1 + attrib][i] = dady;
+      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
                      (dadx * (setup->vmin[0][0] - 0.5f) +
                       dady * (setup->vmin[0][1] - 0.5f)));
    }
@@ -890,9 +889,9 @@ line_persp_coeff(struct setup_context *setup,
       const float da = a1 - a0;
       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
       const float dady = da * setup->emaj.dy * setup->oneoverarea;
-      setup->coef.dadx[attrib][i] = dadx;
-      setup->coef.dady[attrib][i] = dady;
-      setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
+      setup->coef.dadx[1 + attrib][i] = dadx;
+      setup->coef.dady[1 + attrib][i] = dady;
+      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
                      (dadx * (setup->vmin[0][0] - 0.5f) +
                       dady * (setup->vmin[0][1] - 0.5f)));
    }
@@ -959,9 +958,9 @@ setup_line_coefficients(struct setup_context *setup,
       }
 
       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
-         setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
-         setup->coef.dadx[fragSlot][0] = 0.0;
-         setup->coef.dady[fragSlot][0] = 0.0;
+         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
+         setup->coef.dadx[1 + fragSlot][0] = 0.0;
+         setup->coef.dady[1 + fragSlot][0] = 0.0;
       }
    }
    return TRUE;
@@ -1122,9 +1121,9 @@ point_persp_coeff(struct setup_context *setup,
 {
    unsigned i;
    for(i = 0; i < NUM_CHANNELS; ++i) {
-      setup->coef.dadx[attrib][i] = 0.0F;
-      setup->coef.dady[attrib][i] = 0.0F;
-      setup->coef.a0[attrib][i] = vert[vertSlot][i] * vert[0][3];
+      setup->coef.dadx[1 + attrib][i] = 0.0F;
+      setup->coef.dady[1 + attrib][i] = 0.0F;
+      setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3];
    }
 }
 
@@ -1203,9 +1202,9 @@ llvmpipe_setup_point( struct setup_context *setup,
       }
 
       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
-         setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
-         setup->coef.dadx[fragSlot][0] = 0.0;
-         setup->coef.dady[fragSlot][0] = 0.0;
+         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
+         setup->coef.dadx[1 + fragSlot][0] = 0.0;
+         setup->coef.dady[1 + fragSlot][0] = 0.0;
       }
    }
 
@@ -1379,7 +1378,6 @@ struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *ll
 
    for (i = 0; i < MAX_QUADS; i++) {
       setup->quad[i].coef = &setup->coef;
-      setup->quad[i].posCoef = &setup->posCoef;
    }
 
    setup->span.left[0] = 1000000;     /* greater than right[0] */
-- 
2.30.2