gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with...
authorBrian Paul <brian.paul@tungstengraphics.com>
Wed, 29 Oct 2008 00:21:03 +0000 (18:21 -0600)
committerBrian Paul <brian.paul@tungstengraphics.com>
Wed, 29 Oct 2008 00:21:03 +0000 (18:21 -0600)
src/gallium/auxiliary/draw/draw_vs_ppc.c
src/gallium/auxiliary/tgsi/tgsi_ppc.c

index 8eff6d4fda3f99496bb1902799bc9bea1289e5cf..ff402634001192b44c862be8527016a009e20885 100644 (file)
@@ -54,7 +54,7 @@
 typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
                                              float (*outputs)[4][4],
                                              float (*temps)[4][4],
-                                             float (*immeds)[4][4],
+                                             float (*immeds)[4],
                                              float (*consts)[4],
                                              const float *builtins);
 
@@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
                    output_stride );
 #else
       shader->func(inputs_soa, outputs_soa, temps_soa,
-                  (float (*)[4][4]) shader->base.immediates,
+                  (float (*)[4]) shader->base.immediates,
                   (float (*)[4]) constants,
                    ppc_builtin_constants);
 
@@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw,
    vs->base.run_linear = vs_ppc_run_linear;
    vs->base.delete = vs_ppc_delete;
    
-   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 *
+   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
                                       sizeof(float), 16);
 
    vs->machine = &draw->vs.machine;
@@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw,
 
    if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
                        &vs->ppc_program, 
-                        (float (*)[4])vs->base.immediates, 
+                       (float (*)[4]) vs->base.immediates, 
                         TRUE )) 
       goto fail;
       
index 0de9b972b4bb62523ba0c93c48b042c0b46dafc7..dd574ac02a65c6a54c04fbcc1e4e978f2cbf746b 100644 (file)
@@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen,
          break;
       case TGSI_FILE_IMMEDIATE:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
-            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* Load 4-byte word into vector register.
+             * The vector slot depends on the effective address we load from.
+             * We know that our immediates start at a 16-byte boundary so we
+             * know that 'swizzle' tells us which vector slot will have the
+             * loaded word.  The other vector slots will be undefined.
+             */
+            ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* splat word[swizzle] across the vector reg */
+            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
          }
          break;
       case TGSI_FILE_CONSTANT:
@@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
             for (i = 0; i < size; i++) {
-               const float value =
-                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-               imm[num_immediates * 4 + 0] = 
-               imm[num_immediates * 4 + 1] = 
-               imm[num_immediates * 4 + 2] = 
-               imm[num_immediates * 4 + 3] = value;
-               num_immediates++;
+               immediates[num_immediates][i] =
+                 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
             }
+            num_immediates++;
          }
          break;