From: Brian Paul Date: Wed, 29 Oct 2008 00:21:03 +0000 (-0600) Subject: gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f4e9526addc617dc78af9b1af781ffe09ce62504;p=mesa.git gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants --- diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8eff6d4fda3..ff402634001 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -54,7 +54,7 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], - float (*immeds)[4][4], + float (*immeds)[4], float (*consts)[4], const float *builtins); @@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, output_stride ); #else shader->func(inputs_soa, outputs_soa, temps_soa, - (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); @@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; - vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); vs->machine = &draw->vs.machine; @@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw, if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, - (float (*)[4])vs->base.immediates, + (float (*)[4]) vs->base.immediates, TRUE )) goto fail; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 0de9b972b4b..dd574ac02a6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); - ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our immediates start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); } break; case TGSI_FILE_CONSTANT: @@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); for (i = 0; i < size; i++) { - const float value = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - imm[num_immediates * 4 + 0] = - imm[num_immediates * 4 + 1] = - imm[num_immediates * 4 + 2] = - imm[num_immediates * 4 + 3] = value; - num_immediates++; + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; } + num_immediates++; } break;