60cf5e9af7d9d9c05c061aa925b3e04ecfd036c5
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef x, y, w;
87 LLVMValueRef a0_ptr;
88 LLVMValueRef dadx_ptr;
89 LLVMValueRef dady_ptr;
90
91 LLVMValueRef consts_ptr;
92 LLVMValueRef (*outputs)[NUM_CHANNELS];
93 LLVMValueRef samplers_ptr;
94
95 LLVMValueRef oow;
96
97 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
98
99 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
100 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
101
102 struct lp_build_mask_context *mask;
103
104 /** Coords/texels store */
105 LLVMValueRef store_ptr;
106 };
107
108
109 /**
110 * Register fetch.
111 */
112 static LLVMValueRef
113 emit_fetch(
114 struct lp_build_tgsi_soa_context *bld,
115 const struct tgsi_full_instruction *inst,
116 unsigned index,
117 const unsigned chan_index )
118 {
119 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
120 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
121 LLVMValueRef res;
122
123 switch (swizzle) {
124 case TGSI_EXTSWIZZLE_X:
125 case TGSI_EXTSWIZZLE_Y:
126 case TGSI_EXTSWIZZLE_Z:
127 case TGSI_EXTSWIZZLE_W:
128
129 switch (reg->SrcRegister.File) {
130 case TGSI_FILE_CONSTANT: {
131 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
132 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
133 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
134 res = lp_build_broadcast_scalar(&bld->base, scalar);
135 break;
136 }
137
138 case TGSI_FILE_IMMEDIATE:
139 res = bld->immediates[reg->SrcRegister.Index][swizzle];
140 assert(res);
141 break;
142
143 case TGSI_FILE_INPUT:
144 res = bld->inputs[reg->SrcRegister.Index][swizzle];
145 assert(res);
146 break;
147
148 case TGSI_FILE_TEMPORARY:
149 res = bld->temps[reg->SrcRegister.Index][swizzle];
150 if(!res)
151 return bld->base.undef;
152 break;
153
154 default:
155 assert( 0 );
156 return bld->base.undef;
157 }
158 break;
159
160 case TGSI_EXTSWIZZLE_ZERO:
161 res = bld->base.zero;
162 break;
163
164 case TGSI_EXTSWIZZLE_ONE:
165 res = bld->base.one;
166 break;
167
168 default:
169 assert( 0 );
170 return bld->base.undef;
171 }
172
173 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
174 case TGSI_UTIL_SIGN_CLEAR:
175 res = lp_build_abs( &bld->base, res );
176 break;
177
178 case TGSI_UTIL_SIGN_SET:
179 res = lp_build_abs( &bld->base, res );
180 res = LLVMBuildNeg( bld->base.builder, res, "" );
181 break;
182
183 case TGSI_UTIL_SIGN_TOGGLE:
184 res = LLVMBuildNeg( bld->base.builder, res, "" );
185 break;
186
187 case TGSI_UTIL_SIGN_KEEP:
188 break;
189 }
190
191 return res;
192 }
193
194
195 /**
196 * Register store.
197 */
198 static void
199 emit_store(
200 struct lp_build_tgsi_soa_context *bld,
201 const struct tgsi_full_instruction *inst,
202 unsigned index,
203 unsigned chan_index,
204 LLVMValueRef value)
205 {
206 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
207
208 switch( inst->Instruction.Saturate ) {
209 case TGSI_SAT_NONE:
210 break;
211
212 case TGSI_SAT_ZERO_ONE:
213 value = lp_build_max(&bld->base, value, bld->base.zero);
214 value = lp_build_min(&bld->base, value, bld->base.one);
215 break;
216
217 case TGSI_SAT_MINUS_PLUS_ONE:
218 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
219 value = lp_build_min(&bld->base, value, bld->base.one);
220 break;
221
222 default:
223 assert(0);
224 }
225
226 switch( reg->DstRegister.File ) {
227 case TGSI_FILE_OUTPUT:
228 bld->outputs[reg->DstRegister.Index][chan_index] = value;
229 break;
230
231 case TGSI_FILE_TEMPORARY:
232 bld->temps[reg->DstRegister.Index][chan_index] = value;
233 break;
234
235 case TGSI_FILE_ADDRESS:
236 /* FIXME */
237 assert(0);
238 break;
239
240 default:
241 assert( 0 );
242 }
243 }
244
245
246 void PIPE_CDECL
247 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
248 uint32_t unit,
249 float *store )
250 {
251 struct tgsi_sampler *sampler = samplers[unit];
252
253 #if 0
254 uint j;
255
256 debug_printf("%s sampler: %p (%p) store: %p\n",
257 __FUNCTION__,
258 sampler, *sampler,
259 store );
260
261 debug_printf("lodbias %f\n", store[12]);
262
263 for (j = 0; j < 4; j++)
264 debug_printf("sample %d texcoord %f %f\n",
265 j,
266 store[0+j],
267 store[4+j]);
268 #endif
269
270 {
271 float rgba[NUM_CHANNELS][QUAD_SIZE];
272 sampler->get_samples(sampler,
273 &store[0],
274 &store[4],
275 &store[8],
276 0.0f, /*store[12], lodbias */
277 rgba);
278 memcpy(store, rgba, sizeof rgba);
279 }
280
281 #if 0
282 for (j = 0; j < 4; j++)
283 debug_printf("sample %d result %f %f %f %f\n",
284 j,
285 store[0+j],
286 store[4+j],
287 store[8+j],
288 store[12+j]);
289 #endif
290 }
291
292 /**
293 * High-level instruction translators.
294 */
295
296 static void
297 emit_tex( struct lp_build_tgsi_soa_context *bld,
298 const struct tgsi_full_instruction *inst,
299 boolean apply_lodbias,
300 boolean projected)
301 {
302 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
303 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
304 LLVMValueRef lodbias;
305 LLVMValueRef oow;
306 LLVMValueRef args[3];
307 unsigned count;
308 unsigned i;
309
310 switch (inst->InstructionExtTexture.Texture) {
311 case TGSI_TEXTURE_1D:
312 case TGSI_TEXTURE_SHADOW1D:
313 count = 1;
314 break;
315 case TGSI_TEXTURE_2D:
316 case TGSI_TEXTURE_RECT:
317 case TGSI_TEXTURE_SHADOW2D:
318 case TGSI_TEXTURE_SHADOWRECT:
319 count = 2;
320 break;
321 case TGSI_TEXTURE_3D:
322 case TGSI_TEXTURE_CUBE:
323 count = 3;
324 break;
325 default:
326 assert(0);
327 return;
328 }
329
330 if(apply_lodbias)
331 lodbias = emit_fetch( bld, inst, 0, 3 );
332 else
333 lodbias = bld->base.zero;
334
335 if(!bld->store_ptr)
336 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
337 vec_type,
338 LLVMConstInt(LLVMInt32Type(), 4, 0),
339 "store");
340
341 if (projected) {
342 oow = emit_fetch( bld, inst, 0, 3 );
343 oow = lp_build_rcp(&bld->base, oow);
344 }
345
346 for (i = 0; i < count; i++) {
347 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
348 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
349 LLVMValueRef coord;
350
351 coord = emit_fetch( bld, inst, 0, i );
352
353 if (projected)
354 coord = lp_build_mul(&bld->base, coord, oow);
355
356 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
357 }
358
359 args[0] = bld->samplers_ptr;
360 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
361 args[2] = bld->store_ptr;
362
363 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
364
365 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
366 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
367 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
368 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
369 emit_store( bld, inst, 0, i, res );
370 }
371 }
372
373
374 static void
375 emit_kil(
376 struct lp_build_tgsi_soa_context *bld,
377 const struct tgsi_full_instruction *inst )
378 {
379 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
380 LLVMValueRef terms[NUM_CHANNELS];
381 LLVMValueRef mask;
382 unsigned chan_index;
383
384 memset(&terms, 0, sizeof terms);
385
386 FOR_EACH_CHANNEL( chan_index ) {
387 unsigned swizzle;
388
389 /* Unswizzle channel */
390 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
391
392 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
393 * not to be tested. */
394 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
395 continue;
396
397 /* Check if the component has not been already tested. */
398 assert(swizzle < NUM_CHANNELS);
399 if( !terms[swizzle] )
400 /* TODO: change the comparison operator instead of setting the sign */
401 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
402 }
403
404 mask = NULL;
405 FOR_EACH_CHANNEL( chan_index ) {
406 if(terms[chan_index]) {
407 LLVMValueRef chan_mask;
408
409 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
410
411 if(mask)
412 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
413 else
414 mask = chan_mask;
415 }
416 }
417
418 if(mask)
419 lp_build_mask_update(bld->mask, mask);
420 }
421
422
423 static void
424 emit_kilp(
425 struct lp_build_tgsi_soa_context *bld )
426 {
427 /* XXX todo / fix me */
428 }
429
430
431 /**
432 * Check if inst src/dest regs use indirect addressing into temporary
433 * register file.
434 */
435 static boolean
436 indirect_temp_reference(const struct tgsi_full_instruction *inst)
437 {
438 uint i;
439 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
440 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
441 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
442 reg->SrcRegister.Indirect)
443 return TRUE;
444 }
445 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
446 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
447 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
448 reg->DstRegister.Indirect)
449 return TRUE;
450 }
451 return FALSE;
452 }
453
454
455 static int
456 emit_instruction(
457 struct lp_build_tgsi_soa_context *bld,
458 struct tgsi_full_instruction *inst )
459 {
460 unsigned chan_index;
461 LLVMValueRef src0, src1, src2;
462 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
463 LLVMValueRef dst0;
464
465 /* we can't handle indirect addressing into temp register file yet */
466 if (indirect_temp_reference(inst))
467 return FALSE;
468
469 switch (inst->Instruction.Opcode) {
470 #if 0
471 case TGSI_OPCODE_ARL:
472 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
473 tmp0 = emit_fetch( bld, inst, 0, chan_index );
474 emit_flr(bld, 0, 0);
475 emit_f2it( bld, 0 );
476 emit_store( bld, inst, 0, chan_index, tmp0);
477 }
478 break;
479 #endif
480
481 case TGSI_OPCODE_MOV:
482 case TGSI_OPCODE_SWZ:
483 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
484 tmp0 = emit_fetch( bld, inst, 0, chan_index );
485 emit_store( bld, inst, 0, chan_index, tmp0);
486 }
487 break;
488
489 case TGSI_OPCODE_LIT:
490 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
491 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
492 }
493 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
494 src0 = emit_fetch( bld, inst, 0, CHAN_X );
495 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
496 emit_store( bld, inst, 0, CHAN_Y, dst0);
497 }
498 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
499 /* XMM[1] = SrcReg[0].yyyy */
500 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
501 /* XMM[1] = max(XMM[1], 0) */
502 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
503 /* XMM[2] = SrcReg[0].wwww */
504 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
505 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
506 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
507 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
508 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
509 emit_store( bld, inst, 0, CHAN_Z, dst0);
510 }
511 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
512 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
513 }
514 break;
515
516 case TGSI_OPCODE_RCP:
517 /* TGSI_OPCODE_RECIP */
518 src0 = emit_fetch( bld, inst, 0, CHAN_X );
519 dst0 = lp_build_rcp(&bld->base, src0);
520 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
521 emit_store( bld, inst, 0, chan_index, dst0 );
522 }
523 break;
524
525 case TGSI_OPCODE_RSQ:
526 /* TGSI_OPCODE_RECIPSQRT */
527 src0 = emit_fetch( bld, inst, 0, CHAN_X );
528 src0 = lp_build_abs(&bld->base, src0);
529 dst0 = lp_build_rsqrt(&bld->base, src0);
530 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
531 emit_store( bld, inst, 0, chan_index, dst0 );
532 }
533 break;
534
535 case TGSI_OPCODE_EXP:
536 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
537 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
538 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
539 LLVMValueRef *p_exp2_int_part = NULL;
540 LLVMValueRef *p_frac_part = NULL;
541 LLVMValueRef *p_exp2 = NULL;
542
543 src0 = emit_fetch( bld, inst, 0, CHAN_X );
544
545 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
546 p_exp2_int_part = &tmp0;
547 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
548 p_frac_part = &tmp1;
549 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
550 p_exp2 = &tmp2;
551
552 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
553
554 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
555 emit_store( bld, inst, 0, CHAN_X, tmp0);
556 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
557 emit_store( bld, inst, 0, CHAN_Y, tmp1);
558 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
559 emit_store( bld, inst, 0, CHAN_Z, tmp2);
560 }
561 /* dst.w = 1.0 */
562 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
563 tmp0 = bld->base.one;
564 emit_store( bld, inst, 0, CHAN_W, tmp0);
565 }
566 break;
567
568 case TGSI_OPCODE_LOG:
569 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
570 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
571 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
572 LLVMValueRef *p_floor_log2;
573 LLVMValueRef *p_exp;
574 LLVMValueRef *p_log2;
575
576 src0 = emit_fetch( bld, inst, 0, CHAN_X );
577 src0 = lp_build_abs( &bld->base, src0 );
578
579 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
580 p_floor_log2 = &tmp0;
581 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
582 p_exp = &tmp1;
583 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
584 p_log2 = &tmp2;
585
586 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
587
588 /* dst.x = floor(lg2(abs(src.x))) */
589 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
590 emit_store( bld, inst, 0, CHAN_X, tmp0);
591 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
592 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
593 tmp1 = lp_build_div( &bld->base, src0, tmp1);
594 emit_store( bld, inst, 0, CHAN_Y, tmp1);
595 }
596 /* dst.z = lg2(abs(src.x)) */
597 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
598 emit_store( bld, inst, 0, CHAN_Z, tmp2);
599 }
600 /* dst.w = 1.0 */
601 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
602 tmp0 = bld->base.one;
603 emit_store( bld, inst, 0, CHAN_W, tmp0);
604 }
605 break;
606
607 case TGSI_OPCODE_MUL:
608 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
609 src0 = emit_fetch( bld, inst, 0, chan_index );
610 src1 = emit_fetch( bld, inst, 1, chan_index );
611 dst0 = lp_build_mul(&bld->base, src0, src1);
612 emit_store( bld, inst, 0, chan_index, dst0);
613 }
614 break;
615
616 case TGSI_OPCODE_ADD:
617 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
618 src0 = emit_fetch( bld, inst, 0, chan_index );
619 src1 = emit_fetch( bld, inst, 1, chan_index );
620 dst0 = lp_build_add(&bld->base, src0, src1);
621 emit_store( bld, inst, 0, chan_index, dst0);
622 }
623 break;
624
625 case TGSI_OPCODE_DP3:
626 /* TGSI_OPCODE_DOT3 */
627 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
628 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
629 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
630 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
631 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
632 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
633 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
634 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
635 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
636 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
637 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
638 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
639 emit_store( bld, inst, 0, chan_index, tmp0);
640 }
641 break;
642
643 case TGSI_OPCODE_DP4:
644 /* TGSI_OPCODE_DOT4 */
645 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
646 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
647 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
648 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
649 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
650 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
651 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
652 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
653 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
654 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
655 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
656 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
657 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
658 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
659 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
660 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
661 emit_store( bld, inst, 0, chan_index, tmp0);
662 }
663 break;
664
665 case TGSI_OPCODE_DST:
666 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
667 tmp0 = bld->base.one;
668 emit_store( bld, inst, 0, CHAN_X, tmp0);
669 }
670 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
671 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
672 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
673 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
674 emit_store( bld, inst, 0, CHAN_Y, tmp0);
675 }
676 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
677 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
678 emit_store( bld, inst, 0, CHAN_Z, tmp0);
679 }
680 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
681 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
682 emit_store( bld, inst, 0, CHAN_W, tmp0);
683 }
684 break;
685
686 case TGSI_OPCODE_MIN:
687 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
688 src0 = emit_fetch( bld, inst, 0, chan_index );
689 src1 = emit_fetch( bld, inst, 1, chan_index );
690 dst0 = lp_build_min( &bld->base, src0, src1 );
691 emit_store( bld, inst, 0, chan_index, dst0);
692 }
693 break;
694
695 case TGSI_OPCODE_MAX:
696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
697 src0 = emit_fetch( bld, inst, 0, chan_index );
698 src1 = emit_fetch( bld, inst, 1, chan_index );
699 dst0 = lp_build_max( &bld->base, src0, src1 );
700 emit_store( bld, inst, 0, chan_index, dst0);
701 }
702 break;
703
704 case TGSI_OPCODE_SLT:
705 /* TGSI_OPCODE_SETLT */
706 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
707 src0 = emit_fetch( bld, inst, 0, chan_index );
708 src1 = emit_fetch( bld, inst, 1, chan_index );
709 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
710 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
711 emit_store( bld, inst, 0, chan_index, dst0);
712 }
713 break;
714
715 case TGSI_OPCODE_SGE:
716 /* TGSI_OPCODE_SETGE */
717 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
718 src0 = emit_fetch( bld, inst, 0, chan_index );
719 src1 = emit_fetch( bld, inst, 1, chan_index );
720 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
721 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
722 emit_store( bld, inst, 0, chan_index, dst0);
723 }
724 break;
725
726 case TGSI_OPCODE_MAD:
727 /* TGSI_OPCODE_MADD */
728 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
729 tmp0 = emit_fetch( bld, inst, 0, chan_index );
730 tmp1 = emit_fetch( bld, inst, 1, chan_index );
731 tmp2 = emit_fetch( bld, inst, 2, chan_index );
732 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
733 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
734 emit_store( bld, inst, 0, chan_index, tmp0);
735 }
736 break;
737
738 case TGSI_OPCODE_SUB:
739 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
740 tmp0 = emit_fetch( bld, inst, 0, chan_index );
741 tmp1 = emit_fetch( bld, inst, 1, chan_index );
742 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
743 emit_store( bld, inst, 0, chan_index, tmp0);
744 }
745 break;
746
747 case TGSI_OPCODE_LRP:
748 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
749 src0 = emit_fetch( bld, inst, 0, chan_index );
750 src1 = emit_fetch( bld, inst, 1, chan_index );
751 src2 = emit_fetch( bld, inst, 2, chan_index );
752 tmp0 = lp_build_sub( &bld->base, src1, src2 );
753 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
754 dst0 = lp_build_add( &bld->base, tmp0, src2 );
755 emit_store( bld, inst, 0, chan_index, dst0 );
756 }
757 break;
758
759 case TGSI_OPCODE_CND:
760 return 0;
761 break;
762
763 case TGSI_OPCODE_CND0:
764 return 0;
765 break;
766
767 case TGSI_OPCODE_DP2A:
768 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
769 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
770 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
771 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
772 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
773 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
774 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
775 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
776 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
778 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
779 }
780 break;
781
782 #if 0
783 case TGSI_OPCODE_FRC:
784 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
785 tmp0 = emit_fetch( bld, inst, 0, chan_index );
786 emit_frc( bld, 0, 0 );
787 emit_store( bld, inst, 0, chan_index, tmp0);
788 }
789 break;
790
791 case TGSI_OPCODE_CLAMP:
792 return 0;
793 break;
794
795 case TGSI_OPCODE_FLR:
796 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
797 tmp0 = emit_fetch( bld, inst, 0, chan_index );
798 emit_flr( bld, 0, 0 );
799 emit_store( bld, inst, 0, chan_index, tmp0);
800 }
801 break;
802
803 case TGSI_OPCODE_ROUND:
804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
805 tmp0 = emit_fetch( bld, inst, 0, chan_index );
806 emit_rnd( bld, 0, 0 );
807 emit_store( bld, inst, 0, chan_index, tmp0);
808 }
809 break;
810 #endif
811
812 case TGSI_OPCODE_EX2: {
813 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
814 tmp0 = lp_build_exp2( &bld->base, tmp0);
815 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
816 emit_store( bld, inst, 0, chan_index, tmp0);
817 }
818 break;
819 }
820
821 case TGSI_OPCODE_LG2:
822 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
823 tmp0 = lp_build_log2( &bld->base, tmp0);
824 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
825 emit_store( bld, inst, 0, chan_index, tmp0);
826 }
827 break;
828
829 case TGSI_OPCODE_POW:
830 src0 = emit_fetch( bld, inst, 0, CHAN_X );
831 src1 = emit_fetch( bld, inst, 1, CHAN_X );
832 dst0 = lp_build_pow( &bld->base, src0, src1 );
833 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
834 emit_store( bld, inst, 0, chan_index, dst0 );
835 }
836 break;
837
838 case TGSI_OPCODE_XPD:
839 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
840 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
841 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
842 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
843 }
844 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
845 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
846 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
847 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
848 }
849 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
850 tmp2 = tmp0;
851 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
852 tmp5 = tmp3;
853 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
854 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
855 emit_store( bld, inst, 0, CHAN_X, tmp2);
856 }
857 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
858 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
859 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
860 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
861 }
862 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
863 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
864 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
865 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
866 emit_store( bld, inst, 0, CHAN_Y, tmp3);
867 }
868 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
869 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
870 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
871 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
872 emit_store( bld, inst, 0, CHAN_Z, tmp5);
873 }
874 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
875 tmp0 = bld->base.one;
876 emit_store( bld, inst, 0, CHAN_W, tmp0);
877 }
878 break;
879
880 case TGSI_OPCODE_ABS:
881 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
882 tmp0 = emit_fetch( bld, inst, 0, chan_index );
883 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
884 emit_store( bld, inst, 0, chan_index, tmp0);
885 }
886 break;
887
888 case TGSI_OPCODE_RCC:
889 return 0;
890 break;
891
892 case TGSI_OPCODE_DPH:
893 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
894 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
895 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
896 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
897 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
898 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
899 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
900 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
901 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
902 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
903 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
904 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
905 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
906 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
907 emit_store( bld, inst, 0, chan_index, tmp0);
908 }
909 break;
910
911 case TGSI_OPCODE_COS:
912 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
913 tmp0 = lp_build_cos( &bld->base, tmp0 );
914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
915 emit_store( bld, inst, 0, chan_index, tmp0);
916 }
917 break;
918
919 case TGSI_OPCODE_DDX:
920 return 0;
921 break;
922
923 case TGSI_OPCODE_DDY:
924 return 0;
925 break;
926
927 #if 0
928 case TGSI_OPCODE_KILP:
929 /* predicated kill */
930 emit_kilp( bld );
931 return 0; /* XXX fix me */
932 break;
933 #endif
934
935 case TGSI_OPCODE_KIL:
936 /* conditional kill */
937 emit_kil( bld, inst );
938 break;
939
940 case TGSI_OPCODE_PK2H:
941 return 0;
942 break;
943
944 case TGSI_OPCODE_PK2US:
945 return 0;
946 break;
947
948 case TGSI_OPCODE_PK4B:
949 return 0;
950 break;
951
952 case TGSI_OPCODE_PK4UB:
953 return 0;
954 break;
955
956 case TGSI_OPCODE_RFL:
957 return 0;
958 break;
959
960 case TGSI_OPCODE_SEQ:
961 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
962 src0 = emit_fetch( bld, inst, 0, chan_index );
963 src1 = emit_fetch( bld, inst, 1, chan_index );
964 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
965 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
966 emit_store( bld, inst, 0, chan_index, dst0);
967 }
968 break;
969
970 case TGSI_OPCODE_SFL:
971 return 0;
972 break;
973
974 case TGSI_OPCODE_SGT:
975 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
976 src0 = emit_fetch( bld, inst, 0, chan_index );
977 src1 = emit_fetch( bld, inst, 1, chan_index );
978 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
979 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
980 emit_store( bld, inst, 0, chan_index, dst0);
981 }
982 break;
983
984 case TGSI_OPCODE_SIN:
985 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
986 tmp0 = lp_build_sin( &bld->base, tmp0 );
987 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
988 emit_store( bld, inst, 0, chan_index, tmp0);
989 }
990 break;
991
992 case TGSI_OPCODE_SLE:
993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
994 src0 = emit_fetch( bld, inst, 0, chan_index );
995 src1 = emit_fetch( bld, inst, 1, chan_index );
996 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
997 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
998 emit_store( bld, inst, 0, chan_index, dst0);
999 }
1000 break;
1001
1002 case TGSI_OPCODE_SNE:
1003 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1004 src0 = emit_fetch( bld, inst, 0, chan_index );
1005 src1 = emit_fetch( bld, inst, 1, chan_index );
1006 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1007 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1008 emit_store( bld, inst, 0, chan_index, dst0);
1009 }
1010 break;
1011
1012 case TGSI_OPCODE_STR:
1013 return 0;
1014 break;
1015
1016 case TGSI_OPCODE_TEX:
1017 emit_tex( bld, inst, FALSE, FALSE );
1018 break;
1019
1020 case TGSI_OPCODE_TXD:
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_UP2H:
1025 return 0;
1026 break;
1027
1028 case TGSI_OPCODE_UP2US:
1029 return 0;
1030 break;
1031
1032 case TGSI_OPCODE_UP4B:
1033 return 0;
1034 break;
1035
1036 case TGSI_OPCODE_UP4UB:
1037 return 0;
1038 break;
1039
1040 case TGSI_OPCODE_X2D:
1041 return 0;
1042 break;
1043
1044 case TGSI_OPCODE_ARA:
1045 return 0;
1046 break;
1047
1048 #if 0
1049 case TGSI_OPCODE_ARR:
1050 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1051 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1052 emit_rnd( bld, 0, 0 );
1053 emit_f2it( bld, 0 );
1054 emit_store( bld, inst, 0, chan_index, tmp0);
1055 }
1056 break;
1057 #endif
1058
1059 case TGSI_OPCODE_BRA:
1060 return 0;
1061 break;
1062
1063 case TGSI_OPCODE_CAL:
1064 return 0;
1065 break;
1066
1067 #if 0
1068 case TGSI_OPCODE_RET:
1069 emit_ret( bld );
1070 break;
1071 #endif
1072
1073 case TGSI_OPCODE_END:
1074 break;
1075
1076 #if 0
1077 case TGSI_OPCODE_SSG:
1078 /* TGSI_OPCODE_SGN */
1079 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1080 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1081 emit_sgn( bld, 0, 0 );
1082 emit_store( bld, inst, 0, chan_index, tmp0);
1083 }
1084 break;
1085 #endif
1086
1087 case TGSI_OPCODE_CMP:
1088 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1089 src0 = emit_fetch( bld, inst, 0, chan_index );
1090 src1 = emit_fetch( bld, inst, 1, chan_index );
1091 src2 = emit_fetch( bld, inst, 2, chan_index );
1092 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1093 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1094 emit_store( bld, inst, 0, chan_index, dst0);
1095 }
1096 break;
1097
1098 case TGSI_OPCODE_SCS:
1099 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1100 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1101 tmp0 = lp_build_cos( &bld->base, tmp0 );
1102 emit_store( bld, inst, 0, CHAN_X, tmp0);
1103 }
1104 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1105 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1106 tmp0 = lp_build_sin( &bld->base, tmp0 );
1107 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1108 }
1109 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1110 tmp0 = bld->base.zero;
1111 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1112 }
1113 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1114 tmp0 = bld->base.one;
1115 emit_store( bld, inst, 0, CHAN_W, tmp0);
1116 }
1117 break;
1118
1119 case TGSI_OPCODE_TXB:
1120 emit_tex( bld, inst, TRUE, FALSE );
1121 break;
1122
1123 case TGSI_OPCODE_NRM:
1124 /* fall-through */
1125 case TGSI_OPCODE_NRM4:
1126 /* 3 or 4-component normalization */
1127 {
1128 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1129
1130 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1131 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1132 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1133 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1134
1135 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1136
1137 /* xmm4 = src.x */
1138 /* xmm0 = src.x * src.x */
1139 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1140 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1141 tmp4 = tmp0;
1142 }
1143 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1144
1145 /* xmm5 = src.y */
1146 /* xmm0 = xmm0 + src.y * src.y */
1147 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1148 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1149 tmp5 = tmp1;
1150 }
1151 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1152 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1153
1154 /* xmm6 = src.z */
1155 /* xmm0 = xmm0 + src.z * src.z */
1156 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1157 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1158 tmp6 = tmp1;
1159 }
1160 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1161 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1162
1163 if (dims == 4) {
1164 /* xmm7 = src.w */
1165 /* xmm0 = xmm0 + src.w * src.w */
1166 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1167 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1168 tmp7 = tmp1;
1169 }
1170 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1171 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1172 }
1173
1174 /* xmm1 = 1 / sqrt(xmm0) */
1175 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1176
1177 /* dst.x = xmm1 * src.x */
1178 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1179 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1180 emit_store(bld, inst, 0, CHAN_X, tmp4);
1181 }
1182
1183 /* dst.y = xmm1 * src.y */
1184 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1185 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1186 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1187 }
1188
1189 /* dst.z = xmm1 * src.z */
1190 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1191 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1192 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1193 }
1194
1195 /* dst.w = xmm1 * src.w */
1196 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1197 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1198 emit_store(bld, inst, 0, CHAN_W, tmp7);
1199 }
1200 }
1201
1202 /* dst0.w = 1.0 */
1203 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1204 tmp0 = bld->base.one;
1205 emit_store(bld, inst, 0, CHAN_W, tmp0);
1206 }
1207 }
1208 break;
1209
1210 case TGSI_OPCODE_DIV:
1211 return 0;
1212 break;
1213
1214 case TGSI_OPCODE_DP2:
1215 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1216 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1217 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1218 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1219 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1220 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1221 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1222 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1223 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1224 }
1225 break;
1226
1227 case TGSI_OPCODE_TXL:
1228 emit_tex( bld, inst, TRUE, FALSE );
1229 break;
1230
1231 case TGSI_OPCODE_TXP:
1232 emit_tex( bld, inst, FALSE, TRUE );
1233 break;
1234
1235 case TGSI_OPCODE_BRK:
1236 return 0;
1237 break;
1238
1239 case TGSI_OPCODE_IF:
1240 return 0;
1241 break;
1242
1243 case TGSI_OPCODE_LOOP:
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_REP:
1248 return 0;
1249 break;
1250
1251 case TGSI_OPCODE_ELSE:
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_ENDIF:
1256 return 0;
1257 break;
1258
1259 case TGSI_OPCODE_ENDLOOP:
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_ENDREP:
1264 return 0;
1265 break;
1266
1267 case TGSI_OPCODE_PUSHA:
1268 return 0;
1269 break;
1270
1271 case TGSI_OPCODE_POPA:
1272 return 0;
1273 break;
1274
1275 case TGSI_OPCODE_CEIL:
1276 return 0;
1277 break;
1278
1279 case TGSI_OPCODE_I2F:
1280 return 0;
1281 break;
1282
1283 case TGSI_OPCODE_NOT:
1284 return 0;
1285 break;
1286
1287 #if 0
1288 case TGSI_OPCODE_TRUNC:
1289 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1290 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1291 emit_f2it( bld, 0 );
1292 emit_i2f( bld, 0 );
1293 emit_store( bld, inst, 0, chan_index, tmp0);
1294 }
1295 break;
1296 #endif
1297
1298 case TGSI_OPCODE_SHL:
1299 return 0;
1300 break;
1301
1302 case TGSI_OPCODE_SHR:
1303 return 0;
1304 break;
1305
1306 case TGSI_OPCODE_AND:
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_OR:
1311 return 0;
1312 break;
1313
1314 case TGSI_OPCODE_MOD:
1315 return 0;
1316 break;
1317
1318 case TGSI_OPCODE_XOR:
1319 return 0;
1320 break;
1321
1322 case TGSI_OPCODE_SAD:
1323 return 0;
1324 break;
1325
1326 case TGSI_OPCODE_TXF:
1327 return 0;
1328 break;
1329
1330 case TGSI_OPCODE_TXQ:
1331 return 0;
1332 break;
1333
1334 case TGSI_OPCODE_CONT:
1335 return 0;
1336 break;
1337
1338 case TGSI_OPCODE_EMIT:
1339 return 0;
1340 break;
1341
1342 case TGSI_OPCODE_ENDPRIM:
1343 return 0;
1344 break;
1345
1346 default:
1347 return 0;
1348 }
1349
1350 return 1;
1351 }
1352
1353 static void
1354 emit_declaration(
1355 struct lp_build_tgsi_soa_context *bld,
1356 struct tgsi_full_declaration *decl )
1357 {
1358 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1359 LLVMBuilderRef builder = bld->base.builder;
1360 unsigned first, last, mask;
1361 unsigned attrib, chan;
1362
1363 first = decl->DeclarationRange.First;
1364 last = decl->DeclarationRange.Last;
1365 mask = decl->Declaration.UsageMask;
1366
1367 for( attrib = first; attrib <= last; attrib++ ) {
1368 for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
1369 LLVMValueRef input = bld->base.undef;
1370
1371 if( mask & (1 << chan) ) {
1372 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
1373 LLVMValueRef a0;
1374 LLVMValueRef dadx;
1375 LLVMValueRef dady;
1376
1377 switch( decl->Declaration.Interpolate ) {
1378 case TGSI_INTERPOLATE_PERSPECTIVE:
1379 /* fall-through */
1380
1381 case TGSI_INTERPOLATE_LINEAR: {
1382 LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
1383 LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
1384 dadx = LLVMBuildLoad(builder, dadx_ptr, "");
1385 dady = LLVMBuildLoad(builder, dady_ptr, "");
1386 dadx = lp_build_broadcast_scalar(&bld->base, dadx);
1387 dady = lp_build_broadcast_scalar(&bld->base, dady);
1388 lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
1389 lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
1390 /* fall-through */
1391 }
1392
1393 case TGSI_INTERPOLATE_CONSTANT: {
1394 LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
1395 a0 = LLVMBuildLoad(builder, a0_ptr, "");
1396 a0 = lp_build_broadcast_scalar(&bld->base, a0);
1397 lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
1398 break;
1399 }
1400
1401 default:
1402 assert(0);
1403 break;
1404 }
1405
1406 input = a0;
1407
1408 if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
1409 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
1410 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
1411 }
1412
1413 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
1414 if(!bld->oow)
1415 bld->oow = lp_build_rcp(&bld->base, bld->w);
1416 input = lp_build_mul(&bld->base, input, bld->oow);
1417 }
1418
1419 lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
1420 }
1421
1422 bld->inputs[attrib][chan] = input;
1423 }
1424 }
1425 }
1426 }
1427
1428
1429 void
1430 lp_build_tgsi_soa(LLVMBuilderRef builder,
1431 const struct tgsi_token *tokens,
1432 union lp_type type,
1433 struct lp_build_mask_context *mask,
1434 LLVMValueRef *pos,
1435 LLVMValueRef a0_ptr,
1436 LLVMValueRef dadx_ptr,
1437 LLVMValueRef dady_ptr,
1438 LLVMValueRef consts_ptr,
1439 LLVMValueRef (*outputs)[4],
1440 LLVMValueRef samplers_ptr)
1441 {
1442 struct lp_build_tgsi_soa_context bld;
1443 struct tgsi_parse_context parse;
1444 uint num_immediates = 0;
1445 unsigned i;
1446
1447 /* Setup build context */
1448 memset(&bld, 0, sizeof bld);
1449 lp_build_context_init(&bld.base, builder, type);
1450 bld.mask = mask;
1451 bld.x = pos[0];
1452 bld.y = pos[1];
1453 bld.w = pos[3];
1454 bld.a0_ptr = a0_ptr;
1455 bld.dadx_ptr = dadx_ptr;
1456 bld.dady_ptr = dady_ptr;
1457 bld.outputs = outputs;
1458 bld.consts_ptr = consts_ptr;
1459 bld.samplers_ptr = samplers_ptr;
1460
1461 tgsi_parse_init( &parse, tokens );
1462
1463 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1464 tgsi_parse_token( &parse );
1465
1466 switch( parse.FullToken.Token.Type ) {
1467 case TGSI_TOKEN_TYPE_DECLARATION:
1468 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1469 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1470 }
1471 break;
1472
1473 case TGSI_TOKEN_TYPE_INSTRUCTION:
1474 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1475 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1476 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1477 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1478 info ? info->mnemonic : "<invalid>");
1479 }
1480 break;
1481
1482 case TGSI_TOKEN_TYPE_IMMEDIATE:
1483 /* simply copy the immediate values into the next immediates[] slot */
1484 {
1485 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1486 assert(size <= 4);
1487 assert(num_immediates < LP_MAX_IMMEDIATES);
1488 for( i = 0; i < size; ++i )
1489 bld.immediates[num_immediates][i] =
1490 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1491 for( i = size; i < 4; ++i )
1492 bld.immediates[num_immediates][i] = bld.base.undef;
1493 num_immediates++;
1494 }
1495 break;
1496
1497 default:
1498 assert( 0 );
1499 }
1500 }
1501
1502 tgsi_parse_free( &parse );
1503 }
1504