805d9fd4b24a4d82fdcec08e9caef3029b180cbc
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_intr.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_tgsi.h"
45 #include "lp_bld_debug.h"
46
47
48 #define LP_MAX_TEMPS 256
49 #define LP_MAX_IMMEDIATES 256
50
51
52 #define FOR_EACH_CHANNEL( CHAN )\
53 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
54
55 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
56 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
57
58 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
59 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
60
61 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
62 FOR_EACH_CHANNEL( CHAN )\
63 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
64
65 #define CHAN_X 0
66 #define CHAN_Y 1
67 #define CHAN_Z 2
68 #define CHAN_W 3
69
70
71 struct lp_build_tgsi_soa_context
72 {
73 struct lp_build_context base;
74
75 LLVMValueRef x, y, w;
76 LLVMValueRef a0_ptr;
77 LLVMValueRef dadx_ptr;
78 LLVMValueRef dady_ptr;
79
80 LLVMValueRef consts_ptr;
81 LLVMValueRef (*outputs)[NUM_CHANNELS];
82 LLVMValueRef samplers_ptr;
83
84 LLVMValueRef oow;
85
86 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
87
88 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
89 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
90
91 LLVMValueRef mask;
92
93 /** Coords/texels store */
94 LLVMValueRef store_ptr;
95 };
96
97
98 /**
99 * Register fetch.
100 */
101 static LLVMValueRef
102 emit_fetch(
103 struct lp_build_tgsi_soa_context *bld,
104 const struct tgsi_full_instruction *inst,
105 unsigned index,
106 const unsigned chan_index )
107 {
108 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
109 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
110 LLVMValueRef res;
111
112 switch (swizzle) {
113 case TGSI_EXTSWIZZLE_X:
114 case TGSI_EXTSWIZZLE_Y:
115 case TGSI_EXTSWIZZLE_Z:
116 case TGSI_EXTSWIZZLE_W:
117
118 switch (reg->SrcRegister.File) {
119 case TGSI_FILE_CONSTANT: {
120 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
121 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
122 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
123 res = lp_build_broadcast_scalar(&bld->base, scalar);
124 break;
125 }
126
127 case TGSI_FILE_IMMEDIATE:
128 res = bld->immediates[reg->SrcRegister.Index][swizzle];
129 assert(res);
130 break;
131
132 case TGSI_FILE_INPUT:
133 res = bld->inputs[reg->SrcRegister.Index][swizzle];
134 assert(res);
135 break;
136
137 case TGSI_FILE_TEMPORARY:
138 res = bld->temps[reg->SrcRegister.Index][swizzle];
139 if(!res)
140 return bld->base.undef;
141 break;
142
143 default:
144 assert( 0 );
145 return bld->base.undef;
146 }
147 break;
148
149 case TGSI_EXTSWIZZLE_ZERO:
150 res = bld->base.zero;
151 break;
152
153 case TGSI_EXTSWIZZLE_ONE:
154 res = bld->base.one;
155 break;
156
157 default:
158 assert( 0 );
159 return bld->base.undef;
160 }
161
162 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
163 case TGSI_UTIL_SIGN_CLEAR:
164 res = lp_build_abs( &bld->base, res );
165 break;
166
167 case TGSI_UTIL_SIGN_SET:
168 res = lp_build_abs( &bld->base, res );
169 res = LLVMBuildNeg( bld->base.builder, res, "" );
170 break;
171
172 case TGSI_UTIL_SIGN_TOGGLE:
173 res = LLVMBuildNeg( bld->base.builder, res, "" );
174 break;
175
176 case TGSI_UTIL_SIGN_KEEP:
177 break;
178 }
179
180 return res;
181 }
182
183
184 /**
185 * Register store.
186 */
187 static void
188 emit_store(
189 struct lp_build_tgsi_soa_context *bld,
190 const struct tgsi_full_instruction *inst,
191 unsigned index,
192 unsigned chan_index,
193 LLVMValueRef value)
194 {
195 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
196
197 switch( inst->Instruction.Saturate ) {
198 case TGSI_SAT_NONE:
199 break;
200
201 case TGSI_SAT_ZERO_ONE:
202 value = lp_build_max(&bld->base, value, bld->base.zero);
203 value = lp_build_min(&bld->base, value, bld->base.one);
204 break;
205
206 case TGSI_SAT_MINUS_PLUS_ONE:
207 value = lp_build_max(&bld->base, value, lp_build_const_uni(bld->base.type, -1.0));
208 value = lp_build_min(&bld->base, value, bld->base.one);
209 break;
210
211 default:
212 assert(0);
213 }
214
215 switch( reg->DstRegister.File ) {
216 case TGSI_FILE_OUTPUT:
217 bld->outputs[reg->DstRegister.Index][chan_index] = value;
218 break;
219
220 case TGSI_FILE_TEMPORARY:
221 bld->temps[reg->DstRegister.Index][chan_index] = value;
222 break;
223
224 case TGSI_FILE_ADDRESS:
225 /* FIXME */
226 assert(0);
227 break;
228
229 default:
230 assert( 0 );
231 }
232 }
233
234
235 void PIPE_CDECL
236 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
237 uint32_t unit,
238 float *store )
239 {
240 struct tgsi_sampler *sampler = samplers[unit];
241
242 #if 0
243 uint j;
244
245 debug_printf("%s sampler: %p (%p) store: %p\n",
246 __FUNCTION__,
247 sampler, *sampler,
248 store );
249
250 debug_printf("lodbias %f\n", store[12]);
251
252 for (j = 0; j < 4; j++)
253 debug_printf("sample %d texcoord %f %f\n",
254 j,
255 store[0+j],
256 store[4+j]);
257 #endif
258
259 {
260 float rgba[NUM_CHANNELS][QUAD_SIZE];
261 sampler->get_samples(sampler,
262 &store[0],
263 &store[4],
264 &store[8],
265 0.0f, /*store[12], lodbias */
266 rgba);
267 memcpy(store, rgba, sizeof rgba);
268 }
269
270 #if 0
271 for (j = 0; j < 4; j++)
272 debug_printf("sample %d result %f %f %f %f\n",
273 j,
274 store[0+j],
275 store[4+j],
276 store[8+j],
277 store[12+j]);
278 #endif
279 }
280
281 /**
282 * High-level instruction translators.
283 */
284
285 static void
286 emit_tex( struct lp_build_tgsi_soa_context *bld,
287 const struct tgsi_full_instruction *inst,
288 boolean apply_lodbias,
289 boolean projected)
290 {
291 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
292 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
293 LLVMValueRef lodbias;
294 LLVMValueRef oow;
295 LLVMValueRef args[3];
296 unsigned count;
297 unsigned i;
298
299 switch (inst->InstructionExtTexture.Texture) {
300 case TGSI_TEXTURE_1D:
301 case TGSI_TEXTURE_SHADOW1D:
302 count = 1;
303 break;
304 case TGSI_TEXTURE_2D:
305 case TGSI_TEXTURE_RECT:
306 case TGSI_TEXTURE_SHADOW2D:
307 case TGSI_TEXTURE_SHADOWRECT:
308 count = 2;
309 break;
310 case TGSI_TEXTURE_3D:
311 case TGSI_TEXTURE_CUBE:
312 count = 3;
313 break;
314 default:
315 assert(0);
316 return;
317 }
318
319 if(apply_lodbias)
320 lodbias = emit_fetch( bld, inst, 0, 3 );
321 else
322 lodbias = bld->base.zero;
323
324 if(!bld->store_ptr)
325 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
326 vec_type,
327 LLVMConstInt(LLVMInt32Type(), 4, 0),
328 "store");
329
330 if (projected) {
331 oow = emit_fetch( bld, inst, 0, 3 );
332 oow = lp_build_rcp(&bld->base, oow);
333 }
334
335 for (i = 0; i < count; i++) {
336 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
337 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
338 LLVMValueRef coord;
339
340 coord = emit_fetch( bld, inst, 0, i );
341
342 if (projected)
343 coord = lp_build_mul(&bld->base, coord, oow);
344
345 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
346 }
347
348 args[0] = bld->samplers_ptr;
349 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
350 args[2] = bld->store_ptr;
351
352 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
353
354 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
355 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
356 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
357 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
358 emit_store( bld, inst, 0, i, res );
359 }
360 }
361
362
363 static void
364 emit_kil(
365 struct lp_build_tgsi_soa_context *bld,
366 const struct tgsi_full_instruction *inst )
367 {
368 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
369 LLVMValueRef terms[NUM_CHANNELS];
370 unsigned chan_index;
371
372 memset(&terms, 0, sizeof terms);
373
374 FOR_EACH_CHANNEL( chan_index ) {
375 unsigned swizzle;
376
377 /* Unswizzle channel */
378 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
379
380 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
381 * not to be tested. */
382 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
383 continue;
384
385 /* Check if the component has not been already tested. */
386 assert(swizzle < NUM_CHANNELS);
387 if( !terms[swizzle] )
388 /* TODO: change the comparison operator instead of setting the sign */
389 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
390 }
391
392 FOR_EACH_CHANNEL( chan_index ) {
393 if(terms[chan_index]) {
394 LLVMValueRef mask;
395
396 mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
397
398 if(bld->mask)
399 bld->mask = LLVMBuildAnd(bld->base.builder, bld->mask, mask, "");
400 else
401 bld->mask = mask;
402 }
403 }
404 }
405
406
407 static void
408 emit_kilp(
409 struct lp_build_tgsi_soa_context *bld )
410 {
411 /* XXX todo / fix me */
412 }
413
414
415 /**
416 * Check if inst src/dest regs use indirect addressing into temporary
417 * register file.
418 */
419 static boolean
420 indirect_temp_reference(const struct tgsi_full_instruction *inst)
421 {
422 uint i;
423 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
424 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
425 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
426 reg->SrcRegister.Indirect)
427 return TRUE;
428 }
429 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
430 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
431 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
432 reg->DstRegister.Indirect)
433 return TRUE;
434 }
435 return FALSE;
436 }
437
438
439 static int
440 emit_instruction(
441 struct lp_build_tgsi_soa_context *bld,
442 struct tgsi_full_instruction *inst )
443 {
444 unsigned chan_index;
445 LLVMValueRef src0, src1, src2;
446 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
447 LLVMValueRef dst0;
448
449 /* we can't handle indirect addressing into temp register file yet */
450 if (indirect_temp_reference(inst))
451 return FALSE;
452
453 switch (inst->Instruction.Opcode) {
454 #if 0
455 case TGSI_OPCODE_ARL:
456 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
457 tmp0 = emit_fetch( bld, inst, 0, chan_index );
458 emit_flr(bld, 0, 0);
459 emit_f2it( bld, 0 );
460 emit_store( bld, inst, 0, chan_index, tmp0);
461 }
462 break;
463 #endif
464
465 case TGSI_OPCODE_MOV:
466 case TGSI_OPCODE_SWZ:
467 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
468 tmp0 = emit_fetch( bld, inst, 0, chan_index );
469 emit_store( bld, inst, 0, chan_index, tmp0);
470 }
471 break;
472
473 case TGSI_OPCODE_LIT:
474 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
475 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
476 }
477 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
478 src0 = emit_fetch( bld, inst, 0, CHAN_X );
479 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
480 emit_store( bld, inst, 0, CHAN_Y, dst0);
481 }
482 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
483 /* XMM[1] = SrcReg[0].yyyy */
484 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
485 /* XMM[1] = max(XMM[1], 0) */
486 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
487 /* XMM[2] = SrcReg[0].wwww */
488 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
489 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
490 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
491 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
492 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
493 emit_store( bld, inst, 0, CHAN_Z, dst0);
494 }
495 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
496 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
497 }
498 break;
499
500 case TGSI_OPCODE_RCP:
501 /* TGSI_OPCODE_RECIP */
502 src0 = emit_fetch( bld, inst, 0, CHAN_X );
503 dst0 = lp_build_rcp(&bld->base, src0);
504 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
505 emit_store( bld, inst, 0, chan_index, dst0 );
506 }
507 break;
508
509 case TGSI_OPCODE_RSQ:
510 /* TGSI_OPCODE_RECIPSQRT */
511 src0 = emit_fetch( bld, inst, 0, CHAN_X );
512 src0 = lp_build_abs(&bld->base, src0);
513 dst0 = lp_build_rsqrt(&bld->base, src0);
514 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
515 emit_store( bld, inst, 0, chan_index, dst0 );
516 }
517 break;
518
519 case TGSI_OPCODE_EXP:
520 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
521 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
522 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
523 LLVMValueRef *p_exp2_int_part = NULL;
524 LLVMValueRef *p_frac_part = NULL;
525 LLVMValueRef *p_exp2 = NULL;
526
527 src0 = emit_fetch( bld, inst, 0, CHAN_X );
528
529 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
530 p_exp2_int_part = &tmp0;
531 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
532 p_frac_part = &tmp1;
533 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
534 p_exp2 = &tmp2;
535
536 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
537
538 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
539 emit_store( bld, inst, 0, CHAN_X, tmp0);
540 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
541 emit_store( bld, inst, 0, CHAN_Y, tmp1);
542 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
543 emit_store( bld, inst, 0, CHAN_Z, tmp2);
544 }
545 /* dst.w = 1.0 */
546 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
547 tmp0 = bld->base.one;
548 emit_store( bld, inst, 0, CHAN_W, tmp0);
549 }
550 break;
551
552 case TGSI_OPCODE_LOG:
553 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
554 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
555 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
556 LLVMValueRef *p_floor_log2;
557 LLVMValueRef *p_exp;
558 LLVMValueRef *p_log2;
559
560 src0 = emit_fetch( bld, inst, 0, CHAN_X );
561 src0 = lp_build_abs( &bld->base, src0 );
562
563 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
564 p_floor_log2 = &tmp0;
565 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
566 p_exp = &tmp1;
567 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
568 p_log2 = &tmp2;
569
570 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
571
572 /* dst.x = floor(lg2(abs(src.x))) */
573 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
574 emit_store( bld, inst, 0, CHAN_X, tmp0);
575 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
576 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
577 tmp1 = lp_build_div( &bld->base, src0, tmp1);
578 emit_store( bld, inst, 0, CHAN_Y, tmp1);
579 }
580 /* dst.z = lg2(abs(src.x)) */
581 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
582 emit_store( bld, inst, 0, CHAN_Z, tmp2);
583 }
584 /* dst.w = 1.0 */
585 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
586 tmp0 = bld->base.one;
587 emit_store( bld, inst, 0, CHAN_W, tmp0);
588 }
589 break;
590
591 case TGSI_OPCODE_MUL:
592 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
593 src0 = emit_fetch( bld, inst, 0, chan_index );
594 src1 = emit_fetch( bld, inst, 1, chan_index );
595 dst0 = lp_build_mul(&bld->base, src0, src1);
596 emit_store( bld, inst, 0, chan_index, dst0);
597 }
598 break;
599
600 case TGSI_OPCODE_ADD:
601 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
602 src0 = emit_fetch( bld, inst, 0, chan_index );
603 src1 = emit_fetch( bld, inst, 1, chan_index );
604 dst0 = lp_build_add(&bld->base, src0, src1);
605 emit_store( bld, inst, 0, chan_index, dst0);
606 }
607 break;
608
609 case TGSI_OPCODE_DP3:
610 /* TGSI_OPCODE_DOT3 */
611 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
612 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
613 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
614 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
615 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
616 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
617 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
618 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
619 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
620 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
621 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
622 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
623 emit_store( bld, inst, 0, chan_index, tmp0);
624 }
625 break;
626
627 case TGSI_OPCODE_DP4:
628 /* TGSI_OPCODE_DOT4 */
629 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
630 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
631 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
632 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
633 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
634 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
635 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
636 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
637 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
638 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
639 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
640 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
641 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
642 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
643 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
644 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
645 emit_store( bld, inst, 0, chan_index, tmp0);
646 }
647 break;
648
649 case TGSI_OPCODE_DST:
650 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
651 tmp0 = bld->base.one;
652 emit_store( bld, inst, 0, CHAN_X, tmp0);
653 }
654 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
655 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
656 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
657 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
658 emit_store( bld, inst, 0, CHAN_Y, tmp0);
659 }
660 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
661 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
662 emit_store( bld, inst, 0, CHAN_Z, tmp0);
663 }
664 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
665 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
666 emit_store( bld, inst, 0, CHAN_W, tmp0);
667 }
668 break;
669
670 case TGSI_OPCODE_MIN:
671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
672 src0 = emit_fetch( bld, inst, 0, chan_index );
673 src1 = emit_fetch( bld, inst, 1, chan_index );
674 dst0 = lp_build_min( &bld->base, src0, src1 );
675 emit_store( bld, inst, 0, chan_index, dst0);
676 }
677 break;
678
679 case TGSI_OPCODE_MAX:
680 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
681 src0 = emit_fetch( bld, inst, 0, chan_index );
682 src1 = emit_fetch( bld, inst, 1, chan_index );
683 dst0 = lp_build_max( &bld->base, src0, src1 );
684 emit_store( bld, inst, 0, chan_index, dst0);
685 }
686 break;
687
688 case TGSI_OPCODE_SLT:
689 /* TGSI_OPCODE_SETLT */
690 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
691 src0 = emit_fetch( bld, inst, 0, chan_index );
692 src1 = emit_fetch( bld, inst, 1, chan_index );
693 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
694 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
695 emit_store( bld, inst, 0, chan_index, dst0);
696 }
697 break;
698
699 case TGSI_OPCODE_SGE:
700 /* TGSI_OPCODE_SETGE */
701 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
702 src0 = emit_fetch( bld, inst, 0, chan_index );
703 src1 = emit_fetch( bld, inst, 1, chan_index );
704 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
705 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
706 emit_store( bld, inst, 0, chan_index, dst0);
707 }
708 break;
709
710 case TGSI_OPCODE_MAD:
711 /* TGSI_OPCODE_MADD */
712 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
713 tmp0 = emit_fetch( bld, inst, 0, chan_index );
714 tmp1 = emit_fetch( bld, inst, 1, chan_index );
715 tmp2 = emit_fetch( bld, inst, 2, chan_index );
716 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
717 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
718 emit_store( bld, inst, 0, chan_index, tmp0);
719 }
720 break;
721
722 case TGSI_OPCODE_SUB:
723 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
724 tmp0 = emit_fetch( bld, inst, 0, chan_index );
725 tmp1 = emit_fetch( bld, inst, 1, chan_index );
726 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
727 emit_store( bld, inst, 0, chan_index, tmp0);
728 }
729 break;
730
731 case TGSI_OPCODE_LRP:
732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733 src0 = emit_fetch( bld, inst, 0, chan_index );
734 src1 = emit_fetch( bld, inst, 1, chan_index );
735 src2 = emit_fetch( bld, inst, 2, chan_index );
736 tmp0 = lp_build_sub( &bld->base, src1, src2 );
737 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
738 dst0 = lp_build_add( &bld->base, tmp0, src2 );
739 emit_store( bld, inst, 0, chan_index, dst0 );
740 }
741 break;
742
743 case TGSI_OPCODE_CND:
744 return 0;
745 break;
746
747 case TGSI_OPCODE_CND0:
748 return 0;
749 break;
750
751 case TGSI_OPCODE_DP2A:
752 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
753 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
754 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
755 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
756 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
757 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
758 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
759 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
760 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
761 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
762 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
763 }
764 break;
765
766 #if 0
767 case TGSI_OPCODE_FRC:
768 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
769 tmp0 = emit_fetch( bld, inst, 0, chan_index );
770 emit_frc( bld, 0, 0 );
771 emit_store( bld, inst, 0, chan_index, tmp0);
772 }
773 break;
774
775 case TGSI_OPCODE_CLAMP:
776 return 0;
777 break;
778
779 case TGSI_OPCODE_FLR:
780 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
781 tmp0 = emit_fetch( bld, inst, 0, chan_index );
782 emit_flr( bld, 0, 0 );
783 emit_store( bld, inst, 0, chan_index, tmp0);
784 }
785 break;
786
787 case TGSI_OPCODE_ROUND:
788 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
789 tmp0 = emit_fetch( bld, inst, 0, chan_index );
790 emit_rnd( bld, 0, 0 );
791 emit_store( bld, inst, 0, chan_index, tmp0);
792 }
793 break;
794 #endif
795
796 case TGSI_OPCODE_EX2: {
797 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
798 tmp0 = lp_build_exp2( &bld->base, tmp0);
799 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
800 emit_store( bld, inst, 0, chan_index, tmp0);
801 }
802 break;
803 }
804
805 case TGSI_OPCODE_LG2:
806 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
807 tmp0 = lp_build_log2( &bld->base, tmp0);
808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
809 emit_store( bld, inst, 0, chan_index, tmp0);
810 }
811 break;
812
813 case TGSI_OPCODE_POW:
814 src0 = emit_fetch( bld, inst, 0, CHAN_X );
815 src1 = emit_fetch( bld, inst, 1, CHAN_X );
816 dst0 = lp_build_pow( &bld->base, src0, src1 );
817 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
818 emit_store( bld, inst, 0, chan_index, dst0 );
819 }
820 break;
821
822 case TGSI_OPCODE_XPD:
823 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
824 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
825 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
826 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
827 }
828 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
829 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
830 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
831 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
832 }
833 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
834 tmp2 = tmp0;
835 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
836 tmp5 = tmp3;
837 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
838 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
839 emit_store( bld, inst, 0, CHAN_X, tmp2);
840 }
841 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
842 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
843 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
844 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
845 }
846 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
847 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
848 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
849 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
850 emit_store( bld, inst, 0, CHAN_Y, tmp3);
851 }
852 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
853 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
854 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
855 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
856 emit_store( bld, inst, 0, CHAN_Z, tmp5);
857 }
858 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
859 tmp0 = bld->base.one;
860 emit_store( bld, inst, 0, CHAN_W, tmp0);
861 }
862 break;
863
864 case TGSI_OPCODE_ABS:
865 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
866 tmp0 = emit_fetch( bld, inst, 0, chan_index );
867 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
868 emit_store( bld, inst, 0, chan_index, tmp0);
869 }
870 break;
871
872 case TGSI_OPCODE_RCC:
873 return 0;
874 break;
875
876 case TGSI_OPCODE_DPH:
877 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
878 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
879 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
880 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
881 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
882 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
883 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
884 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
885 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
886 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
887 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
888 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
889 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
891 emit_store( bld, inst, 0, chan_index, tmp0);
892 }
893 break;
894
895 case TGSI_OPCODE_COS:
896 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
897 tmp0 = lp_build_cos( &bld->base, tmp0 );
898 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
899 emit_store( bld, inst, 0, chan_index, tmp0);
900 }
901 break;
902
903 case TGSI_OPCODE_DDX:
904 return 0;
905 break;
906
907 case TGSI_OPCODE_DDY:
908 return 0;
909 break;
910
911 #if 0
912 case TGSI_OPCODE_KILP:
913 /* predicated kill */
914 emit_kilp( bld );
915 return 0; /* XXX fix me */
916 break;
917 #endif
918
919 case TGSI_OPCODE_KIL:
920 /* conditional kill */
921 emit_kil( bld, inst );
922 break;
923
924 case TGSI_OPCODE_PK2H:
925 return 0;
926 break;
927
928 case TGSI_OPCODE_PK2US:
929 return 0;
930 break;
931
932 case TGSI_OPCODE_PK4B:
933 return 0;
934 break;
935
936 case TGSI_OPCODE_PK4UB:
937 return 0;
938 break;
939
940 case TGSI_OPCODE_RFL:
941 return 0;
942 break;
943
944 case TGSI_OPCODE_SEQ:
945 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
946 src0 = emit_fetch( bld, inst, 0, chan_index );
947 src1 = emit_fetch( bld, inst, 1, chan_index );
948 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
949 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
950 emit_store( bld, inst, 0, chan_index, dst0);
951 }
952 break;
953
954 case TGSI_OPCODE_SFL:
955 return 0;
956 break;
957
958 case TGSI_OPCODE_SGT:
959 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
960 src0 = emit_fetch( bld, inst, 0, chan_index );
961 src1 = emit_fetch( bld, inst, 1, chan_index );
962 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
963 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
964 emit_store( bld, inst, 0, chan_index, dst0);
965 }
966 break;
967
968 case TGSI_OPCODE_SIN:
969 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
970 tmp0 = lp_build_sin( &bld->base, tmp0 );
971 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
972 emit_store( bld, inst, 0, chan_index, tmp0);
973 }
974 break;
975
976 case TGSI_OPCODE_SLE:
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 src0 = emit_fetch( bld, inst, 0, chan_index );
979 src1 = emit_fetch( bld, inst, 1, chan_index );
980 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
981 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
982 emit_store( bld, inst, 0, chan_index, dst0);
983 }
984 break;
985
986 case TGSI_OPCODE_SNE:
987 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
988 src0 = emit_fetch( bld, inst, 0, chan_index );
989 src1 = emit_fetch( bld, inst, 1, chan_index );
990 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
991 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
992 emit_store( bld, inst, 0, chan_index, dst0);
993 }
994 break;
995
996 case TGSI_OPCODE_STR:
997 return 0;
998 break;
999
1000 case TGSI_OPCODE_TEX:
1001 emit_tex( bld, inst, FALSE, FALSE );
1002 break;
1003
1004 case TGSI_OPCODE_TXD:
1005 return 0;
1006 break;
1007
1008 case TGSI_OPCODE_UP2H:
1009 return 0;
1010 break;
1011
1012 case TGSI_OPCODE_UP2US:
1013 return 0;
1014 break;
1015
1016 case TGSI_OPCODE_UP4B:
1017 return 0;
1018 break;
1019
1020 case TGSI_OPCODE_UP4UB:
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_X2D:
1025 return 0;
1026 break;
1027
1028 case TGSI_OPCODE_ARA:
1029 return 0;
1030 break;
1031
1032 #if 0
1033 case TGSI_OPCODE_ARR:
1034 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1035 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1036 emit_rnd( bld, 0, 0 );
1037 emit_f2it( bld, 0 );
1038 emit_store( bld, inst, 0, chan_index, tmp0);
1039 }
1040 break;
1041 #endif
1042
1043 case TGSI_OPCODE_BRA:
1044 return 0;
1045 break;
1046
1047 case TGSI_OPCODE_CAL:
1048 return 0;
1049 break;
1050
1051 #if 0
1052 case TGSI_OPCODE_RET:
1053 emit_ret( bld );
1054 break;
1055 #endif
1056
1057 case TGSI_OPCODE_END:
1058 break;
1059
1060 #if 0
1061 case TGSI_OPCODE_SSG:
1062 /* TGSI_OPCODE_SGN */
1063 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1064 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1065 emit_sgn( bld, 0, 0 );
1066 emit_store( bld, inst, 0, chan_index, tmp0);
1067 }
1068 break;
1069 #endif
1070
1071 case TGSI_OPCODE_CMP:
1072 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1073 src0 = emit_fetch( bld, inst, 0, chan_index );
1074 src1 = emit_fetch( bld, inst, 1, chan_index );
1075 src2 = emit_fetch( bld, inst, 2, chan_index );
1076 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1077 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1078 emit_store( bld, inst, 0, chan_index, dst0);
1079 }
1080 break;
1081
1082 case TGSI_OPCODE_SCS:
1083 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1084 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1085 tmp0 = lp_build_cos( &bld->base, tmp0 );
1086 emit_store( bld, inst, 0, CHAN_X, tmp0);
1087 }
1088 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1089 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1090 tmp0 = lp_build_sin( &bld->base, tmp0 );
1091 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1092 }
1093 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1094 tmp0 = bld->base.zero;
1095 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1096 }
1097 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1098 tmp0 = bld->base.one;
1099 emit_store( bld, inst, 0, CHAN_W, tmp0);
1100 }
1101 break;
1102
1103 case TGSI_OPCODE_TXB:
1104 emit_tex( bld, inst, TRUE, FALSE );
1105 break;
1106
1107 case TGSI_OPCODE_NRM:
1108 /* fall-through */
1109 case TGSI_OPCODE_NRM4:
1110 /* 3 or 4-component normalization */
1111 {
1112 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1113
1114 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1115 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1116 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1117 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1118
1119 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1120
1121 /* xmm4 = src.x */
1122 /* xmm0 = src.x * src.x */
1123 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1124 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1125 tmp4 = tmp0;
1126 }
1127 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1128
1129 /* xmm5 = src.y */
1130 /* xmm0 = xmm0 + src.y * src.y */
1131 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1132 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1133 tmp5 = tmp1;
1134 }
1135 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1136 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1137
1138 /* xmm6 = src.z */
1139 /* xmm0 = xmm0 + src.z * src.z */
1140 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1141 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1142 tmp6 = tmp1;
1143 }
1144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1146
1147 if (dims == 4) {
1148 /* xmm7 = src.w */
1149 /* xmm0 = xmm0 + src.w * src.w */
1150 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1151 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1152 tmp7 = tmp1;
1153 }
1154 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1155 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1156 }
1157
1158 /* xmm1 = 1 / sqrt(xmm0) */
1159 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1160
1161 /* dst.x = xmm1 * src.x */
1162 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1163 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1164 emit_store(bld, inst, 0, CHAN_X, tmp4);
1165 }
1166
1167 /* dst.y = xmm1 * src.y */
1168 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1169 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1170 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1171 }
1172
1173 /* dst.z = xmm1 * src.z */
1174 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1175 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1176 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1177 }
1178
1179 /* dst.w = xmm1 * src.w */
1180 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1181 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1182 emit_store(bld, inst, 0, CHAN_W, tmp7);
1183 }
1184 }
1185
1186 /* dst0.w = 1.0 */
1187 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1188 tmp0 = bld->base.one;
1189 emit_store(bld, inst, 0, CHAN_W, tmp0);
1190 }
1191 }
1192 break;
1193
1194 case TGSI_OPCODE_DIV:
1195 return 0;
1196 break;
1197
1198 case TGSI_OPCODE_DP2:
1199 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1200 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1201 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1202 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1203 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1204 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1205 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1206 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1207 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1208 }
1209 break;
1210
1211 case TGSI_OPCODE_TXL:
1212 emit_tex( bld, inst, TRUE, FALSE );
1213 break;
1214
1215 case TGSI_OPCODE_TXP:
1216 emit_tex( bld, inst, FALSE, TRUE );
1217 break;
1218
1219 case TGSI_OPCODE_BRK:
1220 return 0;
1221 break;
1222
1223 case TGSI_OPCODE_IF:
1224 return 0;
1225 break;
1226
1227 case TGSI_OPCODE_LOOP:
1228 return 0;
1229 break;
1230
1231 case TGSI_OPCODE_REP:
1232 return 0;
1233 break;
1234
1235 case TGSI_OPCODE_ELSE:
1236 return 0;
1237 break;
1238
1239 case TGSI_OPCODE_ENDIF:
1240 return 0;
1241 break;
1242
1243 case TGSI_OPCODE_ENDLOOP:
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_ENDREP:
1248 return 0;
1249 break;
1250
1251 case TGSI_OPCODE_PUSHA:
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_POPA:
1256 return 0;
1257 break;
1258
1259 case TGSI_OPCODE_CEIL:
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_I2F:
1264 return 0;
1265 break;
1266
1267 case TGSI_OPCODE_NOT:
1268 return 0;
1269 break;
1270
1271 #if 0
1272 case TGSI_OPCODE_TRUNC:
1273 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1274 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1275 emit_f2it( bld, 0 );
1276 emit_i2f( bld, 0 );
1277 emit_store( bld, inst, 0, chan_index, tmp0);
1278 }
1279 break;
1280 #endif
1281
1282 case TGSI_OPCODE_SHL:
1283 return 0;
1284 break;
1285
1286 case TGSI_OPCODE_SHR:
1287 return 0;
1288 break;
1289
1290 case TGSI_OPCODE_AND:
1291 return 0;
1292 break;
1293
1294 case TGSI_OPCODE_OR:
1295 return 0;
1296 break;
1297
1298 case TGSI_OPCODE_MOD:
1299 return 0;
1300 break;
1301
1302 case TGSI_OPCODE_XOR:
1303 return 0;
1304 break;
1305
1306 case TGSI_OPCODE_SAD:
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_TXF:
1311 return 0;
1312 break;
1313
1314 case TGSI_OPCODE_TXQ:
1315 return 0;
1316 break;
1317
1318 case TGSI_OPCODE_CONT:
1319 return 0;
1320 break;
1321
1322 case TGSI_OPCODE_EMIT:
1323 return 0;
1324 break;
1325
1326 case TGSI_OPCODE_ENDPRIM:
1327 return 0;
1328 break;
1329
1330 default:
1331 return 0;
1332 }
1333
1334 return 1;
1335 }
1336
1337 static void
1338 emit_declaration(
1339 struct lp_build_tgsi_soa_context *bld,
1340 struct tgsi_full_declaration *decl )
1341 {
1342 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1343 LLVMBuilderRef builder = bld->base.builder;
1344 unsigned first, last, mask;
1345 unsigned attrib, chan;
1346
1347 first = decl->DeclarationRange.First;
1348 last = decl->DeclarationRange.Last;
1349 mask = decl->Declaration.UsageMask;
1350
1351 for( attrib = first; attrib <= last; attrib++ ) {
1352 for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
1353 LLVMValueRef input = bld->base.undef;
1354
1355 if( mask & (1 << chan) ) {
1356 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
1357 LLVMValueRef a0;
1358 LLVMValueRef dadx;
1359 LLVMValueRef dady;
1360
1361 switch( decl->Declaration.Interpolate ) {
1362 case TGSI_INTERPOLATE_PERSPECTIVE:
1363 /* fall-through */
1364
1365 case TGSI_INTERPOLATE_LINEAR: {
1366 LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
1367 LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
1368 dadx = LLVMBuildLoad(builder, dadx_ptr, "");
1369 dady = LLVMBuildLoad(builder, dady_ptr, "");
1370 dadx = lp_build_broadcast_scalar(&bld->base, dadx);
1371 dady = lp_build_broadcast_scalar(&bld->base, dady);
1372 lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
1373 lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
1374 /* fall-through */
1375 }
1376
1377 case TGSI_INTERPOLATE_CONSTANT: {
1378 LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
1379 a0 = LLVMBuildLoad(builder, a0_ptr, "");
1380 a0 = lp_build_broadcast_scalar(&bld->base, a0);
1381 lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
1382 break;
1383 }
1384
1385 default:
1386 assert(0);
1387 break;
1388 }
1389
1390 input = a0;
1391
1392 if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
1393 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
1394 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
1395 }
1396
1397 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
1398 if(!bld->oow)
1399 bld->oow = lp_build_rcp(&bld->base, bld->w);
1400 input = lp_build_mul(&bld->base, input, bld->oow);
1401 }
1402
1403 lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
1404 }
1405
1406 bld->inputs[attrib][chan] = input;
1407 }
1408 }
1409 }
1410 }
1411
1412 /**
1413 * Translate a TGSI vertex/fragment shader to SSE2 code.
1414 * Slightly different things are done for vertex vs. fragment shaders.
1415 *
1416 * \param tokens the TGSI input shader
1417 * \param bld the output SSE code/function
1418 * \param immediates buffer to place immediates, later passed to SSE bld
1419 * \param return 1 for success, 0 if translation failed
1420 */
1421 LLVMValueRef
1422 lp_build_tgsi_soa(LLVMBuilderRef builder,
1423 const struct tgsi_token *tokens,
1424 union lp_type type,
1425 LLVMValueRef *pos,
1426 LLVMValueRef a0_ptr,
1427 LLVMValueRef dadx_ptr,
1428 LLVMValueRef dady_ptr,
1429 LLVMValueRef consts_ptr,
1430 LLVMValueRef (*outputs)[4],
1431 LLVMValueRef samplers_ptr)
1432 {
1433 struct lp_build_tgsi_soa_context bld;
1434 struct tgsi_parse_context parse;
1435 uint num_immediates = 0;
1436 unsigned i;
1437
1438 /* Setup build context */
1439 memset(&bld, 0, sizeof bld);
1440 lp_build_context_init(&bld.base, builder, type);
1441 bld.x = pos[0];
1442 bld.y = pos[1];
1443 bld.w = pos[3];
1444 bld.a0_ptr = a0_ptr;
1445 bld.dadx_ptr = dadx_ptr;
1446 bld.dady_ptr = dady_ptr;
1447 bld.outputs = outputs;
1448 bld.consts_ptr = consts_ptr;
1449 bld.samplers_ptr = samplers_ptr;
1450
1451 tgsi_parse_init( &parse, tokens );
1452
1453 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1454 tgsi_parse_token( &parse );
1455
1456 switch( parse.FullToken.Token.Type ) {
1457 case TGSI_TOKEN_TYPE_DECLARATION:
1458 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1459 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1460 }
1461 break;
1462
1463 case TGSI_TOKEN_TYPE_INSTRUCTION:
1464 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1465 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1466 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1467 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1468 info ? info->mnemonic : "<invalid>");
1469 }
1470 break;
1471
1472 case TGSI_TOKEN_TYPE_IMMEDIATE:
1473 /* simply copy the immediate values into the next immediates[] slot */
1474 {
1475 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1476 assert(size <= 4);
1477 assert(num_immediates < LP_MAX_IMMEDIATES);
1478 for( i = 0; i < size; ++i )
1479 bld.immediates[num_immediates][i] =
1480 lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
1481 for( i = size; i < 4; ++i )
1482 bld.immediates[num_immediates][i] = bld.base.undef;
1483 num_immediates++;
1484 }
1485 break;
1486
1487 default:
1488 assert( 0 );
1489 }
1490 }
1491
1492 tgsi_parse_free( &parse );
1493
1494 return bld.mask;
1495 }
1496