d35c8c6b7b99a89bc9d6429b7002081686601e13
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_intr.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_tgsi.h"
46 #include "lp_bld_debug.h"
47
48
49 #define LP_MAX_TEMPS 256
50 #define LP_MAX_IMMEDIATES 256
51
52
53 #define FOR_EACH_CHANNEL( CHAN )\
54 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
55
56 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
57 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
58
59 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
60 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
61
62 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
63 FOR_EACH_CHANNEL( CHAN )\
64 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
65
66 #define CHAN_X 0
67 #define CHAN_Y 1
68 #define CHAN_Z 2
69 #define CHAN_W 3
70
71
72 struct lp_build_tgsi_soa_context
73 {
74 struct lp_build_context base;
75
76 LLVMValueRef x, y, w;
77 LLVMValueRef a0_ptr;
78 LLVMValueRef dadx_ptr;
79 LLVMValueRef dady_ptr;
80
81 LLVMValueRef consts_ptr;
82 LLVMValueRef (*outputs)[NUM_CHANNELS];
83 LLVMValueRef samplers_ptr;
84
85 LLVMValueRef oow;
86
87 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
88
89 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
90 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
91
92 struct lp_build_mask_context *mask;
93
94 /** Coords/texels store */
95 LLVMValueRef store_ptr;
96 };
97
98
99 /**
100 * Register fetch.
101 */
102 static LLVMValueRef
103 emit_fetch(
104 struct lp_build_tgsi_soa_context *bld,
105 const struct tgsi_full_instruction *inst,
106 unsigned index,
107 const unsigned chan_index )
108 {
109 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
110 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
111 LLVMValueRef res;
112
113 switch (swizzle) {
114 case TGSI_EXTSWIZZLE_X:
115 case TGSI_EXTSWIZZLE_Y:
116 case TGSI_EXTSWIZZLE_Z:
117 case TGSI_EXTSWIZZLE_W:
118
119 switch (reg->SrcRegister.File) {
120 case TGSI_FILE_CONSTANT: {
121 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
122 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
123 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
124 res = lp_build_broadcast_scalar(&bld->base, scalar);
125 break;
126 }
127
128 case TGSI_FILE_IMMEDIATE:
129 res = bld->immediates[reg->SrcRegister.Index][swizzle];
130 assert(res);
131 break;
132
133 case TGSI_FILE_INPUT:
134 res = bld->inputs[reg->SrcRegister.Index][swizzle];
135 assert(res);
136 break;
137
138 case TGSI_FILE_TEMPORARY:
139 res = bld->temps[reg->SrcRegister.Index][swizzle];
140 if(!res)
141 return bld->base.undef;
142 break;
143
144 default:
145 assert( 0 );
146 return bld->base.undef;
147 }
148 break;
149
150 case TGSI_EXTSWIZZLE_ZERO:
151 res = bld->base.zero;
152 break;
153
154 case TGSI_EXTSWIZZLE_ONE:
155 res = bld->base.one;
156 break;
157
158 default:
159 assert( 0 );
160 return bld->base.undef;
161 }
162
163 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
164 case TGSI_UTIL_SIGN_CLEAR:
165 res = lp_build_abs( &bld->base, res );
166 break;
167
168 case TGSI_UTIL_SIGN_SET:
169 res = lp_build_abs( &bld->base, res );
170 res = LLVMBuildNeg( bld->base.builder, res, "" );
171 break;
172
173 case TGSI_UTIL_SIGN_TOGGLE:
174 res = LLVMBuildNeg( bld->base.builder, res, "" );
175 break;
176
177 case TGSI_UTIL_SIGN_KEEP:
178 break;
179 }
180
181 return res;
182 }
183
184
185 /**
186 * Register store.
187 */
188 static void
189 emit_store(
190 struct lp_build_tgsi_soa_context *bld,
191 const struct tgsi_full_instruction *inst,
192 unsigned index,
193 unsigned chan_index,
194 LLVMValueRef value)
195 {
196 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
197
198 switch( inst->Instruction.Saturate ) {
199 case TGSI_SAT_NONE:
200 break;
201
202 case TGSI_SAT_ZERO_ONE:
203 value = lp_build_max(&bld->base, value, bld->base.zero);
204 value = lp_build_min(&bld->base, value, bld->base.one);
205 break;
206
207 case TGSI_SAT_MINUS_PLUS_ONE:
208 value = lp_build_max(&bld->base, value, lp_build_const_uni(bld->base.type, -1.0));
209 value = lp_build_min(&bld->base, value, bld->base.one);
210 break;
211
212 default:
213 assert(0);
214 }
215
216 switch( reg->DstRegister.File ) {
217 case TGSI_FILE_OUTPUT:
218 bld->outputs[reg->DstRegister.Index][chan_index] = value;
219 break;
220
221 case TGSI_FILE_TEMPORARY:
222 bld->temps[reg->DstRegister.Index][chan_index] = value;
223 break;
224
225 case TGSI_FILE_ADDRESS:
226 /* FIXME */
227 assert(0);
228 break;
229
230 default:
231 assert( 0 );
232 }
233 }
234
235
236 void PIPE_CDECL
237 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
238 uint32_t unit,
239 float *store )
240 {
241 struct tgsi_sampler *sampler = samplers[unit];
242
243 #if 0
244 uint j;
245
246 debug_printf("%s sampler: %p (%p) store: %p\n",
247 __FUNCTION__,
248 sampler, *sampler,
249 store );
250
251 debug_printf("lodbias %f\n", store[12]);
252
253 for (j = 0; j < 4; j++)
254 debug_printf("sample %d texcoord %f %f\n",
255 j,
256 store[0+j],
257 store[4+j]);
258 #endif
259
260 {
261 float rgba[NUM_CHANNELS][QUAD_SIZE];
262 sampler->get_samples(sampler,
263 &store[0],
264 &store[4],
265 &store[8],
266 0.0f, /*store[12], lodbias */
267 rgba);
268 memcpy(store, rgba, sizeof rgba);
269 }
270
271 #if 0
272 for (j = 0; j < 4; j++)
273 debug_printf("sample %d result %f %f %f %f\n",
274 j,
275 store[0+j],
276 store[4+j],
277 store[8+j],
278 store[12+j]);
279 #endif
280 }
281
282 /**
283 * High-level instruction translators.
284 */
285
286 static void
287 emit_tex( struct lp_build_tgsi_soa_context *bld,
288 const struct tgsi_full_instruction *inst,
289 boolean apply_lodbias,
290 boolean projected)
291 {
292 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
293 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
294 LLVMValueRef lodbias;
295 LLVMValueRef oow;
296 LLVMValueRef args[3];
297 unsigned count;
298 unsigned i;
299
300 switch (inst->InstructionExtTexture.Texture) {
301 case TGSI_TEXTURE_1D:
302 case TGSI_TEXTURE_SHADOW1D:
303 count = 1;
304 break;
305 case TGSI_TEXTURE_2D:
306 case TGSI_TEXTURE_RECT:
307 case TGSI_TEXTURE_SHADOW2D:
308 case TGSI_TEXTURE_SHADOWRECT:
309 count = 2;
310 break;
311 case TGSI_TEXTURE_3D:
312 case TGSI_TEXTURE_CUBE:
313 count = 3;
314 break;
315 default:
316 assert(0);
317 return;
318 }
319
320 if(apply_lodbias)
321 lodbias = emit_fetch( bld, inst, 0, 3 );
322 else
323 lodbias = bld->base.zero;
324
325 if(!bld->store_ptr)
326 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
327 vec_type,
328 LLVMConstInt(LLVMInt32Type(), 4, 0),
329 "store");
330
331 if (projected) {
332 oow = emit_fetch( bld, inst, 0, 3 );
333 oow = lp_build_rcp(&bld->base, oow);
334 }
335
336 for (i = 0; i < count; i++) {
337 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
338 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
339 LLVMValueRef coord;
340
341 coord = emit_fetch( bld, inst, 0, i );
342
343 if (projected)
344 coord = lp_build_mul(&bld->base, coord, oow);
345
346 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
347 }
348
349 args[0] = bld->samplers_ptr;
350 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
351 args[2] = bld->store_ptr;
352
353 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
354
355 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
356 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
357 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
358 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
359 emit_store( bld, inst, 0, i, res );
360 }
361 }
362
363
364 static void
365 emit_kil(
366 struct lp_build_tgsi_soa_context *bld,
367 const struct tgsi_full_instruction *inst )
368 {
369 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
370 LLVMValueRef terms[NUM_CHANNELS];
371 LLVMValueRef mask;
372 unsigned chan_index;
373
374 memset(&terms, 0, sizeof terms);
375
376 FOR_EACH_CHANNEL( chan_index ) {
377 unsigned swizzle;
378
379 /* Unswizzle channel */
380 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
381
382 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
383 * not to be tested. */
384 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
385 continue;
386
387 /* Check if the component has not been already tested. */
388 assert(swizzle < NUM_CHANNELS);
389 if( !terms[swizzle] )
390 /* TODO: change the comparison operator instead of setting the sign */
391 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
392 }
393
394 mask = NULL;
395 FOR_EACH_CHANNEL( chan_index ) {
396 if(terms[chan_index]) {
397 LLVMValueRef chan_mask;
398
399 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
400
401 if(mask)
402 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
403 else
404 mask = chan_mask;
405 }
406 }
407
408 if(mask)
409 lp_build_mask_update(bld->mask, mask);
410 }
411
412
413 static void
414 emit_kilp(
415 struct lp_build_tgsi_soa_context *bld )
416 {
417 /* XXX todo / fix me */
418 }
419
420
421 /**
422 * Check if inst src/dest regs use indirect addressing into temporary
423 * register file.
424 */
425 static boolean
426 indirect_temp_reference(const struct tgsi_full_instruction *inst)
427 {
428 uint i;
429 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
430 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
431 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
432 reg->SrcRegister.Indirect)
433 return TRUE;
434 }
435 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
436 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
437 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
438 reg->DstRegister.Indirect)
439 return TRUE;
440 }
441 return FALSE;
442 }
443
444
445 static int
446 emit_instruction(
447 struct lp_build_tgsi_soa_context *bld,
448 struct tgsi_full_instruction *inst )
449 {
450 unsigned chan_index;
451 LLVMValueRef src0, src1, src2;
452 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
453 LLVMValueRef dst0;
454
455 /* we can't handle indirect addressing into temp register file yet */
456 if (indirect_temp_reference(inst))
457 return FALSE;
458
459 switch (inst->Instruction.Opcode) {
460 #if 0
461 case TGSI_OPCODE_ARL:
462 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
463 tmp0 = emit_fetch( bld, inst, 0, chan_index );
464 emit_flr(bld, 0, 0);
465 emit_f2it( bld, 0 );
466 emit_store( bld, inst, 0, chan_index, tmp0);
467 }
468 break;
469 #endif
470
471 case TGSI_OPCODE_MOV:
472 case TGSI_OPCODE_SWZ:
473 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
474 tmp0 = emit_fetch( bld, inst, 0, chan_index );
475 emit_store( bld, inst, 0, chan_index, tmp0);
476 }
477 break;
478
479 case TGSI_OPCODE_LIT:
480 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
481 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
482 }
483 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
484 src0 = emit_fetch( bld, inst, 0, CHAN_X );
485 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
486 emit_store( bld, inst, 0, CHAN_Y, dst0);
487 }
488 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
489 /* XMM[1] = SrcReg[0].yyyy */
490 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
491 /* XMM[1] = max(XMM[1], 0) */
492 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
493 /* XMM[2] = SrcReg[0].wwww */
494 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
495 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
496 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
497 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
498 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
499 emit_store( bld, inst, 0, CHAN_Z, dst0);
500 }
501 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
502 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
503 }
504 break;
505
506 case TGSI_OPCODE_RCP:
507 /* TGSI_OPCODE_RECIP */
508 src0 = emit_fetch( bld, inst, 0, CHAN_X );
509 dst0 = lp_build_rcp(&bld->base, src0);
510 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
511 emit_store( bld, inst, 0, chan_index, dst0 );
512 }
513 break;
514
515 case TGSI_OPCODE_RSQ:
516 /* TGSI_OPCODE_RECIPSQRT */
517 src0 = emit_fetch( bld, inst, 0, CHAN_X );
518 src0 = lp_build_abs(&bld->base, src0);
519 dst0 = lp_build_rsqrt(&bld->base, src0);
520 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
521 emit_store( bld, inst, 0, chan_index, dst0 );
522 }
523 break;
524
525 case TGSI_OPCODE_EXP:
526 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
527 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
528 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
529 LLVMValueRef *p_exp2_int_part = NULL;
530 LLVMValueRef *p_frac_part = NULL;
531 LLVMValueRef *p_exp2 = NULL;
532
533 src0 = emit_fetch( bld, inst, 0, CHAN_X );
534
535 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
536 p_exp2_int_part = &tmp0;
537 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
538 p_frac_part = &tmp1;
539 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
540 p_exp2 = &tmp2;
541
542 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
543
544 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
545 emit_store( bld, inst, 0, CHAN_X, tmp0);
546 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
547 emit_store( bld, inst, 0, CHAN_Y, tmp1);
548 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
549 emit_store( bld, inst, 0, CHAN_Z, tmp2);
550 }
551 /* dst.w = 1.0 */
552 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
553 tmp0 = bld->base.one;
554 emit_store( bld, inst, 0, CHAN_W, tmp0);
555 }
556 break;
557
558 case TGSI_OPCODE_LOG:
559 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
560 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
561 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
562 LLVMValueRef *p_floor_log2;
563 LLVMValueRef *p_exp;
564 LLVMValueRef *p_log2;
565
566 src0 = emit_fetch( bld, inst, 0, CHAN_X );
567 src0 = lp_build_abs( &bld->base, src0 );
568
569 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
570 p_floor_log2 = &tmp0;
571 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
572 p_exp = &tmp1;
573 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
574 p_log2 = &tmp2;
575
576 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
577
578 /* dst.x = floor(lg2(abs(src.x))) */
579 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
580 emit_store( bld, inst, 0, CHAN_X, tmp0);
581 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
582 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
583 tmp1 = lp_build_div( &bld->base, src0, tmp1);
584 emit_store( bld, inst, 0, CHAN_Y, tmp1);
585 }
586 /* dst.z = lg2(abs(src.x)) */
587 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
588 emit_store( bld, inst, 0, CHAN_Z, tmp2);
589 }
590 /* dst.w = 1.0 */
591 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
592 tmp0 = bld->base.one;
593 emit_store( bld, inst, 0, CHAN_W, tmp0);
594 }
595 break;
596
597 case TGSI_OPCODE_MUL:
598 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
599 src0 = emit_fetch( bld, inst, 0, chan_index );
600 src1 = emit_fetch( bld, inst, 1, chan_index );
601 dst0 = lp_build_mul(&bld->base, src0, src1);
602 emit_store( bld, inst, 0, chan_index, dst0);
603 }
604 break;
605
606 case TGSI_OPCODE_ADD:
607 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
608 src0 = emit_fetch( bld, inst, 0, chan_index );
609 src1 = emit_fetch( bld, inst, 1, chan_index );
610 dst0 = lp_build_add(&bld->base, src0, src1);
611 emit_store( bld, inst, 0, chan_index, dst0);
612 }
613 break;
614
615 case TGSI_OPCODE_DP3:
616 /* TGSI_OPCODE_DOT3 */
617 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
618 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
619 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
620 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
621 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
622 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
623 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
624 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
625 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
626 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
627 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
628 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
629 emit_store( bld, inst, 0, chan_index, tmp0);
630 }
631 break;
632
633 case TGSI_OPCODE_DP4:
634 /* TGSI_OPCODE_DOT4 */
635 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
636 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
637 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
638 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
639 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
640 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
641 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
642 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
643 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
644 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
645 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
646 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
647 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
648 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
649 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
650 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
651 emit_store( bld, inst, 0, chan_index, tmp0);
652 }
653 break;
654
655 case TGSI_OPCODE_DST:
656 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
657 tmp0 = bld->base.one;
658 emit_store( bld, inst, 0, CHAN_X, tmp0);
659 }
660 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
661 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
662 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
663 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
664 emit_store( bld, inst, 0, CHAN_Y, tmp0);
665 }
666 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
667 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
668 emit_store( bld, inst, 0, CHAN_Z, tmp0);
669 }
670 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
671 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
672 emit_store( bld, inst, 0, CHAN_W, tmp0);
673 }
674 break;
675
676 case TGSI_OPCODE_MIN:
677 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
678 src0 = emit_fetch( bld, inst, 0, chan_index );
679 src1 = emit_fetch( bld, inst, 1, chan_index );
680 dst0 = lp_build_min( &bld->base, src0, src1 );
681 emit_store( bld, inst, 0, chan_index, dst0);
682 }
683 break;
684
685 case TGSI_OPCODE_MAX:
686 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
687 src0 = emit_fetch( bld, inst, 0, chan_index );
688 src1 = emit_fetch( bld, inst, 1, chan_index );
689 dst0 = lp_build_max( &bld->base, src0, src1 );
690 emit_store( bld, inst, 0, chan_index, dst0);
691 }
692 break;
693
694 case TGSI_OPCODE_SLT:
695 /* TGSI_OPCODE_SETLT */
696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
697 src0 = emit_fetch( bld, inst, 0, chan_index );
698 src1 = emit_fetch( bld, inst, 1, chan_index );
699 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
700 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
701 emit_store( bld, inst, 0, chan_index, dst0);
702 }
703 break;
704
705 case TGSI_OPCODE_SGE:
706 /* TGSI_OPCODE_SETGE */
707 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
708 src0 = emit_fetch( bld, inst, 0, chan_index );
709 src1 = emit_fetch( bld, inst, 1, chan_index );
710 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
711 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
712 emit_store( bld, inst, 0, chan_index, dst0);
713 }
714 break;
715
716 case TGSI_OPCODE_MAD:
717 /* TGSI_OPCODE_MADD */
718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
719 tmp0 = emit_fetch( bld, inst, 0, chan_index );
720 tmp1 = emit_fetch( bld, inst, 1, chan_index );
721 tmp2 = emit_fetch( bld, inst, 2, chan_index );
722 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
723 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
724 emit_store( bld, inst, 0, chan_index, tmp0);
725 }
726 break;
727
728 case TGSI_OPCODE_SUB:
729 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
730 tmp0 = emit_fetch( bld, inst, 0, chan_index );
731 tmp1 = emit_fetch( bld, inst, 1, chan_index );
732 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
733 emit_store( bld, inst, 0, chan_index, tmp0);
734 }
735 break;
736
737 case TGSI_OPCODE_LRP:
738 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
739 src0 = emit_fetch( bld, inst, 0, chan_index );
740 src1 = emit_fetch( bld, inst, 1, chan_index );
741 src2 = emit_fetch( bld, inst, 2, chan_index );
742 tmp0 = lp_build_sub( &bld->base, src1, src2 );
743 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
744 dst0 = lp_build_add( &bld->base, tmp0, src2 );
745 emit_store( bld, inst, 0, chan_index, dst0 );
746 }
747 break;
748
749 case TGSI_OPCODE_CND:
750 return 0;
751 break;
752
753 case TGSI_OPCODE_CND0:
754 return 0;
755 break;
756
757 case TGSI_OPCODE_DP2A:
758 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
759 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
760 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
761 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
762 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
763 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
764 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
765 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
766 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
767 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
768 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
769 }
770 break;
771
772 #if 0
773 case TGSI_OPCODE_FRC:
774 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
775 tmp0 = emit_fetch( bld, inst, 0, chan_index );
776 emit_frc( bld, 0, 0 );
777 emit_store( bld, inst, 0, chan_index, tmp0);
778 }
779 break;
780
781 case TGSI_OPCODE_CLAMP:
782 return 0;
783 break;
784
785 case TGSI_OPCODE_FLR:
786 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
787 tmp0 = emit_fetch( bld, inst, 0, chan_index );
788 emit_flr( bld, 0, 0 );
789 emit_store( bld, inst, 0, chan_index, tmp0);
790 }
791 break;
792
793 case TGSI_OPCODE_ROUND:
794 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
795 tmp0 = emit_fetch( bld, inst, 0, chan_index );
796 emit_rnd( bld, 0, 0 );
797 emit_store( bld, inst, 0, chan_index, tmp0);
798 }
799 break;
800 #endif
801
802 case TGSI_OPCODE_EX2: {
803 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
804 tmp0 = lp_build_exp2( &bld->base, tmp0);
805 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
806 emit_store( bld, inst, 0, chan_index, tmp0);
807 }
808 break;
809 }
810
811 case TGSI_OPCODE_LG2:
812 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
813 tmp0 = lp_build_log2( &bld->base, tmp0);
814 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
815 emit_store( bld, inst, 0, chan_index, tmp0);
816 }
817 break;
818
819 case TGSI_OPCODE_POW:
820 src0 = emit_fetch( bld, inst, 0, CHAN_X );
821 src1 = emit_fetch( bld, inst, 1, CHAN_X );
822 dst0 = lp_build_pow( &bld->base, src0, src1 );
823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
824 emit_store( bld, inst, 0, chan_index, dst0 );
825 }
826 break;
827
828 case TGSI_OPCODE_XPD:
829 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
830 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
831 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
832 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
833 }
834 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
835 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
836 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
837 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
838 }
839 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
840 tmp2 = tmp0;
841 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
842 tmp5 = tmp3;
843 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
844 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
845 emit_store( bld, inst, 0, CHAN_X, tmp2);
846 }
847 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
848 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
849 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
850 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
851 }
852 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
853 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
854 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
855 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
856 emit_store( bld, inst, 0, CHAN_Y, tmp3);
857 }
858 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
859 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
860 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
861 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
862 emit_store( bld, inst, 0, CHAN_Z, tmp5);
863 }
864 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
865 tmp0 = bld->base.one;
866 emit_store( bld, inst, 0, CHAN_W, tmp0);
867 }
868 break;
869
870 case TGSI_OPCODE_ABS:
871 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
872 tmp0 = emit_fetch( bld, inst, 0, chan_index );
873 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
874 emit_store( bld, inst, 0, chan_index, tmp0);
875 }
876 break;
877
878 case TGSI_OPCODE_RCC:
879 return 0;
880 break;
881
882 case TGSI_OPCODE_DPH:
883 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
884 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
885 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
886 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
887 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
888 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
889 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
890 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
891 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
892 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
893 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
894 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
895 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
896 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
897 emit_store( bld, inst, 0, chan_index, tmp0);
898 }
899 break;
900
901 case TGSI_OPCODE_COS:
902 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
903 tmp0 = lp_build_cos( &bld->base, tmp0 );
904 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
905 emit_store( bld, inst, 0, chan_index, tmp0);
906 }
907 break;
908
909 case TGSI_OPCODE_DDX:
910 return 0;
911 break;
912
913 case TGSI_OPCODE_DDY:
914 return 0;
915 break;
916
917 #if 0
918 case TGSI_OPCODE_KILP:
919 /* predicated kill */
920 emit_kilp( bld );
921 return 0; /* XXX fix me */
922 break;
923 #endif
924
925 case TGSI_OPCODE_KIL:
926 /* conditional kill */
927 emit_kil( bld, inst );
928 break;
929
930 case TGSI_OPCODE_PK2H:
931 return 0;
932 break;
933
934 case TGSI_OPCODE_PK2US:
935 return 0;
936 break;
937
938 case TGSI_OPCODE_PK4B:
939 return 0;
940 break;
941
942 case TGSI_OPCODE_PK4UB:
943 return 0;
944 break;
945
946 case TGSI_OPCODE_RFL:
947 return 0;
948 break;
949
950 case TGSI_OPCODE_SEQ:
951 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
952 src0 = emit_fetch( bld, inst, 0, chan_index );
953 src1 = emit_fetch( bld, inst, 1, chan_index );
954 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
955 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
956 emit_store( bld, inst, 0, chan_index, dst0);
957 }
958 break;
959
960 case TGSI_OPCODE_SFL:
961 return 0;
962 break;
963
964 case TGSI_OPCODE_SGT:
965 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
966 src0 = emit_fetch( bld, inst, 0, chan_index );
967 src1 = emit_fetch( bld, inst, 1, chan_index );
968 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
969 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
970 emit_store( bld, inst, 0, chan_index, dst0);
971 }
972 break;
973
974 case TGSI_OPCODE_SIN:
975 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
976 tmp0 = lp_build_sin( &bld->base, tmp0 );
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 emit_store( bld, inst, 0, chan_index, tmp0);
979 }
980 break;
981
982 case TGSI_OPCODE_SLE:
983 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
984 src0 = emit_fetch( bld, inst, 0, chan_index );
985 src1 = emit_fetch( bld, inst, 1, chan_index );
986 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
987 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
988 emit_store( bld, inst, 0, chan_index, dst0);
989 }
990 break;
991
992 case TGSI_OPCODE_SNE:
993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
994 src0 = emit_fetch( bld, inst, 0, chan_index );
995 src1 = emit_fetch( bld, inst, 1, chan_index );
996 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
997 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
998 emit_store( bld, inst, 0, chan_index, dst0);
999 }
1000 break;
1001
1002 case TGSI_OPCODE_STR:
1003 return 0;
1004 break;
1005
1006 case TGSI_OPCODE_TEX:
1007 emit_tex( bld, inst, FALSE, FALSE );
1008 break;
1009
1010 case TGSI_OPCODE_TXD:
1011 return 0;
1012 break;
1013
1014 case TGSI_OPCODE_UP2H:
1015 return 0;
1016 break;
1017
1018 case TGSI_OPCODE_UP2US:
1019 return 0;
1020 break;
1021
1022 case TGSI_OPCODE_UP4B:
1023 return 0;
1024 break;
1025
1026 case TGSI_OPCODE_UP4UB:
1027 return 0;
1028 break;
1029
1030 case TGSI_OPCODE_X2D:
1031 return 0;
1032 break;
1033
1034 case TGSI_OPCODE_ARA:
1035 return 0;
1036 break;
1037
1038 #if 0
1039 case TGSI_OPCODE_ARR:
1040 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1042 emit_rnd( bld, 0, 0 );
1043 emit_f2it( bld, 0 );
1044 emit_store( bld, inst, 0, chan_index, tmp0);
1045 }
1046 break;
1047 #endif
1048
1049 case TGSI_OPCODE_BRA:
1050 return 0;
1051 break;
1052
1053 case TGSI_OPCODE_CAL:
1054 return 0;
1055 break;
1056
1057 #if 0
1058 case TGSI_OPCODE_RET:
1059 emit_ret( bld );
1060 break;
1061 #endif
1062
1063 case TGSI_OPCODE_END:
1064 break;
1065
1066 #if 0
1067 case TGSI_OPCODE_SSG:
1068 /* TGSI_OPCODE_SGN */
1069 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1070 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1071 emit_sgn( bld, 0, 0 );
1072 emit_store( bld, inst, 0, chan_index, tmp0);
1073 }
1074 break;
1075 #endif
1076
1077 case TGSI_OPCODE_CMP:
1078 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1079 src0 = emit_fetch( bld, inst, 0, chan_index );
1080 src1 = emit_fetch( bld, inst, 1, chan_index );
1081 src2 = emit_fetch( bld, inst, 2, chan_index );
1082 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1083 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1084 emit_store( bld, inst, 0, chan_index, dst0);
1085 }
1086 break;
1087
1088 case TGSI_OPCODE_SCS:
1089 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1090 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1091 tmp0 = lp_build_cos( &bld->base, tmp0 );
1092 emit_store( bld, inst, 0, CHAN_X, tmp0);
1093 }
1094 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1095 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1096 tmp0 = lp_build_sin( &bld->base, tmp0 );
1097 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1098 }
1099 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1100 tmp0 = bld->base.zero;
1101 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1102 }
1103 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1104 tmp0 = bld->base.one;
1105 emit_store( bld, inst, 0, CHAN_W, tmp0);
1106 }
1107 break;
1108
1109 case TGSI_OPCODE_TXB:
1110 emit_tex( bld, inst, TRUE, FALSE );
1111 break;
1112
1113 case TGSI_OPCODE_NRM:
1114 /* fall-through */
1115 case TGSI_OPCODE_NRM4:
1116 /* 3 or 4-component normalization */
1117 {
1118 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1119
1120 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1121 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1122 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1123 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1124
1125 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1126
1127 /* xmm4 = src.x */
1128 /* xmm0 = src.x * src.x */
1129 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1130 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1131 tmp4 = tmp0;
1132 }
1133 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1134
1135 /* xmm5 = src.y */
1136 /* xmm0 = xmm0 + src.y * src.y */
1137 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1138 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1139 tmp5 = tmp1;
1140 }
1141 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1142 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1143
1144 /* xmm6 = src.z */
1145 /* xmm0 = xmm0 + src.z * src.z */
1146 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1147 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1148 tmp6 = tmp1;
1149 }
1150 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1151 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1152
1153 if (dims == 4) {
1154 /* xmm7 = src.w */
1155 /* xmm0 = xmm0 + src.w * src.w */
1156 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1157 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1158 tmp7 = tmp1;
1159 }
1160 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1161 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1162 }
1163
1164 /* xmm1 = 1 / sqrt(xmm0) */
1165 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1166
1167 /* dst.x = xmm1 * src.x */
1168 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1169 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1170 emit_store(bld, inst, 0, CHAN_X, tmp4);
1171 }
1172
1173 /* dst.y = xmm1 * src.y */
1174 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1175 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1176 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1177 }
1178
1179 /* dst.z = xmm1 * src.z */
1180 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1181 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1182 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1183 }
1184
1185 /* dst.w = xmm1 * src.w */
1186 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1187 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1188 emit_store(bld, inst, 0, CHAN_W, tmp7);
1189 }
1190 }
1191
1192 /* dst0.w = 1.0 */
1193 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1194 tmp0 = bld->base.one;
1195 emit_store(bld, inst, 0, CHAN_W, tmp0);
1196 }
1197 }
1198 break;
1199
1200 case TGSI_OPCODE_DIV:
1201 return 0;
1202 break;
1203
1204 case TGSI_OPCODE_DP2:
1205 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1206 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1207 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1208 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1209 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1210 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1211 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_TXL:
1218 emit_tex( bld, inst, TRUE, FALSE );
1219 break;
1220
1221 case TGSI_OPCODE_TXP:
1222 emit_tex( bld, inst, FALSE, TRUE );
1223 break;
1224
1225 case TGSI_OPCODE_BRK:
1226 return 0;
1227 break;
1228
1229 case TGSI_OPCODE_IF:
1230 return 0;
1231 break;
1232
1233 case TGSI_OPCODE_LOOP:
1234 return 0;
1235 break;
1236
1237 case TGSI_OPCODE_REP:
1238 return 0;
1239 break;
1240
1241 case TGSI_OPCODE_ELSE:
1242 return 0;
1243 break;
1244
1245 case TGSI_OPCODE_ENDIF:
1246 return 0;
1247 break;
1248
1249 case TGSI_OPCODE_ENDLOOP:
1250 return 0;
1251 break;
1252
1253 case TGSI_OPCODE_ENDREP:
1254 return 0;
1255 break;
1256
1257 case TGSI_OPCODE_PUSHA:
1258 return 0;
1259 break;
1260
1261 case TGSI_OPCODE_POPA:
1262 return 0;
1263 break;
1264
1265 case TGSI_OPCODE_CEIL:
1266 return 0;
1267 break;
1268
1269 case TGSI_OPCODE_I2F:
1270 return 0;
1271 break;
1272
1273 case TGSI_OPCODE_NOT:
1274 return 0;
1275 break;
1276
1277 #if 0
1278 case TGSI_OPCODE_TRUNC:
1279 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1280 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1281 emit_f2it( bld, 0 );
1282 emit_i2f( bld, 0 );
1283 emit_store( bld, inst, 0, chan_index, tmp0);
1284 }
1285 break;
1286 #endif
1287
1288 case TGSI_OPCODE_SHL:
1289 return 0;
1290 break;
1291
1292 case TGSI_OPCODE_SHR:
1293 return 0;
1294 break;
1295
1296 case TGSI_OPCODE_AND:
1297 return 0;
1298 break;
1299
1300 case TGSI_OPCODE_OR:
1301 return 0;
1302 break;
1303
1304 case TGSI_OPCODE_MOD:
1305 return 0;
1306 break;
1307
1308 case TGSI_OPCODE_XOR:
1309 return 0;
1310 break;
1311
1312 case TGSI_OPCODE_SAD:
1313 return 0;
1314 break;
1315
1316 case TGSI_OPCODE_TXF:
1317 return 0;
1318 break;
1319
1320 case TGSI_OPCODE_TXQ:
1321 return 0;
1322 break;
1323
1324 case TGSI_OPCODE_CONT:
1325 return 0;
1326 break;
1327
1328 case TGSI_OPCODE_EMIT:
1329 return 0;
1330 break;
1331
1332 case TGSI_OPCODE_ENDPRIM:
1333 return 0;
1334 break;
1335
1336 default:
1337 return 0;
1338 }
1339
1340 return 1;
1341 }
1342
1343 static void
1344 emit_declaration(
1345 struct lp_build_tgsi_soa_context *bld,
1346 struct tgsi_full_declaration *decl )
1347 {
1348 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1349 LLVMBuilderRef builder = bld->base.builder;
1350 unsigned first, last, mask;
1351 unsigned attrib, chan;
1352
1353 first = decl->DeclarationRange.First;
1354 last = decl->DeclarationRange.Last;
1355 mask = decl->Declaration.UsageMask;
1356
1357 for( attrib = first; attrib <= last; attrib++ ) {
1358 for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
1359 LLVMValueRef input = bld->base.undef;
1360
1361 if( mask & (1 << chan) ) {
1362 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
1363 LLVMValueRef a0;
1364 LLVMValueRef dadx;
1365 LLVMValueRef dady;
1366
1367 switch( decl->Declaration.Interpolate ) {
1368 case TGSI_INTERPOLATE_PERSPECTIVE:
1369 /* fall-through */
1370
1371 case TGSI_INTERPOLATE_LINEAR: {
1372 LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
1373 LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
1374 dadx = LLVMBuildLoad(builder, dadx_ptr, "");
1375 dady = LLVMBuildLoad(builder, dady_ptr, "");
1376 dadx = lp_build_broadcast_scalar(&bld->base, dadx);
1377 dady = lp_build_broadcast_scalar(&bld->base, dady);
1378 lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
1379 lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
1380 /* fall-through */
1381 }
1382
1383 case TGSI_INTERPOLATE_CONSTANT: {
1384 LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
1385 a0 = LLVMBuildLoad(builder, a0_ptr, "");
1386 a0 = lp_build_broadcast_scalar(&bld->base, a0);
1387 lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
1388 break;
1389 }
1390
1391 default:
1392 assert(0);
1393 break;
1394 }
1395
1396 input = a0;
1397
1398 if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
1399 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
1400 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
1401 }
1402
1403 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
1404 if(!bld->oow)
1405 bld->oow = lp_build_rcp(&bld->base, bld->w);
1406 input = lp_build_mul(&bld->base, input, bld->oow);
1407 }
1408
1409 lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
1410 }
1411
1412 bld->inputs[attrib][chan] = input;
1413 }
1414 }
1415 }
1416 }
1417
1418
1419 void
1420 lp_build_tgsi_soa(LLVMBuilderRef builder,
1421 const struct tgsi_token *tokens,
1422 union lp_type type,
1423 struct lp_build_mask_context *mask,
1424 LLVMValueRef *pos,
1425 LLVMValueRef a0_ptr,
1426 LLVMValueRef dadx_ptr,
1427 LLVMValueRef dady_ptr,
1428 LLVMValueRef consts_ptr,
1429 LLVMValueRef (*outputs)[4],
1430 LLVMValueRef samplers_ptr)
1431 {
1432 struct lp_build_tgsi_soa_context bld;
1433 struct tgsi_parse_context parse;
1434 uint num_immediates = 0;
1435 unsigned i;
1436
1437 /* Setup build context */
1438 memset(&bld, 0, sizeof bld);
1439 lp_build_context_init(&bld.base, builder, type);
1440 bld.mask = mask;
1441 bld.x = pos[0];
1442 bld.y = pos[1];
1443 bld.w = pos[3];
1444 bld.a0_ptr = a0_ptr;
1445 bld.dadx_ptr = dadx_ptr;
1446 bld.dady_ptr = dady_ptr;
1447 bld.outputs = outputs;
1448 bld.consts_ptr = consts_ptr;
1449 bld.samplers_ptr = samplers_ptr;
1450
1451 tgsi_parse_init( &parse, tokens );
1452
1453 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1454 tgsi_parse_token( &parse );
1455
1456 switch( parse.FullToken.Token.Type ) {
1457 case TGSI_TOKEN_TYPE_DECLARATION:
1458 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1459 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1460 }
1461 break;
1462
1463 case TGSI_TOKEN_TYPE_INSTRUCTION:
1464 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1465 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1466 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1467 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1468 info ? info->mnemonic : "<invalid>");
1469 }
1470 break;
1471
1472 case TGSI_TOKEN_TYPE_IMMEDIATE:
1473 /* simply copy the immediate values into the next immediates[] slot */
1474 {
1475 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1476 assert(size <= 4);
1477 assert(num_immediates < LP_MAX_IMMEDIATES);
1478 for( i = 0; i < size; ++i )
1479 bld.immediates[num_immediates][i] =
1480 lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
1481 for( i = size; i < 4; ++i )
1482 bld.immediates[num_immediates][i] = bld.base.undef;
1483 num_immediates++;
1484 }
1485 break;
1486
1487 default:
1488 assert( 0 );
1489 }
1490 }
1491
1492 tgsi_parse_free( &parse );
1493 }
1494