tgsi/ureg: Improve immediate match & expand logic.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_ureg.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_context.h"
30 #include "pipe/p_state.h"
31 #include "tgsi/tgsi_ureg.h"
32 #include "tgsi/tgsi_build.h"
33 #include "tgsi/tgsi_info.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_sanity.h"
36 #include "util/u_memory.h"
37 #include "util/u_math.h"
38
39 union tgsi_any_token {
40 struct tgsi_header header;
41 struct tgsi_processor processor;
42 struct tgsi_token token;
43 struct tgsi_declaration decl;
44 struct tgsi_declaration_range decl_range;
45 struct tgsi_declaration_semantic decl_semantic;
46 struct tgsi_immediate imm;
47 union tgsi_immediate_data imm_data;
48 struct tgsi_instruction insn;
49 struct tgsi_instruction_predicate insn_predicate;
50 struct tgsi_instruction_label insn_label;
51 struct tgsi_instruction_texture insn_texture;
52 struct tgsi_src_register src;
53 struct tgsi_dimension dim;
54 struct tgsi_dst_register dst;
55 unsigned value;
56 };
57
58
59 struct ureg_tokens {
60 union tgsi_any_token *tokens;
61 unsigned size;
62 unsigned order;
63 unsigned count;
64 };
65
66 #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
67 #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
68 #define UREG_MAX_CONSTANT_RANGE 32
69 #define UREG_MAX_IMMEDIATE 32
70 #define UREG_MAX_TEMP 256
71 #define UREG_MAX_ADDR 2
72 #define UREG_MAX_LOOP 1
73 #define UREG_MAX_PRED 1
74
75 #define DOMAIN_DECL 0
76 #define DOMAIN_INSN 1
77
78 struct ureg_program
79 {
80 unsigned processor;
81 struct pipe_context *pipe;
82
83 struct {
84 unsigned semantic_name;
85 unsigned semantic_index;
86 unsigned interp;
87 } fs_input[UREG_MAX_INPUT];
88 unsigned nr_fs_inputs;
89
90 unsigned vs_inputs[UREG_MAX_INPUT/32];
91
92 struct {
93 unsigned index;
94 } gs_input[UREG_MAX_INPUT];
95 unsigned nr_gs_inputs;
96
97 struct {
98 unsigned semantic_name;
99 unsigned semantic_index;
100 } output[UREG_MAX_OUTPUT];
101 unsigned nr_outputs;
102
103 struct {
104 union {
105 float f[4];
106 unsigned u[4];
107 int i[4];
108 } value;
109 unsigned nr;
110 unsigned type;
111 } immediate[UREG_MAX_IMMEDIATE];
112 unsigned nr_immediates;
113
114 struct ureg_src sampler[PIPE_MAX_SAMPLERS];
115 unsigned nr_samplers;
116
117 unsigned temps_active[UREG_MAX_TEMP / 32];
118 unsigned nr_temps;
119
120 struct {
121 unsigned first;
122 unsigned last;
123 } constant_range[UREG_MAX_CONSTANT_RANGE];
124 unsigned nr_constant_ranges;
125
126 unsigned nr_addrs;
127 unsigned nr_preds;
128 unsigned nr_loops;
129 unsigned nr_instructions;
130
131 struct ureg_tokens domain[2];
132 };
133
134 static union tgsi_any_token error_tokens[32];
135
136 static void tokens_error( struct ureg_tokens *tokens )
137 {
138 if (tokens->tokens && tokens->tokens != error_tokens)
139 FREE(tokens->tokens);
140
141 tokens->tokens = error_tokens;
142 tokens->size = Elements(error_tokens);
143 tokens->count = 0;
144 }
145
146
147 static void tokens_expand( struct ureg_tokens *tokens,
148 unsigned count )
149 {
150 unsigned old_size = tokens->size * sizeof(unsigned);
151
152 if (tokens->tokens == error_tokens) {
153 return;
154 }
155
156 while (tokens->count + count > tokens->size) {
157 tokens->size = (1 << ++tokens->order);
158 }
159
160 tokens->tokens = REALLOC(tokens->tokens,
161 old_size,
162 tokens->size * sizeof(unsigned));
163 if (tokens->tokens == NULL) {
164 tokens_error(tokens);
165 }
166 }
167
168 static void set_bad( struct ureg_program *ureg )
169 {
170 tokens_error(&ureg->domain[0]);
171 }
172
173
174
175 static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
176 unsigned domain,
177 unsigned count )
178 {
179 struct ureg_tokens *tokens = &ureg->domain[domain];
180 union tgsi_any_token *result;
181
182 if (tokens->count + count > tokens->size)
183 tokens_expand(tokens, count);
184
185 result = &tokens->tokens[tokens->count];
186 tokens->count += count;
187 return result;
188 }
189
190
191 static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
192 unsigned domain,
193 unsigned nr )
194 {
195 if (ureg->domain[domain].tokens == error_tokens)
196 return &error_tokens[0];
197
198 return &ureg->domain[domain].tokens[nr];
199 }
200
201
202
203 static INLINE struct ureg_dst
204 ureg_dst_register( unsigned file,
205 unsigned index )
206 {
207 struct ureg_dst dst;
208
209 dst.File = file;
210 dst.WriteMask = TGSI_WRITEMASK_XYZW;
211 dst.Indirect = 0;
212 dst.IndirectIndex = 0;
213 dst.IndirectSwizzle = 0;
214 dst.Saturate = 0;
215 dst.Predicate = 0;
216 dst.PredNegate = 0;
217 dst.PredSwizzleX = TGSI_SWIZZLE_X;
218 dst.PredSwizzleY = TGSI_SWIZZLE_Y;
219 dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
220 dst.PredSwizzleW = TGSI_SWIZZLE_W;
221 dst.Index = index;
222
223 return dst;
224 }
225
226 static INLINE struct ureg_src
227 ureg_src_register( unsigned file,
228 unsigned index )
229 {
230 struct ureg_src src;
231
232 src.File = file;
233 src.SwizzleX = TGSI_SWIZZLE_X;
234 src.SwizzleY = TGSI_SWIZZLE_Y;
235 src.SwizzleZ = TGSI_SWIZZLE_Z;
236 src.SwizzleW = TGSI_SWIZZLE_W;
237 src.Pad = 0;
238 src.Indirect = 0;
239 src.IndirectIndex = 0;
240 src.IndirectSwizzle = 0;
241 src.Absolute = 0;
242 src.Index = index;
243 src.Negate = 0;
244
245 return src;
246 }
247
248
249
250
251 struct ureg_src
252 ureg_DECL_fs_input( struct ureg_program *ureg,
253 unsigned name,
254 unsigned index,
255 unsigned interp_mode )
256 {
257 unsigned i;
258
259 for (i = 0; i < ureg->nr_fs_inputs; i++) {
260 if (ureg->fs_input[i].semantic_name == name &&
261 ureg->fs_input[i].semantic_index == index)
262 goto out;
263 }
264
265 if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
266 ureg->fs_input[i].semantic_name = name;
267 ureg->fs_input[i].semantic_index = index;
268 ureg->fs_input[i].interp = interp_mode;
269 ureg->nr_fs_inputs++;
270 }
271 else {
272 set_bad( ureg );
273 }
274
275 out:
276 return ureg_src_register( TGSI_FILE_INPUT, i );
277 }
278
279
280 struct ureg_src
281 ureg_DECL_vs_input( struct ureg_program *ureg,
282 unsigned index )
283 {
284 assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
285
286 ureg->vs_inputs[index/32] |= 1 << (index % 32);
287 return ureg_src_register( TGSI_FILE_INPUT, index );
288 }
289
290
291 struct ureg_src
292 ureg_DECL_gs_input(struct ureg_program *ureg,
293 unsigned index)
294 {
295 if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
296 ureg->gs_input[ureg->nr_gs_inputs].index = index;
297 ureg->nr_gs_inputs++;
298 } else {
299 set_bad(ureg);
300 }
301
302 /* XXX: Add suport for true 2D input registers. */
303 return ureg_src_register(TGSI_FILE_INPUT, index);
304 }
305
306
307 struct ureg_dst
308 ureg_DECL_output( struct ureg_program *ureg,
309 unsigned name,
310 unsigned index )
311 {
312 unsigned i;
313
314 for (i = 0; i < ureg->nr_outputs; i++) {
315 if (ureg->output[i].semantic_name == name &&
316 ureg->output[i].semantic_index == index)
317 goto out;
318 }
319
320 if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
321 ureg->output[i].semantic_name = name;
322 ureg->output[i].semantic_index = index;
323 ureg->nr_outputs++;
324 }
325 else {
326 set_bad( ureg );
327 }
328
329 out:
330 return ureg_dst_register( TGSI_FILE_OUTPUT, i );
331 }
332
333
334 /* Returns a new constant register. Keep track of which have been
335 * referred to so that we can emit decls later.
336 *
337 * There is nothing in this code to bind this constant to any tracked
338 * value or manage any constant_buffer contents -- that's the
339 * resposibility of the calling code.
340 */
341 struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
342 unsigned index )
343 {
344 unsigned minconst = index, maxconst = index;
345 unsigned i;
346
347 /* Inside existing range?
348 */
349 for (i = 0; i < ureg->nr_constant_ranges; i++) {
350 if (ureg->constant_range[i].first <= index &&
351 ureg->constant_range[i].last >= index)
352 goto out;
353 }
354
355 /* Extend existing range?
356 */
357 for (i = 0; i < ureg->nr_constant_ranges; i++) {
358 if (ureg->constant_range[i].last == index - 1) {
359 ureg->constant_range[i].last = index;
360 goto out;
361 }
362
363 if (ureg->constant_range[i].first == index + 1) {
364 ureg->constant_range[i].first = index;
365 goto out;
366 }
367
368 minconst = MIN2(minconst, ureg->constant_range[i].first);
369 maxconst = MAX2(maxconst, ureg->constant_range[i].last);
370 }
371
372 /* Create new range?
373 */
374 if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
375 i = ureg->nr_constant_ranges++;
376 ureg->constant_range[i].first = index;
377 ureg->constant_range[i].last = index;
378 goto out;
379 }
380
381 /* Collapse all ranges down to one:
382 */
383 i = 0;
384 ureg->constant_range[0].first = minconst;
385 ureg->constant_range[0].last = maxconst;
386 ureg->nr_constant_ranges = 1;
387
388 out:
389 assert(i < ureg->nr_constant_ranges);
390 assert(ureg->constant_range[i].first <= index);
391 assert(ureg->constant_range[i].last >= index);
392 return ureg_src_register( TGSI_FILE_CONSTANT, index );
393 }
394
395
396 /* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP
397 * are legal, but will not be released.
398 */
399 struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
400 {
401 unsigned i;
402
403 for (i = 0; i < UREG_MAX_TEMP; i += 32) {
404 int bit = ffs(~ureg->temps_active[i/32]);
405 if (bit != 0) {
406 i += bit - 1;
407 goto out;
408 }
409 }
410
411 /* No reusable temps, so allocate a new one:
412 */
413 i = ureg->nr_temps++;
414
415 out:
416 if (i < UREG_MAX_TEMP)
417 ureg->temps_active[i/32] |= 1 << (i % 32);
418
419 if (i >= ureg->nr_temps)
420 ureg->nr_temps = i + 1;
421
422 return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
423 }
424
425
426 void ureg_release_temporary( struct ureg_program *ureg,
427 struct ureg_dst tmp )
428 {
429 if(tmp.File == TGSI_FILE_TEMPORARY)
430 if (tmp.Index < UREG_MAX_TEMP)
431 ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
432 }
433
434
435 /* Allocate a new address register.
436 */
437 struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
438 {
439 if (ureg->nr_addrs < UREG_MAX_ADDR)
440 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
441
442 assert( 0 );
443 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
444 }
445
446 /* Allocate a new loop register.
447 */
448 struct ureg_dst
449 ureg_DECL_loop(struct ureg_program *ureg)
450 {
451 if (ureg->nr_loops < UREG_MAX_LOOP) {
452 return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
453 }
454
455 assert(0);
456 return ureg_dst_register(TGSI_FILE_LOOP, 0);
457 }
458
459 /* Allocate a new predicate register.
460 */
461 struct ureg_dst
462 ureg_DECL_predicate(struct ureg_program *ureg)
463 {
464 if (ureg->nr_preds < UREG_MAX_PRED) {
465 return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++);
466 }
467
468 assert(0);
469 return ureg_dst_register(TGSI_FILE_PREDICATE, 0);
470 }
471
472 /* Allocate a new sampler.
473 */
474 struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
475 unsigned nr )
476 {
477 unsigned i;
478
479 for (i = 0; i < ureg->nr_samplers; i++)
480 if (ureg->sampler[i].Index == nr)
481 return ureg->sampler[i];
482
483 if (i < PIPE_MAX_SAMPLERS) {
484 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
485 ureg->nr_samplers++;
486 return ureg->sampler[i];
487 }
488
489 assert( 0 );
490 return ureg->sampler[0];
491 }
492
493
494 static int
495 match_or_expand_immediate( const unsigned *v,
496 unsigned nr,
497 unsigned *v2,
498 unsigned *pnr2,
499 unsigned *swizzle )
500 {
501 unsigned nr2 = *pnr2;
502 unsigned i, j;
503
504 *swizzle = 0;
505
506 for (i = 0; i < nr; i++) {
507 boolean found = FALSE;
508
509 for (j = 0; j < nr2 && !found; j++) {
510 if (v[i] == v2[j]) {
511 *swizzle |= j << (i * 2);
512 found = TRUE;
513 }
514 }
515
516 if (!found) {
517 if (nr2 >= 4) {
518 return FALSE;
519 }
520
521 v2[nr2] = v[i];
522 *swizzle |= nr2 << (i * 2);
523 nr2++;
524 }
525 }
526
527 /* Actually expand immediate only when fully succeeded.
528 */
529 *pnr2 = nr2;
530 return TRUE;
531 }
532
533
534 static struct ureg_src
535 decl_immediate( struct ureg_program *ureg,
536 const unsigned *v,
537 unsigned nr,
538 unsigned type )
539 {
540 unsigned i, j;
541 unsigned swizzle;
542
543 /* Could do a first pass where we examine all existing immediates
544 * without expanding.
545 */
546
547 for (i = 0; i < ureg->nr_immediates; i++) {
548 if (ureg->immediate[i].type != type) {
549 continue;
550 }
551 if (match_or_expand_immediate(v,
552 nr,
553 ureg->immediate[i].value.u,
554 &ureg->immediate[i].nr,
555 &swizzle)) {
556 goto out;
557 }
558 }
559
560 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
561 i = ureg->nr_immediates++;
562 ureg->immediate[i].type = type;
563 if (match_or_expand_immediate(v,
564 nr,
565 ureg->immediate[i].value.u,
566 &ureg->immediate[i].nr,
567 &swizzle)) {
568 goto out;
569 }
570 }
571
572 set_bad(ureg);
573
574 out:
575 /* Make sure that all referenced elements are from this immediate.
576 * Has the effect of making size-one immediates into scalars.
577 */
578 for (j = nr; j < 4; j++) {
579 swizzle |= (swizzle & 0x3) << (j * 2);
580 }
581
582 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
583 (swizzle >> 0) & 0x3,
584 (swizzle >> 2) & 0x3,
585 (swizzle >> 4) & 0x3,
586 (swizzle >> 6) & 0x3);
587 }
588
589
590 struct ureg_src
591 ureg_DECL_immediate( struct ureg_program *ureg,
592 const float *v,
593 unsigned nr )
594 {
595 union {
596 float f[4];
597 unsigned u[4];
598 } fu;
599 unsigned int i;
600
601 for (i = 0; i < nr; i++) {
602 fu.f[i] = v[i];
603 }
604
605 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
606 }
607
608
609 struct ureg_src
610 ureg_DECL_immediate_uint( struct ureg_program *ureg,
611 const unsigned *v,
612 unsigned nr )
613 {
614 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
615 }
616
617
618 struct ureg_src
619 ureg_DECL_immediate_int( struct ureg_program *ureg,
620 const int *v,
621 unsigned nr )
622 {
623 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
624 }
625
626
627 void
628 ureg_emit_src( struct ureg_program *ureg,
629 struct ureg_src src )
630 {
631 unsigned size = 1 + (src.Indirect ? 1 : 0);
632
633 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
634 unsigned n = 0;
635
636 assert(src.File != TGSI_FILE_NULL);
637 assert(src.File != TGSI_FILE_OUTPUT);
638 assert(src.File < TGSI_FILE_COUNT);
639
640 out[n].value = 0;
641 out[n].src.File = src.File;
642 out[n].src.SwizzleX = src.SwizzleX;
643 out[n].src.SwizzleY = src.SwizzleY;
644 out[n].src.SwizzleZ = src.SwizzleZ;
645 out[n].src.SwizzleW = src.SwizzleW;
646 out[n].src.Index = src.Index;
647 out[n].src.Negate = src.Negate;
648 out[0].src.Absolute = src.Absolute;
649 n++;
650
651 if (src.Indirect) {
652 out[0].src.Indirect = 1;
653 out[n].value = 0;
654 out[n].src.File = TGSI_FILE_ADDRESS;
655 out[n].src.SwizzleX = src.IndirectSwizzle;
656 out[n].src.SwizzleY = src.IndirectSwizzle;
657 out[n].src.SwizzleZ = src.IndirectSwizzle;
658 out[n].src.SwizzleW = src.IndirectSwizzle;
659 out[n].src.Index = src.IndirectIndex;
660 n++;
661 }
662
663 assert(n == size);
664 }
665
666
667 void
668 ureg_emit_dst( struct ureg_program *ureg,
669 struct ureg_dst dst )
670 {
671 unsigned size = (1 +
672 (dst.Indirect ? 1 : 0));
673
674 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
675 unsigned n = 0;
676
677 assert(dst.File != TGSI_FILE_NULL);
678 assert(dst.File != TGSI_FILE_CONSTANT);
679 assert(dst.File != TGSI_FILE_INPUT);
680 assert(dst.File != TGSI_FILE_SAMPLER);
681 assert(dst.File != TGSI_FILE_IMMEDIATE);
682 assert(dst.File < TGSI_FILE_COUNT);
683
684 out[n].value = 0;
685 out[n].dst.File = dst.File;
686 out[n].dst.WriteMask = dst.WriteMask;
687 out[n].dst.Indirect = dst.Indirect;
688 out[n].dst.Index = dst.Index;
689 n++;
690
691 if (dst.Indirect) {
692 out[n].value = 0;
693 out[n].src.File = TGSI_FILE_ADDRESS;
694 out[n].src.SwizzleX = dst.IndirectSwizzle;
695 out[n].src.SwizzleY = dst.IndirectSwizzle;
696 out[n].src.SwizzleZ = dst.IndirectSwizzle;
697 out[n].src.SwizzleW = dst.IndirectSwizzle;
698 out[n].src.Index = dst.IndirectIndex;
699 n++;
700 }
701
702 assert(n == size);
703 }
704
705
706 static void validate( unsigned opcode,
707 unsigned nr_dst,
708 unsigned nr_src )
709 {
710 #ifdef DEBUG
711 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
712 assert(info);
713 if(info) {
714 assert(nr_dst == info->num_dst);
715 assert(nr_src == info->num_src);
716 }
717 #endif
718 }
719
720 struct ureg_emit_insn_result
721 ureg_emit_insn(struct ureg_program *ureg,
722 unsigned opcode,
723 boolean saturate,
724 boolean predicate,
725 boolean pred_negate,
726 unsigned pred_swizzle_x,
727 unsigned pred_swizzle_y,
728 unsigned pred_swizzle_z,
729 unsigned pred_swizzle_w,
730 unsigned num_dst,
731 unsigned num_src )
732 {
733 union tgsi_any_token *out;
734 uint count = predicate ? 2 : 1;
735 struct ureg_emit_insn_result result;
736
737 validate( opcode, num_dst, num_src );
738
739 out = get_tokens( ureg, DOMAIN_INSN, count );
740 out[0].insn = tgsi_default_instruction();
741 out[0].insn.Opcode = opcode;
742 out[0].insn.Saturate = saturate;
743 out[0].insn.NumDstRegs = num_dst;
744 out[0].insn.NumSrcRegs = num_src;
745
746 result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
747 result.extended_token = result.insn_token;
748
749 if (predicate) {
750 out[0].insn.Predicate = 1;
751 out[1].insn_predicate = tgsi_default_instruction_predicate();
752 out[1].insn_predicate.Negate = pred_negate;
753 out[1].insn_predicate.SwizzleX = pred_swizzle_x;
754 out[1].insn_predicate.SwizzleY = pred_swizzle_y;
755 out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
756 out[1].insn_predicate.SwizzleW = pred_swizzle_w;
757 }
758
759 ureg->nr_instructions++;
760
761 return result;
762 }
763
764
765 void
766 ureg_emit_label(struct ureg_program *ureg,
767 unsigned extended_token,
768 unsigned *label_token )
769 {
770 union tgsi_any_token *out, *insn;
771
772 if(!label_token)
773 return;
774
775 out = get_tokens( ureg, DOMAIN_INSN, 1 );
776 out[0].value = 0;
777
778 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
779 insn->insn.Label = 1;
780
781 *label_token = ureg->domain[DOMAIN_INSN].count - 1;
782 }
783
784 /* Will return a number which can be used in a label to point to the
785 * next instruction to be emitted.
786 */
787 unsigned
788 ureg_get_instruction_number( struct ureg_program *ureg )
789 {
790 return ureg->nr_instructions;
791 }
792
793 /* Patch a given label (expressed as a token number) to point to a
794 * given instruction (expressed as an instruction number).
795 */
796 void
797 ureg_fixup_label(struct ureg_program *ureg,
798 unsigned label_token,
799 unsigned instruction_number )
800 {
801 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
802
803 out->insn_label.Label = instruction_number;
804 }
805
806
807 void
808 ureg_emit_texture(struct ureg_program *ureg,
809 unsigned extended_token,
810 unsigned target )
811 {
812 union tgsi_any_token *out, *insn;
813
814 out = get_tokens( ureg, DOMAIN_INSN, 1 );
815 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
816
817 insn->insn.Texture = 1;
818
819 out[0].value = 0;
820 out[0].insn_texture.Texture = target;
821 }
822
823
824 void
825 ureg_fixup_insn_size(struct ureg_program *ureg,
826 unsigned insn )
827 {
828 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
829
830 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
831 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
832 }
833
834
835 void
836 ureg_insn(struct ureg_program *ureg,
837 unsigned opcode,
838 const struct ureg_dst *dst,
839 unsigned nr_dst,
840 const struct ureg_src *src,
841 unsigned nr_src )
842 {
843 struct ureg_emit_insn_result insn;
844 unsigned i;
845 boolean saturate;
846 boolean predicate;
847 boolean negate = FALSE;
848 unsigned swizzle[4] = { 0 };
849
850 saturate = nr_dst ? dst[0].Saturate : FALSE;
851 predicate = nr_dst ? dst[0].Predicate : FALSE;
852 if (predicate) {
853 negate = dst[0].PredNegate;
854 swizzle[0] = dst[0].PredSwizzleX;
855 swizzle[1] = dst[0].PredSwizzleY;
856 swizzle[2] = dst[0].PredSwizzleZ;
857 swizzle[3] = dst[0].PredSwizzleW;
858 }
859
860 insn = ureg_emit_insn(ureg,
861 opcode,
862 saturate,
863 predicate,
864 negate,
865 swizzle[0],
866 swizzle[1],
867 swizzle[2],
868 swizzle[3],
869 nr_dst,
870 nr_src);
871
872 for (i = 0; i < nr_dst; i++)
873 ureg_emit_dst( ureg, dst[i] );
874
875 for (i = 0; i < nr_src; i++)
876 ureg_emit_src( ureg, src[i] );
877
878 ureg_fixup_insn_size( ureg, insn.insn_token );
879 }
880
881 void
882 ureg_tex_insn(struct ureg_program *ureg,
883 unsigned opcode,
884 const struct ureg_dst *dst,
885 unsigned nr_dst,
886 unsigned target,
887 const struct ureg_src *src,
888 unsigned nr_src )
889 {
890 struct ureg_emit_insn_result insn;
891 unsigned i;
892 boolean saturate;
893 boolean predicate;
894 boolean negate = FALSE;
895 unsigned swizzle[4] = { 0 };
896
897 saturate = nr_dst ? dst[0].Saturate : FALSE;
898 predicate = nr_dst ? dst[0].Predicate : FALSE;
899 if (predicate) {
900 negate = dst[0].PredNegate;
901 swizzle[0] = dst[0].PredSwizzleX;
902 swizzle[1] = dst[0].PredSwizzleY;
903 swizzle[2] = dst[0].PredSwizzleZ;
904 swizzle[3] = dst[0].PredSwizzleW;
905 }
906
907 insn = ureg_emit_insn(ureg,
908 opcode,
909 saturate,
910 predicate,
911 negate,
912 swizzle[0],
913 swizzle[1],
914 swizzle[2],
915 swizzle[3],
916 nr_dst,
917 nr_src);
918
919 ureg_emit_texture( ureg, insn.extended_token, target );
920
921 for (i = 0; i < nr_dst; i++)
922 ureg_emit_dst( ureg, dst[i] );
923
924 for (i = 0; i < nr_src; i++)
925 ureg_emit_src( ureg, src[i] );
926
927 ureg_fixup_insn_size( ureg, insn.insn_token );
928 }
929
930
931 void
932 ureg_label_insn(struct ureg_program *ureg,
933 unsigned opcode,
934 const struct ureg_src *src,
935 unsigned nr_src,
936 unsigned *label_token )
937 {
938 struct ureg_emit_insn_result insn;
939 unsigned i;
940
941 insn = ureg_emit_insn(ureg,
942 opcode,
943 FALSE,
944 FALSE,
945 FALSE,
946 TGSI_SWIZZLE_X,
947 TGSI_SWIZZLE_Y,
948 TGSI_SWIZZLE_Z,
949 TGSI_SWIZZLE_W,
950 0,
951 nr_src);
952
953 ureg_emit_label( ureg, insn.extended_token, label_token );
954
955 for (i = 0; i < nr_src; i++)
956 ureg_emit_src( ureg, src[i] );
957
958 ureg_fixup_insn_size( ureg, insn.insn_token );
959 }
960
961
962
963 static void emit_decl( struct ureg_program *ureg,
964 unsigned file,
965 unsigned index,
966 unsigned semantic_name,
967 unsigned semantic_index,
968 unsigned interp )
969 {
970 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
971
972 out[0].value = 0;
973 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
974 out[0].decl.NrTokens = 3;
975 out[0].decl.File = file;
976 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
977 out[0].decl.Interpolate = interp;
978 out[0].decl.Semantic = 1;
979
980 out[1].value = 0;
981 out[1].decl_range.First =
982 out[1].decl_range.Last = index;
983
984 out[2].value = 0;
985 out[2].decl_semantic.Name = semantic_name;
986 out[2].decl_semantic.Index = semantic_index;
987
988 }
989
990
991 static void emit_decl_range( struct ureg_program *ureg,
992 unsigned file,
993 unsigned first,
994 unsigned count )
995 {
996 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
997
998 out[0].value = 0;
999 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1000 out[0].decl.NrTokens = 2;
1001 out[0].decl.File = file;
1002 out[0].decl.UsageMask = 0xf;
1003 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
1004 out[0].decl.Semantic = 0;
1005
1006 out[1].value = 0;
1007 out[1].decl_range.First = first;
1008 out[1].decl_range.Last = first + count - 1;
1009 }
1010
1011 static void
1012 emit_immediate( struct ureg_program *ureg,
1013 const unsigned *v,
1014 unsigned type )
1015 {
1016 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1017
1018 out[0].value = 0;
1019 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1020 out[0].imm.NrTokens = 5;
1021 out[0].imm.DataType = type;
1022 out[0].imm.Padding = 0;
1023
1024 out[1].imm_data.Uint = v[0];
1025 out[2].imm_data.Uint = v[1];
1026 out[3].imm_data.Uint = v[2];
1027 out[4].imm_data.Uint = v[3];
1028 }
1029
1030
1031
1032
1033 static void emit_decls( struct ureg_program *ureg )
1034 {
1035 unsigned i;
1036
1037 if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
1038 for (i = 0; i < UREG_MAX_INPUT; i++) {
1039 if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
1040 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1041 }
1042 }
1043 } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
1044 for (i = 0; i < ureg->nr_fs_inputs; i++) {
1045 emit_decl( ureg,
1046 TGSI_FILE_INPUT,
1047 i,
1048 ureg->fs_input[i].semantic_name,
1049 ureg->fs_input[i].semantic_index,
1050 ureg->fs_input[i].interp );
1051 }
1052 } else {
1053 for (i = 0; i < ureg->nr_gs_inputs; i++) {
1054 emit_decl_range(ureg,
1055 TGSI_FILE_INPUT,
1056 ureg->gs_input[i].index,
1057 1);
1058 }
1059 }
1060
1061 for (i = 0; i < ureg->nr_outputs; i++) {
1062 emit_decl( ureg,
1063 TGSI_FILE_OUTPUT,
1064 i,
1065 ureg->output[i].semantic_name,
1066 ureg->output[i].semantic_index,
1067 TGSI_INTERPOLATE_CONSTANT );
1068 }
1069
1070 for (i = 0; i < ureg->nr_samplers; i++) {
1071 emit_decl_range( ureg,
1072 TGSI_FILE_SAMPLER,
1073 ureg->sampler[i].Index, 1 );
1074 }
1075
1076 if (ureg->nr_constant_ranges) {
1077 for (i = 0; i < ureg->nr_constant_ranges; i++)
1078 emit_decl_range( ureg,
1079 TGSI_FILE_CONSTANT,
1080 ureg->constant_range[i].first,
1081 (ureg->constant_range[i].last + 1 -
1082 ureg->constant_range[i].first) );
1083 }
1084
1085 if (ureg->nr_temps) {
1086 emit_decl_range( ureg,
1087 TGSI_FILE_TEMPORARY,
1088 0, ureg->nr_temps );
1089 }
1090
1091 if (ureg->nr_addrs) {
1092 emit_decl_range( ureg,
1093 TGSI_FILE_ADDRESS,
1094 0, ureg->nr_addrs );
1095 }
1096
1097 if (ureg->nr_loops) {
1098 emit_decl_range(ureg,
1099 TGSI_FILE_LOOP,
1100 0,
1101 ureg->nr_loops);
1102 }
1103
1104 if (ureg->nr_preds) {
1105 emit_decl_range(ureg,
1106 TGSI_FILE_PREDICATE,
1107 0,
1108 ureg->nr_preds);
1109 }
1110
1111 for (i = 0; i < ureg->nr_immediates; i++) {
1112 emit_immediate( ureg,
1113 ureg->immediate[i].value.u,
1114 ureg->immediate[i].type );
1115 }
1116 }
1117
1118 /* Append the instruction tokens onto the declarations to build a
1119 * contiguous stream suitable to send to the driver.
1120 */
1121 static void copy_instructions( struct ureg_program *ureg )
1122 {
1123 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
1124 union tgsi_any_token *out = get_tokens( ureg,
1125 DOMAIN_DECL,
1126 nr_tokens );
1127
1128 memcpy(out,
1129 ureg->domain[DOMAIN_INSN].tokens,
1130 nr_tokens * sizeof out[0] );
1131 }
1132
1133
1134 static void
1135 fixup_header_size(struct ureg_program *ureg)
1136 {
1137 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
1138
1139 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
1140 }
1141
1142
1143 static void
1144 emit_header( struct ureg_program *ureg )
1145 {
1146 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1147
1148 out[0].header.HeaderSize = 2;
1149 out[0].header.BodySize = 0;
1150
1151 out[1].processor.Processor = ureg->processor;
1152 out[1].processor.Padding = 0;
1153 }
1154
1155
1156 const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
1157 {
1158 const struct tgsi_token *tokens;
1159
1160 emit_header( ureg );
1161 emit_decls( ureg );
1162 copy_instructions( ureg );
1163 fixup_header_size( ureg );
1164
1165 if (ureg->domain[0].tokens == error_tokens ||
1166 ureg->domain[1].tokens == error_tokens) {
1167 debug_printf("%s: error in generated shader\n", __FUNCTION__);
1168 assert(0);
1169 return NULL;
1170 }
1171
1172 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1173
1174 if (0) {
1175 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
1176 ureg->domain[DOMAIN_DECL].count);
1177 tgsi_dump( tokens, 0 );
1178 }
1179
1180 #if DEBUG
1181 if (tokens && !tgsi_sanity_check(tokens)) {
1182 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
1183 tgsi_dump(tokens, 0);
1184 assert(0);
1185 }
1186 #endif
1187
1188
1189 return tokens;
1190 }
1191
1192
1193 void *ureg_create_shader( struct ureg_program *ureg,
1194 struct pipe_context *pipe )
1195 {
1196 struct pipe_shader_state state;
1197
1198 state.tokens = ureg_finalize(ureg);
1199 if(!state.tokens)
1200 return NULL;
1201
1202 if (ureg->processor == TGSI_PROCESSOR_VERTEX)
1203 return pipe->create_vs_state( pipe, &state );
1204 else
1205 return pipe->create_fs_state( pipe, &state );
1206 }
1207
1208
1209 const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
1210 unsigned *nr_tokens )
1211 {
1212 const struct tgsi_token *tokens;
1213
1214 ureg_finalize(ureg);
1215
1216 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1217
1218 if (nr_tokens)
1219 *nr_tokens = ureg->domain[DOMAIN_DECL].size;
1220
1221 ureg->domain[DOMAIN_DECL].tokens = 0;
1222 ureg->domain[DOMAIN_DECL].size = 0;
1223 ureg->domain[DOMAIN_DECL].order = 0;
1224 ureg->domain[DOMAIN_DECL].count = 0;
1225
1226 return tokens;
1227 }
1228
1229
1230 struct ureg_program *ureg_create( unsigned processor )
1231 {
1232 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
1233 if (ureg == NULL)
1234 return NULL;
1235
1236 ureg->processor = processor;
1237 return ureg;
1238 }
1239
1240
1241 void ureg_destroy( struct ureg_program *ureg )
1242 {
1243 unsigned i;
1244
1245 for (i = 0; i < Elements(ureg->domain); i++) {
1246 if (ureg->domain[i].tokens &&
1247 ureg->domain[i].tokens != error_tokens)
1248 FREE(ureg->domain[i].tokens);
1249 }
1250
1251 FREE(ureg);
1252 }