ureg: add buffer support to ureg
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_ureg.c
1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_screen.h"
30 #include "pipe/p_context.h"
31 #include "pipe/p_state.h"
32 #include "tgsi/tgsi_ureg.h"
33 #include "tgsi/tgsi_build.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_dump.h"
36 #include "tgsi/tgsi_sanity.h"
37 #include "util/u_debug.h"
38 #include "util/u_inlines.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "util/u_bitmask.h"
42
43 union tgsi_any_token {
44 struct tgsi_header header;
45 struct tgsi_processor processor;
46 struct tgsi_token token;
47 struct tgsi_property prop;
48 struct tgsi_property_data prop_data;
49 struct tgsi_declaration decl;
50 struct tgsi_declaration_range decl_range;
51 struct tgsi_declaration_dimension decl_dim;
52 struct tgsi_declaration_interp decl_interp;
53 struct tgsi_declaration_image decl_image;
54 struct tgsi_declaration_semantic decl_semantic;
55 struct tgsi_declaration_sampler_view decl_sampler_view;
56 struct tgsi_declaration_array array;
57 struct tgsi_immediate imm;
58 union tgsi_immediate_data imm_data;
59 struct tgsi_instruction insn;
60 struct tgsi_instruction_predicate insn_predicate;
61 struct tgsi_instruction_label insn_label;
62 struct tgsi_instruction_texture insn_texture;
63 struct tgsi_texture_offset insn_texture_offset;
64 struct tgsi_src_register src;
65 struct tgsi_ind_register ind;
66 struct tgsi_dimension dim;
67 struct tgsi_dst_register dst;
68 unsigned value;
69 };
70
71
72 struct ureg_tokens {
73 union tgsi_any_token *tokens;
74 unsigned size;
75 unsigned order;
76 unsigned count;
77 };
78
79 #define UREG_MAX_INPUT PIPE_MAX_SHADER_INPUTS
80 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
81 #define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
82 #define UREG_MAX_CONSTANT_RANGE 32
83 #define UREG_MAX_IMMEDIATE 4096
84 #define UREG_MAX_ADDR 3
85 #define UREG_MAX_PRED 1
86 #define UREG_MAX_ARRAY_TEMPS 256
87
88 struct const_decl {
89 struct {
90 unsigned first;
91 unsigned last;
92 } constant_range[UREG_MAX_CONSTANT_RANGE];
93 unsigned nr_constant_ranges;
94 };
95
96 #define DOMAIN_DECL 0
97 #define DOMAIN_INSN 1
98
99 struct ureg_program
100 {
101 unsigned processor;
102 bool supports_any_inout_decl_range;
103
104 struct {
105 unsigned semantic_name;
106 unsigned semantic_index;
107 unsigned interp;
108 unsigned char cylindrical_wrap;
109 unsigned interp_location;
110 unsigned first;
111 unsigned last;
112 unsigned array_id;
113 } input[UREG_MAX_INPUT];
114 unsigned nr_inputs, nr_input_regs;
115
116 unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
117
118 struct {
119 unsigned semantic_name;
120 unsigned semantic_index;
121 } system_value[UREG_MAX_SYSTEM_VALUE];
122 unsigned nr_system_values;
123
124 struct {
125 unsigned semantic_name;
126 unsigned semantic_index;
127 unsigned usage_mask; /* = TGSI_WRITEMASK_* */
128 unsigned first;
129 unsigned last;
130 unsigned array_id;
131 } output[UREG_MAX_OUTPUT];
132 unsigned nr_outputs, nr_output_regs;
133
134 struct {
135 union {
136 float f[4];
137 unsigned u[4];
138 int i[4];
139 } value;
140 unsigned nr;
141 unsigned type;
142 } immediate[UREG_MAX_IMMEDIATE];
143 unsigned nr_immediates;
144
145 struct ureg_src sampler[PIPE_MAX_SAMPLERS];
146 unsigned nr_samplers;
147
148 struct {
149 unsigned index;
150 unsigned target;
151 unsigned return_type_x;
152 unsigned return_type_y;
153 unsigned return_type_z;
154 unsigned return_type_w;
155 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
156 unsigned nr_sampler_views;
157
158 struct {
159 unsigned index;
160 unsigned target;
161 unsigned format;
162 boolean wr;
163 boolean raw;
164 } image[PIPE_MAX_SHADER_IMAGES];
165 unsigned nr_images;
166
167 struct {
168 unsigned index;
169 bool atomic;
170 } buffer[PIPE_MAX_SHADER_BUFFERS];
171 unsigned nr_buffers;
172
173 struct util_bitmask *free_temps;
174 struct util_bitmask *local_temps;
175 struct util_bitmask *decl_temps;
176 unsigned nr_temps;
177
178 unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
179 unsigned nr_array_temps;
180
181 struct const_decl const_decls;
182 struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
183
184 unsigned properties[TGSI_PROPERTY_COUNT];
185
186 unsigned nr_addrs;
187 unsigned nr_preds;
188 unsigned nr_instructions;
189
190 struct ureg_tokens domain[2];
191 };
192
193 static union tgsi_any_token error_tokens[32];
194
195 static void tokens_error( struct ureg_tokens *tokens )
196 {
197 if (tokens->tokens && tokens->tokens != error_tokens)
198 FREE(tokens->tokens);
199
200 tokens->tokens = error_tokens;
201 tokens->size = Elements(error_tokens);
202 tokens->count = 0;
203 }
204
205
206 static void tokens_expand( struct ureg_tokens *tokens,
207 unsigned count )
208 {
209 unsigned old_size = tokens->size * sizeof(unsigned);
210
211 if (tokens->tokens == error_tokens) {
212 return;
213 }
214
215 while (tokens->count + count > tokens->size) {
216 tokens->size = (1 << ++tokens->order);
217 }
218
219 tokens->tokens = REALLOC(tokens->tokens,
220 old_size,
221 tokens->size * sizeof(unsigned));
222 if (tokens->tokens == NULL) {
223 tokens_error(tokens);
224 }
225 }
226
227 static void set_bad( struct ureg_program *ureg )
228 {
229 tokens_error(&ureg->domain[0]);
230 }
231
232
233
234 static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
235 unsigned domain,
236 unsigned count )
237 {
238 struct ureg_tokens *tokens = &ureg->domain[domain];
239 union tgsi_any_token *result;
240
241 if (tokens->count + count > tokens->size)
242 tokens_expand(tokens, count);
243
244 result = &tokens->tokens[tokens->count];
245 tokens->count += count;
246 return result;
247 }
248
249
250 static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
251 unsigned domain,
252 unsigned nr )
253 {
254 if (ureg->domain[domain].tokens == error_tokens)
255 return &error_tokens[0];
256
257 return &ureg->domain[domain].tokens[nr];
258 }
259
260 void
261 ureg_property(struct ureg_program *ureg, unsigned name, unsigned value)
262 {
263 assert(name < Elements(ureg->properties));
264 ureg->properties[name] = value;
265 }
266
267 struct ureg_src
268 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
269 unsigned semantic_name,
270 unsigned semantic_index,
271 unsigned interp_mode,
272 unsigned cylindrical_wrap,
273 unsigned interp_location,
274 unsigned array_id,
275 unsigned array_size)
276 {
277 unsigned i;
278
279 for (i = 0; i < ureg->nr_inputs; i++) {
280 if (ureg->input[i].semantic_name == semantic_name &&
281 ureg->input[i].semantic_index == semantic_index) {
282 assert(ureg->input[i].interp == interp_mode);
283 assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap);
284 assert(ureg->input[i].interp_location == interp_location);
285 assert(ureg->input[i].array_id == array_id);
286 goto out;
287 }
288 }
289
290 if (ureg->nr_inputs < UREG_MAX_INPUT) {
291 assert(array_size >= 1);
292 ureg->input[i].semantic_name = semantic_name;
293 ureg->input[i].semantic_index = semantic_index;
294 ureg->input[i].interp = interp_mode;
295 ureg->input[i].cylindrical_wrap = cylindrical_wrap;
296 ureg->input[i].interp_location = interp_location;
297 ureg->input[i].first = ureg->nr_input_regs;
298 ureg->input[i].last = ureg->nr_input_regs + array_size - 1;
299 ureg->input[i].array_id = array_id;
300 ureg->nr_input_regs += array_size;
301 ureg->nr_inputs++;
302 } else {
303 set_bad(ureg);
304 }
305
306 out:
307 return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first,
308 array_id);
309 }
310
311
312 struct ureg_src
313 ureg_DECL_vs_input( struct ureg_program *ureg,
314 unsigned index )
315 {
316 assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
317 assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
318
319 ureg->vs_inputs[index/32] |= 1 << (index % 32);
320 return ureg_src_register( TGSI_FILE_INPUT, index );
321 }
322
323
324 struct ureg_src
325 ureg_DECL_input(struct ureg_program *ureg,
326 unsigned semantic_name,
327 unsigned semantic_index,
328 unsigned array_id,
329 unsigned array_size)
330 {
331 return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index,
332 0, 0, 0, array_id, array_size);
333 }
334
335
336 struct ureg_src
337 ureg_DECL_system_value(struct ureg_program *ureg,
338 unsigned semantic_name,
339 unsigned semantic_index)
340 {
341 unsigned i;
342
343 for (i = 0; i < ureg->nr_system_values; i++) {
344 if (ureg->system_value[i].semantic_name == semantic_name &&
345 ureg->system_value[i].semantic_index == semantic_index) {
346 goto out;
347 }
348 }
349
350 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
351 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
352 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
353 i = ureg->nr_system_values;
354 ureg->nr_system_values++;
355 } else {
356 set_bad(ureg);
357 }
358
359 out:
360 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
361 }
362
363
364 struct ureg_dst
365 ureg_DECL_output_masked(struct ureg_program *ureg,
366 unsigned name,
367 unsigned index,
368 unsigned usage_mask,
369 unsigned array_id,
370 unsigned array_size)
371 {
372 unsigned i;
373
374 assert(usage_mask != 0);
375
376 for (i = 0; i < ureg->nr_outputs; i++) {
377 if (ureg->output[i].semantic_name == name &&
378 ureg->output[i].semantic_index == index) {
379 assert(ureg->output[i].array_id == array_id);
380 ureg->output[i].usage_mask |= usage_mask;
381 goto out;
382 }
383 }
384
385 if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
386 ureg->output[i].semantic_name = name;
387 ureg->output[i].semantic_index = index;
388 ureg->output[i].usage_mask = usage_mask;
389 ureg->output[i].first = ureg->nr_output_regs;
390 ureg->output[i].last = ureg->nr_output_regs + array_size - 1;
391 ureg->output[i].array_id = array_id;
392 ureg->nr_output_regs += array_size;
393 ureg->nr_outputs++;
394 }
395 else {
396 set_bad( ureg );
397 }
398
399 out:
400 return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
401 array_id);
402 }
403
404
405 struct ureg_dst
406 ureg_DECL_output(struct ureg_program *ureg,
407 unsigned name,
408 unsigned index)
409 {
410 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
411 0, 1);
412 }
413
414 struct ureg_dst
415 ureg_DECL_output_array(struct ureg_program *ureg,
416 unsigned semantic_name,
417 unsigned semantic_index,
418 unsigned array_id,
419 unsigned array_size)
420 {
421 return ureg_DECL_output_masked(ureg, semantic_name, semantic_index,
422 TGSI_WRITEMASK_XYZW,
423 array_id, array_size);
424 }
425
426
427 /* Returns a new constant register. Keep track of which have been
428 * referred to so that we can emit decls later.
429 *
430 * Constant operands declared with this function must be addressed
431 * with a two-dimensional index.
432 *
433 * There is nothing in this code to bind this constant to any tracked
434 * value or manage any constant_buffer contents -- that's the
435 * resposibility of the calling code.
436 */
437 void
438 ureg_DECL_constant2D(struct ureg_program *ureg,
439 unsigned first,
440 unsigned last,
441 unsigned index2D)
442 {
443 struct const_decl *decl = &ureg->const_decls2D[index2D];
444
445 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
446
447 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
448 uint i = decl->nr_constant_ranges++;
449
450 decl->constant_range[i].first = first;
451 decl->constant_range[i].last = last;
452 }
453 }
454
455
456 /* A one-dimensional, depricated version of ureg_DECL_constant2D().
457 *
458 * Constant operands declared with this function must be addressed
459 * with a one-dimensional index.
460 */
461 struct ureg_src
462 ureg_DECL_constant(struct ureg_program *ureg,
463 unsigned index)
464 {
465 struct const_decl *decl = &ureg->const_decls;
466 unsigned minconst = index, maxconst = index;
467 unsigned i;
468
469 /* Inside existing range?
470 */
471 for (i = 0; i < decl->nr_constant_ranges; i++) {
472 if (decl->constant_range[i].first <= index &&
473 decl->constant_range[i].last >= index) {
474 goto out;
475 }
476 }
477
478 /* Extend existing range?
479 */
480 for (i = 0; i < decl->nr_constant_ranges; i++) {
481 if (decl->constant_range[i].last == index - 1) {
482 decl->constant_range[i].last = index;
483 goto out;
484 }
485
486 if (decl->constant_range[i].first == index + 1) {
487 decl->constant_range[i].first = index;
488 goto out;
489 }
490
491 minconst = MIN2(minconst, decl->constant_range[i].first);
492 maxconst = MAX2(maxconst, decl->constant_range[i].last);
493 }
494
495 /* Create new range?
496 */
497 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
498 i = decl->nr_constant_ranges++;
499 decl->constant_range[i].first = index;
500 decl->constant_range[i].last = index;
501 goto out;
502 }
503
504 /* Collapse all ranges down to one:
505 */
506 i = 0;
507 decl->constant_range[0].first = minconst;
508 decl->constant_range[0].last = maxconst;
509 decl->nr_constant_ranges = 1;
510
511 out:
512 assert(i < decl->nr_constant_ranges);
513 assert(decl->constant_range[i].first <= index);
514 assert(decl->constant_range[i].last >= index);
515 return ureg_src_register(TGSI_FILE_CONSTANT, index);
516 }
517
518 static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
519 boolean local )
520 {
521 unsigned i;
522
523 /* Look for a released temporary.
524 */
525 for (i = util_bitmask_get_first_index(ureg->free_temps);
526 i != UTIL_BITMASK_INVALID_INDEX;
527 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
528 if (util_bitmask_get(ureg->local_temps, i) == local)
529 break;
530 }
531
532 /* Or allocate a new one.
533 */
534 if (i == UTIL_BITMASK_INVALID_INDEX) {
535 i = ureg->nr_temps++;
536
537 if (local)
538 util_bitmask_set(ureg->local_temps, i);
539
540 /* Start a new declaration when the local flag changes */
541 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
542 util_bitmask_set(ureg->decl_temps, i);
543 }
544
545 util_bitmask_clear(ureg->free_temps, i);
546
547 return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
548 }
549
550 struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
551 {
552 return alloc_temporary(ureg, FALSE);
553 }
554
555 struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
556 {
557 return alloc_temporary(ureg, TRUE);
558 }
559
560 struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
561 unsigned size,
562 boolean local )
563 {
564 unsigned i = ureg->nr_temps;
565 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
566
567 if (local)
568 util_bitmask_set(ureg->local_temps, i);
569
570 /* Always start a new declaration at the start */
571 util_bitmask_set(ureg->decl_temps, i);
572
573 ureg->nr_temps += size;
574
575 /* and also at the end of the array */
576 util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
577
578 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
579 ureg->array_temps[ureg->nr_array_temps++] = i;
580 dst.ArrayID = ureg->nr_array_temps;
581 }
582
583 return dst;
584 }
585
586 void ureg_release_temporary( struct ureg_program *ureg,
587 struct ureg_dst tmp )
588 {
589 if(tmp.File == TGSI_FILE_TEMPORARY)
590 util_bitmask_set(ureg->free_temps, tmp.Index);
591 }
592
593
594 /* Allocate a new address register.
595 */
596 struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
597 {
598 if (ureg->nr_addrs < UREG_MAX_ADDR)
599 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
600
601 assert( 0 );
602 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
603 }
604
605 /* Allocate a new predicate register.
606 */
607 struct ureg_dst
608 ureg_DECL_predicate(struct ureg_program *ureg)
609 {
610 if (ureg->nr_preds < UREG_MAX_PRED) {
611 return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++);
612 }
613
614 assert(0);
615 return ureg_dst_register(TGSI_FILE_PREDICATE, 0);
616 }
617
618 /* Allocate a new sampler.
619 */
620 struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
621 unsigned nr )
622 {
623 unsigned i;
624
625 for (i = 0; i < ureg->nr_samplers; i++)
626 if (ureg->sampler[i].Index == nr)
627 return ureg->sampler[i];
628
629 if (i < PIPE_MAX_SAMPLERS) {
630 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
631 ureg->nr_samplers++;
632 return ureg->sampler[i];
633 }
634
635 assert( 0 );
636 return ureg->sampler[0];
637 }
638
639 /*
640 * Allocate a new shader sampler view.
641 */
642 struct ureg_src
643 ureg_DECL_sampler_view(struct ureg_program *ureg,
644 unsigned index,
645 unsigned target,
646 unsigned return_type_x,
647 unsigned return_type_y,
648 unsigned return_type_z,
649 unsigned return_type_w)
650 {
651 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
652 uint i;
653
654 for (i = 0; i < ureg->nr_sampler_views; i++) {
655 if (ureg->sampler_view[i].index == index) {
656 return reg;
657 }
658 }
659
660 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
661 ureg->sampler_view[i].index = index;
662 ureg->sampler_view[i].target = target;
663 ureg->sampler_view[i].return_type_x = return_type_x;
664 ureg->sampler_view[i].return_type_y = return_type_y;
665 ureg->sampler_view[i].return_type_z = return_type_z;
666 ureg->sampler_view[i].return_type_w = return_type_w;
667 ureg->nr_sampler_views++;
668 return reg;
669 }
670
671 assert(0);
672 return reg;
673 }
674
675 /* Allocate a new image.
676 */
677 struct ureg_src
678 ureg_DECL_image(struct ureg_program *ureg,
679 unsigned index,
680 unsigned target,
681 unsigned format,
682 boolean wr,
683 boolean raw)
684 {
685 struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
686 unsigned i;
687
688 for (i = 0; i < ureg->nr_images; i++)
689 if (ureg->image[i].index == index)
690 return reg;
691
692 if (i < PIPE_MAX_SHADER_IMAGES) {
693 ureg->image[i].index = index;
694 ureg->image[i].target = target;
695 ureg->image[i].wr = wr;
696 ureg->image[i].raw = raw;
697 ureg->image[i].format = format;
698 ureg->nr_images++;
699 return reg;
700 }
701
702 assert(0);
703 return reg;
704 }
705
706 /* Allocate a new buffer.
707 */
708 struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
709 bool atomic)
710 {
711 struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
712 unsigned i;
713
714 for (i = 0; i < ureg->nr_buffers; i++)
715 if (ureg->buffer[i].index == nr)
716 return reg;
717
718 if (i < PIPE_MAX_SHADER_BUFFERS) {
719 ureg->buffer[i].index = nr;
720 ureg->buffer[i].atomic = atomic;
721 ureg->nr_buffers++;
722 return reg;
723 }
724
725 assert(0);
726 return reg;
727 }
728
729 static int
730 match_or_expand_immediate64( const unsigned *v,
731 int type,
732 unsigned nr,
733 unsigned *v2,
734 unsigned *pnr2,
735 unsigned *swizzle )
736 {
737 unsigned nr2 = *pnr2;
738 unsigned i, j;
739 *swizzle = 0;
740
741 for (i = 0; i < nr; i += 2) {
742 boolean found = FALSE;
743
744 for (j = 0; j < nr2 && !found; j += 2) {
745 if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) {
746 *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2));
747 found = TRUE;
748 }
749 }
750 if (!found) {
751 if ((nr2) >= 4) {
752 return FALSE;
753 }
754
755 v2[nr2] = v[i];
756 v2[nr2 + 1] = v[i + 1];
757
758 *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2));
759 nr2 += 2;
760 }
761 }
762
763 /* Actually expand immediate only when fully succeeded.
764 */
765 *pnr2 = nr2;
766 return TRUE;
767 }
768
769 static int
770 match_or_expand_immediate( const unsigned *v,
771 int type,
772 unsigned nr,
773 unsigned *v2,
774 unsigned *pnr2,
775 unsigned *swizzle )
776 {
777 unsigned nr2 = *pnr2;
778 unsigned i, j;
779
780 if (type == TGSI_IMM_FLOAT64)
781 return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle);
782
783 *swizzle = 0;
784
785 for (i = 0; i < nr; i++) {
786 boolean found = FALSE;
787
788 for (j = 0; j < nr2 && !found; j++) {
789 if (v[i] == v2[j]) {
790 *swizzle |= j << (i * 2);
791 found = TRUE;
792 }
793 }
794
795 if (!found) {
796 if (nr2 >= 4) {
797 return FALSE;
798 }
799
800 v2[nr2] = v[i];
801 *swizzle |= nr2 << (i * 2);
802 nr2++;
803 }
804 }
805
806 /* Actually expand immediate only when fully succeeded.
807 */
808 *pnr2 = nr2;
809 return TRUE;
810 }
811
812
813 static struct ureg_src
814 decl_immediate( struct ureg_program *ureg,
815 const unsigned *v,
816 unsigned nr,
817 unsigned type )
818 {
819 unsigned i, j;
820 unsigned swizzle = 0;
821
822 /* Could do a first pass where we examine all existing immediates
823 * without expanding.
824 */
825
826 for (i = 0; i < ureg->nr_immediates; i++) {
827 if (ureg->immediate[i].type != type) {
828 continue;
829 }
830 if (match_or_expand_immediate(v,
831 type,
832 nr,
833 ureg->immediate[i].value.u,
834 &ureg->immediate[i].nr,
835 &swizzle)) {
836 goto out;
837 }
838 }
839
840 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
841 i = ureg->nr_immediates++;
842 ureg->immediate[i].type = type;
843 if (match_or_expand_immediate(v,
844 type,
845 nr,
846 ureg->immediate[i].value.u,
847 &ureg->immediate[i].nr,
848 &swizzle)) {
849 goto out;
850 }
851 }
852
853 set_bad(ureg);
854
855 out:
856 /* Make sure that all referenced elements are from this immediate.
857 * Has the effect of making size-one immediates into scalars.
858 */
859 if (type == TGSI_IMM_FLOAT64) {
860 for (j = nr; j < 4; j+=2) {
861 swizzle |= (swizzle & 0xf) << (j * 2);
862 }
863 } else {
864 for (j = nr; j < 4; j++) {
865 swizzle |= (swizzle & 0x3) << (j * 2);
866 }
867 }
868 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
869 (swizzle >> 0) & 0x3,
870 (swizzle >> 2) & 0x3,
871 (swizzle >> 4) & 0x3,
872 (swizzle >> 6) & 0x3);
873 }
874
875
876 struct ureg_src
877 ureg_DECL_immediate( struct ureg_program *ureg,
878 const float *v,
879 unsigned nr )
880 {
881 union {
882 float f[4];
883 unsigned u[4];
884 } fu;
885 unsigned int i;
886
887 for (i = 0; i < nr; i++) {
888 fu.f[i] = v[i];
889 }
890
891 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
892 }
893
894 struct ureg_src
895 ureg_DECL_immediate_f64( struct ureg_program *ureg,
896 const double *v,
897 unsigned nr )
898 {
899 union {
900 unsigned u[4];
901 double d[2];
902 } fu;
903 unsigned int i;
904
905 assert((nr / 2) < 3);
906 for (i = 0; i < nr / 2; i++) {
907 fu.d[i] = v[i];
908 }
909
910 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64);
911 }
912
913 struct ureg_src
914 ureg_DECL_immediate_uint( struct ureg_program *ureg,
915 const unsigned *v,
916 unsigned nr )
917 {
918 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
919 }
920
921
922 struct ureg_src
923 ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
924 const unsigned *v,
925 unsigned nr )
926 {
927 uint index;
928 uint i;
929
930 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
931 set_bad(ureg);
932 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
933 }
934
935 index = ureg->nr_immediates;
936 ureg->nr_immediates += (nr + 3) / 4;
937
938 for (i = index; i < ureg->nr_immediates; i++) {
939 ureg->immediate[i].type = TGSI_IMM_UINT32;
940 ureg->immediate[i].nr = nr > 4 ? 4 : nr;
941 memcpy(ureg->immediate[i].value.u,
942 &v[(i - index) * 4],
943 ureg->immediate[i].nr * sizeof(uint));
944 nr -= 4;
945 }
946
947 return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
948 }
949
950
951 struct ureg_src
952 ureg_DECL_immediate_int( struct ureg_program *ureg,
953 const int *v,
954 unsigned nr )
955 {
956 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
957 }
958
959
960 void
961 ureg_emit_src( struct ureg_program *ureg,
962 struct ureg_src src )
963 {
964 unsigned size = 1 + (src.Indirect ? 1 : 0) +
965 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
966
967 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
968 unsigned n = 0;
969
970 assert(src.File != TGSI_FILE_NULL);
971 assert(src.File < TGSI_FILE_COUNT);
972
973 out[n].value = 0;
974 out[n].src.File = src.File;
975 out[n].src.SwizzleX = src.SwizzleX;
976 out[n].src.SwizzleY = src.SwizzleY;
977 out[n].src.SwizzleZ = src.SwizzleZ;
978 out[n].src.SwizzleW = src.SwizzleW;
979 out[n].src.Index = src.Index;
980 out[n].src.Negate = src.Negate;
981 out[0].src.Absolute = src.Absolute;
982 n++;
983
984 if (src.Indirect) {
985 out[0].src.Indirect = 1;
986 out[n].value = 0;
987 out[n].ind.File = src.IndirectFile;
988 out[n].ind.Swizzle = src.IndirectSwizzle;
989 out[n].ind.Index = src.IndirectIndex;
990 if (!ureg->supports_any_inout_decl_range &&
991 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
992 out[n].ind.ArrayID = 0;
993 else
994 out[n].ind.ArrayID = src.ArrayID;
995 n++;
996 }
997
998 if (src.Dimension) {
999 out[0].src.Dimension = 1;
1000 out[n].dim.Dimension = 0;
1001 out[n].dim.Padding = 0;
1002 if (src.DimIndirect) {
1003 out[n].dim.Indirect = 1;
1004 out[n].dim.Index = src.DimensionIndex;
1005 n++;
1006 out[n].value = 0;
1007 out[n].ind.File = src.DimIndFile;
1008 out[n].ind.Swizzle = src.DimIndSwizzle;
1009 out[n].ind.Index = src.DimIndIndex;
1010 if (!ureg->supports_any_inout_decl_range &&
1011 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1012 out[n].ind.ArrayID = 0;
1013 else
1014 out[n].ind.ArrayID = src.ArrayID;
1015 } else {
1016 out[n].dim.Indirect = 0;
1017 out[n].dim.Index = src.DimensionIndex;
1018 }
1019 n++;
1020 }
1021
1022 assert(n == size);
1023 }
1024
1025
1026 void
1027 ureg_emit_dst( struct ureg_program *ureg,
1028 struct ureg_dst dst )
1029 {
1030 unsigned size = 1 + (dst.Indirect ? 1 : 0) +
1031 (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0);
1032
1033 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1034 unsigned n = 0;
1035
1036 assert(dst.File != TGSI_FILE_NULL);
1037 assert(dst.File != TGSI_FILE_CONSTANT);
1038 assert(dst.File != TGSI_FILE_INPUT);
1039 assert(dst.File != TGSI_FILE_SAMPLER);
1040 assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
1041 assert(dst.File != TGSI_FILE_IMMEDIATE);
1042 assert(dst.File < TGSI_FILE_COUNT);
1043
1044 out[n].value = 0;
1045 out[n].dst.File = dst.File;
1046 out[n].dst.WriteMask = dst.WriteMask;
1047 out[n].dst.Indirect = dst.Indirect;
1048 out[n].dst.Index = dst.Index;
1049 n++;
1050
1051 if (dst.Indirect) {
1052 out[n].value = 0;
1053 out[n].ind.File = dst.IndirectFile;
1054 out[n].ind.Swizzle = dst.IndirectSwizzle;
1055 out[n].ind.Index = dst.IndirectIndex;
1056 if (!ureg->supports_any_inout_decl_range &&
1057 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1058 out[n].ind.ArrayID = 0;
1059 else
1060 out[n].ind.ArrayID = dst.ArrayID;
1061 n++;
1062 }
1063
1064 if (dst.Dimension) {
1065 out[0].dst.Dimension = 1;
1066 out[n].dim.Dimension = 0;
1067 out[n].dim.Padding = 0;
1068 if (dst.DimIndirect) {
1069 out[n].dim.Indirect = 1;
1070 out[n].dim.Index = dst.DimensionIndex;
1071 n++;
1072 out[n].value = 0;
1073 out[n].ind.File = dst.DimIndFile;
1074 out[n].ind.Swizzle = dst.DimIndSwizzle;
1075 out[n].ind.Index = dst.DimIndIndex;
1076 if (!ureg->supports_any_inout_decl_range &&
1077 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1078 out[n].ind.ArrayID = 0;
1079 else
1080 out[n].ind.ArrayID = dst.ArrayID;
1081 } else {
1082 out[n].dim.Indirect = 0;
1083 out[n].dim.Index = dst.DimensionIndex;
1084 }
1085 n++;
1086 }
1087
1088 assert(n == size);
1089 }
1090
1091
1092 static void validate( unsigned opcode,
1093 unsigned nr_dst,
1094 unsigned nr_src )
1095 {
1096 #ifdef DEBUG
1097 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
1098 assert(info);
1099 if (info) {
1100 assert(nr_dst == info->num_dst);
1101 assert(nr_src == info->num_src);
1102 }
1103 #endif
1104 }
1105
1106 struct ureg_emit_insn_result
1107 ureg_emit_insn(struct ureg_program *ureg,
1108 unsigned opcode,
1109 boolean saturate,
1110 boolean predicate,
1111 boolean pred_negate,
1112 unsigned pred_swizzle_x,
1113 unsigned pred_swizzle_y,
1114 unsigned pred_swizzle_z,
1115 unsigned pred_swizzle_w,
1116 unsigned num_dst,
1117 unsigned num_src )
1118 {
1119 union tgsi_any_token *out;
1120 uint count = predicate ? 2 : 1;
1121 struct ureg_emit_insn_result result;
1122
1123 validate( opcode, num_dst, num_src );
1124
1125 out = get_tokens( ureg, DOMAIN_INSN, count );
1126 out[0].insn = tgsi_default_instruction();
1127 out[0].insn.Opcode = opcode;
1128 out[0].insn.Saturate = saturate;
1129 out[0].insn.NumDstRegs = num_dst;
1130 out[0].insn.NumSrcRegs = num_src;
1131
1132 result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
1133 result.extended_token = result.insn_token;
1134
1135 if (predicate) {
1136 out[0].insn.Predicate = 1;
1137 out[1].insn_predicate = tgsi_default_instruction_predicate();
1138 out[1].insn_predicate.Negate = pred_negate;
1139 out[1].insn_predicate.SwizzleX = pred_swizzle_x;
1140 out[1].insn_predicate.SwizzleY = pred_swizzle_y;
1141 out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
1142 out[1].insn_predicate.SwizzleW = pred_swizzle_w;
1143 }
1144
1145 ureg->nr_instructions++;
1146
1147 return result;
1148 }
1149
1150
1151 /**
1152 * Emit a label token.
1153 * \param label_token returns a token number indicating where the label
1154 * needs to be patched later. Later, this value should be passed to the
1155 * ureg_fixup_label() function.
1156 */
1157 void
1158 ureg_emit_label(struct ureg_program *ureg,
1159 unsigned extended_token,
1160 unsigned *label_token )
1161 {
1162 union tgsi_any_token *out, *insn;
1163
1164 if (!label_token)
1165 return;
1166
1167 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1168 out[0].value = 0;
1169
1170 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1171 insn->insn.Label = 1;
1172
1173 *label_token = ureg->domain[DOMAIN_INSN].count - 1;
1174 }
1175
1176 /* Will return a number which can be used in a label to point to the
1177 * next instruction to be emitted.
1178 */
1179 unsigned
1180 ureg_get_instruction_number( struct ureg_program *ureg )
1181 {
1182 return ureg->nr_instructions;
1183 }
1184
1185 /* Patch a given label (expressed as a token number) to point to a
1186 * given instruction (expressed as an instruction number).
1187 */
1188 void
1189 ureg_fixup_label(struct ureg_program *ureg,
1190 unsigned label_token,
1191 unsigned instruction_number )
1192 {
1193 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
1194
1195 out->insn_label.Label = instruction_number;
1196 }
1197
1198
1199 void
1200 ureg_emit_texture(struct ureg_program *ureg,
1201 unsigned extended_token,
1202 unsigned target, unsigned num_offsets)
1203 {
1204 union tgsi_any_token *out, *insn;
1205
1206 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1207 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1208
1209 insn->insn.Texture = 1;
1210
1211 out[0].value = 0;
1212 out[0].insn_texture.Texture = target;
1213 out[0].insn_texture.NumOffsets = num_offsets;
1214 }
1215
1216 void
1217 ureg_emit_texture_offset(struct ureg_program *ureg,
1218 const struct tgsi_texture_offset *offset)
1219 {
1220 union tgsi_any_token *out;
1221
1222 out = get_tokens( ureg, DOMAIN_INSN, 1);
1223
1224 out[0].value = 0;
1225 out[0].insn_texture_offset = *offset;
1226
1227 }
1228
1229
1230 void
1231 ureg_fixup_insn_size(struct ureg_program *ureg,
1232 unsigned insn )
1233 {
1234 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
1235
1236 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
1237 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
1238 }
1239
1240
1241 void
1242 ureg_insn(struct ureg_program *ureg,
1243 unsigned opcode,
1244 const struct ureg_dst *dst,
1245 unsigned nr_dst,
1246 const struct ureg_src *src,
1247 unsigned nr_src )
1248 {
1249 struct ureg_emit_insn_result insn;
1250 unsigned i;
1251 boolean saturate;
1252 boolean predicate;
1253 boolean negate = FALSE;
1254 unsigned swizzle[4] = { 0 };
1255
1256 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1257 return;
1258 }
1259
1260 saturate = nr_dst ? dst[0].Saturate : FALSE;
1261 predicate = nr_dst ? dst[0].Predicate : FALSE;
1262 if (predicate) {
1263 negate = dst[0].PredNegate;
1264 swizzle[0] = dst[0].PredSwizzleX;
1265 swizzle[1] = dst[0].PredSwizzleY;
1266 swizzle[2] = dst[0].PredSwizzleZ;
1267 swizzle[3] = dst[0].PredSwizzleW;
1268 }
1269
1270 insn = ureg_emit_insn(ureg,
1271 opcode,
1272 saturate,
1273 predicate,
1274 negate,
1275 swizzle[0],
1276 swizzle[1],
1277 swizzle[2],
1278 swizzle[3],
1279 nr_dst,
1280 nr_src);
1281
1282 for (i = 0; i < nr_dst; i++)
1283 ureg_emit_dst( ureg, dst[i] );
1284
1285 for (i = 0; i < nr_src; i++)
1286 ureg_emit_src( ureg, src[i] );
1287
1288 ureg_fixup_insn_size( ureg, insn.insn_token );
1289 }
1290
1291 void
1292 ureg_tex_insn(struct ureg_program *ureg,
1293 unsigned opcode,
1294 const struct ureg_dst *dst,
1295 unsigned nr_dst,
1296 unsigned target,
1297 const struct tgsi_texture_offset *texoffsets,
1298 unsigned nr_offset,
1299 const struct ureg_src *src,
1300 unsigned nr_src )
1301 {
1302 struct ureg_emit_insn_result insn;
1303 unsigned i;
1304 boolean saturate;
1305 boolean predicate;
1306 boolean negate = FALSE;
1307 unsigned swizzle[4] = { 0 };
1308
1309 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1310 return;
1311 }
1312
1313 saturate = nr_dst ? dst[0].Saturate : FALSE;
1314 predicate = nr_dst ? dst[0].Predicate : FALSE;
1315 if (predicate) {
1316 negate = dst[0].PredNegate;
1317 swizzle[0] = dst[0].PredSwizzleX;
1318 swizzle[1] = dst[0].PredSwizzleY;
1319 swizzle[2] = dst[0].PredSwizzleZ;
1320 swizzle[3] = dst[0].PredSwizzleW;
1321 }
1322
1323 insn = ureg_emit_insn(ureg,
1324 opcode,
1325 saturate,
1326 predicate,
1327 negate,
1328 swizzle[0],
1329 swizzle[1],
1330 swizzle[2],
1331 swizzle[3],
1332 nr_dst,
1333 nr_src);
1334
1335 ureg_emit_texture( ureg, insn.extended_token, target, nr_offset );
1336
1337 for (i = 0; i < nr_offset; i++)
1338 ureg_emit_texture_offset( ureg, &texoffsets[i]);
1339
1340 for (i = 0; i < nr_dst; i++)
1341 ureg_emit_dst( ureg, dst[i] );
1342
1343 for (i = 0; i < nr_src; i++)
1344 ureg_emit_src( ureg, src[i] );
1345
1346 ureg_fixup_insn_size( ureg, insn.insn_token );
1347 }
1348
1349
1350 void
1351 ureg_label_insn(struct ureg_program *ureg,
1352 unsigned opcode,
1353 const struct ureg_src *src,
1354 unsigned nr_src,
1355 unsigned *label_token )
1356 {
1357 struct ureg_emit_insn_result insn;
1358 unsigned i;
1359
1360 insn = ureg_emit_insn(ureg,
1361 opcode,
1362 FALSE,
1363 FALSE,
1364 FALSE,
1365 TGSI_SWIZZLE_X,
1366 TGSI_SWIZZLE_Y,
1367 TGSI_SWIZZLE_Z,
1368 TGSI_SWIZZLE_W,
1369 0,
1370 nr_src);
1371
1372 ureg_emit_label( ureg, insn.extended_token, label_token );
1373
1374 for (i = 0; i < nr_src; i++)
1375 ureg_emit_src( ureg, src[i] );
1376
1377 ureg_fixup_insn_size( ureg, insn.insn_token );
1378 }
1379
1380
1381 static void
1382 emit_decl_semantic(struct ureg_program *ureg,
1383 unsigned file,
1384 unsigned first,
1385 unsigned last,
1386 unsigned semantic_name,
1387 unsigned semantic_index,
1388 unsigned usage_mask,
1389 unsigned array_id)
1390 {
1391 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1392
1393 out[0].value = 0;
1394 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1395 out[0].decl.NrTokens = 3;
1396 out[0].decl.File = file;
1397 out[0].decl.UsageMask = usage_mask;
1398 out[0].decl.Semantic = 1;
1399 out[0].decl.Array = array_id != 0;
1400
1401 out[1].value = 0;
1402 out[1].decl_range.First = first;
1403 out[1].decl_range.Last = last;
1404
1405 out[2].value = 0;
1406 out[2].decl_semantic.Name = semantic_name;
1407 out[2].decl_semantic.Index = semantic_index;
1408
1409 if (array_id) {
1410 out[3].value = 0;
1411 out[3].array.ArrayID = array_id;
1412 }
1413 }
1414
1415
1416 static void
1417 emit_decl_fs(struct ureg_program *ureg,
1418 unsigned file,
1419 unsigned first,
1420 unsigned last,
1421 unsigned semantic_name,
1422 unsigned semantic_index,
1423 unsigned interpolate,
1424 unsigned cylindrical_wrap,
1425 unsigned interpolate_location,
1426 unsigned array_id)
1427 {
1428 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
1429 array_id ? 5 : 4);
1430
1431 out[0].value = 0;
1432 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1433 out[0].decl.NrTokens = 4;
1434 out[0].decl.File = file;
1435 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
1436 out[0].decl.Interpolate = 1;
1437 out[0].decl.Semantic = 1;
1438 out[0].decl.Array = array_id != 0;
1439
1440 out[1].value = 0;
1441 out[1].decl_range.First = first;
1442 out[1].decl_range.Last = last;
1443
1444 out[2].value = 0;
1445 out[2].decl_interp.Interpolate = interpolate;
1446 out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
1447 out[2].decl_interp.Location = interpolate_location;
1448
1449 out[3].value = 0;
1450 out[3].decl_semantic.Name = semantic_name;
1451 out[3].decl_semantic.Index = semantic_index;
1452
1453 if (array_id) {
1454 out[4].value = 0;
1455 out[4].array.ArrayID = array_id;
1456 }
1457 }
1458
1459 static void
1460 emit_decl_temps( struct ureg_program *ureg,
1461 unsigned first, unsigned last,
1462 boolean local,
1463 unsigned arrayid )
1464 {
1465 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
1466 arrayid ? 3 : 2 );
1467
1468 out[0].value = 0;
1469 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1470 out[0].decl.NrTokens = 2;
1471 out[0].decl.File = TGSI_FILE_TEMPORARY;
1472 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1473 out[0].decl.Local = local;
1474
1475 out[1].value = 0;
1476 out[1].decl_range.First = first;
1477 out[1].decl_range.Last = last;
1478
1479 if (arrayid) {
1480 out[0].decl.Array = 1;
1481 out[2].value = 0;
1482 out[2].array.ArrayID = arrayid;
1483 }
1484 }
1485
1486 static void emit_decl_range( struct ureg_program *ureg,
1487 unsigned file,
1488 unsigned first,
1489 unsigned count )
1490 {
1491 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1492
1493 out[0].value = 0;
1494 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1495 out[0].decl.NrTokens = 2;
1496 out[0].decl.File = file;
1497 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1498 out[0].decl.Semantic = 0;
1499
1500 out[1].value = 0;
1501 out[1].decl_range.First = first;
1502 out[1].decl_range.Last = first + count - 1;
1503 }
1504
1505 static void
1506 emit_decl_range2D(struct ureg_program *ureg,
1507 unsigned file,
1508 unsigned first,
1509 unsigned last,
1510 unsigned index2D)
1511 {
1512 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1513
1514 out[0].value = 0;
1515 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1516 out[0].decl.NrTokens = 3;
1517 out[0].decl.File = file;
1518 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1519 out[0].decl.Dimension = 1;
1520
1521 out[1].value = 0;
1522 out[1].decl_range.First = first;
1523 out[1].decl_range.Last = last;
1524
1525 out[2].value = 0;
1526 out[2].decl_dim.Index2D = index2D;
1527 }
1528
1529 static void
1530 emit_decl_sampler_view(struct ureg_program *ureg,
1531 unsigned index,
1532 unsigned target,
1533 unsigned return_type_x,
1534 unsigned return_type_y,
1535 unsigned return_type_z,
1536 unsigned return_type_w )
1537 {
1538 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1539
1540 out[0].value = 0;
1541 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1542 out[0].decl.NrTokens = 3;
1543 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
1544 out[0].decl.UsageMask = 0xf;
1545
1546 out[1].value = 0;
1547 out[1].decl_range.First = index;
1548 out[1].decl_range.Last = index;
1549
1550 out[2].value = 0;
1551 out[2].decl_sampler_view.Resource = target;
1552 out[2].decl_sampler_view.ReturnTypeX = return_type_x;
1553 out[2].decl_sampler_view.ReturnTypeY = return_type_y;
1554 out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
1555 out[2].decl_sampler_view.ReturnTypeW = return_type_w;
1556 }
1557
1558 static void
1559 emit_decl_image(struct ureg_program *ureg,
1560 unsigned index,
1561 unsigned target,
1562 unsigned format,
1563 boolean wr,
1564 boolean raw)
1565 {
1566 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1567
1568 out[0].value = 0;
1569 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1570 out[0].decl.NrTokens = 3;
1571 out[0].decl.File = TGSI_FILE_IMAGE;
1572 out[0].decl.UsageMask = 0xf;
1573
1574 out[1].value = 0;
1575 out[1].decl_range.First = index;
1576 out[1].decl_range.Last = index;
1577
1578 out[2].value = 0;
1579 out[2].decl_image.Resource = target;
1580 out[2].decl_image.Writable = wr;
1581 out[2].decl_image.Raw = raw;
1582 out[2].decl_image.Format = format;
1583 }
1584
1585 static void
1586 emit_decl_buffer(struct ureg_program *ureg,
1587 unsigned index,
1588 bool atomic)
1589 {
1590 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1591
1592 out[0].value = 0;
1593 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1594 out[0].decl.NrTokens = 2;
1595 out[0].decl.File = TGSI_FILE_BUFFER;
1596 out[0].decl.UsageMask = 0xf;
1597 out[0].decl.Atomic = atomic;
1598
1599 out[1].value = 0;
1600 out[1].decl_range.First = index;
1601 out[1].decl_range.Last = index;
1602 }
1603
1604 static void
1605 emit_immediate( struct ureg_program *ureg,
1606 const unsigned *v,
1607 unsigned type )
1608 {
1609 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1610
1611 out[0].value = 0;
1612 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1613 out[0].imm.NrTokens = 5;
1614 out[0].imm.DataType = type;
1615 out[0].imm.Padding = 0;
1616
1617 out[1].imm_data.Uint = v[0];
1618 out[2].imm_data.Uint = v[1];
1619 out[3].imm_data.Uint = v[2];
1620 out[4].imm_data.Uint = v[3];
1621 }
1622
1623 static void
1624 emit_property(struct ureg_program *ureg,
1625 unsigned name,
1626 unsigned data)
1627 {
1628 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1629
1630 out[0].value = 0;
1631 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1632 out[0].prop.NrTokens = 2;
1633 out[0].prop.PropertyName = name;
1634
1635 out[1].prop_data.Data = data;
1636 }
1637
1638
1639 static void emit_decls( struct ureg_program *ureg )
1640 {
1641 unsigned i,j;
1642
1643 for (i = 0; i < Elements(ureg->properties); i++)
1644 if (ureg->properties[i] != ~0)
1645 emit_property(ureg, i, ureg->properties[i]);
1646
1647 if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
1648 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
1649 if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
1650 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1651 }
1652 }
1653 } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
1654 if (ureg->supports_any_inout_decl_range) {
1655 for (i = 0; i < ureg->nr_inputs; i++) {
1656 emit_decl_fs(ureg,
1657 TGSI_FILE_INPUT,
1658 ureg->input[i].first,
1659 ureg->input[i].last,
1660 ureg->input[i].semantic_name,
1661 ureg->input[i].semantic_index,
1662 ureg->input[i].interp,
1663 ureg->input[i].cylindrical_wrap,
1664 ureg->input[i].interp_location,
1665 ureg->input[i].array_id);
1666 }
1667 }
1668 else {
1669 for (i = 0; i < ureg->nr_inputs; i++) {
1670 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1671 emit_decl_fs(ureg,
1672 TGSI_FILE_INPUT,
1673 j, j,
1674 ureg->input[i].semantic_name,
1675 ureg->input[i].semantic_index +
1676 (j - ureg->input[i].first),
1677 ureg->input[i].interp,
1678 ureg->input[i].cylindrical_wrap,
1679 ureg->input[i].interp_location, 0);
1680 }
1681 }
1682 }
1683 } else {
1684 if (ureg->supports_any_inout_decl_range) {
1685 for (i = 0; i < ureg->nr_inputs; i++) {
1686 emit_decl_semantic(ureg,
1687 TGSI_FILE_INPUT,
1688 ureg->input[i].first,
1689 ureg->input[i].last,
1690 ureg->input[i].semantic_name,
1691 ureg->input[i].semantic_index,
1692 TGSI_WRITEMASK_XYZW,
1693 ureg->input[i].array_id);
1694 }
1695 }
1696 else {
1697 for (i = 0; i < ureg->nr_inputs; i++) {
1698 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1699 emit_decl_semantic(ureg,
1700 TGSI_FILE_INPUT,
1701 j, j,
1702 ureg->input[i].semantic_name,
1703 ureg->input[i].semantic_index +
1704 (j - ureg->input[i].first),
1705 TGSI_WRITEMASK_XYZW, 0);
1706 }
1707 }
1708 }
1709 }
1710
1711 for (i = 0; i < ureg->nr_system_values; i++) {
1712 emit_decl_semantic(ureg,
1713 TGSI_FILE_SYSTEM_VALUE,
1714 i,
1715 i,
1716 ureg->system_value[i].semantic_name,
1717 ureg->system_value[i].semantic_index,
1718 TGSI_WRITEMASK_XYZW, 0);
1719 }
1720
1721 if (ureg->supports_any_inout_decl_range) {
1722 for (i = 0; i < ureg->nr_outputs; i++) {
1723 emit_decl_semantic(ureg,
1724 TGSI_FILE_OUTPUT,
1725 ureg->output[i].first,
1726 ureg->output[i].last,
1727 ureg->output[i].semantic_name,
1728 ureg->output[i].semantic_index,
1729 ureg->output[i].usage_mask,
1730 ureg->output[i].array_id);
1731 }
1732 }
1733 else {
1734 for (i = 0; i < ureg->nr_outputs; i++) {
1735 for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
1736 emit_decl_semantic(ureg,
1737 TGSI_FILE_OUTPUT,
1738 j, j,
1739 ureg->output[i].semantic_name,
1740 ureg->output[i].semantic_index +
1741 (j - ureg->output[i].first),
1742 ureg->output[i].usage_mask, 0);
1743 }
1744 }
1745 }
1746
1747 for (i = 0; i < ureg->nr_samplers; i++) {
1748 emit_decl_range( ureg,
1749 TGSI_FILE_SAMPLER,
1750 ureg->sampler[i].Index, 1 );
1751 }
1752
1753 for (i = 0; i < ureg->nr_sampler_views; i++) {
1754 emit_decl_sampler_view(ureg,
1755 ureg->sampler_view[i].index,
1756 ureg->sampler_view[i].target,
1757 ureg->sampler_view[i].return_type_x,
1758 ureg->sampler_view[i].return_type_y,
1759 ureg->sampler_view[i].return_type_z,
1760 ureg->sampler_view[i].return_type_w);
1761 }
1762
1763 for (i = 0; i < ureg->nr_images; i++) {
1764 emit_decl_image(ureg,
1765 ureg->image[i].index,
1766 ureg->image[i].target,
1767 ureg->image[i].format,
1768 ureg->image[i].wr,
1769 ureg->image[i].raw);
1770 }
1771
1772 for (i = 0; i < ureg->nr_buffers; i++) {
1773 emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
1774 }
1775
1776 if (ureg->const_decls.nr_constant_ranges) {
1777 for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
1778 emit_decl_range(ureg,
1779 TGSI_FILE_CONSTANT,
1780 ureg->const_decls.constant_range[i].first,
1781 ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1);
1782 }
1783 }
1784
1785 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
1786 struct const_decl *decl = &ureg->const_decls2D[i];
1787
1788 if (decl->nr_constant_ranges) {
1789 uint j;
1790
1791 for (j = 0; j < decl->nr_constant_ranges; j++) {
1792 emit_decl_range2D(ureg,
1793 TGSI_FILE_CONSTANT,
1794 decl->constant_range[j].first,
1795 decl->constant_range[j].last,
1796 i);
1797 }
1798 }
1799 }
1800
1801 if (ureg->nr_temps) {
1802 unsigned array = 0;
1803 for (i = 0; i < ureg->nr_temps;) {
1804 boolean local = util_bitmask_get(ureg->local_temps, i);
1805 unsigned first = i;
1806 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
1807 if (i == UTIL_BITMASK_INVALID_INDEX)
1808 i = ureg->nr_temps;
1809
1810 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
1811 emit_decl_temps( ureg, first, i - 1, local, ++array );
1812 else
1813 emit_decl_temps( ureg, first, i - 1, local, 0 );
1814 }
1815 }
1816
1817 if (ureg->nr_addrs) {
1818 emit_decl_range( ureg,
1819 TGSI_FILE_ADDRESS,
1820 0, ureg->nr_addrs );
1821 }
1822
1823 if (ureg->nr_preds) {
1824 emit_decl_range(ureg,
1825 TGSI_FILE_PREDICATE,
1826 0,
1827 ureg->nr_preds);
1828 }
1829
1830 for (i = 0; i < ureg->nr_immediates; i++) {
1831 emit_immediate( ureg,
1832 ureg->immediate[i].value.u,
1833 ureg->immediate[i].type );
1834 }
1835 }
1836
1837 /* Append the instruction tokens onto the declarations to build a
1838 * contiguous stream suitable to send to the driver.
1839 */
1840 static void copy_instructions( struct ureg_program *ureg )
1841 {
1842 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
1843 union tgsi_any_token *out = get_tokens( ureg,
1844 DOMAIN_DECL,
1845 nr_tokens );
1846
1847 memcpy(out,
1848 ureg->domain[DOMAIN_INSN].tokens,
1849 nr_tokens * sizeof out[0] );
1850 }
1851
1852
1853 static void
1854 fixup_header_size(struct ureg_program *ureg)
1855 {
1856 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
1857
1858 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
1859 }
1860
1861
1862 static void
1863 emit_header( struct ureg_program *ureg )
1864 {
1865 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1866
1867 out[0].header.HeaderSize = 2;
1868 out[0].header.BodySize = 0;
1869
1870 out[1].processor.Processor = ureg->processor;
1871 out[1].processor.Padding = 0;
1872 }
1873
1874
1875 const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
1876 {
1877 const struct tgsi_token *tokens;
1878
1879 emit_header( ureg );
1880 emit_decls( ureg );
1881 copy_instructions( ureg );
1882 fixup_header_size( ureg );
1883
1884 if (ureg->domain[0].tokens == error_tokens ||
1885 ureg->domain[1].tokens == error_tokens) {
1886 debug_printf("%s: error in generated shader\n", __FUNCTION__);
1887 assert(0);
1888 return NULL;
1889 }
1890
1891 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1892
1893 if (0) {
1894 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
1895 ureg->domain[DOMAIN_DECL].count);
1896 tgsi_dump( tokens, 0 );
1897 }
1898
1899 #if DEBUG
1900 if (tokens && !tgsi_sanity_check(tokens)) {
1901 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
1902 tgsi_dump(tokens, 0);
1903 assert(0);
1904 }
1905 #endif
1906
1907
1908 return tokens;
1909 }
1910
1911
1912 void *ureg_create_shader( struct ureg_program *ureg,
1913 struct pipe_context *pipe,
1914 const struct pipe_stream_output_info *so )
1915 {
1916 struct pipe_shader_state state;
1917
1918 state.tokens = ureg_finalize(ureg);
1919 if(!state.tokens)
1920 return NULL;
1921
1922 if (so)
1923 state.stream_output = *so;
1924 else
1925 memset(&state.stream_output, 0, sizeof(state.stream_output));
1926
1927 switch (ureg->processor) {
1928 case TGSI_PROCESSOR_VERTEX:
1929 return pipe->create_vs_state(pipe, &state);
1930 case TGSI_PROCESSOR_TESS_CTRL:
1931 return pipe->create_tcs_state(pipe, &state);
1932 case TGSI_PROCESSOR_TESS_EVAL:
1933 return pipe->create_tes_state(pipe, &state);
1934 case TGSI_PROCESSOR_GEOMETRY:
1935 return pipe->create_gs_state(pipe, &state);
1936 case TGSI_PROCESSOR_FRAGMENT:
1937 return pipe->create_fs_state(pipe, &state);
1938 default:
1939 return NULL;
1940 }
1941 }
1942
1943
1944 const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
1945 unsigned *nr_tokens )
1946 {
1947 const struct tgsi_token *tokens;
1948
1949 ureg_finalize(ureg);
1950
1951 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1952
1953 if (nr_tokens)
1954 *nr_tokens = ureg->domain[DOMAIN_DECL].size;
1955
1956 ureg->domain[DOMAIN_DECL].tokens = 0;
1957 ureg->domain[DOMAIN_DECL].size = 0;
1958 ureg->domain[DOMAIN_DECL].order = 0;
1959 ureg->domain[DOMAIN_DECL].count = 0;
1960
1961 return tokens;
1962 }
1963
1964
1965 void ureg_free_tokens( const struct tgsi_token *tokens )
1966 {
1967 FREE((struct tgsi_token *)tokens);
1968 }
1969
1970
1971 struct ureg_program *
1972 ureg_create(unsigned processor)
1973 {
1974 return ureg_create_with_screen(processor, NULL);
1975 }
1976
1977
1978 struct ureg_program *
1979 ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
1980 {
1981 int i;
1982 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
1983 if (!ureg)
1984 goto no_ureg;
1985
1986 ureg->processor = processor;
1987 ureg->supports_any_inout_decl_range =
1988 screen &&
1989 screen->get_shader_param(screen,
1990 util_pipe_shader_from_tgsi_processor(processor),
1991 PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
1992
1993 for (i = 0; i < Elements(ureg->properties); i++)
1994 ureg->properties[i] = ~0;
1995
1996 ureg->free_temps = util_bitmask_create();
1997 if (ureg->free_temps == NULL)
1998 goto no_free_temps;
1999
2000 ureg->local_temps = util_bitmask_create();
2001 if (ureg->local_temps == NULL)
2002 goto no_local_temps;
2003
2004 ureg->decl_temps = util_bitmask_create();
2005 if (ureg->decl_temps == NULL)
2006 goto no_decl_temps;
2007
2008 return ureg;
2009
2010 no_decl_temps:
2011 util_bitmask_destroy(ureg->local_temps);
2012 no_local_temps:
2013 util_bitmask_destroy(ureg->free_temps);
2014 no_free_temps:
2015 FREE(ureg);
2016 no_ureg:
2017 return NULL;
2018 }
2019
2020
2021 unsigned
2022 ureg_get_nr_outputs( const struct ureg_program *ureg )
2023 {
2024 if (!ureg)
2025 return 0;
2026 return ureg->nr_outputs;
2027 }
2028
2029
2030 void ureg_destroy( struct ureg_program *ureg )
2031 {
2032 unsigned i;
2033
2034 for (i = 0; i < Elements(ureg->domain); i++) {
2035 if (ureg->domain[i].tokens &&
2036 ureg->domain[i].tokens != error_tokens)
2037 FREE(ureg->domain[i].tokens);
2038 }
2039
2040 util_bitmask_destroy(ureg->free_temps);
2041 util_bitmask_destroy(ureg->local_temps);
2042 util_bitmask_destroy(ureg->decl_temps);
2043
2044 FREE(ureg);
2045 }