gallium/ureg: Set the next shader stage from the shader info.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_ureg.c
1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_screen.h"
30 #include "pipe/p_context.h"
31 #include "pipe/p_state.h"
32 #include "tgsi/tgsi_ureg.h"
33 #include "tgsi/tgsi_build.h"
34 #include "tgsi/tgsi_from_mesa.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_sanity.h"
38 #include "util/u_debug.h"
39 #include "util/u_inlines.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_bitmask.h"
43 #include "GL/gl.h"
44 #include "compiler/shader_info.h"
45
46 union tgsi_any_token {
47 struct tgsi_header header;
48 struct tgsi_processor processor;
49 struct tgsi_token token;
50 struct tgsi_property prop;
51 struct tgsi_property_data prop_data;
52 struct tgsi_declaration decl;
53 struct tgsi_declaration_range decl_range;
54 struct tgsi_declaration_dimension decl_dim;
55 struct tgsi_declaration_interp decl_interp;
56 struct tgsi_declaration_image decl_image;
57 struct tgsi_declaration_semantic decl_semantic;
58 struct tgsi_declaration_sampler_view decl_sampler_view;
59 struct tgsi_declaration_array array;
60 struct tgsi_immediate imm;
61 union tgsi_immediate_data imm_data;
62 struct tgsi_instruction insn;
63 struct tgsi_instruction_label insn_label;
64 struct tgsi_instruction_texture insn_texture;
65 struct tgsi_instruction_memory insn_memory;
66 struct tgsi_texture_offset insn_texture_offset;
67 struct tgsi_src_register src;
68 struct tgsi_ind_register ind;
69 struct tgsi_dimension dim;
70 struct tgsi_dst_register dst;
71 unsigned value;
72 };
73
74
75 struct ureg_tokens {
76 union tgsi_any_token *tokens;
77 unsigned size;
78 unsigned order;
79 unsigned count;
80 };
81
82 #define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS)
83 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
84 #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
85 #define UREG_MAX_CONSTANT_RANGE 32
86 #define UREG_MAX_HW_ATOMIC_RANGE 32
87 #define UREG_MAX_IMMEDIATE 4096
88 #define UREG_MAX_ADDR 3
89 #define UREG_MAX_ARRAY_TEMPS 256
90
91 struct const_decl {
92 struct {
93 unsigned first;
94 unsigned last;
95 } constant_range[UREG_MAX_CONSTANT_RANGE];
96 unsigned nr_constant_ranges;
97 };
98
99 struct hw_atomic_decl {
100 struct {
101 unsigned first;
102 unsigned last;
103 unsigned array_id;
104 } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
105 unsigned nr_hw_atomic_ranges;
106 };
107
108 #define DOMAIN_DECL 0
109 #define DOMAIN_INSN 1
110
111 struct ureg_program
112 {
113 enum pipe_shader_type processor;
114 bool supports_any_inout_decl_range;
115 int next_shader_processor;
116
117 struct {
118 enum tgsi_semantic semantic_name;
119 unsigned semantic_index;
120 enum tgsi_interpolate_mode interp;
121 unsigned char cylindrical_wrap;
122 unsigned char usage_mask;
123 enum tgsi_interpolate_loc interp_location;
124 unsigned first;
125 unsigned last;
126 unsigned array_id;
127 } input[UREG_MAX_INPUT];
128 unsigned nr_inputs, nr_input_regs;
129
130 unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
131
132 struct {
133 enum tgsi_semantic semantic_name;
134 unsigned semantic_index;
135 } system_value[UREG_MAX_SYSTEM_VALUE];
136 unsigned nr_system_values;
137
138 struct {
139 enum tgsi_semantic semantic_name;
140 unsigned semantic_index;
141 unsigned streams;
142 unsigned usage_mask; /* = TGSI_WRITEMASK_* */
143 unsigned first;
144 unsigned last;
145 unsigned array_id;
146 boolean invariant;
147 } output[UREG_MAX_OUTPUT];
148 unsigned nr_outputs, nr_output_regs;
149
150 struct {
151 union {
152 float f[4];
153 unsigned u[4];
154 int i[4];
155 } value;
156 unsigned nr;
157 unsigned type;
158 } immediate[UREG_MAX_IMMEDIATE];
159 unsigned nr_immediates;
160
161 struct ureg_src sampler[PIPE_MAX_SAMPLERS];
162 unsigned nr_samplers;
163
164 struct {
165 unsigned index;
166 enum tgsi_texture_type target;
167 enum tgsi_return_type return_type_x;
168 enum tgsi_return_type return_type_y;
169 enum tgsi_return_type return_type_z;
170 enum tgsi_return_type return_type_w;
171 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
172 unsigned nr_sampler_views;
173
174 struct {
175 unsigned index;
176 enum tgsi_texture_type target;
177 enum pipe_format format;
178 boolean wr;
179 boolean raw;
180 } image[PIPE_MAX_SHADER_IMAGES];
181 unsigned nr_images;
182
183 struct {
184 unsigned index;
185 bool atomic;
186 } buffer[PIPE_MAX_SHADER_BUFFERS];
187 unsigned nr_buffers;
188
189 struct util_bitmask *free_temps;
190 struct util_bitmask *local_temps;
191 struct util_bitmask *decl_temps;
192 unsigned nr_temps;
193
194 unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
195 unsigned nr_array_temps;
196
197 struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
198
199 struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
200
201 unsigned properties[TGSI_PROPERTY_COUNT];
202
203 unsigned nr_addrs;
204 unsigned nr_instructions;
205
206 struct ureg_tokens domain[2];
207
208 bool use_memory[TGSI_MEMORY_TYPE_COUNT];
209 };
210
211 static union tgsi_any_token error_tokens[32];
212
213 static void tokens_error( struct ureg_tokens *tokens )
214 {
215 if (tokens->tokens && tokens->tokens != error_tokens)
216 FREE(tokens->tokens);
217
218 tokens->tokens = error_tokens;
219 tokens->size = ARRAY_SIZE(error_tokens);
220 tokens->count = 0;
221 }
222
223
224 static void tokens_expand( struct ureg_tokens *tokens,
225 unsigned count )
226 {
227 unsigned old_size = tokens->size * sizeof(unsigned);
228
229 if (tokens->tokens == error_tokens) {
230 return;
231 }
232
233 while (tokens->count + count > tokens->size) {
234 tokens->size = (1 << ++tokens->order);
235 }
236
237 tokens->tokens = REALLOC(tokens->tokens,
238 old_size,
239 tokens->size * sizeof(unsigned));
240 if (tokens->tokens == NULL) {
241 tokens_error(tokens);
242 }
243 }
244
245 static void set_bad( struct ureg_program *ureg )
246 {
247 tokens_error(&ureg->domain[0]);
248 }
249
250
251
252 static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
253 unsigned domain,
254 unsigned count )
255 {
256 struct ureg_tokens *tokens = &ureg->domain[domain];
257 union tgsi_any_token *result;
258
259 if (tokens->count + count > tokens->size)
260 tokens_expand(tokens, count);
261
262 result = &tokens->tokens[tokens->count];
263 tokens->count += count;
264 return result;
265 }
266
267
268 static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
269 unsigned domain,
270 unsigned nr )
271 {
272 if (ureg->domain[domain].tokens == error_tokens)
273 return &error_tokens[0];
274
275 return &ureg->domain[domain].tokens[nr];
276 }
277
278
279 void
280 ureg_property(struct ureg_program *ureg, unsigned name, unsigned value)
281 {
282 assert(name < ARRAY_SIZE(ureg->properties));
283 ureg->properties[name] = value;
284 }
285
286 struct ureg_src
287 ureg_DECL_fs_input_cyl_centroid_layout(struct ureg_program *ureg,
288 enum tgsi_semantic semantic_name,
289 unsigned semantic_index,
290 enum tgsi_interpolate_mode interp_mode,
291 unsigned cylindrical_wrap,
292 enum tgsi_interpolate_loc interp_location,
293 unsigned index,
294 unsigned usage_mask,
295 unsigned array_id,
296 unsigned array_size)
297 {
298 unsigned i;
299
300 assert(usage_mask != 0);
301 assert(usage_mask <= TGSI_WRITEMASK_XYZW);
302
303 for (i = 0; i < ureg->nr_inputs; i++) {
304 if (ureg->input[i].semantic_name == semantic_name &&
305 ureg->input[i].semantic_index == semantic_index) {
306 assert(ureg->input[i].interp == interp_mode);
307 assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap);
308 assert(ureg->input[i].interp_location == interp_location);
309 if (ureg->input[i].array_id == array_id) {
310 ureg->input[i].usage_mask |= usage_mask;
311 goto out;
312 }
313 assert((ureg->input[i].usage_mask & usage_mask) == 0);
314 }
315 }
316
317 if (ureg->nr_inputs < UREG_MAX_INPUT) {
318 assert(array_size >= 1);
319 ureg->input[i].semantic_name = semantic_name;
320 ureg->input[i].semantic_index = semantic_index;
321 ureg->input[i].interp = interp_mode;
322 ureg->input[i].cylindrical_wrap = cylindrical_wrap;
323 ureg->input[i].interp_location = interp_location;
324 ureg->input[i].first = index;
325 ureg->input[i].last = index + array_size - 1;
326 ureg->input[i].array_id = array_id;
327 ureg->input[i].usage_mask = usage_mask;
328 ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size);
329 ureg->nr_inputs++;
330 } else {
331 set_bad(ureg);
332 }
333
334 out:
335 return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first,
336 array_id);
337 }
338
339 struct ureg_src
340 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
341 enum tgsi_semantic semantic_name,
342 unsigned semantic_index,
343 enum tgsi_interpolate_mode interp_mode,
344 unsigned cylindrical_wrap,
345 enum tgsi_interpolate_loc interp_location,
346 unsigned array_id,
347 unsigned array_size)
348 {
349 return ureg_DECL_fs_input_cyl_centroid_layout(ureg,
350 semantic_name, semantic_index, interp_mode,
351 cylindrical_wrap, interp_location,
352 ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size);
353 }
354
355
356 struct ureg_src
357 ureg_DECL_vs_input( struct ureg_program *ureg,
358 unsigned index )
359 {
360 assert(ureg->processor == PIPE_SHADER_VERTEX);
361 assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
362
363 ureg->vs_inputs[index/32] |= 1 << (index % 32);
364 return ureg_src_register( TGSI_FILE_INPUT, index );
365 }
366
367
368 struct ureg_src
369 ureg_DECL_input_layout(struct ureg_program *ureg,
370 enum tgsi_semantic semantic_name,
371 unsigned semantic_index,
372 unsigned index,
373 unsigned usage_mask,
374 unsigned array_id,
375 unsigned array_size)
376 {
377 return ureg_DECL_fs_input_cyl_centroid_layout(ureg,
378 semantic_name, semantic_index,
379 TGSI_INTERPOLATE_CONSTANT, 0, TGSI_INTERPOLATE_LOC_CENTER,
380 index, usage_mask, array_id, array_size);
381 }
382
383
384 struct ureg_src
385 ureg_DECL_input(struct ureg_program *ureg,
386 enum tgsi_semantic semantic_name,
387 unsigned semantic_index,
388 unsigned array_id,
389 unsigned array_size)
390 {
391 return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index,
392 TGSI_INTERPOLATE_CONSTANT, 0,
393 TGSI_INTERPOLATE_LOC_CENTER,
394 array_id, array_size);
395 }
396
397
398 struct ureg_src
399 ureg_DECL_system_value(struct ureg_program *ureg,
400 enum tgsi_semantic semantic_name,
401 unsigned semantic_index)
402 {
403 unsigned i;
404
405 for (i = 0; i < ureg->nr_system_values; i++) {
406 if (ureg->system_value[i].semantic_name == semantic_name &&
407 ureg->system_value[i].semantic_index == semantic_index) {
408 goto out;
409 }
410 }
411
412 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
413 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
414 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
415 i = ureg->nr_system_values;
416 ureg->nr_system_values++;
417 } else {
418 set_bad(ureg);
419 }
420
421 out:
422 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
423 }
424
425
426 struct ureg_dst
427 ureg_DECL_output_layout(struct ureg_program *ureg,
428 enum tgsi_semantic semantic_name,
429 unsigned semantic_index,
430 unsigned streams,
431 unsigned index,
432 unsigned usage_mask,
433 unsigned array_id,
434 unsigned array_size,
435 boolean invariant)
436 {
437 unsigned i;
438
439 assert(usage_mask != 0);
440 assert(!(streams & 0x03) || (usage_mask & 1));
441 assert(!(streams & 0x0c) || (usage_mask & 2));
442 assert(!(streams & 0x30) || (usage_mask & 4));
443 assert(!(streams & 0xc0) || (usage_mask & 8));
444
445 for (i = 0; i < ureg->nr_outputs; i++) {
446 if (ureg->output[i].semantic_name == semantic_name &&
447 ureg->output[i].semantic_index == semantic_index) {
448 if (ureg->output[i].array_id == array_id) {
449 ureg->output[i].usage_mask |= usage_mask;
450 goto out;
451 }
452 assert((ureg->output[i].usage_mask & usage_mask) == 0);
453 }
454 }
455
456 if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
457 ureg->output[i].semantic_name = semantic_name;
458 ureg->output[i].semantic_index = semantic_index;
459 ureg->output[i].usage_mask = usage_mask;
460 ureg->output[i].first = index;
461 ureg->output[i].last = index + array_size - 1;
462 ureg->output[i].array_id = array_id;
463 ureg->output[i].invariant = invariant;
464 ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
465 ureg->nr_outputs++;
466 }
467 else {
468 set_bad( ureg );
469 i = 0;
470 }
471
472 out:
473 ureg->output[i].streams |= streams;
474
475 return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
476 array_id);
477 }
478
479
480 struct ureg_dst
481 ureg_DECL_output_masked(struct ureg_program *ureg,
482 unsigned name,
483 unsigned index,
484 unsigned usage_mask,
485 unsigned array_id,
486 unsigned array_size)
487 {
488 return ureg_DECL_output_layout(ureg, name, index, 0,
489 ureg->nr_output_regs, usage_mask, array_id,
490 array_size, FALSE);
491 }
492
493
494 struct ureg_dst
495 ureg_DECL_output(struct ureg_program *ureg,
496 enum tgsi_semantic name,
497 unsigned index)
498 {
499 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
500 0, 1);
501 }
502
503 struct ureg_dst
504 ureg_DECL_output_array(struct ureg_program *ureg,
505 enum tgsi_semantic semantic_name,
506 unsigned semantic_index,
507 unsigned array_id,
508 unsigned array_size)
509 {
510 return ureg_DECL_output_masked(ureg, semantic_name, semantic_index,
511 TGSI_WRITEMASK_XYZW,
512 array_id, array_size);
513 }
514
515
516 /* Returns a new constant register. Keep track of which have been
517 * referred to so that we can emit decls later.
518 *
519 * Constant operands declared with this function must be addressed
520 * with a two-dimensional index.
521 *
522 * There is nothing in this code to bind this constant to any tracked
523 * value or manage any constant_buffer contents -- that's the
524 * resposibility of the calling code.
525 */
526 void
527 ureg_DECL_constant2D(struct ureg_program *ureg,
528 unsigned first,
529 unsigned last,
530 unsigned index2D)
531 {
532 struct const_decl *decl = &ureg->const_decls[index2D];
533
534 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
535
536 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
537 uint i = decl->nr_constant_ranges++;
538
539 decl->constant_range[i].first = first;
540 decl->constant_range[i].last = last;
541 }
542 }
543
544
545 /* A one-dimensional, deprecated version of ureg_DECL_constant2D().
546 *
547 * Constant operands declared with this function must be addressed
548 * with a one-dimensional index.
549 */
550 struct ureg_src
551 ureg_DECL_constant(struct ureg_program *ureg,
552 unsigned index)
553 {
554 struct const_decl *decl = &ureg->const_decls[0];
555 unsigned minconst = index, maxconst = index;
556 unsigned i;
557
558 /* Inside existing range?
559 */
560 for (i = 0; i < decl->nr_constant_ranges; i++) {
561 if (decl->constant_range[i].first <= index &&
562 decl->constant_range[i].last >= index) {
563 goto out;
564 }
565 }
566
567 /* Extend existing range?
568 */
569 for (i = 0; i < decl->nr_constant_ranges; i++) {
570 if (decl->constant_range[i].last == index - 1) {
571 decl->constant_range[i].last = index;
572 goto out;
573 }
574
575 if (decl->constant_range[i].first == index + 1) {
576 decl->constant_range[i].first = index;
577 goto out;
578 }
579
580 minconst = MIN2(minconst, decl->constant_range[i].first);
581 maxconst = MAX2(maxconst, decl->constant_range[i].last);
582 }
583
584 /* Create new range?
585 */
586 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
587 i = decl->nr_constant_ranges++;
588 decl->constant_range[i].first = index;
589 decl->constant_range[i].last = index;
590 goto out;
591 }
592
593 /* Collapse all ranges down to one:
594 */
595 i = 0;
596 decl->constant_range[0].first = minconst;
597 decl->constant_range[0].last = maxconst;
598 decl->nr_constant_ranges = 1;
599
600 out:
601 assert(i < decl->nr_constant_ranges);
602 assert(decl->constant_range[i].first <= index);
603 assert(decl->constant_range[i].last >= index);
604
605 struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index);
606 return ureg_src_dimension(src, 0);
607 }
608
609
610 /* Returns a new hw atomic register. Keep track of which have been
611 * referred to so that we can emit decls later.
612 */
613 void
614 ureg_DECL_hw_atomic(struct ureg_program *ureg,
615 unsigned first,
616 unsigned last,
617 unsigned buffer_id,
618 unsigned array_id)
619 {
620 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id];
621
622 if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
623 uint i = decl->nr_hw_atomic_ranges++;
624
625 decl->hw_atomic_range[i].first = first;
626 decl->hw_atomic_range[i].last = last;
627 decl->hw_atomic_range[i].array_id = array_id;
628 } else {
629 set_bad(ureg);
630 }
631 }
632
633 static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
634 boolean local )
635 {
636 unsigned i;
637
638 /* Look for a released temporary.
639 */
640 for (i = util_bitmask_get_first_index(ureg->free_temps);
641 i != UTIL_BITMASK_INVALID_INDEX;
642 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
643 if (util_bitmask_get(ureg->local_temps, i) == local)
644 break;
645 }
646
647 /* Or allocate a new one.
648 */
649 if (i == UTIL_BITMASK_INVALID_INDEX) {
650 i = ureg->nr_temps++;
651
652 if (local)
653 util_bitmask_set(ureg->local_temps, i);
654
655 /* Start a new declaration when the local flag changes */
656 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
657 util_bitmask_set(ureg->decl_temps, i);
658 }
659
660 util_bitmask_clear(ureg->free_temps, i);
661
662 return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
663 }
664
665 struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
666 {
667 return alloc_temporary(ureg, FALSE);
668 }
669
670 struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
671 {
672 return alloc_temporary(ureg, TRUE);
673 }
674
675 struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
676 unsigned size,
677 boolean local )
678 {
679 unsigned i = ureg->nr_temps;
680 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
681
682 if (local)
683 util_bitmask_set(ureg->local_temps, i);
684
685 /* Always start a new declaration at the start */
686 util_bitmask_set(ureg->decl_temps, i);
687
688 ureg->nr_temps += size;
689
690 /* and also at the end of the array */
691 util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
692
693 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
694 ureg->array_temps[ureg->nr_array_temps++] = i;
695 dst.ArrayID = ureg->nr_array_temps;
696 }
697
698 return dst;
699 }
700
701 void ureg_release_temporary( struct ureg_program *ureg,
702 struct ureg_dst tmp )
703 {
704 if(tmp.File == TGSI_FILE_TEMPORARY)
705 util_bitmask_set(ureg->free_temps, tmp.Index);
706 }
707
708
709 /* Allocate a new address register.
710 */
711 struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
712 {
713 if (ureg->nr_addrs < UREG_MAX_ADDR)
714 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
715
716 assert( 0 );
717 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
718 }
719
720 /* Allocate a new sampler.
721 */
722 struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
723 unsigned nr )
724 {
725 unsigned i;
726
727 for (i = 0; i < ureg->nr_samplers; i++)
728 if (ureg->sampler[i].Index == (int)nr)
729 return ureg->sampler[i];
730
731 if (i < PIPE_MAX_SAMPLERS) {
732 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
733 ureg->nr_samplers++;
734 return ureg->sampler[i];
735 }
736
737 assert( 0 );
738 return ureg->sampler[0];
739 }
740
741 /*
742 * Allocate a new shader sampler view.
743 */
744 struct ureg_src
745 ureg_DECL_sampler_view(struct ureg_program *ureg,
746 unsigned index,
747 enum tgsi_texture_type target,
748 enum tgsi_return_type return_type_x,
749 enum tgsi_return_type return_type_y,
750 enum tgsi_return_type return_type_z,
751 enum tgsi_return_type return_type_w)
752 {
753 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
754 uint i;
755
756 for (i = 0; i < ureg->nr_sampler_views; i++) {
757 if (ureg->sampler_view[i].index == index) {
758 return reg;
759 }
760 }
761
762 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
763 ureg->sampler_view[i].index = index;
764 ureg->sampler_view[i].target = target;
765 ureg->sampler_view[i].return_type_x = return_type_x;
766 ureg->sampler_view[i].return_type_y = return_type_y;
767 ureg->sampler_view[i].return_type_z = return_type_z;
768 ureg->sampler_view[i].return_type_w = return_type_w;
769 ureg->nr_sampler_views++;
770 return reg;
771 }
772
773 assert(0);
774 return reg;
775 }
776
777 /* Allocate a new image.
778 */
779 struct ureg_src
780 ureg_DECL_image(struct ureg_program *ureg,
781 unsigned index,
782 enum tgsi_texture_type target,
783 enum pipe_format format,
784 boolean wr,
785 boolean raw)
786 {
787 struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
788 unsigned i;
789
790 for (i = 0; i < ureg->nr_images; i++)
791 if (ureg->image[i].index == index)
792 return reg;
793
794 if (i < PIPE_MAX_SHADER_IMAGES) {
795 ureg->image[i].index = index;
796 ureg->image[i].target = target;
797 ureg->image[i].wr = wr;
798 ureg->image[i].raw = raw;
799 ureg->image[i].format = format;
800 ureg->nr_images++;
801 return reg;
802 }
803
804 assert(0);
805 return reg;
806 }
807
808 /* Allocate a new buffer.
809 */
810 struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
811 bool atomic)
812 {
813 struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
814 unsigned i;
815
816 for (i = 0; i < ureg->nr_buffers; i++)
817 if (ureg->buffer[i].index == nr)
818 return reg;
819
820 if (i < PIPE_MAX_SHADER_BUFFERS) {
821 ureg->buffer[i].index = nr;
822 ureg->buffer[i].atomic = atomic;
823 ureg->nr_buffers++;
824 return reg;
825 }
826
827 assert(0);
828 return reg;
829 }
830
831 /* Allocate a memory area.
832 */
833 struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
834 unsigned memory_type)
835 {
836 struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
837
838 ureg->use_memory[memory_type] = true;
839 return reg;
840 }
841
842 static int
843 match_or_expand_immediate64( const unsigned *v,
844 unsigned nr,
845 unsigned *v2,
846 unsigned *pnr2,
847 unsigned *swizzle )
848 {
849 unsigned nr2 = *pnr2;
850 unsigned i, j;
851 *swizzle = 0;
852
853 for (i = 0; i < nr; i += 2) {
854 boolean found = FALSE;
855
856 for (j = 0; j < nr2 && !found; j += 2) {
857 if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) {
858 *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2));
859 found = TRUE;
860 }
861 }
862 if (!found) {
863 if ((nr2) >= 4) {
864 return FALSE;
865 }
866
867 v2[nr2] = v[i];
868 v2[nr2 + 1] = v[i + 1];
869
870 *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2));
871 nr2 += 2;
872 }
873 }
874
875 /* Actually expand immediate only when fully succeeded.
876 */
877 *pnr2 = nr2;
878 return TRUE;
879 }
880
881 static int
882 match_or_expand_immediate( const unsigned *v,
883 int type,
884 unsigned nr,
885 unsigned *v2,
886 unsigned *pnr2,
887 unsigned *swizzle )
888 {
889 unsigned nr2 = *pnr2;
890 unsigned i, j;
891
892 if (type == TGSI_IMM_FLOAT64 ||
893 type == TGSI_IMM_UINT64 ||
894 type == TGSI_IMM_INT64)
895 return match_or_expand_immediate64(v, nr, v2, pnr2, swizzle);
896
897 *swizzle = 0;
898
899 for (i = 0; i < nr; i++) {
900 boolean found = FALSE;
901
902 for (j = 0; j < nr2 && !found; j++) {
903 if (v[i] == v2[j]) {
904 *swizzle |= j << (i * 2);
905 found = TRUE;
906 }
907 }
908
909 if (!found) {
910 if (nr2 >= 4) {
911 return FALSE;
912 }
913
914 v2[nr2] = v[i];
915 *swizzle |= nr2 << (i * 2);
916 nr2++;
917 }
918 }
919
920 /* Actually expand immediate only when fully succeeded.
921 */
922 *pnr2 = nr2;
923 return TRUE;
924 }
925
926
927 static struct ureg_src
928 decl_immediate( struct ureg_program *ureg,
929 const unsigned *v,
930 unsigned nr,
931 unsigned type )
932 {
933 unsigned i, j;
934 unsigned swizzle = 0;
935
936 /* Could do a first pass where we examine all existing immediates
937 * without expanding.
938 */
939
940 for (i = 0; i < ureg->nr_immediates; i++) {
941 if (ureg->immediate[i].type != type) {
942 continue;
943 }
944 if (match_or_expand_immediate(v,
945 type,
946 nr,
947 ureg->immediate[i].value.u,
948 &ureg->immediate[i].nr,
949 &swizzle)) {
950 goto out;
951 }
952 }
953
954 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
955 i = ureg->nr_immediates++;
956 ureg->immediate[i].type = type;
957 if (match_or_expand_immediate(v,
958 type,
959 nr,
960 ureg->immediate[i].value.u,
961 &ureg->immediate[i].nr,
962 &swizzle)) {
963 goto out;
964 }
965 }
966
967 set_bad(ureg);
968
969 out:
970 /* Make sure that all referenced elements are from this immediate.
971 * Has the effect of making size-one immediates into scalars.
972 */
973 if (type == TGSI_IMM_FLOAT64 ||
974 type == TGSI_IMM_UINT64 ||
975 type == TGSI_IMM_INT64) {
976 for (j = nr; j < 4; j+=2) {
977 swizzle |= (swizzle & 0xf) << (j * 2);
978 }
979 } else {
980 for (j = nr; j < 4; j++) {
981 swizzle |= (swizzle & 0x3) << (j * 2);
982 }
983 }
984 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
985 (swizzle >> 0) & 0x3,
986 (swizzle >> 2) & 0x3,
987 (swizzle >> 4) & 0x3,
988 (swizzle >> 6) & 0x3);
989 }
990
991
992 struct ureg_src
993 ureg_DECL_immediate( struct ureg_program *ureg,
994 const float *v,
995 unsigned nr )
996 {
997 union {
998 float f[4];
999 unsigned u[4];
1000 } fu;
1001 unsigned int i;
1002
1003 for (i = 0; i < nr; i++) {
1004 fu.f[i] = v[i];
1005 }
1006
1007 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
1008 }
1009
1010 struct ureg_src
1011 ureg_DECL_immediate_f64( struct ureg_program *ureg,
1012 const double *v,
1013 unsigned nr )
1014 {
1015 union {
1016 unsigned u[4];
1017 double d[2];
1018 } fu;
1019 unsigned int i;
1020
1021 assert((nr / 2) < 3);
1022 for (i = 0; i < nr / 2; i++) {
1023 fu.d[i] = v[i];
1024 }
1025
1026 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64);
1027 }
1028
1029 struct ureg_src
1030 ureg_DECL_immediate_uint( struct ureg_program *ureg,
1031 const unsigned *v,
1032 unsigned nr )
1033 {
1034 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
1035 }
1036
1037
1038 struct ureg_src
1039 ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
1040 const unsigned *v,
1041 unsigned nr )
1042 {
1043 uint index;
1044 uint i;
1045
1046 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
1047 set_bad(ureg);
1048 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
1049 }
1050
1051 index = ureg->nr_immediates;
1052 ureg->nr_immediates += (nr + 3) / 4;
1053
1054 for (i = index; i < ureg->nr_immediates; i++) {
1055 ureg->immediate[i].type = TGSI_IMM_UINT32;
1056 ureg->immediate[i].nr = nr > 4 ? 4 : nr;
1057 memcpy(ureg->immediate[i].value.u,
1058 &v[(i - index) * 4],
1059 ureg->immediate[i].nr * sizeof(uint));
1060 nr -= 4;
1061 }
1062
1063 return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
1064 }
1065
1066
1067 struct ureg_src
1068 ureg_DECL_immediate_int( struct ureg_program *ureg,
1069 const int *v,
1070 unsigned nr )
1071 {
1072 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
1073 }
1074
1075 struct ureg_src
1076 ureg_DECL_immediate_uint64( struct ureg_program *ureg,
1077 const uint64_t *v,
1078 unsigned nr )
1079 {
1080 union {
1081 unsigned u[4];
1082 uint64_t u64[2];
1083 } fu;
1084 unsigned int i;
1085
1086 assert((nr / 2) < 3);
1087 for (i = 0; i < nr / 2; i++) {
1088 fu.u64[i] = v[i];
1089 }
1090
1091 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64);
1092 }
1093
1094 struct ureg_src
1095 ureg_DECL_immediate_int64( struct ureg_program *ureg,
1096 const int64_t *v,
1097 unsigned nr )
1098 {
1099 union {
1100 unsigned u[4];
1101 int64_t i64[2];
1102 } fu;
1103 unsigned int i;
1104
1105 assert((nr / 2) < 3);
1106 for (i = 0; i < nr / 2; i++) {
1107 fu.i64[i] = v[i];
1108 }
1109
1110 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64);
1111 }
1112
1113 void
1114 ureg_emit_src( struct ureg_program *ureg,
1115 struct ureg_src src )
1116 {
1117 unsigned size = 1 + (src.Indirect ? 1 : 0) +
1118 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
1119
1120 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1121 unsigned n = 0;
1122
1123 assert(src.File != TGSI_FILE_NULL);
1124 assert(src.File < TGSI_FILE_COUNT);
1125
1126 out[n].value = 0;
1127 out[n].src.File = src.File;
1128 out[n].src.SwizzleX = src.SwizzleX;
1129 out[n].src.SwizzleY = src.SwizzleY;
1130 out[n].src.SwizzleZ = src.SwizzleZ;
1131 out[n].src.SwizzleW = src.SwizzleW;
1132 out[n].src.Index = src.Index;
1133 out[n].src.Negate = src.Negate;
1134 out[0].src.Absolute = src.Absolute;
1135 n++;
1136
1137 if (src.Indirect) {
1138 out[0].src.Indirect = 1;
1139 out[n].value = 0;
1140 out[n].ind.File = src.IndirectFile;
1141 out[n].ind.Swizzle = src.IndirectSwizzle;
1142 out[n].ind.Index = src.IndirectIndex;
1143 if (!ureg->supports_any_inout_decl_range &&
1144 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1145 out[n].ind.ArrayID = 0;
1146 else
1147 out[n].ind.ArrayID = src.ArrayID;
1148 n++;
1149 }
1150
1151 if (src.Dimension) {
1152 out[0].src.Dimension = 1;
1153 out[n].dim.Dimension = 0;
1154 out[n].dim.Padding = 0;
1155 if (src.DimIndirect) {
1156 out[n].dim.Indirect = 1;
1157 out[n].dim.Index = src.DimensionIndex;
1158 n++;
1159 out[n].value = 0;
1160 out[n].ind.File = src.DimIndFile;
1161 out[n].ind.Swizzle = src.DimIndSwizzle;
1162 out[n].ind.Index = src.DimIndIndex;
1163 if (!ureg->supports_any_inout_decl_range &&
1164 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1165 out[n].ind.ArrayID = 0;
1166 else
1167 out[n].ind.ArrayID = src.ArrayID;
1168 } else {
1169 out[n].dim.Indirect = 0;
1170 out[n].dim.Index = src.DimensionIndex;
1171 }
1172 n++;
1173 }
1174
1175 assert(n == size);
1176 }
1177
1178
1179 void
1180 ureg_emit_dst( struct ureg_program *ureg,
1181 struct ureg_dst dst )
1182 {
1183 unsigned size = 1 + (dst.Indirect ? 1 : 0) +
1184 (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0);
1185
1186 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1187 unsigned n = 0;
1188
1189 assert(dst.File != TGSI_FILE_NULL);
1190 assert(dst.File != TGSI_FILE_SAMPLER);
1191 assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
1192 assert(dst.File != TGSI_FILE_IMMEDIATE);
1193 assert(dst.File < TGSI_FILE_COUNT);
1194
1195 out[n].value = 0;
1196 out[n].dst.File = dst.File;
1197 out[n].dst.WriteMask = dst.WriteMask;
1198 out[n].dst.Indirect = dst.Indirect;
1199 out[n].dst.Index = dst.Index;
1200 n++;
1201
1202 if (dst.Indirect) {
1203 out[n].value = 0;
1204 out[n].ind.File = dst.IndirectFile;
1205 out[n].ind.Swizzle = dst.IndirectSwizzle;
1206 out[n].ind.Index = dst.IndirectIndex;
1207 if (!ureg->supports_any_inout_decl_range &&
1208 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1209 out[n].ind.ArrayID = 0;
1210 else
1211 out[n].ind.ArrayID = dst.ArrayID;
1212 n++;
1213 }
1214
1215 if (dst.Dimension) {
1216 out[0].dst.Dimension = 1;
1217 out[n].dim.Dimension = 0;
1218 out[n].dim.Padding = 0;
1219 if (dst.DimIndirect) {
1220 out[n].dim.Indirect = 1;
1221 out[n].dim.Index = dst.DimensionIndex;
1222 n++;
1223 out[n].value = 0;
1224 out[n].ind.File = dst.DimIndFile;
1225 out[n].ind.Swizzle = dst.DimIndSwizzle;
1226 out[n].ind.Index = dst.DimIndIndex;
1227 if (!ureg->supports_any_inout_decl_range &&
1228 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1229 out[n].ind.ArrayID = 0;
1230 else
1231 out[n].ind.ArrayID = dst.ArrayID;
1232 } else {
1233 out[n].dim.Indirect = 0;
1234 out[n].dim.Index = dst.DimensionIndex;
1235 }
1236 n++;
1237 }
1238
1239 assert(n == size);
1240 }
1241
1242
1243 static void validate( enum tgsi_opcode opcode,
1244 unsigned nr_dst,
1245 unsigned nr_src )
1246 {
1247 #ifndef NDEBUG
1248 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
1249 assert(info);
1250 if (info) {
1251 assert(nr_dst == info->num_dst);
1252 assert(nr_src == info->num_src);
1253 }
1254 #endif
1255 }
1256
1257 struct ureg_emit_insn_result
1258 ureg_emit_insn(struct ureg_program *ureg,
1259 enum tgsi_opcode opcode,
1260 boolean saturate,
1261 unsigned precise,
1262 unsigned num_dst,
1263 unsigned num_src)
1264 {
1265 union tgsi_any_token *out;
1266 uint count = 1;
1267 struct ureg_emit_insn_result result;
1268
1269 validate( opcode, num_dst, num_src );
1270
1271 out = get_tokens( ureg, DOMAIN_INSN, count );
1272 out[0].insn = tgsi_default_instruction();
1273 out[0].insn.Opcode = opcode;
1274 out[0].insn.Saturate = saturate;
1275 out[0].insn.Precise = precise;
1276 out[0].insn.NumDstRegs = num_dst;
1277 out[0].insn.NumSrcRegs = num_src;
1278
1279 result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
1280 result.extended_token = result.insn_token;
1281
1282 ureg->nr_instructions++;
1283
1284 return result;
1285 }
1286
1287
1288 /**
1289 * Emit a label token.
1290 * \param label_token returns a token number indicating where the label
1291 * needs to be patched later. Later, this value should be passed to the
1292 * ureg_fixup_label() function.
1293 */
1294 void
1295 ureg_emit_label(struct ureg_program *ureg,
1296 unsigned extended_token,
1297 unsigned *label_token )
1298 {
1299 union tgsi_any_token *out, *insn;
1300
1301 if (!label_token)
1302 return;
1303
1304 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1305 out[0].value = 0;
1306
1307 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1308 insn->insn.Label = 1;
1309
1310 *label_token = ureg->domain[DOMAIN_INSN].count - 1;
1311 }
1312
1313 /* Will return a number which can be used in a label to point to the
1314 * next instruction to be emitted.
1315 */
1316 unsigned
1317 ureg_get_instruction_number( struct ureg_program *ureg )
1318 {
1319 return ureg->nr_instructions;
1320 }
1321
1322 /* Patch a given label (expressed as a token number) to point to a
1323 * given instruction (expressed as an instruction number).
1324 */
1325 void
1326 ureg_fixup_label(struct ureg_program *ureg,
1327 unsigned label_token,
1328 unsigned instruction_number )
1329 {
1330 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
1331
1332 out->insn_label.Label = instruction_number;
1333 }
1334
1335
1336 void
1337 ureg_emit_texture(struct ureg_program *ureg,
1338 unsigned extended_token,
1339 enum tgsi_texture_type target,
1340 enum tgsi_return_type return_type, unsigned num_offsets)
1341 {
1342 union tgsi_any_token *out, *insn;
1343
1344 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1345 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1346
1347 insn->insn.Texture = 1;
1348
1349 out[0].value = 0;
1350 out[0].insn_texture.Texture = target;
1351 out[0].insn_texture.NumOffsets = num_offsets;
1352 out[0].insn_texture.ReturnType = return_type;
1353 }
1354
1355 void
1356 ureg_emit_texture_offset(struct ureg_program *ureg,
1357 const struct tgsi_texture_offset *offset)
1358 {
1359 union tgsi_any_token *out;
1360
1361 out = get_tokens( ureg, DOMAIN_INSN, 1);
1362
1363 out[0].value = 0;
1364 out[0].insn_texture_offset = *offset;
1365 }
1366
1367 void
1368 ureg_emit_memory(struct ureg_program *ureg,
1369 unsigned extended_token,
1370 unsigned qualifier,
1371 enum tgsi_texture_type texture,
1372 enum pipe_format format)
1373 {
1374 union tgsi_any_token *out, *insn;
1375
1376 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1377 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1378
1379 insn->insn.Memory = 1;
1380
1381 out[0].value = 0;
1382 out[0].insn_memory.Qualifier = qualifier;
1383 out[0].insn_memory.Texture = texture;
1384 out[0].insn_memory.Format = format;
1385 }
1386
1387 void
1388 ureg_fixup_insn_size(struct ureg_program *ureg,
1389 unsigned insn )
1390 {
1391 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
1392
1393 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
1394 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
1395 }
1396
1397
1398 void
1399 ureg_insn(struct ureg_program *ureg,
1400 enum tgsi_opcode opcode,
1401 const struct ureg_dst *dst,
1402 unsigned nr_dst,
1403 const struct ureg_src *src,
1404 unsigned nr_src,
1405 unsigned precise )
1406 {
1407 struct ureg_emit_insn_result insn;
1408 unsigned i;
1409 boolean saturate;
1410
1411 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1412 return;
1413 }
1414
1415 saturate = nr_dst ? dst[0].Saturate : FALSE;
1416
1417 insn = ureg_emit_insn(ureg,
1418 opcode,
1419 saturate,
1420 precise,
1421 nr_dst,
1422 nr_src);
1423
1424 for (i = 0; i < nr_dst; i++)
1425 ureg_emit_dst( ureg, dst[i] );
1426
1427 for (i = 0; i < nr_src; i++)
1428 ureg_emit_src( ureg, src[i] );
1429
1430 ureg_fixup_insn_size( ureg, insn.insn_token );
1431 }
1432
1433 void
1434 ureg_tex_insn(struct ureg_program *ureg,
1435 enum tgsi_opcode opcode,
1436 const struct ureg_dst *dst,
1437 unsigned nr_dst,
1438 enum tgsi_texture_type target,
1439 enum tgsi_return_type return_type,
1440 const struct tgsi_texture_offset *texoffsets,
1441 unsigned nr_offset,
1442 const struct ureg_src *src,
1443 unsigned nr_src )
1444 {
1445 struct ureg_emit_insn_result insn;
1446 unsigned i;
1447 boolean saturate;
1448
1449 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1450 return;
1451 }
1452
1453 saturate = nr_dst ? dst[0].Saturate : FALSE;
1454
1455 insn = ureg_emit_insn(ureg,
1456 opcode,
1457 saturate,
1458 0,
1459 nr_dst,
1460 nr_src);
1461
1462 ureg_emit_texture( ureg, insn.extended_token, target, return_type,
1463 nr_offset );
1464
1465 for (i = 0; i < nr_offset; i++)
1466 ureg_emit_texture_offset( ureg, &texoffsets[i]);
1467
1468 for (i = 0; i < nr_dst; i++)
1469 ureg_emit_dst( ureg, dst[i] );
1470
1471 for (i = 0; i < nr_src; i++)
1472 ureg_emit_src( ureg, src[i] );
1473
1474 ureg_fixup_insn_size( ureg, insn.insn_token );
1475 }
1476
1477
1478 void
1479 ureg_memory_insn(struct ureg_program *ureg,
1480 enum tgsi_opcode opcode,
1481 const struct ureg_dst *dst,
1482 unsigned nr_dst,
1483 const struct ureg_src *src,
1484 unsigned nr_src,
1485 unsigned qualifier,
1486 enum tgsi_texture_type texture,
1487 enum pipe_format format)
1488 {
1489 struct ureg_emit_insn_result insn;
1490 unsigned i;
1491
1492 insn = ureg_emit_insn(ureg,
1493 opcode,
1494 FALSE,
1495 0,
1496 nr_dst,
1497 nr_src);
1498
1499 ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format);
1500
1501 for (i = 0; i < nr_dst; i++)
1502 ureg_emit_dst(ureg, dst[i]);
1503
1504 for (i = 0; i < nr_src; i++)
1505 ureg_emit_src(ureg, src[i]);
1506
1507 ureg_fixup_insn_size(ureg, insn.insn_token);
1508 }
1509
1510
1511 static void
1512 emit_decl_semantic(struct ureg_program *ureg,
1513 unsigned file,
1514 unsigned first,
1515 unsigned last,
1516 enum tgsi_semantic semantic_name,
1517 unsigned semantic_index,
1518 unsigned streams,
1519 unsigned usage_mask,
1520 unsigned array_id,
1521 boolean invariant)
1522 {
1523 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1524
1525 out[0].value = 0;
1526 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1527 out[0].decl.NrTokens = 3;
1528 out[0].decl.File = file;
1529 out[0].decl.UsageMask = usage_mask;
1530 out[0].decl.Semantic = 1;
1531 out[0].decl.Array = array_id != 0;
1532 out[0].decl.Invariant = invariant;
1533
1534 out[1].value = 0;
1535 out[1].decl_range.First = first;
1536 out[1].decl_range.Last = last;
1537
1538 out[2].value = 0;
1539 out[2].decl_semantic.Name = semantic_name;
1540 out[2].decl_semantic.Index = semantic_index;
1541 out[2].decl_semantic.StreamX = streams & 3;
1542 out[2].decl_semantic.StreamY = (streams >> 2) & 3;
1543 out[2].decl_semantic.StreamZ = (streams >> 4) & 3;
1544 out[2].decl_semantic.StreamW = (streams >> 6) & 3;
1545
1546 if (array_id) {
1547 out[3].value = 0;
1548 out[3].array.ArrayID = array_id;
1549 }
1550 }
1551
1552 static void
1553 emit_decl_atomic_2d(struct ureg_program *ureg,
1554 unsigned first,
1555 unsigned last,
1556 unsigned index2D,
1557 unsigned array_id)
1558 {
1559 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1560
1561 out[0].value = 0;
1562 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1563 out[0].decl.NrTokens = 3;
1564 out[0].decl.File = TGSI_FILE_HW_ATOMIC;
1565 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1566 out[0].decl.Dimension = 1;
1567 out[0].decl.Array = array_id != 0;
1568
1569 out[1].value = 0;
1570 out[1].decl_range.First = first;
1571 out[1].decl_range.Last = last;
1572
1573 out[2].value = 0;
1574 out[2].decl_dim.Index2D = index2D;
1575
1576 if (array_id) {
1577 out[3].value = 0;
1578 out[3].array.ArrayID = array_id;
1579 }
1580 }
1581
1582 static void
1583 emit_decl_fs(struct ureg_program *ureg,
1584 unsigned file,
1585 unsigned first,
1586 unsigned last,
1587 enum tgsi_semantic semantic_name,
1588 unsigned semantic_index,
1589 enum tgsi_interpolate_mode interpolate,
1590 unsigned cylindrical_wrap,
1591 enum tgsi_interpolate_loc interpolate_location,
1592 unsigned array_id,
1593 unsigned usage_mask)
1594 {
1595 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
1596 array_id ? 5 : 4);
1597
1598 out[0].value = 0;
1599 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1600 out[0].decl.NrTokens = 4;
1601 out[0].decl.File = file;
1602 out[0].decl.UsageMask = usage_mask;
1603 out[0].decl.Interpolate = 1;
1604 out[0].decl.Semantic = 1;
1605 out[0].decl.Array = array_id != 0;
1606
1607 out[1].value = 0;
1608 out[1].decl_range.First = first;
1609 out[1].decl_range.Last = last;
1610
1611 out[2].value = 0;
1612 out[2].decl_interp.Interpolate = interpolate;
1613 out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
1614 out[2].decl_interp.Location = interpolate_location;
1615
1616 out[3].value = 0;
1617 out[3].decl_semantic.Name = semantic_name;
1618 out[3].decl_semantic.Index = semantic_index;
1619
1620 if (array_id) {
1621 out[4].value = 0;
1622 out[4].array.ArrayID = array_id;
1623 }
1624 }
1625
1626 static void
1627 emit_decl_temps( struct ureg_program *ureg,
1628 unsigned first, unsigned last,
1629 boolean local,
1630 unsigned arrayid )
1631 {
1632 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
1633 arrayid ? 3 : 2 );
1634
1635 out[0].value = 0;
1636 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1637 out[0].decl.NrTokens = 2;
1638 out[0].decl.File = TGSI_FILE_TEMPORARY;
1639 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1640 out[0].decl.Local = local;
1641
1642 out[1].value = 0;
1643 out[1].decl_range.First = first;
1644 out[1].decl_range.Last = last;
1645
1646 if (arrayid) {
1647 out[0].decl.Array = 1;
1648 out[2].value = 0;
1649 out[2].array.ArrayID = arrayid;
1650 }
1651 }
1652
1653 static void emit_decl_range( struct ureg_program *ureg,
1654 unsigned file,
1655 unsigned first,
1656 unsigned count )
1657 {
1658 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1659
1660 out[0].value = 0;
1661 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1662 out[0].decl.NrTokens = 2;
1663 out[0].decl.File = file;
1664 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1665 out[0].decl.Semantic = 0;
1666
1667 out[1].value = 0;
1668 out[1].decl_range.First = first;
1669 out[1].decl_range.Last = first + count - 1;
1670 }
1671
1672 static void
1673 emit_decl_range2D(struct ureg_program *ureg,
1674 unsigned file,
1675 unsigned first,
1676 unsigned last,
1677 unsigned index2D)
1678 {
1679 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1680
1681 out[0].value = 0;
1682 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1683 out[0].decl.NrTokens = 3;
1684 out[0].decl.File = file;
1685 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1686 out[0].decl.Dimension = 1;
1687
1688 out[1].value = 0;
1689 out[1].decl_range.First = first;
1690 out[1].decl_range.Last = last;
1691
1692 out[2].value = 0;
1693 out[2].decl_dim.Index2D = index2D;
1694 }
1695
1696 static void
1697 emit_decl_sampler_view(struct ureg_program *ureg,
1698 unsigned index,
1699 enum tgsi_texture_type target,
1700 enum tgsi_return_type return_type_x,
1701 enum tgsi_return_type return_type_y,
1702 enum tgsi_return_type return_type_z,
1703 enum tgsi_return_type return_type_w )
1704 {
1705 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1706
1707 out[0].value = 0;
1708 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1709 out[0].decl.NrTokens = 3;
1710 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
1711 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1712
1713 out[1].value = 0;
1714 out[1].decl_range.First = index;
1715 out[1].decl_range.Last = index;
1716
1717 out[2].value = 0;
1718 out[2].decl_sampler_view.Resource = target;
1719 out[2].decl_sampler_view.ReturnTypeX = return_type_x;
1720 out[2].decl_sampler_view.ReturnTypeY = return_type_y;
1721 out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
1722 out[2].decl_sampler_view.ReturnTypeW = return_type_w;
1723 }
1724
1725 static void
1726 emit_decl_image(struct ureg_program *ureg,
1727 unsigned index,
1728 enum tgsi_texture_type target,
1729 enum pipe_format format,
1730 boolean wr,
1731 boolean raw)
1732 {
1733 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1734
1735 out[0].value = 0;
1736 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1737 out[0].decl.NrTokens = 3;
1738 out[0].decl.File = TGSI_FILE_IMAGE;
1739 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1740
1741 out[1].value = 0;
1742 out[1].decl_range.First = index;
1743 out[1].decl_range.Last = index;
1744
1745 out[2].value = 0;
1746 out[2].decl_image.Resource = target;
1747 out[2].decl_image.Writable = wr;
1748 out[2].decl_image.Raw = raw;
1749 out[2].decl_image.Format = format;
1750 }
1751
1752 static void
1753 emit_decl_buffer(struct ureg_program *ureg,
1754 unsigned index,
1755 bool atomic)
1756 {
1757 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1758
1759 out[0].value = 0;
1760 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1761 out[0].decl.NrTokens = 2;
1762 out[0].decl.File = TGSI_FILE_BUFFER;
1763 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1764 out[0].decl.Atomic = atomic;
1765
1766 out[1].value = 0;
1767 out[1].decl_range.First = index;
1768 out[1].decl_range.Last = index;
1769 }
1770
1771 static void
1772 emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
1773 {
1774 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1775
1776 out[0].value = 0;
1777 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1778 out[0].decl.NrTokens = 2;
1779 out[0].decl.File = TGSI_FILE_MEMORY;
1780 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1781 out[0].decl.MemType = memory_type;
1782
1783 out[1].value = 0;
1784 out[1].decl_range.First = memory_type;
1785 out[1].decl_range.Last = memory_type;
1786 }
1787
1788 static void
1789 emit_immediate( struct ureg_program *ureg,
1790 const unsigned *v,
1791 unsigned type )
1792 {
1793 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1794
1795 out[0].value = 0;
1796 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1797 out[0].imm.NrTokens = 5;
1798 out[0].imm.DataType = type;
1799 out[0].imm.Padding = 0;
1800
1801 out[1].imm_data.Uint = v[0];
1802 out[2].imm_data.Uint = v[1];
1803 out[3].imm_data.Uint = v[2];
1804 out[4].imm_data.Uint = v[3];
1805 }
1806
1807 static void
1808 emit_property(struct ureg_program *ureg,
1809 unsigned name,
1810 unsigned data)
1811 {
1812 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1813
1814 out[0].value = 0;
1815 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1816 out[0].prop.NrTokens = 2;
1817 out[0].prop.PropertyName = name;
1818
1819 out[1].prop_data.Data = data;
1820 }
1821
1822
1823 static void emit_decls( struct ureg_program *ureg )
1824 {
1825 unsigned i,j;
1826
1827 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
1828 if (ureg->properties[i] != ~0u)
1829 emit_property(ureg, i, ureg->properties[i]);
1830
1831 if (ureg->processor == PIPE_SHADER_VERTEX) {
1832 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
1833 if (ureg->vs_inputs[i/32] & (1u << (i%32))) {
1834 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1835 }
1836 }
1837 } else if (ureg->processor == PIPE_SHADER_FRAGMENT) {
1838 if (ureg->supports_any_inout_decl_range) {
1839 for (i = 0; i < ureg->nr_inputs; i++) {
1840 emit_decl_fs(ureg,
1841 TGSI_FILE_INPUT,
1842 ureg->input[i].first,
1843 ureg->input[i].last,
1844 ureg->input[i].semantic_name,
1845 ureg->input[i].semantic_index,
1846 ureg->input[i].interp,
1847 ureg->input[i].cylindrical_wrap,
1848 ureg->input[i].interp_location,
1849 ureg->input[i].array_id,
1850 ureg->input[i].usage_mask);
1851 }
1852 }
1853 else {
1854 for (i = 0; i < ureg->nr_inputs; i++) {
1855 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1856 emit_decl_fs(ureg,
1857 TGSI_FILE_INPUT,
1858 j, j,
1859 ureg->input[i].semantic_name,
1860 ureg->input[i].semantic_index +
1861 (j - ureg->input[i].first),
1862 ureg->input[i].interp,
1863 ureg->input[i].cylindrical_wrap,
1864 ureg->input[i].interp_location, 0,
1865 ureg->input[i].usage_mask);
1866 }
1867 }
1868 }
1869 } else {
1870 if (ureg->supports_any_inout_decl_range) {
1871 for (i = 0; i < ureg->nr_inputs; i++) {
1872 emit_decl_semantic(ureg,
1873 TGSI_FILE_INPUT,
1874 ureg->input[i].first,
1875 ureg->input[i].last,
1876 ureg->input[i].semantic_name,
1877 ureg->input[i].semantic_index,
1878 0,
1879 TGSI_WRITEMASK_XYZW,
1880 ureg->input[i].array_id,
1881 FALSE);
1882 }
1883 }
1884 else {
1885 for (i = 0; i < ureg->nr_inputs; i++) {
1886 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1887 emit_decl_semantic(ureg,
1888 TGSI_FILE_INPUT,
1889 j, j,
1890 ureg->input[i].semantic_name,
1891 ureg->input[i].semantic_index +
1892 (j - ureg->input[i].first),
1893 0,
1894 TGSI_WRITEMASK_XYZW, 0, FALSE);
1895 }
1896 }
1897 }
1898 }
1899
1900 for (i = 0; i < ureg->nr_system_values; i++) {
1901 emit_decl_semantic(ureg,
1902 TGSI_FILE_SYSTEM_VALUE,
1903 i,
1904 i,
1905 ureg->system_value[i].semantic_name,
1906 ureg->system_value[i].semantic_index,
1907 0,
1908 TGSI_WRITEMASK_XYZW, 0, FALSE);
1909 }
1910
1911 if (ureg->supports_any_inout_decl_range) {
1912 for (i = 0; i < ureg->nr_outputs; i++) {
1913 emit_decl_semantic(ureg,
1914 TGSI_FILE_OUTPUT,
1915 ureg->output[i].first,
1916 ureg->output[i].last,
1917 ureg->output[i].semantic_name,
1918 ureg->output[i].semantic_index,
1919 ureg->output[i].streams,
1920 ureg->output[i].usage_mask,
1921 ureg->output[i].array_id,
1922 ureg->output[i].invariant);
1923 }
1924 }
1925 else {
1926 for (i = 0; i < ureg->nr_outputs; i++) {
1927 for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
1928 emit_decl_semantic(ureg,
1929 TGSI_FILE_OUTPUT,
1930 j, j,
1931 ureg->output[i].semantic_name,
1932 ureg->output[i].semantic_index +
1933 (j - ureg->output[i].first),
1934 ureg->output[i].streams,
1935 ureg->output[i].usage_mask,
1936 0,
1937 ureg->output[i].invariant);
1938 }
1939 }
1940 }
1941
1942 for (i = 0; i < ureg->nr_samplers; i++) {
1943 emit_decl_range( ureg,
1944 TGSI_FILE_SAMPLER,
1945 ureg->sampler[i].Index, 1 );
1946 }
1947
1948 for (i = 0; i < ureg->nr_sampler_views; i++) {
1949 emit_decl_sampler_view(ureg,
1950 ureg->sampler_view[i].index,
1951 ureg->sampler_view[i].target,
1952 ureg->sampler_view[i].return_type_x,
1953 ureg->sampler_view[i].return_type_y,
1954 ureg->sampler_view[i].return_type_z,
1955 ureg->sampler_view[i].return_type_w);
1956 }
1957
1958 for (i = 0; i < ureg->nr_images; i++) {
1959 emit_decl_image(ureg,
1960 ureg->image[i].index,
1961 ureg->image[i].target,
1962 ureg->image[i].format,
1963 ureg->image[i].wr,
1964 ureg->image[i].raw);
1965 }
1966
1967 for (i = 0; i < ureg->nr_buffers; i++) {
1968 emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
1969 }
1970
1971 for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
1972 if (ureg->use_memory[i])
1973 emit_decl_memory(ureg, i);
1974 }
1975
1976 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
1977 struct const_decl *decl = &ureg->const_decls[i];
1978
1979 if (decl->nr_constant_ranges) {
1980 uint j;
1981
1982 for (j = 0; j < decl->nr_constant_ranges; j++) {
1983 emit_decl_range2D(ureg,
1984 TGSI_FILE_CONSTANT,
1985 decl->constant_range[j].first,
1986 decl->constant_range[j].last,
1987 i);
1988 }
1989 }
1990 }
1991
1992 for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
1993 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i];
1994
1995 if (decl->nr_hw_atomic_ranges) {
1996 uint j;
1997
1998 for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
1999 emit_decl_atomic_2d(ureg,
2000 decl->hw_atomic_range[j].first,
2001 decl->hw_atomic_range[j].last,
2002 i,
2003 decl->hw_atomic_range[j].array_id);
2004 }
2005 }
2006 }
2007
2008 if (ureg->nr_temps) {
2009 unsigned array = 0;
2010 for (i = 0; i < ureg->nr_temps;) {
2011 boolean local = util_bitmask_get(ureg->local_temps, i);
2012 unsigned first = i;
2013 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
2014 if (i == UTIL_BITMASK_INVALID_INDEX)
2015 i = ureg->nr_temps;
2016
2017 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
2018 emit_decl_temps( ureg, first, i - 1, local, ++array );
2019 else
2020 emit_decl_temps( ureg, first, i - 1, local, 0 );
2021 }
2022 }
2023
2024 if (ureg->nr_addrs) {
2025 emit_decl_range( ureg,
2026 TGSI_FILE_ADDRESS,
2027 0, ureg->nr_addrs );
2028 }
2029
2030 for (i = 0; i < ureg->nr_immediates; i++) {
2031 emit_immediate( ureg,
2032 ureg->immediate[i].value.u,
2033 ureg->immediate[i].type );
2034 }
2035 }
2036
2037 /* Append the instruction tokens onto the declarations to build a
2038 * contiguous stream suitable to send to the driver.
2039 */
2040 static void copy_instructions( struct ureg_program *ureg )
2041 {
2042 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
2043 union tgsi_any_token *out = get_tokens( ureg,
2044 DOMAIN_DECL,
2045 nr_tokens );
2046
2047 memcpy(out,
2048 ureg->domain[DOMAIN_INSN].tokens,
2049 nr_tokens * sizeof out[0] );
2050 }
2051
2052
2053 static void
2054 fixup_header_size(struct ureg_program *ureg)
2055 {
2056 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
2057
2058 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
2059 }
2060
2061
2062 static void
2063 emit_header( struct ureg_program *ureg )
2064 {
2065 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
2066
2067 out[0].header.HeaderSize = 2;
2068 out[0].header.BodySize = 0;
2069
2070 out[1].processor.Processor = ureg->processor;
2071 out[1].processor.Padding = 0;
2072 }
2073
2074
2075 const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
2076 {
2077 const struct tgsi_token *tokens;
2078
2079 switch (ureg->processor) {
2080 case PIPE_SHADER_VERTEX:
2081 case PIPE_SHADER_TESS_EVAL:
2082 ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
2083 ureg->next_shader_processor == -1 ?
2084 PIPE_SHADER_FRAGMENT :
2085 ureg->next_shader_processor);
2086 break;
2087 default:
2088 ; /* nothing */
2089 }
2090
2091 emit_header( ureg );
2092 emit_decls( ureg );
2093 copy_instructions( ureg );
2094 fixup_header_size( ureg );
2095
2096 if (ureg->domain[0].tokens == error_tokens ||
2097 ureg->domain[1].tokens == error_tokens) {
2098 debug_printf("%s: error in generated shader\n", __FUNCTION__);
2099 assert(0);
2100 return NULL;
2101 }
2102
2103 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2104
2105 if (0) {
2106 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
2107 ureg->domain[DOMAIN_DECL].count);
2108 tgsi_dump( tokens, 0 );
2109 }
2110
2111 #if DEBUG
2112 /* tgsi_sanity doesn't seem to return if there are too many constants. */
2113 bool too_many_constants = false;
2114 for (unsigned i = 0; i < ARRAY_SIZE(ureg->const_decls); i++) {
2115 for (unsigned j = 0; j < ureg->const_decls[i].nr_constant_ranges; j++) {
2116 if (ureg->const_decls[i].constant_range[j].last > 4096) {
2117 too_many_constants = true;
2118 break;
2119 }
2120 }
2121 }
2122
2123 if (tokens && !too_many_constants && !tgsi_sanity_check(tokens)) {
2124 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
2125 tgsi_dump(tokens, 0);
2126 assert(0);
2127 }
2128 #endif
2129
2130
2131 return tokens;
2132 }
2133
2134
2135 void *ureg_create_shader( struct ureg_program *ureg,
2136 struct pipe_context *pipe,
2137 const struct pipe_stream_output_info *so )
2138 {
2139 struct pipe_shader_state state = {0};
2140
2141 pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg));
2142 if(!state.tokens)
2143 return NULL;
2144
2145 if (so)
2146 state.stream_output = *so;
2147
2148 switch (ureg->processor) {
2149 case PIPE_SHADER_VERTEX:
2150 return pipe->create_vs_state(pipe, &state);
2151 case PIPE_SHADER_TESS_CTRL:
2152 return pipe->create_tcs_state(pipe, &state);
2153 case PIPE_SHADER_TESS_EVAL:
2154 return pipe->create_tes_state(pipe, &state);
2155 case PIPE_SHADER_GEOMETRY:
2156 return pipe->create_gs_state(pipe, &state);
2157 case PIPE_SHADER_FRAGMENT:
2158 return pipe->create_fs_state(pipe, &state);
2159 default:
2160 return NULL;
2161 }
2162 }
2163
2164
2165 const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
2166 unsigned *nr_tokens )
2167 {
2168 const struct tgsi_token *tokens;
2169
2170 ureg_finalize(ureg);
2171
2172 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2173
2174 if (nr_tokens)
2175 *nr_tokens = ureg->domain[DOMAIN_DECL].count;
2176
2177 ureg->domain[DOMAIN_DECL].tokens = 0;
2178 ureg->domain[DOMAIN_DECL].size = 0;
2179 ureg->domain[DOMAIN_DECL].order = 0;
2180 ureg->domain[DOMAIN_DECL].count = 0;
2181
2182 return tokens;
2183 }
2184
2185
2186 void ureg_free_tokens( const struct tgsi_token *tokens )
2187 {
2188 FREE((struct tgsi_token *)tokens);
2189 }
2190
2191
2192 struct ureg_program *
2193 ureg_create(enum pipe_shader_type processor)
2194 {
2195 return ureg_create_with_screen(processor, NULL);
2196 }
2197
2198
2199 struct ureg_program *
2200 ureg_create_with_screen(enum pipe_shader_type processor,
2201 struct pipe_screen *screen)
2202 {
2203 uint i;
2204 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
2205 if (!ureg)
2206 goto no_ureg;
2207
2208 ureg->processor = processor;
2209 ureg->supports_any_inout_decl_range =
2210 screen &&
2211 screen->get_shader_param(screen, processor,
2212 PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
2213 ureg->next_shader_processor = -1;
2214
2215 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
2216 ureg->properties[i] = ~0;
2217
2218 ureg->free_temps = util_bitmask_create();
2219 if (ureg->free_temps == NULL)
2220 goto no_free_temps;
2221
2222 ureg->local_temps = util_bitmask_create();
2223 if (ureg->local_temps == NULL)
2224 goto no_local_temps;
2225
2226 ureg->decl_temps = util_bitmask_create();
2227 if (ureg->decl_temps == NULL)
2228 goto no_decl_temps;
2229
2230 return ureg;
2231
2232 no_decl_temps:
2233 util_bitmask_destroy(ureg->local_temps);
2234 no_local_temps:
2235 util_bitmask_destroy(ureg->free_temps);
2236 no_free_temps:
2237 FREE(ureg);
2238 no_ureg:
2239 return NULL;
2240 }
2241
2242
2243 void
2244 ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
2245 {
2246 ureg->next_shader_processor = processor;
2247 }
2248
2249
2250 unsigned
2251 ureg_get_nr_outputs( const struct ureg_program *ureg )
2252 {
2253 if (!ureg)
2254 return 0;
2255 return ureg->nr_outputs;
2256 }
2257
2258 static void
2259 ureg_setup_clipdist_info(struct ureg_program *ureg,
2260 const struct shader_info *info)
2261 {
2262 if (info->clip_distance_array_size)
2263 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2264 info->clip_distance_array_size);
2265 if (info->cull_distance_array_size)
2266 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2267 info->cull_distance_array_size);
2268 }
2269
2270 static void
2271 ureg_setup_tess_ctrl_shader(struct ureg_program *ureg,
2272 const struct shader_info *info)
2273 {
2274 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
2275 info->tess.tcs_vertices_out);
2276 }
2277
2278 static void
2279 ureg_setup_tess_eval_shader(struct ureg_program *ureg,
2280 const struct shader_info *info)
2281 {
2282 if (info->tess.primitive_mode == GL_ISOLINES)
2283 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
2284 else
2285 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
2286 info->tess.primitive_mode);
2287
2288 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
2289 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
2290 PIPE_TESS_SPACING_FRACTIONAL_ODD);
2291 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
2292 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
2293
2294 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
2295 (info->tess.spacing + 1) % 3);
2296
2297 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
2298 !info->tess.ccw);
2299 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
2300 info->tess.point_mode);
2301 }
2302
2303 static void
2304 ureg_setup_geometry_shader(struct ureg_program *ureg,
2305 const struct shader_info *info)
2306 {
2307 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
2308 info->gs.input_primitive);
2309 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
2310 info->gs.output_primitive);
2311 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
2312 info->gs.vertices_out);
2313 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
2314 info->gs.invocations);
2315 }
2316
2317 static void
2318 ureg_setup_fragment_shader(struct ureg_program *ureg,
2319 const struct shader_info *info)
2320 {
2321 if (info->fs.early_fragment_tests || info->fs.post_depth_coverage) {
2322 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
2323
2324 if (info->fs.post_depth_coverage)
2325 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
2326 }
2327
2328 if (info->fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
2329 switch (info->fs.depth_layout) {
2330 case FRAG_DEPTH_LAYOUT_ANY:
2331 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2332 TGSI_FS_DEPTH_LAYOUT_ANY);
2333 break;
2334 case FRAG_DEPTH_LAYOUT_GREATER:
2335 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2336 TGSI_FS_DEPTH_LAYOUT_GREATER);
2337 break;
2338 case FRAG_DEPTH_LAYOUT_LESS:
2339 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2340 TGSI_FS_DEPTH_LAYOUT_LESS);
2341 break;
2342 case FRAG_DEPTH_LAYOUT_UNCHANGED:
2343 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2344 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
2345 break;
2346 default:
2347 assert(0);
2348 }
2349 }
2350 }
2351
2352 static void
2353 ureg_setup_compute_shader(struct ureg_program *ureg,
2354 const struct shader_info *info)
2355 {
2356 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
2357 info->cs.local_size[0]);
2358 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
2359 info->cs.local_size[1]);
2360 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
2361 info->cs.local_size[2]);
2362
2363 if (info->cs.shared_size)
2364 ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
2365 }
2366
2367 void
2368 ureg_setup_shader_info(struct ureg_program *ureg,
2369 const struct shader_info *info)
2370 {
2371 if (info->layer_viewport_relative)
2372 ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1);
2373
2374 switch (info->stage) {
2375 case MESA_SHADER_VERTEX:
2376 ureg_setup_clipdist_info(ureg, info);
2377 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2378 break;
2379 case MESA_SHADER_TESS_CTRL:
2380 ureg_setup_tess_ctrl_shader(ureg, info);
2381 break;
2382 case MESA_SHADER_TESS_EVAL:
2383 ureg_setup_tess_eval_shader(ureg, info);
2384 ureg_setup_clipdist_info(ureg, info);
2385 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2386 break;
2387 case MESA_SHADER_GEOMETRY:
2388 ureg_setup_geometry_shader(ureg, info);
2389 ureg_setup_clipdist_info(ureg, info);
2390 break;
2391 case MESA_SHADER_FRAGMENT:
2392 ureg_setup_fragment_shader(ureg, info);
2393 break;
2394 case MESA_SHADER_COMPUTE:
2395 ureg_setup_compute_shader(ureg, info);
2396 break;
2397 default:
2398 break;
2399 }
2400 }
2401
2402
2403 void ureg_destroy( struct ureg_program *ureg )
2404 {
2405 unsigned i;
2406
2407 for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) {
2408 if (ureg->domain[i].tokens &&
2409 ureg->domain[i].tokens != error_tokens)
2410 FREE(ureg->domain[i].tokens);
2411 }
2412
2413 util_bitmask_destroy(ureg->free_temps);
2414 util_bitmask_destroy(ureg->local_temps);
2415 util_bitmask_destroy(ureg->decl_temps);
2416
2417 FREE(ureg);
2418 }