1 /**************************************************************************
3 * Copyright 2007 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "util/u_debug.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi_info.h"
31 #include "tgsi_parse.h"
32 #include "tgsi_util.h"
33 #include "tgsi_exec.h"
34 #include "util/bitscan.h"
46 union pointer_hack ph
;
49 ph
.pointer
= unaligned
;
50 ph
.uint64
= (ph
.uint64
+ 15) & ~15;
55 tgsi_util_get_src_register_swizzle(
56 const struct tgsi_src_register
*reg
,
76 tgsi_util_get_full_src_register_swizzle(
77 const struct tgsi_full_src_register
*reg
,
80 return tgsi_util_get_src_register_swizzle(
86 tgsi_util_set_src_register_swizzle(
87 struct tgsi_src_register
*reg
,
93 reg
->SwizzleX
= swizzle
;
96 reg
->SwizzleY
= swizzle
;
99 reg
->SwizzleZ
= swizzle
;
102 reg
->SwizzleW
= swizzle
;
110 tgsi_util_get_full_src_register_sign_mode(
111 const struct tgsi_full_src_register
*reg
,
116 if( reg
->Register
.Absolute
) {
117 /* Consider only the post-abs negation. */
119 if( reg
->Register
.Negate
) {
120 sign_mode
= TGSI_UTIL_SIGN_SET
;
123 sign_mode
= TGSI_UTIL_SIGN_CLEAR
;
127 if( reg
->Register
.Negate
) {
128 sign_mode
= TGSI_UTIL_SIGN_TOGGLE
;
131 sign_mode
= TGSI_UTIL_SIGN_KEEP
;
139 tgsi_util_set_full_src_register_sign_mode(
140 struct tgsi_full_src_register
*reg
,
145 case TGSI_UTIL_SIGN_CLEAR
:
146 reg
->Register
.Negate
= 0;
147 reg
->Register
.Absolute
= 1;
150 case TGSI_UTIL_SIGN_SET
:
151 reg
->Register
.Absolute
= 1;
152 reg
->Register
.Negate
= 1;
155 case TGSI_UTIL_SIGN_TOGGLE
:
156 reg
->Register
.Negate
= 1;
157 reg
->Register
.Absolute
= 0;
160 case TGSI_UTIL_SIGN_KEEP
:
161 reg
->Register
.Negate
= 0;
162 reg
->Register
.Absolute
= 0;
171 * Determine which channels of the specificed src register are effectively
172 * used by this instruction.
175 tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction
*inst
,
178 const struct tgsi_full_src_register
*src
= &inst
->Src
[src_idx
];
179 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
184 switch (inst
->Instruction
.Opcode
) {
186 case TGSI_OPCODE_UIF
:
187 case TGSI_OPCODE_EMIT
:
188 case TGSI_OPCODE_ENDPRIM
:
189 case TGSI_OPCODE_RCP
:
190 case TGSI_OPCODE_RSQ
:
191 case TGSI_OPCODE_SQRT
:
192 case TGSI_OPCODE_EX2
:
193 case TGSI_OPCODE_LG2
:
194 case TGSI_OPCODE_SIN
:
195 case TGSI_OPCODE_COS
:
196 case TGSI_OPCODE_POW
: /* reads src0.x and src1.x */
197 case TGSI_OPCODE_UP2H
:
198 case TGSI_OPCODE_UP2US
:
199 case TGSI_OPCODE_UP4B
:
200 case TGSI_OPCODE_UP4UB
:
201 case TGSI_OPCODE_MEMBAR
:
202 case TGSI_OPCODE_BALLOT
:
203 read_mask
= TGSI_WRITEMASK_X
;
206 case TGSI_OPCODE_DP2
:
207 case TGSI_OPCODE_PK2H
:
208 case TGSI_OPCODE_PK2US
:
209 case TGSI_OPCODE_DFRACEXP
:
210 case TGSI_OPCODE_F2D
:
211 case TGSI_OPCODE_I2D
:
212 case TGSI_OPCODE_U2D
:
213 case TGSI_OPCODE_F2U64
:
214 case TGSI_OPCODE_F2I64
:
215 case TGSI_OPCODE_U2I64
:
216 case TGSI_OPCODE_I2I64
:
217 case TGSI_OPCODE_TXQS
: /* bindless handle possible */
218 case TGSI_OPCODE_RESQ
: /* bindless handle possible */
219 read_mask
= TGSI_WRITEMASK_XY
;
222 case TGSI_OPCODE_TXQ
:
224 read_mask
= TGSI_WRITEMASK_X
;
226 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
229 case TGSI_OPCODE_DP3
:
230 read_mask
= TGSI_WRITEMASK_XYZ
;
233 case TGSI_OPCODE_DSEQ
:
234 case TGSI_OPCODE_DSNE
:
235 case TGSI_OPCODE_DSLT
:
236 case TGSI_OPCODE_DSGE
:
237 case TGSI_OPCODE_DP4
:
238 case TGSI_OPCODE_PK4B
:
239 case TGSI_OPCODE_PK4UB
:
240 case TGSI_OPCODE_D2F
:
241 case TGSI_OPCODE_D2I
:
242 case TGSI_OPCODE_D2U
:
243 case TGSI_OPCODE_I2F
:
244 case TGSI_OPCODE_U2F
:
245 case TGSI_OPCODE_U64SEQ
:
246 case TGSI_OPCODE_U64SNE
:
247 case TGSI_OPCODE_U64SLT
:
248 case TGSI_OPCODE_U64SGE
:
249 case TGSI_OPCODE_U642F
:
250 case TGSI_OPCODE_I64SLT
:
251 case TGSI_OPCODE_I64SGE
:
252 case TGSI_OPCODE_I642F
:
253 read_mask
= TGSI_WRITEMASK_XYZW
;
256 case TGSI_OPCODE_LIT
:
257 read_mask
= write_mask
& TGSI_WRITEMASK_YZ
?
258 TGSI_WRITEMASK_XY
| TGSI_WRITEMASK_W
: 0;
261 case TGSI_OPCODE_EXP
:
262 case TGSI_OPCODE_LOG
:
263 read_mask
= write_mask
& TGSI_WRITEMASK_XYZ
? TGSI_WRITEMASK_X
: 0;
266 case TGSI_OPCODE_DST
:
268 read_mask
= TGSI_WRITEMASK_YZ
;
270 read_mask
= TGSI_WRITEMASK_YW
;
273 case TGSI_OPCODE_DLDEXP
:
275 read_mask
= write_mask
;
278 (write_mask
& TGSI_WRITEMASK_XY
? TGSI_WRITEMASK_X
: 0) |
279 (write_mask
& TGSI_WRITEMASK_ZW
? TGSI_WRITEMASK_Z
: 0);
283 case TGSI_OPCODE_READ_INVOC
:
285 read_mask
= write_mask
;
287 read_mask
= TGSI_WRITEMASK_X
;
290 case TGSI_OPCODE_FBFETCH
:
291 read_mask
= 0; /* not a real register read */
294 case TGSI_OPCODE_TEX
:
295 case TGSI_OPCODE_TEX_LZ
:
296 case TGSI_OPCODE_TXF_LZ
:
297 case TGSI_OPCODE_TXF
:
298 case TGSI_OPCODE_TXB
:
299 case TGSI_OPCODE_TXL
:
300 case TGSI_OPCODE_TXP
:
301 case TGSI_OPCODE_TXD
:
302 case TGSI_OPCODE_TEX2
:
303 case TGSI_OPCODE_TXB2
:
304 case TGSI_OPCODE_TXL2
:
305 case TGSI_OPCODE_LODQ
:
306 case TGSI_OPCODE_TG4
: {
308 tgsi_util_get_texture_coord_dim(inst
->Texture
.Texture
);
309 unsigned dim_layer_shadow
, dim
;
312 if (tgsi_is_shadow_target(inst
->Texture
.Texture
)) {
313 dim_layer_shadow
= dim_layer
+ 1;
314 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
)
315 dim_layer_shadow
= 3;
317 dim_layer_shadow
= dim_layer
;
321 if (tgsi_is_array_sampler(inst
->Texture
.Texture
))
326 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle in the last operand */
330 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_LODQ
)
331 read_mask
= u_bit_consecutive(0, dim
);
333 read_mask
= u_bit_consecutive(0, dim_layer_shadow
) & 0xf;
335 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
)
336 read_mask
&= ~TGSI_WRITEMASK_Y
;
338 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
||
339 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
340 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
||
341 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
342 read_mask
|= TGSI_WRITEMASK_W
;
346 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
)
347 read_mask
= u_bit_consecutive(0, dim
);
348 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX2
||
349 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
350 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
||
351 inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
)
352 read_mask
= TGSI_WRITEMASK_X
;
356 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
)
357 read_mask
= u_bit_consecutive(0, dim
);
363 case TGSI_OPCODE_LOAD
:
365 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
367 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
368 read_mask
= u_bit_consecutive(0, dim
);
372 case TGSI_OPCODE_STORE
:
374 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
375 read_mask
= u_bit_consecutive(0, dim
);
377 read_mask
= TGSI_WRITEMASK_XYZW
;
381 case TGSI_OPCODE_ATOMUADD
:
382 case TGSI_OPCODE_ATOMXCHG
:
383 case TGSI_OPCODE_ATOMCAS
:
384 case TGSI_OPCODE_ATOMAND
:
385 case TGSI_OPCODE_ATOMOR
:
386 case TGSI_OPCODE_ATOMXOR
:
387 case TGSI_OPCODE_ATOMUMIN
:
388 case TGSI_OPCODE_ATOMUMAX
:
389 case TGSI_OPCODE_ATOMIMIN
:
390 case TGSI_OPCODE_ATOMIMAX
:
392 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
393 } else if (src_idx
== 1) {
394 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
395 read_mask
= u_bit_consecutive(0, dim
);
397 read_mask
= TGSI_WRITEMASK_XYZW
;
401 case TGSI_OPCODE_INTERP_CENTROID
:
402 case TGSI_OPCODE_INTERP_SAMPLE
:
403 case TGSI_OPCODE_INTERP_OFFSET
:
405 read_mask
= write_mask
;
406 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
)
407 read_mask
= TGSI_WRITEMASK_XY
; /* offset */
409 read_mask
= TGSI_WRITEMASK_X
; /* sample */
413 if (tgsi_get_opcode_info(inst
->Instruction
.Opcode
)->output_mode
==
414 TGSI_OUTPUT_COMPONENTWISE
)
415 read_mask
= write_mask
;
417 read_mask
= TGSI_WRITEMASK_XYZW
; /* assume all channels are read */
422 for (chan
= 0; chan
< 4; ++chan
) {
423 if (read_mask
& (1 << chan
)) {
424 usage_mask
|= 1 << tgsi_util_get_full_src_register_swizzle(src
, chan
);
432 * Convert a tgsi_ind_register into a tgsi_src_register
434 struct tgsi_src_register
435 tgsi_util_get_src_from_ind(const struct tgsi_ind_register
*reg
)
437 struct tgsi_src_register src
= { 0 };
439 src
.File
= reg
->File
;
440 src
.Index
= reg
->Index
;
441 src
.SwizzleX
= reg
->Swizzle
;
442 src
.SwizzleY
= reg
->Swizzle
;
443 src
.SwizzleZ
= reg
->Swizzle
;
444 src
.SwizzleW
= reg
->Swizzle
;
450 * Return the dimension of the texture coordinates (layer included for array
451 * textures), as well as the location of the shadow reference value or the
455 tgsi_util_get_texture_coord_dim(unsigned tgsi_tex
)
458 * Depending on the texture target, (src0.xyzw, src1.x) is interpreted
461 * (s, X, X, X, X), for BUFFER
462 * (s, X, X, X, X), for 1D
463 * (s, t, X, X, X), for 2D, RECT
464 * (s, t, r, X, X), for 3D, CUBE
466 * (s, layer, X, X, X), for 1D_ARRAY
467 * (s, t, layer, X, X), for 2D_ARRAY
468 * (s, t, r, layer, X), for CUBE_ARRAY
470 * (s, X, shadow, X, X), for SHADOW1D
471 * (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT
472 * (s, t, r, shadow, X), for SHADOWCUBE
474 * (s, layer, shadow, X, X), for SHADOW1D_ARRAY
475 * (s, t, layer, shadow, X), for SHADOW2D_ARRAY
476 * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
478 * (s, t, sample, X, X), for 2D_MSAA
479 * (s, t, layer, sample, X), for 2D_ARRAY_MSAA
482 case TGSI_TEXTURE_BUFFER
:
483 case TGSI_TEXTURE_1D
:
484 case TGSI_TEXTURE_SHADOW1D
:
486 case TGSI_TEXTURE_2D
:
487 case TGSI_TEXTURE_RECT
:
488 case TGSI_TEXTURE_1D_ARRAY
:
489 case TGSI_TEXTURE_SHADOW2D
:
490 case TGSI_TEXTURE_SHADOWRECT
:
491 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
492 case TGSI_TEXTURE_2D_MSAA
:
494 case TGSI_TEXTURE_3D
:
495 case TGSI_TEXTURE_CUBE
:
496 case TGSI_TEXTURE_2D_ARRAY
:
497 case TGSI_TEXTURE_SHADOWCUBE
:
498 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
499 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
501 case TGSI_TEXTURE_CUBE_ARRAY
:
502 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
505 assert(!"unknown texture target");
512 * Given a TGSI_TEXTURE_x target, return the src register index for the
513 * shadow reference coordinate.
516 tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex
)
519 case TGSI_TEXTURE_SHADOW1D
:
520 case TGSI_TEXTURE_SHADOW2D
:
521 case TGSI_TEXTURE_SHADOWRECT
:
522 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
524 case TGSI_TEXTURE_SHADOWCUBE
:
525 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
526 case TGSI_TEXTURE_2D_MSAA
:
527 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
529 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
532 /* no shadow nor sample */
539 tgsi_is_shadow_target(unsigned target
)
542 case TGSI_TEXTURE_SHADOW1D
:
543 case TGSI_TEXTURE_SHADOW2D
:
544 case TGSI_TEXTURE_SHADOWRECT
:
545 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
546 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
547 case TGSI_TEXTURE_SHADOWCUBE
:
548 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
: