1 /**************************************************************************
3 * Copyright 2007 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "util/u_debug.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi_info.h"
31 #include "tgsi_parse.h"
32 #include "tgsi_util.h"
33 #include "tgsi_exec.h"
34 #include "util/bitscan.h"
43 tgsi_align_128bit(void *unaligned
)
45 union pointer_hack ph
;
48 ph
.pointer
= unaligned
;
49 ph
.uint64
= (ph
.uint64
+ 15) & ~15;
54 tgsi_util_get_src_register_swizzle(const struct tgsi_src_register
*reg
,
74 tgsi_util_get_full_src_register_swizzle(
75 const struct tgsi_full_src_register
*reg
,
78 return tgsi_util_get_src_register_swizzle(®
->Register
, component
);
83 tgsi_util_set_src_register_swizzle(struct tgsi_src_register
*reg
,
89 reg
->SwizzleX
= swizzle
;
92 reg
->SwizzleY
= swizzle
;
95 reg
->SwizzleZ
= swizzle
;
98 reg
->SwizzleW
= swizzle
;
107 tgsi_util_get_full_src_register_sign_mode(
108 const struct tgsi_full_src_register
*reg
,
109 UNUSED
unsigned component
)
113 if (reg
->Register
.Absolute
) {
114 /* Consider only the post-abs negation. */
116 if (reg
->Register
.Negate
) {
117 sign_mode
= TGSI_UTIL_SIGN_SET
;
120 sign_mode
= TGSI_UTIL_SIGN_CLEAR
;
124 if (reg
->Register
.Negate
) {
125 sign_mode
= TGSI_UTIL_SIGN_TOGGLE
;
128 sign_mode
= TGSI_UTIL_SIGN_KEEP
;
137 tgsi_util_set_full_src_register_sign_mode(struct tgsi_full_src_register
*reg
,
141 case TGSI_UTIL_SIGN_CLEAR
:
142 reg
->Register
.Negate
= 0;
143 reg
->Register
.Absolute
= 1;
146 case TGSI_UTIL_SIGN_SET
:
147 reg
->Register
.Absolute
= 1;
148 reg
->Register
.Negate
= 1;
151 case TGSI_UTIL_SIGN_TOGGLE
:
152 reg
->Register
.Negate
= 1;
153 reg
->Register
.Absolute
= 0;
156 case TGSI_UTIL_SIGN_KEEP
:
157 reg
->Register
.Negate
= 0;
158 reg
->Register
.Absolute
= 0;
168 * Determine which channels of the specificed src register are effectively
169 * used by this instruction.
172 tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction
*inst
,
175 const struct tgsi_full_src_register
*src
= &inst
->Src
[src_idx
];
176 unsigned write_mask
= inst
->Dst
[0].Register
.WriteMask
;
181 switch (inst
->Instruction
.Opcode
) {
183 case TGSI_OPCODE_UIF
:
184 case TGSI_OPCODE_EMIT
:
185 case TGSI_OPCODE_ENDPRIM
:
186 case TGSI_OPCODE_RCP
:
187 case TGSI_OPCODE_RSQ
:
188 case TGSI_OPCODE_SQRT
:
189 case TGSI_OPCODE_EX2
:
190 case TGSI_OPCODE_LG2
:
191 case TGSI_OPCODE_SIN
:
192 case TGSI_OPCODE_COS
:
193 case TGSI_OPCODE_POW
: /* reads src0.x and src1.x */
194 case TGSI_OPCODE_UP2H
:
195 case TGSI_OPCODE_UP2US
:
196 case TGSI_OPCODE_UP4B
:
197 case TGSI_OPCODE_UP4UB
:
198 case TGSI_OPCODE_MEMBAR
:
199 case TGSI_OPCODE_BALLOT
:
200 read_mask
= TGSI_WRITEMASK_X
;
203 case TGSI_OPCODE_DP2
:
204 case TGSI_OPCODE_PK2H
:
205 case TGSI_OPCODE_PK2US
:
206 case TGSI_OPCODE_DFRACEXP
:
207 case TGSI_OPCODE_F2D
:
208 case TGSI_OPCODE_I2D
:
209 case TGSI_OPCODE_U2D
:
210 case TGSI_OPCODE_F2U64
:
211 case TGSI_OPCODE_F2I64
:
212 case TGSI_OPCODE_U2I64
:
213 case TGSI_OPCODE_I2I64
:
214 case TGSI_OPCODE_TXQS
: /* bindless handle possible */
215 case TGSI_OPCODE_RESQ
: /* bindless handle possible */
216 read_mask
= TGSI_WRITEMASK_XY
;
219 case TGSI_OPCODE_TXQ
:
221 read_mask
= TGSI_WRITEMASK_X
;
223 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
226 case TGSI_OPCODE_DP3
:
227 read_mask
= TGSI_WRITEMASK_XYZ
;
230 case TGSI_OPCODE_DSEQ
:
231 case TGSI_OPCODE_DSNE
:
232 case TGSI_OPCODE_DSLT
:
233 case TGSI_OPCODE_DSGE
:
234 case TGSI_OPCODE_DP4
:
235 case TGSI_OPCODE_PK4B
:
236 case TGSI_OPCODE_PK4UB
:
237 case TGSI_OPCODE_D2F
:
238 case TGSI_OPCODE_D2I
:
239 case TGSI_OPCODE_D2U
:
240 case TGSI_OPCODE_I2F
:
241 case TGSI_OPCODE_U2F
:
242 case TGSI_OPCODE_U64SEQ
:
243 case TGSI_OPCODE_U64SNE
:
244 case TGSI_OPCODE_U64SLT
:
245 case TGSI_OPCODE_U64SGE
:
246 case TGSI_OPCODE_U642F
:
247 case TGSI_OPCODE_I64SLT
:
248 case TGSI_OPCODE_I64SGE
:
249 case TGSI_OPCODE_I642F
:
250 read_mask
= TGSI_WRITEMASK_XYZW
;
253 case TGSI_OPCODE_LIT
:
254 read_mask
= write_mask
& TGSI_WRITEMASK_YZ
?
255 TGSI_WRITEMASK_XY
| TGSI_WRITEMASK_W
: 0;
258 case TGSI_OPCODE_EXP
:
259 case TGSI_OPCODE_LOG
:
260 read_mask
= write_mask
& TGSI_WRITEMASK_XYZ
? TGSI_WRITEMASK_X
: 0;
263 case TGSI_OPCODE_DST
:
265 read_mask
= TGSI_WRITEMASK_YZ
;
267 read_mask
= TGSI_WRITEMASK_YW
;
270 case TGSI_OPCODE_DLDEXP
:
272 read_mask
= write_mask
;
275 (write_mask
& TGSI_WRITEMASK_XY
? TGSI_WRITEMASK_X
: 0) |
276 (write_mask
& TGSI_WRITEMASK_ZW
? TGSI_WRITEMASK_Z
: 0);
280 case TGSI_OPCODE_READ_INVOC
:
282 read_mask
= write_mask
;
284 read_mask
= TGSI_WRITEMASK_X
;
287 case TGSI_OPCODE_FBFETCH
:
288 read_mask
= 0; /* not a real register read */
291 case TGSI_OPCODE_TEX
:
292 case TGSI_OPCODE_TEX_LZ
:
293 case TGSI_OPCODE_TXF_LZ
:
294 case TGSI_OPCODE_TXF
:
295 case TGSI_OPCODE_TXB
:
296 case TGSI_OPCODE_TXL
:
297 case TGSI_OPCODE_TXP
:
298 case TGSI_OPCODE_TXD
:
299 case TGSI_OPCODE_TEX2
:
300 case TGSI_OPCODE_TXB2
:
301 case TGSI_OPCODE_TXL2
:
302 case TGSI_OPCODE_LODQ
:
303 case TGSI_OPCODE_TG4
: {
305 tgsi_util_get_texture_coord_dim(inst
->Texture
.Texture
);
306 unsigned dim_layer_shadow
, dim
;
309 if (tgsi_is_shadow_target(inst
->Texture
.Texture
)) {
310 dim_layer_shadow
= dim_layer
+ 1;
311 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
)
312 dim_layer_shadow
= 3;
314 dim_layer_shadow
= dim_layer
;
318 if (tgsi_is_array_sampler(inst
->Texture
.Texture
))
323 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle in the last operand */
327 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_LODQ
)
328 read_mask
= u_bit_consecutive(0, dim
);
330 read_mask
= u_bit_consecutive(0, dim_layer_shadow
) & 0xf;
332 if (inst
->Texture
.Texture
== TGSI_TEXTURE_SHADOW1D
)
333 read_mask
&= ~TGSI_WRITEMASK_Y
;
335 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXF
||
336 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
337 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
||
338 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXP
)
339 read_mask
|= TGSI_WRITEMASK_W
;
343 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
)
344 read_mask
= u_bit_consecutive(0, dim
);
345 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX2
||
346 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
||
347 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
||
348 inst
->Instruction
.Opcode
== TGSI_OPCODE_TG4
)
349 read_mask
= TGSI_WRITEMASK_X
;
353 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXD
)
354 read_mask
= u_bit_consecutive(0, dim
);
360 case TGSI_OPCODE_LOAD
:
362 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
364 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
365 read_mask
= u_bit_consecutive(0, dim
);
369 case TGSI_OPCODE_STORE
:
371 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
372 read_mask
= u_bit_consecutive(0, dim
);
374 read_mask
= TGSI_WRITEMASK_XYZW
;
378 case TGSI_OPCODE_ATOMUADD
:
379 case TGSI_OPCODE_ATOMXCHG
:
380 case TGSI_OPCODE_ATOMCAS
:
381 case TGSI_OPCODE_ATOMAND
:
382 case TGSI_OPCODE_ATOMOR
:
383 case TGSI_OPCODE_ATOMXOR
:
384 case TGSI_OPCODE_ATOMUMIN
:
385 case TGSI_OPCODE_ATOMUMAX
:
386 case TGSI_OPCODE_ATOMIMIN
:
387 case TGSI_OPCODE_ATOMIMAX
:
389 read_mask
= TGSI_WRITEMASK_XY
; /* bindless handle possible */
390 } else if (src_idx
== 1) {
391 unsigned dim
= tgsi_util_get_texture_coord_dim(inst
->Memory
.Texture
);
392 read_mask
= u_bit_consecutive(0, dim
);
394 read_mask
= TGSI_WRITEMASK_XYZW
;
398 case TGSI_OPCODE_INTERP_CENTROID
:
399 case TGSI_OPCODE_INTERP_SAMPLE
:
400 case TGSI_OPCODE_INTERP_OFFSET
:
402 read_mask
= write_mask
;
403 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_INTERP_OFFSET
)
404 read_mask
= TGSI_WRITEMASK_XY
; /* offset */
406 read_mask
= TGSI_WRITEMASK_X
; /* sample */
410 if (tgsi_get_opcode_info(inst
->Instruction
.Opcode
)->output_mode
==
411 TGSI_OUTPUT_COMPONENTWISE
)
412 read_mask
= write_mask
;
414 read_mask
= TGSI_WRITEMASK_XYZW
; /* assume all channels are read */
419 for (chan
= 0; chan
< 4; ++chan
) {
420 if (read_mask
& (1 << chan
)) {
421 usage_mask
|= 1 << tgsi_util_get_full_src_register_swizzle(src
, chan
);
429 * Convert a tgsi_ind_register into a tgsi_src_register
431 struct tgsi_src_register
432 tgsi_util_get_src_from_ind(const struct tgsi_ind_register
*reg
)
434 struct tgsi_src_register src
= { 0 };
436 src
.File
= reg
->File
;
437 src
.Index
= reg
->Index
;
438 src
.SwizzleX
= reg
->Swizzle
;
439 src
.SwizzleY
= reg
->Swizzle
;
440 src
.SwizzleZ
= reg
->Swizzle
;
441 src
.SwizzleW
= reg
->Swizzle
;
447 * Return the dimension of the texture coordinates (layer included for array
448 * textures), as well as the location of the shadow reference value or the
452 tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex
)
455 * Depending on the texture target, (src0.xyzw, src1.x) is interpreted
458 * (s, X, X, X, X), for BUFFER
459 * (s, X, X, X, X), for 1D
460 * (s, t, X, X, X), for 2D, RECT
461 * (s, t, r, X, X), for 3D, CUBE
463 * (s, layer, X, X, X), for 1D_ARRAY
464 * (s, t, layer, X, X), for 2D_ARRAY
465 * (s, t, r, layer, X), for CUBE_ARRAY
467 * (s, X, shadow, X, X), for SHADOW1D
468 * (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT
469 * (s, t, r, shadow, X), for SHADOWCUBE
471 * (s, layer, shadow, X, X), for SHADOW1D_ARRAY
472 * (s, t, layer, shadow, X), for SHADOW2D_ARRAY
473 * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
475 * (s, t, sample, X, X), for 2D_MSAA
476 * (s, t, layer, sample, X), for 2D_ARRAY_MSAA
479 case TGSI_TEXTURE_BUFFER
:
480 case TGSI_TEXTURE_1D
:
481 case TGSI_TEXTURE_SHADOW1D
:
483 case TGSI_TEXTURE_2D
:
484 case TGSI_TEXTURE_RECT
:
485 case TGSI_TEXTURE_1D_ARRAY
:
486 case TGSI_TEXTURE_SHADOW2D
:
487 case TGSI_TEXTURE_SHADOWRECT
:
488 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
489 case TGSI_TEXTURE_2D_MSAA
:
491 case TGSI_TEXTURE_3D
:
492 case TGSI_TEXTURE_CUBE
:
493 case TGSI_TEXTURE_2D_ARRAY
:
494 case TGSI_TEXTURE_SHADOWCUBE
:
495 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
496 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
498 case TGSI_TEXTURE_CUBE_ARRAY
:
499 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
502 assert(!"unknown texture target");
509 * Given a TGSI_TEXTURE_x target, return register component where the
510 * shadow reference/distance coordinate is found. Typically, components
511 * 0 and 1 are the (s,t) texcoords and component 2 or 3 hold the shadow
512 * reference value. But if we return 4, it means the reference value is
513 * found in the 0th component of the second coordinate argument to the
517 tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex
)
520 case TGSI_TEXTURE_SHADOW1D
:
521 case TGSI_TEXTURE_SHADOW2D
:
522 case TGSI_TEXTURE_SHADOWRECT
:
523 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
525 case TGSI_TEXTURE_SHADOWCUBE
:
526 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
527 case TGSI_TEXTURE_2D_MSAA
:
528 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
530 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
533 /* no shadow nor sample */
540 tgsi_is_shadow_target(enum tgsi_texture_type target
)
543 case TGSI_TEXTURE_SHADOW1D
:
544 case TGSI_TEXTURE_SHADOW2D
:
545 case TGSI_TEXTURE_SHADOWRECT
:
546 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
547 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
548 case TGSI_TEXTURE_SHADOWCUBE
:
549 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
: