afe5690ce098a3fabff79443001dcdc108c44d7e
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_util.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_debug.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi_info.h"
31 #include "tgsi_parse.h"
32 #include "tgsi_util.h"
33 #include "tgsi_exec.h"
34 #include "util/bitscan.h"
35
36 union pointer_hack
37 {
38 void *pointer;
39 uint64_t uint64;
40 };
41
42 void *
43 tgsi_align_128bit(
44 void *unaligned )
45 {
46 union pointer_hack ph;
47
48 ph.uint64 = 0;
49 ph.pointer = unaligned;
50 ph.uint64 = (ph.uint64 + 15) & ~15;
51 return ph.pointer;
52 }
53
54 unsigned
55 tgsi_util_get_src_register_swizzle(
56 const struct tgsi_src_register *reg,
57 unsigned component )
58 {
59 switch (component) {
60 case TGSI_CHAN_X:
61 return reg->SwizzleX;
62 case TGSI_CHAN_Y:
63 return reg->SwizzleY;
64 case TGSI_CHAN_Z:
65 return reg->SwizzleZ;
66 case TGSI_CHAN_W:
67 return reg->SwizzleW;
68 default:
69 assert(0);
70 }
71 return 0;
72 }
73
74
75 unsigned
76 tgsi_util_get_full_src_register_swizzle(
77 const struct tgsi_full_src_register *reg,
78 unsigned component )
79 {
80 return tgsi_util_get_src_register_swizzle(
81 &reg->Register,
82 component );
83 }
84
85 void
86 tgsi_util_set_src_register_swizzle(
87 struct tgsi_src_register *reg,
88 unsigned swizzle,
89 unsigned component )
90 {
91 switch( component ) {
92 case 0:
93 reg->SwizzleX = swizzle;
94 break;
95 case 1:
96 reg->SwizzleY = swizzle;
97 break;
98 case 2:
99 reg->SwizzleZ = swizzle;
100 break;
101 case 3:
102 reg->SwizzleW = swizzle;
103 break;
104 default:
105 assert( 0 );
106 }
107 }
108
109 unsigned
110 tgsi_util_get_full_src_register_sign_mode(
111 const struct tgsi_full_src_register *reg,
112 unsigned component )
113 {
114 unsigned sign_mode;
115
116 if( reg->Register.Absolute ) {
117 /* Consider only the post-abs negation. */
118
119 if( reg->Register.Negate ) {
120 sign_mode = TGSI_UTIL_SIGN_SET;
121 }
122 else {
123 sign_mode = TGSI_UTIL_SIGN_CLEAR;
124 }
125 }
126 else {
127 if( reg->Register.Negate ) {
128 sign_mode = TGSI_UTIL_SIGN_TOGGLE;
129 }
130 else {
131 sign_mode = TGSI_UTIL_SIGN_KEEP;
132 }
133 }
134
135 return sign_mode;
136 }
137
138 void
139 tgsi_util_set_full_src_register_sign_mode(
140 struct tgsi_full_src_register *reg,
141 unsigned sign_mode )
142 {
143 switch (sign_mode)
144 {
145 case TGSI_UTIL_SIGN_CLEAR:
146 reg->Register.Negate = 0;
147 reg->Register.Absolute = 1;
148 break;
149
150 case TGSI_UTIL_SIGN_SET:
151 reg->Register.Absolute = 1;
152 reg->Register.Negate = 1;
153 break;
154
155 case TGSI_UTIL_SIGN_TOGGLE:
156 reg->Register.Negate = 1;
157 reg->Register.Absolute = 0;
158 break;
159
160 case TGSI_UTIL_SIGN_KEEP:
161 reg->Register.Negate = 0;
162 reg->Register.Absolute = 0;
163 break;
164
165 default:
166 assert( 0 );
167 }
168 }
169
170 /**
171 * Determine which channels of the specificed src register are effectively
172 * used by this instruction.
173 */
174 unsigned
175 tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
176 unsigned src_idx)
177 {
178 const struct tgsi_full_src_register *src = &inst->Src[src_idx];
179 unsigned write_mask = inst->Dst[0].Register.WriteMask;
180 unsigned read_mask;
181 unsigned usage_mask;
182 unsigned chan;
183
184 switch (inst->Instruction.Opcode) {
185 case TGSI_OPCODE_IF:
186 case TGSI_OPCODE_UIF:
187 case TGSI_OPCODE_EMIT:
188 case TGSI_OPCODE_ENDPRIM:
189 case TGSI_OPCODE_RCP:
190 case TGSI_OPCODE_RSQ:
191 case TGSI_OPCODE_SQRT:
192 case TGSI_OPCODE_EX2:
193 case TGSI_OPCODE_LG2:
194 case TGSI_OPCODE_SIN:
195 case TGSI_OPCODE_COS:
196 case TGSI_OPCODE_POW: /* reads src0.x and src1.x */
197 case TGSI_OPCODE_UP2H:
198 case TGSI_OPCODE_UP2US:
199 case TGSI_OPCODE_UP4B:
200 case TGSI_OPCODE_UP4UB:
201 case TGSI_OPCODE_MEMBAR:
202 case TGSI_OPCODE_BALLOT:
203 read_mask = TGSI_WRITEMASK_X;
204 break;
205
206 case TGSI_OPCODE_DP2:
207 case TGSI_OPCODE_PK2H:
208 case TGSI_OPCODE_PK2US:
209 case TGSI_OPCODE_DFRACEXP:
210 case TGSI_OPCODE_F2D:
211 case TGSI_OPCODE_I2D:
212 case TGSI_OPCODE_U2D:
213 case TGSI_OPCODE_F2U64:
214 case TGSI_OPCODE_F2I64:
215 case TGSI_OPCODE_U2I64:
216 case TGSI_OPCODE_I2I64:
217 case TGSI_OPCODE_TXQS: /* bindless handle possible */
218 case TGSI_OPCODE_RESQ: /* bindless handle possible */
219 read_mask = TGSI_WRITEMASK_XY;
220 break;
221
222 case TGSI_OPCODE_TXQ:
223 if (src_idx == 0)
224 read_mask = TGSI_WRITEMASK_X;
225 else
226 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
227 break;
228
229 case TGSI_OPCODE_DP3:
230 read_mask = TGSI_WRITEMASK_XYZ;
231 break;
232
233 case TGSI_OPCODE_DSEQ:
234 case TGSI_OPCODE_DSNE:
235 case TGSI_OPCODE_DSLT:
236 case TGSI_OPCODE_DSGE:
237 case TGSI_OPCODE_DP4:
238 case TGSI_OPCODE_PK4B:
239 case TGSI_OPCODE_PK4UB:
240 case TGSI_OPCODE_D2F:
241 case TGSI_OPCODE_D2I:
242 case TGSI_OPCODE_D2U:
243 case TGSI_OPCODE_I2F:
244 case TGSI_OPCODE_U2F:
245 case TGSI_OPCODE_U64SEQ:
246 case TGSI_OPCODE_U64SNE:
247 case TGSI_OPCODE_U64SLT:
248 case TGSI_OPCODE_U64SGE:
249 case TGSI_OPCODE_U642F:
250 case TGSI_OPCODE_I64SLT:
251 case TGSI_OPCODE_I64SGE:
252 case TGSI_OPCODE_I642F:
253 read_mask = TGSI_WRITEMASK_XYZW;
254 break;
255
256 case TGSI_OPCODE_LIT:
257 read_mask = write_mask & TGSI_WRITEMASK_YZ ?
258 TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0;
259 break;
260
261 case TGSI_OPCODE_EXP:
262 case TGSI_OPCODE_LOG:
263 read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
264 break;
265
266 case TGSI_OPCODE_DST:
267 if (src_idx == 0)
268 read_mask = TGSI_WRITEMASK_YZ;
269 else
270 read_mask = TGSI_WRITEMASK_YW;
271 break;
272
273 case TGSI_OPCODE_DLDEXP:
274 if (src_idx == 0) {
275 read_mask = write_mask;
276 } else {
277 read_mask =
278 (write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) |
279 (write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0);
280 }
281 break;
282
283 case TGSI_OPCODE_READ_INVOC:
284 if (src_idx == 0)
285 read_mask = write_mask;
286 else
287 read_mask = TGSI_WRITEMASK_X;
288 break;
289
290 case TGSI_OPCODE_FBFETCH:
291 read_mask = 0; /* not a real register read */
292 break;
293
294 case TGSI_OPCODE_TEX:
295 case TGSI_OPCODE_TEX_LZ:
296 case TGSI_OPCODE_TXF_LZ:
297 case TGSI_OPCODE_TXF:
298 case TGSI_OPCODE_TXB:
299 case TGSI_OPCODE_TXL:
300 case TGSI_OPCODE_TXP:
301 case TGSI_OPCODE_TXD:
302 case TGSI_OPCODE_TEX2:
303 case TGSI_OPCODE_TXB2:
304 case TGSI_OPCODE_TXL2:
305 case TGSI_OPCODE_LODQ:
306 case TGSI_OPCODE_TG4: {
307 unsigned dim_layer =
308 tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
309 unsigned dim_layer_shadow, dim;
310
311 /* Add shadow. */
312 if (tgsi_is_shadow_target(inst->Texture.Texture)) {
313 dim_layer_shadow = dim_layer + 1;
314 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)
315 dim_layer_shadow = 3;
316 } else {
317 dim_layer_shadow = dim_layer;
318 }
319
320 /* Remove layer. */
321 if (tgsi_is_array_sampler(inst->Texture.Texture))
322 dim = dim_layer - 1;
323 else
324 dim = dim_layer;
325
326 read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */
327
328 switch (src_idx) {
329 case 0:
330 if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ)
331 read_mask = u_bit_consecutive(0, dim);
332 else
333 read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf;
334
335 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)
336 read_mask &= ~TGSI_WRITEMASK_Y;
337
338 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
339 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
340 inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
341 inst->Instruction.Opcode == TGSI_OPCODE_TXP)
342 read_mask |= TGSI_WRITEMASK_W;
343 break;
344
345 case 1:
346 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
347 read_mask = u_bit_consecutive(0, dim);
348 else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
349 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
350 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
351 inst->Instruction.Opcode == TGSI_OPCODE_TG4)
352 read_mask = TGSI_WRITEMASK_X;
353 break;
354
355 case 2:
356 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
357 read_mask = u_bit_consecutive(0, dim);
358 break;
359 }
360 break;
361 }
362
363 case TGSI_OPCODE_LOAD:
364 if (src_idx == 0) {
365 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
366 } else {
367 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
368 read_mask = u_bit_consecutive(0, dim);
369 }
370 break;
371
372 case TGSI_OPCODE_STORE:
373 if (src_idx == 0) {
374 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
375 read_mask = u_bit_consecutive(0, dim);
376 } else {
377 read_mask = TGSI_WRITEMASK_XYZW;
378 }
379 break;
380
381 case TGSI_OPCODE_ATOMUADD:
382 case TGSI_OPCODE_ATOMXCHG:
383 case TGSI_OPCODE_ATOMCAS:
384 case TGSI_OPCODE_ATOMAND:
385 case TGSI_OPCODE_ATOMOR:
386 case TGSI_OPCODE_ATOMXOR:
387 case TGSI_OPCODE_ATOMUMIN:
388 case TGSI_OPCODE_ATOMUMAX:
389 case TGSI_OPCODE_ATOMIMIN:
390 case TGSI_OPCODE_ATOMIMAX:
391 if (src_idx == 0) {
392 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
393 } else if (src_idx == 1) {
394 unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
395 read_mask = u_bit_consecutive(0, dim);
396 } else {
397 read_mask = TGSI_WRITEMASK_XYZW;
398 }
399 break;
400
401 case TGSI_OPCODE_INTERP_CENTROID:
402 case TGSI_OPCODE_INTERP_SAMPLE:
403 case TGSI_OPCODE_INTERP_OFFSET:
404 if (src_idx == 0)
405 read_mask = write_mask;
406 else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET)
407 read_mask = TGSI_WRITEMASK_XY; /* offset */
408 else
409 read_mask = TGSI_WRITEMASK_X; /* sample */
410 break;
411
412 default:
413 if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode ==
414 TGSI_OUTPUT_COMPONENTWISE)
415 read_mask = write_mask;
416 else
417 read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */
418 break;
419 }
420
421 usage_mask = 0;
422 for (chan = 0; chan < 4; ++chan) {
423 if (read_mask & (1 << chan)) {
424 usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan);
425 }
426 }
427
428 return usage_mask;
429 }
430
431 /**
432 * Convert a tgsi_ind_register into a tgsi_src_register
433 */
434 struct tgsi_src_register
435 tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg)
436 {
437 struct tgsi_src_register src = { 0 };
438
439 src.File = reg->File;
440 src.Index = reg->Index;
441 src.SwizzleX = reg->Swizzle;
442 src.SwizzleY = reg->Swizzle;
443 src.SwizzleZ = reg->Swizzle;
444 src.SwizzleW = reg->Swizzle;
445
446 return src;
447 }
448
449 /**
450 * Return the dimension of the texture coordinates (layer included for array
451 * textures), as well as the location of the shadow reference value or the
452 * sample index.
453 */
454 int
455 tgsi_util_get_texture_coord_dim(unsigned tgsi_tex)
456 {
457 /*
458 * Depending on the texture target, (src0.xyzw, src1.x) is interpreted
459 * differently:
460 *
461 * (s, X, X, X, X), for BUFFER
462 * (s, X, X, X, X), for 1D
463 * (s, t, X, X, X), for 2D, RECT
464 * (s, t, r, X, X), for 3D, CUBE
465 *
466 * (s, layer, X, X, X), for 1D_ARRAY
467 * (s, t, layer, X, X), for 2D_ARRAY
468 * (s, t, r, layer, X), for CUBE_ARRAY
469 *
470 * (s, X, shadow, X, X), for SHADOW1D
471 * (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT
472 * (s, t, r, shadow, X), for SHADOWCUBE
473 *
474 * (s, layer, shadow, X, X), for SHADOW1D_ARRAY
475 * (s, t, layer, shadow, X), for SHADOW2D_ARRAY
476 * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
477 *
478 * (s, t, sample, X, X), for 2D_MSAA
479 * (s, t, layer, sample, X), for 2D_ARRAY_MSAA
480 */
481 switch (tgsi_tex) {
482 case TGSI_TEXTURE_BUFFER:
483 case TGSI_TEXTURE_1D:
484 case TGSI_TEXTURE_SHADOW1D:
485 return 1;
486 case TGSI_TEXTURE_2D:
487 case TGSI_TEXTURE_RECT:
488 case TGSI_TEXTURE_1D_ARRAY:
489 case TGSI_TEXTURE_SHADOW2D:
490 case TGSI_TEXTURE_SHADOWRECT:
491 case TGSI_TEXTURE_SHADOW1D_ARRAY:
492 case TGSI_TEXTURE_2D_MSAA:
493 return 2;
494 case TGSI_TEXTURE_3D:
495 case TGSI_TEXTURE_CUBE:
496 case TGSI_TEXTURE_2D_ARRAY:
497 case TGSI_TEXTURE_SHADOWCUBE:
498 case TGSI_TEXTURE_SHADOW2D_ARRAY:
499 case TGSI_TEXTURE_2D_ARRAY_MSAA:
500 return 3;
501 case TGSI_TEXTURE_CUBE_ARRAY:
502 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
503 return 4;
504 default:
505 assert(!"unknown texture target");
506 return 0;
507 }
508 }
509
510
511 /**
512 * Given a TGSI_TEXTURE_x target, return the src register index for the
513 * shadow reference coordinate.
514 */
515 int
516 tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex)
517 {
518 switch (tgsi_tex) {
519 case TGSI_TEXTURE_SHADOW1D:
520 case TGSI_TEXTURE_SHADOW2D:
521 case TGSI_TEXTURE_SHADOWRECT:
522 case TGSI_TEXTURE_SHADOW1D_ARRAY:
523 return 2;
524 case TGSI_TEXTURE_SHADOWCUBE:
525 case TGSI_TEXTURE_SHADOW2D_ARRAY:
526 case TGSI_TEXTURE_2D_MSAA:
527 case TGSI_TEXTURE_2D_ARRAY_MSAA:
528 return 3;
529 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
530 return 4;
531 default:
532 /* no shadow nor sample */
533 return -1;
534 }
535 }
536
537
538 boolean
539 tgsi_is_shadow_target(unsigned target)
540 {
541 switch (target) {
542 case TGSI_TEXTURE_SHADOW1D:
543 case TGSI_TEXTURE_SHADOW2D:
544 case TGSI_TEXTURE_SHADOWRECT:
545 case TGSI_TEXTURE_SHADOW1D_ARRAY:
546 case TGSI_TEXTURE_SHADOW2D_ARRAY:
547 case TGSI_TEXTURE_SHADOWCUBE:
548 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
549 return TRUE;
550 default:
551 return FALSE;
552 }
553 }