intel/compiler: fix brw_imm_w for negative 16-bit integers
[mesa.git] / src / intel / compiler / brw_reg.h
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 /** @file brw_reg.h
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers. They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42 #ifndef BRW_REG_H
43 #define BRW_REG_H
44
45 #include <stdbool.h>
46 #include "main/compiler.h"
47 #include "main/macros.h"
48 #include "program/prog_instruction.h"
49 #include "brw_eu_defines.h"
50 #include "brw_reg_type.h"
51
52 #ifdef __cplusplus
53 extern "C" {
54 #endif
55
56 struct gen_device_info;
57
58 /** Number of general purpose registers (VS, WM, etc) */
59 #define BRW_MAX_GRF 128
60
61 /**
62 * First GRF used for the MRF hack.
63 *
64 * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We
65 * haven't converted our compiler to be aware of this, so it asks for MRFs and
66 * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
67 * register allocators have to be careful of this to avoid corrupting the "MRF"s
68 * with actual GRF allocations.
69 */
70 #define GEN7_MRF_HACK_START 112
71
72 /** Number of message register file registers */
73 #define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
74
75 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
76 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
77
78 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
79 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
80 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
81 #define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
82 #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
83 #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
84 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
85 #define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0)
86 #define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
87 #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
88 #define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
89 #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
90 #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
91 #define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2)
92 #define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
93 #define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2)
94 #define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3)
95 #define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2)
96
97 #define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
98 #define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
99
100 static inline bool
101 brw_is_single_value_swizzle(unsigned swiz)
102 {
103 return (swiz == BRW_SWIZZLE_XXXX ||
104 swiz == BRW_SWIZZLE_YYYY ||
105 swiz == BRW_SWIZZLE_ZZZZ ||
106 swiz == BRW_SWIZZLE_WWWW);
107 }
108
109 /**
110 * Compute the swizzle obtained from the application of \p swz0 on the result
111 * of \p swz1. The argument ordering is expected to match function
112 * composition.
113 */
114 static inline unsigned
115 brw_compose_swizzle(unsigned swz0, unsigned swz1)
116 {
117 return BRW_SWIZZLE4(
118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
119 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
120 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
121 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
122 }
123
124 /**
125 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
126 * (AKA image).
127 */
128 static inline unsigned
129 brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
130 {
131 unsigned result = 0;
132
133 for (unsigned i = 0; i < 4; i++) {
134 if (mask & (1 << BRW_GET_SWZ(swz, i)))
135 result |= 1 << i;
136 }
137
138 return result;
139 }
140
141 /**
142 * Return the result of applying the inverse of swizzle \p swz to shuffle the
143 * bits of \p mask (AKA preimage). Useful to find out which components are
144 * read from a swizzled source given the instruction writemask.
145 */
146 static inline unsigned
147 brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
148 {
149 unsigned result = 0;
150
151 for (unsigned i = 0; i < 4; i++) {
152 if (mask & (1 << i))
153 result |= 1 << BRW_GET_SWZ(swz, i);
154 }
155
156 return result;
157 }
158
159 /**
160 * Construct an identity swizzle for the set of enabled channels given by \p
161 * mask. The result will only reference channels enabled in the provided \p
162 * mask, assuming that \p mask is non-zero. The constructed swizzle will
163 * satisfy the property that for any instruction OP and any mask:
164 *
165 * brw_OP(p, brw_writemask(dst, mask),
166 * brw_swizzle(src, brw_swizzle_for_mask(mask)));
167 *
168 * will be equivalent to the same instruction without swizzle:
169 *
170 * brw_OP(p, brw_writemask(dst, mask), src);
171 */
172 static inline unsigned
173 brw_swizzle_for_mask(unsigned mask)
174 {
175 unsigned last = (mask ? ffs(mask) - 1 : 0);
176 unsigned swz[4];
177
178 for (unsigned i = 0; i < 4; i++)
179 last = swz[i] = (mask & (1 << i) ? i : last);
180
181 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
182 }
183
184 /**
185 * Construct an identity swizzle for the first \p n components of a vector.
186 * When only a subset of channels of a vec4 are used we don't want to
187 * reference the other channels, as that will tell optimization passes that
188 * those other channels are used.
189 */
190 static inline unsigned
191 brw_swizzle_for_size(unsigned n)
192 {
193 return brw_swizzle_for_mask((1 << n) - 1);
194 }
195
196 /**
197 * Converse of brw_swizzle_for_mask(). Returns the mask of components
198 * accessed by the specified swizzle \p swz.
199 */
200 static inline unsigned
201 brw_mask_for_swizzle(unsigned swz)
202 {
203 return brw_apply_inv_swizzle_to_mask(swz, ~0);
204 }
205
206 uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
207
208 #define REG_SIZE (8*4)
209
210 /* These aren't hardware structs, just something useful for us to pass around:
211 *
212 * Align1 operation has a lot of control over input ranges. Used in
213 * WM programs to implement shaders decomposed into "channel serial"
214 * or "structure of array" form:
215 */
216 struct brw_reg {
217 union {
218 struct {
219 enum brw_reg_type type:4;
220 enum brw_reg_file file:3; /* :2 hardware format */
221 unsigned negate:1; /* source only */
222 unsigned abs:1; /* source only */
223 unsigned address_mode:1; /* relative addressing, hopefully! */
224 unsigned pad0:1;
225 unsigned subnr:5; /* :1 in align16 */
226 unsigned nr:16;
227 };
228 uint32_t bits;
229 };
230
231 union {
232 struct {
233 unsigned swizzle:8; /* src only, align16 only */
234 unsigned writemask:4; /* dest only, align16 only */
235 int indirect_offset:10; /* relative addressing offset */
236 unsigned vstride:4; /* source only */
237 unsigned width:3; /* src only, align1 only */
238 unsigned hstride:2; /* align1 only */
239 unsigned pad1:1;
240 };
241
242 double df;
243 uint64_t u64;
244 int64_t d64;
245 float f;
246 int d;
247 unsigned ud;
248 };
249 };
250
251 static inline bool
252 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
253 {
254 const bool df = a->type == BRW_REGISTER_TYPE_DF && a->file == IMM;
255 return a->bits == b->bits && (df ? a->u64 == b->u64 : a->ud == b->ud);
256 }
257
258 static inline bool
259 brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
260 {
261 if (a->file == IMM) {
262 if (a->bits != b->bits)
263 return false;
264
265 switch ((enum brw_reg_type) a->type) {
266 case BRW_REGISTER_TYPE_UQ:
267 case BRW_REGISTER_TYPE_Q:
268 return a->d64 == -b->d64;
269 case BRW_REGISTER_TYPE_DF:
270 return a->df == -b->df;
271 case BRW_REGISTER_TYPE_UD:
272 case BRW_REGISTER_TYPE_D:
273 return a->d == -b->d;
274 case BRW_REGISTER_TYPE_F:
275 return a->f == -b->f;
276 case BRW_REGISTER_TYPE_VF:
277 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
278 * of -0). There are occasions where 0 or -0 is used and the exact
279 * bit pattern is desired. At the very least, changing this to allow
280 * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
281 */
282 return a->ud == (b->ud ^ 0x80808080);
283 case BRW_REGISTER_TYPE_UW:
284 case BRW_REGISTER_TYPE_W:
285 case BRW_REGISTER_TYPE_UV:
286 case BRW_REGISTER_TYPE_V:
287 case BRW_REGISTER_TYPE_HF:
288 /* FINISHME: Implement support for these types once there is
289 * something in the compiler that can generate them. Until then,
290 * they cannot be tested.
291 */
292 return false;
293 case BRW_REGISTER_TYPE_UB:
294 case BRW_REGISTER_TYPE_B:
295 case BRW_REGISTER_TYPE_NF:
296 default:
297 unreachable("not reached");
298 }
299 } else {
300 struct brw_reg tmp = *a;
301
302 tmp.negate = !tmp.negate;
303
304 return brw_regs_equal(&tmp, b);
305 }
306 }
307
308 struct brw_indirect {
309 unsigned addr_subnr:4;
310 int addr_offset:10;
311 unsigned pad:18;
312 };
313
314
315 static inline unsigned
316 type_sz(unsigned type)
317 {
318 switch(type) {
319 case BRW_REGISTER_TYPE_UQ:
320 case BRW_REGISTER_TYPE_Q:
321 case BRW_REGISTER_TYPE_DF:
322 return 8;
323 case BRW_REGISTER_TYPE_UD:
324 case BRW_REGISTER_TYPE_D:
325 case BRW_REGISTER_TYPE_F:
326 case BRW_REGISTER_TYPE_VF:
327 return 4;
328 case BRW_REGISTER_TYPE_UW:
329 case BRW_REGISTER_TYPE_W:
330 case BRW_REGISTER_TYPE_UV:
331 case BRW_REGISTER_TYPE_V:
332 case BRW_REGISTER_TYPE_HF:
333 return 2;
334 case BRW_REGISTER_TYPE_UB:
335 case BRW_REGISTER_TYPE_B:
336 return 1;
337 default:
338 unreachable("not reached");
339 }
340 }
341
342 static inline enum brw_reg_type
343 get_exec_type(const enum brw_reg_type type)
344 {
345 switch (type) {
346 case BRW_REGISTER_TYPE_B:
347 case BRW_REGISTER_TYPE_V:
348 return BRW_REGISTER_TYPE_W;
349 case BRW_REGISTER_TYPE_UB:
350 case BRW_REGISTER_TYPE_UV:
351 return BRW_REGISTER_TYPE_UW;
352 case BRW_REGISTER_TYPE_VF:
353 return BRW_REGISTER_TYPE_F;
354 default:
355 return type;
356 }
357 }
358
359 /**
360 * Return an integer type of the requested size and signedness.
361 */
362 static inline enum brw_reg_type
363 brw_int_type(unsigned sz, bool is_signed)
364 {
365 switch (sz) {
366 case 1:
367 return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB);
368 case 2:
369 return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
370 case 4:
371 return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
372 case 8:
373 return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ);
374 default:
375 unreachable("Not reached.");
376 }
377 }
378
379 /**
380 * Construct a brw_reg.
381 * \param file one of the BRW_x_REGISTER_FILE values
382 * \param nr register number/index
383 * \param subnr register sub number
384 * \param negate register negate modifier
385 * \param abs register abs modifier
386 * \param type one of BRW_REGISTER_TYPE_x
387 * \param vstride one of BRW_VERTICAL_STRIDE_x
388 * \param width one of BRW_WIDTH_x
389 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
390 * \param swizzle one of BRW_SWIZZLE_x
391 * \param writemask WRITEMASK_X/Y/Z/W bitfield
392 */
393 static inline struct brw_reg
394 brw_reg(enum brw_reg_file file,
395 unsigned nr,
396 unsigned subnr,
397 unsigned negate,
398 unsigned abs,
399 enum brw_reg_type type,
400 unsigned vstride,
401 unsigned width,
402 unsigned hstride,
403 unsigned swizzle,
404 unsigned writemask)
405 {
406 struct brw_reg reg;
407 if (file == BRW_GENERAL_REGISTER_FILE)
408 assert(nr < BRW_MAX_GRF);
409 else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
410 assert(nr <= BRW_ARF_TIMESTAMP);
411 /* Asserting on the MRF register number requires to know the hardware gen
412 * (gen6 has 24 MRF registers), which we don't know here, so we assert
413 * for that in the generators and in brw_eu_emit.c
414 */
415
416 reg.type = type;
417 reg.file = file;
418 reg.negate = negate;
419 reg.abs = abs;
420 reg.address_mode = BRW_ADDRESS_DIRECT;
421 reg.pad0 = 0;
422 reg.subnr = subnr * type_sz(type);
423 reg.nr = nr;
424
425 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
426 * set swizzle and writemask to W, as the lower bits of subnr will
427 * be lost when converted to align16. This is probably too much to
428 * keep track of as you'd want it adjusted by suboffset(), etc.
429 * Perhaps fix up when converting to align16?
430 */
431 reg.swizzle = swizzle;
432 reg.writemask = writemask;
433 reg.indirect_offset = 0;
434 reg.vstride = vstride;
435 reg.width = width;
436 reg.hstride = hstride;
437 reg.pad1 = 0;
438 return reg;
439 }
440
441 /** Construct float[16] register */
442 static inline struct brw_reg
443 brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
444 {
445 return brw_reg(file,
446 nr,
447 subnr,
448 0,
449 0,
450 BRW_REGISTER_TYPE_F,
451 BRW_VERTICAL_STRIDE_16,
452 BRW_WIDTH_16,
453 BRW_HORIZONTAL_STRIDE_1,
454 BRW_SWIZZLE_XYZW,
455 WRITEMASK_XYZW);
456 }
457
458 /** Construct float[8] register */
459 static inline struct brw_reg
460 brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
461 {
462 return brw_reg(file,
463 nr,
464 subnr,
465 0,
466 0,
467 BRW_REGISTER_TYPE_F,
468 BRW_VERTICAL_STRIDE_8,
469 BRW_WIDTH_8,
470 BRW_HORIZONTAL_STRIDE_1,
471 BRW_SWIZZLE_XYZW,
472 WRITEMASK_XYZW);
473 }
474
475 /** Construct float[4] register */
476 static inline struct brw_reg
477 brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
478 {
479 return brw_reg(file,
480 nr,
481 subnr,
482 0,
483 0,
484 BRW_REGISTER_TYPE_F,
485 BRW_VERTICAL_STRIDE_4,
486 BRW_WIDTH_4,
487 BRW_HORIZONTAL_STRIDE_1,
488 BRW_SWIZZLE_XYZW,
489 WRITEMASK_XYZW);
490 }
491
492 /** Construct float[2] register */
493 static inline struct brw_reg
494 brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
495 {
496 return brw_reg(file,
497 nr,
498 subnr,
499 0,
500 0,
501 BRW_REGISTER_TYPE_F,
502 BRW_VERTICAL_STRIDE_2,
503 BRW_WIDTH_2,
504 BRW_HORIZONTAL_STRIDE_1,
505 BRW_SWIZZLE_XYXY,
506 WRITEMASK_XY);
507 }
508
509 /** Construct float[1] register */
510 static inline struct brw_reg
511 brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
512 {
513 return brw_reg(file,
514 nr,
515 subnr,
516 0,
517 0,
518 BRW_REGISTER_TYPE_F,
519 BRW_VERTICAL_STRIDE_0,
520 BRW_WIDTH_1,
521 BRW_HORIZONTAL_STRIDE_0,
522 BRW_SWIZZLE_XXXX,
523 WRITEMASK_X);
524 }
525
526 static inline struct brw_reg
527 brw_vecn_reg(unsigned width, enum brw_reg_file file,
528 unsigned nr, unsigned subnr)
529 {
530 switch (width) {
531 case 1:
532 return brw_vec1_reg(file, nr, subnr);
533 case 2:
534 return brw_vec2_reg(file, nr, subnr);
535 case 4:
536 return brw_vec4_reg(file, nr, subnr);
537 case 8:
538 return brw_vec8_reg(file, nr, subnr);
539 case 16:
540 return brw_vec16_reg(file, nr, subnr);
541 default:
542 unreachable("Invalid register width");
543 }
544 }
545
546 static inline struct brw_reg
547 retype(struct brw_reg reg, enum brw_reg_type type)
548 {
549 reg.type = type;
550 return reg;
551 }
552
553 static inline struct brw_reg
554 firsthalf(struct brw_reg reg)
555 {
556 return reg;
557 }
558
559 static inline struct brw_reg
560 sechalf(struct brw_reg reg)
561 {
562 if (reg.vstride)
563 reg.nr++;
564 return reg;
565 }
566
567 static inline struct brw_reg
568 offset(struct brw_reg reg, unsigned delta)
569 {
570 reg.nr += delta;
571 return reg;
572 }
573
574
575 static inline struct brw_reg
576 byte_offset(struct brw_reg reg, unsigned bytes)
577 {
578 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
579 reg.nr = newoffset / REG_SIZE;
580 reg.subnr = newoffset % REG_SIZE;
581 return reg;
582 }
583
584 static inline struct brw_reg
585 suboffset(struct brw_reg reg, unsigned delta)
586 {
587 return byte_offset(reg, delta * type_sz(reg.type));
588 }
589
590 /** Construct unsigned word[16] register */
591 static inline struct brw_reg
592 brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
593 {
594 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
595 }
596
597 /** Construct unsigned word[8] register */
598 static inline struct brw_reg
599 brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
600 {
601 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
602 }
603
604 /** Construct unsigned word[1] register */
605 static inline struct brw_reg
606 brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
607 {
608 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
609 }
610
611 static inline struct brw_reg
612 brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
613 {
614 return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD);
615 }
616
617 static inline struct brw_reg
618 brw_imm_reg(enum brw_reg_type type)
619 {
620 return brw_reg(BRW_IMMEDIATE_VALUE,
621 0,
622 0,
623 0,
624 0,
625 type,
626 BRW_VERTICAL_STRIDE_0,
627 BRW_WIDTH_1,
628 BRW_HORIZONTAL_STRIDE_0,
629 0,
630 0);
631 }
632
633 /** Construct float immediate register */
634 static inline struct brw_reg
635 brw_imm_df(double df)
636 {
637 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF);
638 imm.df = df;
639 return imm;
640 }
641
642 static inline struct brw_reg
643 brw_imm_u64(uint64_t u64)
644 {
645 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
646 imm.u64 = u64;
647 return imm;
648 }
649
650 static inline struct brw_reg
651 brw_imm_f(float f)
652 {
653 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
654 imm.f = f;
655 return imm;
656 }
657
658 /** Construct int64_t immediate register */
659 static inline struct brw_reg
660 brw_imm_q(int64_t q)
661 {
662 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q);
663 imm.d64 = q;
664 return imm;
665 }
666
667 /** Construct int64_t immediate register */
668 static inline struct brw_reg
669 brw_imm_uq(uint64_t uq)
670 {
671 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
672 imm.u64 = uq;
673 return imm;
674 }
675
676 /** Construct integer immediate register */
677 static inline struct brw_reg
678 brw_imm_d(int d)
679 {
680 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
681 imm.d = d;
682 return imm;
683 }
684
685 /** Construct uint immediate register */
686 static inline struct brw_reg
687 brw_imm_ud(unsigned ud)
688 {
689 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
690 imm.ud = ud;
691 return imm;
692 }
693
694 /** Construct ushort immediate register */
695 static inline struct brw_reg
696 brw_imm_uw(uint16_t uw)
697 {
698 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
699 imm.ud = uw | (uw << 16);
700 return imm;
701 }
702
703 /** Construct short immediate register */
704 static inline struct brw_reg
705 brw_imm_w(int16_t w)
706 {
707 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
708 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
709 return imm;
710 }
711
712 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
713 * numbers alias with _V and _VF below:
714 */
715
716 /** Construct vector of eight signed half-byte values */
717 static inline struct brw_reg
718 brw_imm_v(unsigned v)
719 {
720 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
721 imm.ud = v;
722 return imm;
723 }
724
725 /** Construct vector of eight unsigned half-byte values */
726 static inline struct brw_reg
727 brw_imm_uv(unsigned uv)
728 {
729 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
730 imm.ud = uv;
731 return imm;
732 }
733
734 /** Construct vector of four 8-bit float values */
735 static inline struct brw_reg
736 brw_imm_vf(unsigned v)
737 {
738 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
739 imm.ud = v;
740 return imm;
741 }
742
743 static inline struct brw_reg
744 brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
745 {
746 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
747 imm.vstride = BRW_VERTICAL_STRIDE_0;
748 imm.width = BRW_WIDTH_4;
749 imm.hstride = BRW_HORIZONTAL_STRIDE_1;
750 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
751 return imm;
752 }
753
754
755 static inline struct brw_reg
756 brw_address(struct brw_reg reg)
757 {
758 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
759 }
760
761 /** Construct float[1] general-purpose register */
762 static inline struct brw_reg
763 brw_vec1_grf(unsigned nr, unsigned subnr)
764 {
765 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
766 }
767
768 /** Construct float[2] general-purpose register */
769 static inline struct brw_reg
770 brw_vec2_grf(unsigned nr, unsigned subnr)
771 {
772 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
773 }
774
775 /** Construct float[4] general-purpose register */
776 static inline struct brw_reg
777 brw_vec4_grf(unsigned nr, unsigned subnr)
778 {
779 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
780 }
781
782 /** Construct float[8] general-purpose register */
783 static inline struct brw_reg
784 brw_vec8_grf(unsigned nr, unsigned subnr)
785 {
786 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
787 }
788
789 /** Construct float[16] general-purpose register */
790 static inline struct brw_reg
791 brw_vec16_grf(unsigned nr, unsigned subnr)
792 {
793 return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
794 }
795
796 static inline struct brw_reg
797 brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
798 {
799 return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr);
800 }
801
802
803 static inline struct brw_reg
804 brw_uw8_grf(unsigned nr, unsigned subnr)
805 {
806 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
807 }
808
809 static inline struct brw_reg
810 brw_uw16_grf(unsigned nr, unsigned subnr)
811 {
812 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
813 }
814
815
816 /** Construct null register (usually used for setting condition codes) */
817 static inline struct brw_reg
818 brw_null_reg(void)
819 {
820 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
821 }
822
823 static inline struct brw_reg
824 brw_null_vec(unsigned width)
825 {
826 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
827 }
828
829 static inline struct brw_reg
830 brw_address_reg(unsigned subnr)
831 {
832 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
833 }
834
835 static inline struct brw_reg
836 brw_tdr_reg(void)
837 {
838 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0);
839 }
840
841 /* If/else instructions break in align16 mode if writemask & swizzle
842 * aren't xyzw. This goes against the convention for other scalar
843 * regs:
844 */
845 static inline struct brw_reg
846 brw_ip_reg(void)
847 {
848 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
849 BRW_ARF_IP,
850 0,
851 0,
852 0,
853 BRW_REGISTER_TYPE_UD,
854 BRW_VERTICAL_STRIDE_4, /* ? */
855 BRW_WIDTH_1,
856 BRW_HORIZONTAL_STRIDE_0,
857 BRW_SWIZZLE_XYZW, /* NOTE! */
858 WRITEMASK_XYZW); /* NOTE! */
859 }
860
861 static inline struct brw_reg
862 brw_notification_reg(void)
863 {
864 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
865 BRW_ARF_NOTIFICATION_COUNT,
866 0,
867 0,
868 0,
869 BRW_REGISTER_TYPE_UD,
870 BRW_VERTICAL_STRIDE_0,
871 BRW_WIDTH_1,
872 BRW_HORIZONTAL_STRIDE_0,
873 BRW_SWIZZLE_XXXX,
874 WRITEMASK_X);
875 }
876
877 static inline struct brw_reg
878 brw_cr0_reg(unsigned subnr)
879 {
880 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr);
881 }
882
883 static inline struct brw_reg
884 brw_sr0_reg(unsigned subnr)
885 {
886 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr);
887 }
888
889 static inline struct brw_reg
890 brw_acc_reg(unsigned width)
891 {
892 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
893 BRW_ARF_ACCUMULATOR, 0);
894 }
895
896 static inline struct brw_reg
897 brw_flag_reg(int reg, int subreg)
898 {
899 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
900 BRW_ARF_FLAG + reg, subreg);
901 }
902
903 static inline struct brw_reg
904 brw_flag_subreg(unsigned subreg)
905 {
906 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
907 BRW_ARF_FLAG + subreg / 2, subreg % 2);
908 }
909
910 /**
911 * Return the mask register present in Gen4-5, or the related register present
912 * in Gen7.5 and later hardware referred to as "channel enable" register in
913 * the documentation.
914 */
915 static inline struct brw_reg
916 brw_mask_reg(unsigned subnr)
917 {
918 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
919 }
920
921 static inline struct brw_reg
922 brw_vmask_reg()
923 {
924 return brw_sr0_reg(3);
925 }
926
927 static inline struct brw_reg
928 brw_dmask_reg()
929 {
930 return brw_sr0_reg(2);
931 }
932
933 static inline struct brw_reg
934 brw_message_reg(unsigned nr)
935 {
936 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
937 }
938
939 static inline struct brw_reg
940 brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
941 {
942 return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
943 BRW_REGISTER_TYPE_UD);
944 }
945
946 /* This is almost always called with a numeric constant argument, so
947 * make things easy to evaluate at compile time:
948 */
949 static inline unsigned cvt(unsigned val)
950 {
951 switch (val) {
952 case 0: return 0;
953 case 1: return 1;
954 case 2: return 2;
955 case 4: return 3;
956 case 8: return 4;
957 case 16: return 5;
958 case 32: return 6;
959 }
960 return 0;
961 }
962
963 static inline struct brw_reg
964 stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
965 {
966 reg.vstride = cvt(vstride);
967 reg.width = cvt(width) - 1;
968 reg.hstride = cvt(hstride);
969 return reg;
970 }
971
972 /**
973 * Multiply the vertical and horizontal stride of a register by the given
974 * factor \a s.
975 */
976 static inline struct brw_reg
977 spread(struct brw_reg reg, unsigned s)
978 {
979 if (s) {
980 assert(_mesa_is_pow_two(s));
981
982 if (reg.hstride)
983 reg.hstride += cvt(s) - 1;
984
985 if (reg.vstride)
986 reg.vstride += cvt(s) - 1;
987
988 return reg;
989 } else {
990 return stride(reg, 0, 1, 0);
991 }
992 }
993
994 /**
995 * Reinterpret each channel of register \p reg as a vector of values of the
996 * given smaller type and take the i-th subcomponent from each.
997 */
998 static inline struct brw_reg
999 subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1000 {
1001 if (reg.file == IMM)
1002 return reg;
1003
1004 unsigned scale = type_sz(reg.type) / type_sz(type);
1005 assert(scale >= 1 && i < scale);
1006
1007 return suboffset(retype(spread(reg, scale), type), i);
1008 }
1009
1010 static inline struct brw_reg
1011 vec16(struct brw_reg reg)
1012 {
1013 return stride(reg, 16,16,1);
1014 }
1015
1016 static inline struct brw_reg
1017 vec8(struct brw_reg reg)
1018 {
1019 return stride(reg, 8,8,1);
1020 }
1021
1022 static inline struct brw_reg
1023 vec4(struct brw_reg reg)
1024 {
1025 return stride(reg, 4,4,1);
1026 }
1027
1028 static inline struct brw_reg
1029 vec2(struct brw_reg reg)
1030 {
1031 return stride(reg, 2,2,1);
1032 }
1033
1034 static inline struct brw_reg
1035 vec1(struct brw_reg reg)
1036 {
1037 return stride(reg, 0,1,0);
1038 }
1039
1040
1041 static inline struct brw_reg
1042 get_element(struct brw_reg reg, unsigned elt)
1043 {
1044 return vec1(suboffset(reg, elt));
1045 }
1046
1047 static inline struct brw_reg
1048 get_element_ud(struct brw_reg reg, unsigned elt)
1049 {
1050 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
1051 }
1052
1053 static inline struct brw_reg
1054 get_element_d(struct brw_reg reg, unsigned elt)
1055 {
1056 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
1057 }
1058
1059 static inline struct brw_reg
1060 brw_swizzle(struct brw_reg reg, unsigned swz)
1061 {
1062 if (reg.file == BRW_IMMEDIATE_VALUE)
1063 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1064 else
1065 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1066
1067 return reg;
1068 }
1069
1070 static inline struct brw_reg
1071 brw_writemask(struct brw_reg reg, unsigned mask)
1072 {
1073 assert(reg.file != BRW_IMMEDIATE_VALUE);
1074 reg.writemask &= mask;
1075 return reg;
1076 }
1077
1078 static inline struct brw_reg
1079 brw_set_writemask(struct brw_reg reg, unsigned mask)
1080 {
1081 assert(reg.file != BRW_IMMEDIATE_VALUE);
1082 reg.writemask = mask;
1083 return reg;
1084 }
1085
1086 static inline unsigned
1087 brw_writemask_for_size(unsigned n)
1088 {
1089 return (1 << n) - 1;
1090 }
1091
1092 static inline unsigned
1093 brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1094 {
1095 assert(first_component + n <= 4);
1096 return (((1 << n) - 1) << first_component);
1097 }
1098
1099 static inline struct brw_reg
1100 negate(struct brw_reg reg)
1101 {
1102 reg.negate ^= 1;
1103 return reg;
1104 }
1105
1106 static inline struct brw_reg
1107 brw_abs(struct brw_reg reg)
1108 {
1109 reg.abs = 1;
1110 reg.negate = 0;
1111 return reg;
1112 }
1113
1114 /************************************************************************/
1115
1116 static inline struct brw_reg
1117 brw_vec4_indirect(unsigned subnr, int offset)
1118 {
1119 struct brw_reg reg = brw_vec4_grf(0, 0);
1120 reg.subnr = subnr;
1121 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1122 reg.indirect_offset = offset;
1123 return reg;
1124 }
1125
1126 static inline struct brw_reg
1127 brw_vec1_indirect(unsigned subnr, int offset)
1128 {
1129 struct brw_reg reg = brw_vec1_grf(0, 0);
1130 reg.subnr = subnr;
1131 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1132 reg.indirect_offset = offset;
1133 return reg;
1134 }
1135
1136 static inline struct brw_reg
1137 brw_VxH_indirect(unsigned subnr, int offset)
1138 {
1139 struct brw_reg reg = brw_vec1_grf(0, 0);
1140 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1141 reg.subnr = subnr;
1142 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1143 reg.indirect_offset = offset;
1144 return reg;
1145 }
1146
1147 static inline struct brw_reg
1148 deref_4f(struct brw_indirect ptr, int offset)
1149 {
1150 return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1151 }
1152
1153 static inline struct brw_reg
1154 deref_1f(struct brw_indirect ptr, int offset)
1155 {
1156 return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1157 }
1158
1159 static inline struct brw_reg
1160 deref_4b(struct brw_indirect ptr, int offset)
1161 {
1162 return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1163 }
1164
1165 static inline struct brw_reg
1166 deref_1uw(struct brw_indirect ptr, int offset)
1167 {
1168 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1169 }
1170
1171 static inline struct brw_reg
1172 deref_1d(struct brw_indirect ptr, int offset)
1173 {
1174 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1175 }
1176
1177 static inline struct brw_reg
1178 deref_1ud(struct brw_indirect ptr, int offset)
1179 {
1180 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1181 }
1182
1183 static inline struct brw_reg
1184 get_addr_reg(struct brw_indirect ptr)
1185 {
1186 return brw_address_reg(ptr.addr_subnr);
1187 }
1188
1189 static inline struct brw_indirect
1190 brw_indirect_offset(struct brw_indirect ptr, int offset)
1191 {
1192 ptr.addr_offset += offset;
1193 return ptr;
1194 }
1195
1196 static inline struct brw_indirect
1197 brw_indirect(unsigned addr_subnr, int offset)
1198 {
1199 struct brw_indirect ptr;
1200 ptr.addr_subnr = addr_subnr;
1201 ptr.addr_offset = offset;
1202 ptr.pad = 0;
1203 return ptr;
1204 }
1205
1206 static inline bool
1207 region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1208 enum brw_width w, enum brw_horizontal_stride h)
1209 {
1210 return reg.vstride == v &&
1211 reg.width == w &&
1212 reg.hstride == h;
1213 }
1214
1215 #define has_scalar_region(reg) \
1216 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1217 BRW_HORIZONTAL_STRIDE_0)
1218
1219 /* brw_packed_float.c */
1220 int brw_float_to_vf(float f);
1221 float brw_vf_to_float(unsigned char vf);
1222
1223 #ifdef __cplusplus
1224 }
1225 #endif
1226
1227 #endif