5e505992842f6ec1e6da4c9befe08c91e13bfa1c
[mesa.git] / src / intel / compiler / brw_eu_validate.c
1 /*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41 #include "brw_eu.h"
42
43 /* We're going to do lots of string concatenation, so this should help. */
44 struct string {
45 char *str;
46 size_t len;
47 };
48
49 static void
50 cat(struct string *dest, const struct string src)
51 {
52 dest->str = realloc(dest->str, dest->len + src.len + 1);
53 memcpy(dest->str + dest->len, src.str, src.len);
54 dest->str[dest->len + src.len] = '\0';
55 dest->len = dest->len + src.len;
56 }
57 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
58
59 static bool
60 contains(const struct string haystack, const struct string needle)
61 {
62 return haystack.str && memmem(haystack.str, haystack.len,
63 needle.str, needle.len) != NULL;
64 }
65 #define CONTAINS(haystack, needle) \
66 contains(haystack, (struct string){needle, strlen(needle)})
67
68 #define error(str) "\tERROR: " str "\n"
69 #define ERROR_INDENT "\t "
70
71 #define ERROR(msg) ERROR_IF(true, msg)
72 #define ERROR_IF(cond, msg) \
73 do { \
74 if ((cond) && !CONTAINS(error_msg, error(msg))) { \
75 CAT(error_msg, error(msg)); \
76 } \
77 } while(0)
78
79 #define CHECK(func, args...) \
80 do { \
81 struct string __msg = func(devinfo, inst, ##args); \
82 if (__msg.str) { \
83 cat(&error_msg, __msg); \
84 free(__msg.str); \
85 } \
86 } while (0)
87
88 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
89 #define WIDTH(width) (1 << (width))
90
91 static bool
92 inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst)
93 {
94 switch (brw_inst_opcode(devinfo, inst)) {
95 case BRW_OPCODE_SEND:
96 case BRW_OPCODE_SENDC:
97 case BRW_OPCODE_SENDS:
98 case BRW_OPCODE_SENDSC:
99 return true;
100 default:
101 return false;
102 }
103 }
104
105 static unsigned
106 signed_type(unsigned type)
107 {
108 switch (type) {
109 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
110 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
111 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
112 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
113 default: return type;
114 }
115 }
116
117 static bool
118 inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst)
119 {
120 unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst));
121 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
122
123 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
124 /* FIXME: not strictly true */
125 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
126 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
127 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
128 return false;
129 }
130 } else if (brw_inst_src0_negate(devinfo, inst) ||
131 brw_inst_src0_abs(devinfo, inst)) {
132 return false;
133 }
134
135 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
136 brw_inst_saturate(devinfo, inst) == 0 &&
137 dst_type == src_type;
138 }
139
140 static bool
141 dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
142 {
143 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
144 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
145 }
146
147 static bool
148 src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
149 {
150 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
151 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
152 }
153
154 static bool
155 src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
156 {
157 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
158 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
159 }
160
161 static bool
162 src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
163 {
164 return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE;
165 }
166
167 static bool
168 src0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
169 {
170 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
171 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
172 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
173 }
174
175 static bool
176 src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
177 {
178 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
179 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
180 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
181 }
182
183 static unsigned
184 num_sources_from_inst(const struct gen_device_info *devinfo,
185 const brw_inst *inst)
186 {
187 const struct opcode_desc *desc =
188 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
189 unsigned math_function;
190
191 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
192 math_function = brw_inst_math_function(devinfo, inst);
193 } else if (devinfo->gen < 6 &&
194 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
195 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
196 /* src1 must be a descriptor (including the information to determine
197 * that the SEND is doing an extended math operation), but src0 can
198 * actually be null since it serves as the source of the implicit GRF
199 * to MRF move.
200 *
201 * If we stop using that functionality, we'll have to revisit this.
202 */
203 return 2;
204 } else {
205 /* Send instructions are allowed to have null sources since they use
206 * the base_mrf field to specify which message register source.
207 */
208 return 0;
209 }
210 } else {
211 assert(desc->nsrc < 4);
212 return desc->nsrc;
213 }
214
215 switch (math_function) {
216 case BRW_MATH_FUNCTION_INV:
217 case BRW_MATH_FUNCTION_LOG:
218 case BRW_MATH_FUNCTION_EXP:
219 case BRW_MATH_FUNCTION_SQRT:
220 case BRW_MATH_FUNCTION_RSQ:
221 case BRW_MATH_FUNCTION_SIN:
222 case BRW_MATH_FUNCTION_COS:
223 case BRW_MATH_FUNCTION_SINCOS:
224 case GEN8_MATH_FUNCTION_INVM:
225 case GEN8_MATH_FUNCTION_RSQRTM:
226 return 1;
227 case BRW_MATH_FUNCTION_FDIV:
228 case BRW_MATH_FUNCTION_POW:
229 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
230 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
231 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
232 return 2;
233 default:
234 unreachable("not reached");
235 }
236 }
237
238 static struct string
239 sources_not_null(const struct gen_device_info *devinfo,
240 const brw_inst *inst)
241 {
242 unsigned num_sources = num_sources_from_inst(devinfo, inst);
243 struct string error_msg = { .str = NULL, .len = 0 };
244
245 /* Nothing to test. 3-src instructions can only have GRF sources, and
246 * there's no bit to control the file.
247 */
248 if (num_sources == 3)
249 return (struct string){};
250
251 if (num_sources >= 1)
252 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
253
254 if (num_sources == 2)
255 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
256
257 return error_msg;
258 }
259
260 static struct string
261 send_restrictions(const struct gen_device_info *devinfo,
262 const brw_inst *inst)
263 {
264 struct string error_msg = { .str = NULL, .len = 0 };
265
266 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
267 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
268 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
269 "send must use direct addressing");
270
271 if (devinfo->gen >= 7) {
272 ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF");
273 ERROR_IF(brw_inst_eot(devinfo, inst) &&
274 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
275 "send with EOT must use g112-g127");
276 }
277
278 if (devinfo->gen >= 8) {
279 ERROR_IF(!dst_is_null(devinfo, inst) &&
280 (brw_inst_dst_da_reg_nr(devinfo, inst) +
281 brw_inst_rlen(devinfo, inst) > 127) &&
282 (brw_inst_src0_da_reg_nr(devinfo, inst) +
283 brw_inst_mlen(devinfo, inst) >
284 brw_inst_dst_da_reg_nr(devinfo, inst)),
285 "r127 must not be used for return address when there is "
286 "a src and dest overlap");
287 }
288 }
289
290 return error_msg;
291 }
292
293 static bool
294 is_unsupported_inst(const struct gen_device_info *devinfo,
295 const brw_inst *inst)
296 {
297 return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL;
298 }
299
300 static enum brw_reg_type
301 execution_type_for_type(enum brw_reg_type type)
302 {
303 switch (type) {
304 case BRW_REGISTER_TYPE_NF:
305 case BRW_REGISTER_TYPE_DF:
306 case BRW_REGISTER_TYPE_F:
307 case BRW_REGISTER_TYPE_HF:
308 return type;
309
310 case BRW_REGISTER_TYPE_VF:
311 return BRW_REGISTER_TYPE_F;
312
313 case BRW_REGISTER_TYPE_Q:
314 case BRW_REGISTER_TYPE_UQ:
315 return BRW_REGISTER_TYPE_Q;
316
317 case BRW_REGISTER_TYPE_D:
318 case BRW_REGISTER_TYPE_UD:
319 return BRW_REGISTER_TYPE_D;
320
321 case BRW_REGISTER_TYPE_W:
322 case BRW_REGISTER_TYPE_UW:
323 case BRW_REGISTER_TYPE_B:
324 case BRW_REGISTER_TYPE_UB:
325 case BRW_REGISTER_TYPE_V:
326 case BRW_REGISTER_TYPE_UV:
327 return BRW_REGISTER_TYPE_W;
328 }
329 unreachable("not reached");
330 }
331
332 /**
333 * Returns the execution type of an instruction \p inst
334 */
335 static enum brw_reg_type
336 execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
337 {
338 unsigned num_sources = num_sources_from_inst(devinfo, inst);
339 enum brw_reg_type src0_exec_type, src1_exec_type;
340
341 /* Execution data type is independent of destination data type, except in
342 * mixed F/HF instructions on CHV and SKL+.
343 */
344 enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst);
345
346 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
347 if (num_sources == 1) {
348 if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
349 src0_exec_type == BRW_REGISTER_TYPE_HF) {
350 return dst_exec_type;
351 }
352 return src0_exec_type;
353 }
354
355 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
356 if (src0_exec_type == src1_exec_type)
357 return src0_exec_type;
358
359 /* Mixed operand types where one is float is float on Gen < 6
360 * (and not allowed on later platforms)
361 */
362 if (devinfo->gen < 6 &&
363 (src0_exec_type == BRW_REGISTER_TYPE_F ||
364 src1_exec_type == BRW_REGISTER_TYPE_F))
365 return BRW_REGISTER_TYPE_F;
366
367 if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
368 src1_exec_type == BRW_REGISTER_TYPE_Q)
369 return BRW_REGISTER_TYPE_Q;
370
371 if (src0_exec_type == BRW_REGISTER_TYPE_D ||
372 src1_exec_type == BRW_REGISTER_TYPE_D)
373 return BRW_REGISTER_TYPE_D;
374
375 if (src0_exec_type == BRW_REGISTER_TYPE_W ||
376 src1_exec_type == BRW_REGISTER_TYPE_W)
377 return BRW_REGISTER_TYPE_W;
378
379 if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
380 src1_exec_type == BRW_REGISTER_TYPE_DF)
381 return BRW_REGISTER_TYPE_DF;
382
383 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
384 if (dst_exec_type == BRW_REGISTER_TYPE_F ||
385 src0_exec_type == BRW_REGISTER_TYPE_F ||
386 src1_exec_type == BRW_REGISTER_TYPE_F) {
387 return BRW_REGISTER_TYPE_F;
388 } else {
389 return BRW_REGISTER_TYPE_HF;
390 }
391 }
392
393 assert(src0_exec_type == BRW_REGISTER_TYPE_F);
394 return BRW_REGISTER_TYPE_F;
395 }
396
397 /**
398 * Returns whether a region is packed
399 *
400 * A region is packed if its elements are adjacent in memory, with no
401 * intervening space, no overlap, and no replicated values.
402 */
403 static bool
404 is_packed(unsigned vstride, unsigned width, unsigned hstride)
405 {
406 if (vstride == width) {
407 if (vstride == 1) {
408 return hstride == 0;
409 } else {
410 return hstride == 1;
411 }
412 }
413
414 return false;
415 }
416
417 /**
418 * Checks restrictions listed in "General Restrictions Based on Operand Types"
419 * in the "Register Region Restrictions" section.
420 */
421 static struct string
422 general_restrictions_based_on_operand_types(const struct gen_device_info *devinfo,
423 const brw_inst *inst)
424 {
425 const struct opcode_desc *desc =
426 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
427 unsigned num_sources = num_sources_from_inst(devinfo, inst);
428 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
429 struct string error_msg = { .str = NULL, .len = 0 };
430
431 if (num_sources == 3)
432 return (struct string){};
433
434 if (inst_is_send(devinfo, inst))
435 return (struct string){};
436
437 if (exec_size == 1)
438 return (struct string){};
439
440 if (desc->ndst == 0)
441 return (struct string){};
442
443 /* The PRMs say:
444 *
445 * Where n is the largest element size in bytes for any source or
446 * destination operand type, ExecSize * n must be <= 64.
447 *
448 * But we do not attempt to enforce it, because it is implied by other
449 * rules:
450 *
451 * - that the destination stride must match the execution data type
452 * - sources may not span more than two adjacent GRF registers
453 * - destination may not span more than two adjacent GRF registers
454 *
455 * In fact, checking it would weaken testing of the other rules.
456 */
457
458 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
459 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
460 bool dst_type_is_byte =
461 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||
462 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;
463
464 if (dst_type_is_byte) {
465 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
466 if (!inst_is_raw_move(devinfo, inst)) {
467 ERROR("Only raw MOV supports a packed-byte destination");
468 return error_msg;
469 } else {
470 return (struct string){};
471 }
472 }
473 }
474
475 unsigned exec_type = execution_type(devinfo, inst);
476 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
477 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
478
479 /* On IVB/BYT, region parameters and execution size for DF are in terms of
480 * 32-bit elements, so they are doubled. For evaluating the validity of an
481 * instruction, we halve them.
482 */
483 if (devinfo->gen == 7 && !devinfo->is_haswell &&
484 exec_type_size == 8 && dst_type_size == 4)
485 dst_type_size = 8;
486
487 if (exec_type_size > dst_type_size) {
488 if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
489 ERROR_IF(dst_stride * dst_type_size != exec_type_size,
490 "Destination stride must be equal to the ratio of the sizes "
491 "of the execution data type to the destination type");
492 }
493
494 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
495
496 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
497 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
498 /* The i965 PRM says:
499 *
500 * Implementation Restriction: The relaxed alignment rule for byte
501 * destination (#10.5) is not supported.
502 */
503 if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) {
504 ERROR_IF(subreg % exec_type_size != 0 &&
505 subreg % exec_type_size != 1,
506 "Destination subreg must be aligned to the size of the "
507 "execution data type (or to the next lowest byte for byte "
508 "destinations)");
509 } else {
510 ERROR_IF(subreg % exec_type_size != 0,
511 "Destination subreg must be aligned to the size of the "
512 "execution data type");
513 }
514 }
515 }
516
517 return error_msg;
518 }
519
520 /**
521 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
522 * in the "Register Region Restrictions" section.
523 */
524 static struct string
525 general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
526 const brw_inst *inst)
527 {
528 const struct opcode_desc *desc =
529 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
530 unsigned num_sources = num_sources_from_inst(devinfo, inst);
531 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
532 struct string error_msg = { .str = NULL, .len = 0 };
533
534 if (num_sources == 3)
535 return (struct string){};
536
537 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
538 if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
539 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
540 "Destination Horizontal Stride must be 1");
541
542 if (num_sources >= 1) {
543 if (devinfo->is_haswell || devinfo->gen >= 8) {
544 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
545 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
546 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
547 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
548 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
549 } else {
550 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
551 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
552 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
553 "In Align16 mode, only VertStride of 0 or 4 is allowed");
554 }
555 }
556
557 if (num_sources == 2) {
558 if (devinfo->is_haswell || devinfo->gen >= 8) {
559 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
560 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
561 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
562 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
563 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
564 } else {
565 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
566 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
567 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
568 "In Align16 mode, only VertStride of 0 or 4 is allowed");
569 }
570 }
571
572 return error_msg;
573 }
574
575 for (unsigned i = 0; i < num_sources; i++) {
576 unsigned vstride, width, hstride, element_size, subreg;
577 enum brw_reg_type type;
578
579 #define DO_SRC(n) \
580 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
581 BRW_IMMEDIATE_VALUE) \
582 continue; \
583 \
584 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
585 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
586 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
587 type = brw_inst_src ## n ## _type(devinfo, inst); \
588 element_size = brw_reg_type_to_size(type); \
589 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
590
591 if (i == 0) {
592 DO_SRC(0);
593 } else {
594 DO_SRC(1);
595 }
596 #undef DO_SRC
597
598 /* On IVB/BYT, region parameters and execution size for DF are in terms of
599 * 32-bit elements, so they are doubled. For evaluating the validity of an
600 * instruction, we halve them.
601 */
602 if (devinfo->gen == 7 && !devinfo->is_haswell &&
603 element_size == 8)
604 element_size = 4;
605
606 /* ExecSize must be greater than or equal to Width. */
607 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
608 "to Width");
609
610 /* If ExecSize = Width and HorzStride ≠ 0,
611 * VertStride must be set to Width * HorzStride.
612 */
613 if (exec_size == width && hstride != 0) {
614 ERROR_IF(vstride != width * hstride,
615 "If ExecSize = Width and HorzStride ≠ 0, "
616 "VertStride must be set to Width * HorzStride");
617 }
618
619 /* If Width = 1, HorzStride must be 0 regardless of the values of
620 * ExecSize and VertStride.
621 */
622 if (width == 1) {
623 ERROR_IF(hstride != 0,
624 "If Width = 1, HorzStride must be 0 regardless "
625 "of the values of ExecSize and VertStride");
626 }
627
628 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
629 if (exec_size == 1 && width == 1) {
630 ERROR_IF(vstride != 0 || hstride != 0,
631 "If ExecSize = Width = 1, both VertStride "
632 "and HorzStride must be 0");
633 }
634
635 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
636 * value of ExecSize.
637 */
638 if (vstride == 0 && hstride == 0) {
639 ERROR_IF(width != 1,
640 "If VertStride = HorzStride = 0, Width must be "
641 "1 regardless of the value of ExecSize");
642 }
643
644 /* VertStride must be used to cross GRF register boundaries. This rule
645 * implies that elements within a 'Width' cannot cross GRF boundaries.
646 */
647 const uint64_t mask = (1ULL << element_size) - 1;
648 unsigned rowbase = subreg;
649
650 for (int y = 0; y < exec_size / width; y++) {
651 uint64_t access_mask = 0;
652 unsigned offset = rowbase;
653
654 for (int x = 0; x < width; x++) {
655 access_mask |= mask << offset;
656 offset += hstride * element_size;
657 }
658
659 rowbase += vstride * element_size;
660
661 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
662 ERROR("VertStride must be used to cross GRF register boundaries");
663 break;
664 }
665 }
666 }
667
668 /* Dst.HorzStride must not be 0. */
669 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
670 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
671 "Destination Horizontal Stride must not be 0");
672 }
673
674 return error_msg;
675 }
676
677 /**
678 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
679 *
680 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
681 * a bitmask of bytes accessed by the region.
682 *
683 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
684 * instruction would be
685 *
686 * access_mask[0] = 0x00000000000000F0
687 * access_mask[1] = 0x000000000000F000
688 * access_mask[2] = 0x0000000000F00000
689 * access_mask[3] = 0x00000000F0000000
690 * access_mask[4-31] = 0
691 *
692 * because the first execution channel accesses bytes 7-4 and the second
693 * execution channel accesses bytes 15-12, etc.
694 */
695 static void
696 align1_access_mask(uint64_t access_mask[static 32],
697 unsigned exec_size, unsigned element_size, unsigned subreg,
698 unsigned vstride, unsigned width, unsigned hstride)
699 {
700 const uint64_t mask = (1ULL << element_size) - 1;
701 unsigned rowbase = subreg;
702 unsigned element = 0;
703
704 for (int y = 0; y < exec_size / width; y++) {
705 unsigned offset = rowbase;
706
707 for (int x = 0; x < width; x++) {
708 access_mask[element++] = mask << offset;
709 offset += hstride * element_size;
710 }
711
712 rowbase += vstride * element_size;
713 }
714
715 assert(element == 0 || element == exec_size);
716 }
717
718 /**
719 * Returns the number of registers accessed according to the \p access_mask
720 */
721 static int
722 registers_read(const uint64_t access_mask[static 32])
723 {
724 int regs_read = 0;
725
726 for (unsigned i = 0; i < 32; i++) {
727 if (access_mask[i] > 0xFFFFFFFF) {
728 return 2;
729 } else if (access_mask[i]) {
730 regs_read = 1;
731 }
732 }
733
734 return regs_read;
735 }
736
737 /**
738 * Checks restrictions listed in "Region Alignment Rules" in the "Register
739 * Region Restrictions" section.
740 */
741 static struct string
742 region_alignment_rules(const struct gen_device_info *devinfo,
743 const brw_inst *inst)
744 {
745 const struct opcode_desc *desc =
746 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
747 unsigned num_sources = num_sources_from_inst(devinfo, inst);
748 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
749 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
750 struct string error_msg = { .str = NULL, .len = 0 };
751
752 if (num_sources == 3)
753 return (struct string){};
754
755 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
756 return (struct string){};
757
758 if (inst_is_send(devinfo, inst))
759 return (struct string){};
760
761 memset(dst_access_mask, 0, sizeof(dst_access_mask));
762 memset(src0_access_mask, 0, sizeof(src0_access_mask));
763 memset(src1_access_mask, 0, sizeof(src1_access_mask));
764
765 for (unsigned i = 0; i < num_sources; i++) {
766 unsigned vstride, width, hstride, element_size, subreg;
767 enum brw_reg_type type;
768
769 /* In Direct Addressing mode, a source cannot span more than 2 adjacent
770 * GRF registers.
771 */
772
773 #define DO_SRC(n) \
774 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \
775 BRW_ADDRESS_DIRECT) \
776 continue; \
777 \
778 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
779 BRW_IMMEDIATE_VALUE) \
780 continue; \
781 \
782 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
783 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
784 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
785 type = brw_inst_src ## n ## _type(devinfo, inst); \
786 element_size = brw_reg_type_to_size(type); \
787 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
788 align1_access_mask(src ## n ## _access_mask, \
789 exec_size, element_size, subreg, \
790 vstride, width, hstride)
791
792 if (i == 0) {
793 DO_SRC(0);
794 } else {
795 DO_SRC(1);
796 }
797 #undef DO_SRC
798
799 unsigned num_vstride = exec_size / width;
800 unsigned num_hstride = width;
801 unsigned vstride_elements = (num_vstride - 1) * vstride;
802 unsigned hstride_elements = (num_hstride - 1) * hstride;
803 unsigned offset = (vstride_elements + hstride_elements) * element_size +
804 subreg;
805 ERROR_IF(offset >= 64,
806 "A source cannot span more than 2 adjacent GRF registers");
807 }
808
809 if (desc->ndst == 0 || dst_is_null(devinfo, inst))
810 return error_msg;
811
812 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
813 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
814 unsigned element_size = brw_reg_type_to_size(dst_type);
815 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
816 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
817 ERROR_IF(offset >= 64,
818 "A destination cannot span more than 2 adjacent GRF registers");
819
820 if (error_msg.str)
821 return error_msg;
822
823 /* On IVB/BYT, region parameters and execution size for DF are in terms of
824 * 32-bit elements, so they are doubled. For evaluating the validity of an
825 * instruction, we halve them.
826 */
827 if (devinfo->gen == 7 && !devinfo->is_haswell &&
828 element_size == 8)
829 element_size = 4;
830
831 align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
832 exec_size == 1 ? 0 : exec_size * stride,
833 exec_size == 1 ? 1 : exec_size,
834 exec_size == 1 ? 0 : stride);
835
836 unsigned dst_regs = registers_read(dst_access_mask);
837 unsigned src0_regs = registers_read(src0_access_mask);
838 unsigned src1_regs = registers_read(src1_access_mask);
839
840 /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
841 *
842 * When an instruction has a source region spanning two registers and a
843 * destination region contained in one register, the number of elements
844 * must be the same between two sources and one of the following must be
845 * true:
846 *
847 * 1. The destination region is entirely contained in the lower OWord
848 * of a register.
849 * 2. The destination region is entirely contained in the upper OWord
850 * of a register.
851 * 3. The destination elements are evenly split between the two OWords
852 * of a register.
853 */
854 if (devinfo->gen <= 8) {
855 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
856 unsigned upper_oword_writes = 0, lower_oword_writes = 0;
857
858 for (unsigned i = 0; i < exec_size; i++) {
859 if (dst_access_mask[i] > 0x0000FFFF) {
860 upper_oword_writes++;
861 } else {
862 assert(dst_access_mask[i] != 0);
863 lower_oword_writes++;
864 }
865 }
866
867 ERROR_IF(lower_oword_writes != 0 &&
868 upper_oword_writes != 0 &&
869 upper_oword_writes != lower_oword_writes,
870 "Writes must be to only one OWord or "
871 "evenly split between OWords");
872 }
873 }
874
875 /* The IVB and HSW PRMs say:
876 *
877 * When an instruction has a source region that spans two registers and
878 * the destination spans two registers, the destination elements must be
879 * evenly split between the two registers [...]
880 *
881 * The SNB PRM contains similar wording (but written in a much more
882 * confusing manner).
883 *
884 * The BDW PRM says:
885 *
886 * When destination spans two registers, the source may be one or two
887 * registers. The destination elements must be evenly split between the
888 * two registers.
889 *
890 * The SKL PRM says:
891 *
892 * When destination of MATH instruction spans two registers, the
893 * destination elements must be evenly split between the two registers.
894 *
895 * It is not known whether this restriction applies to KBL other Gens after
896 * SKL.
897 */
898 if (devinfo->gen <= 8 ||
899 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
900
901 /* Nothing explicitly states that on Gen < 8 elements must be evenly
902 * split between two destination registers in the two exceptional
903 * source-region-spans-one-register cases, but since Broadwell requires
904 * evenly split writes regardless of source region, we assume that it was
905 * an oversight and require it.
906 */
907 if (dst_regs == 2) {
908 unsigned upper_reg_writes = 0, lower_reg_writes = 0;
909
910 for (unsigned i = 0; i < exec_size; i++) {
911 if (dst_access_mask[i] > 0xFFFFFFFF) {
912 upper_reg_writes++;
913 } else {
914 assert(dst_access_mask[i] != 0);
915 lower_reg_writes++;
916 }
917 }
918
919 ERROR_IF(upper_reg_writes != lower_reg_writes,
920 "Writes must be evenly split between the two "
921 "destination registers");
922 }
923 }
924
925 /* The IVB and HSW PRMs say:
926 *
927 * When an instruction has a source region that spans two registers and
928 * the destination spans two registers, the destination elements must be
929 * evenly split between the two registers and each destination register
930 * must be entirely derived from one source register.
931 *
932 * Note: In such cases, the regioning parameters must ensure that the
933 * offset from the two source registers is the same.
934 *
935 * The SNB PRM contains similar wording (but written in a much more
936 * confusing manner).
937 *
938 * There are effectively three rules stated here:
939 *
940 * For an instruction with a source and a destination spanning two
941 * registers,
942 *
943 * (1) destination elements must be evenly split between the two
944 * registers
945 * (2) all destination elements in a register must be derived
946 * from one source register
947 * (3) the offset (i.e. the starting location in each of the two
948 * registers spanned by a region) must be the same in the two
949 * registers spanned by a region
950 *
951 * It is impossible to violate rule (1) without violating (2) or (3), so we
952 * do not attempt to validate it.
953 */
954 if (devinfo->gen <= 7 && dst_regs == 2) {
955 for (unsigned i = 0; i < num_sources; i++) {
956 #define DO_SRC(n) \
957 if (src ## n ## _regs <= 1) \
958 continue; \
959 \
960 for (unsigned i = 0; i < exec_size; i++) { \
961 if ((dst_access_mask[i] > 0xFFFFFFFF) != \
962 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
963 ERROR("Each destination register must be entirely derived " \
964 "from one source register"); \
965 break; \
966 } \
967 } \
968 \
969 unsigned offset_0 = \
970 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
971 unsigned offset_1 = offset_0; \
972 \
973 for (unsigned i = 0; i < exec_size; i++) { \
974 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
975 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
976 break; \
977 } \
978 } \
979 \
980 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
981 "The offset from the two source registers " \
982 "must be the same")
983
984 if (i == 0) {
985 DO_SRC(0);
986 } else {
987 DO_SRC(1);
988 }
989 #undef DO_SRC
990 }
991 }
992
993 /* The IVB and HSW PRMs say:
994 *
995 * When destination spans two registers, the source MUST span two
996 * registers. The exception to the above rule:
997 * 1. When source is scalar, the source registers are not
998 * incremented.
999 * 2. When source is packed integer Word and destination is packed
1000 * integer DWord, the source register is not incremented by the
1001 * source sub register is incremented.
1002 *
1003 * The SNB PRM does not contain this rule, but the internal documentation
1004 * indicates that it applies to SNB as well. We assume that the rule applies
1005 * to Gen <= 5 although their PRMs do not state it.
1006 *
1007 * While the documentation explicitly says in exception (2) that the
1008 * destination must be an integer DWord, the hardware allows at least a
1009 * float destination type as well. We emit such instructions from
1010 *
1011 * fs_visitor::emit_interpolation_setup_gen6
1012 * fs_visitor::emit_fragcoord_interpolation
1013 *
1014 * and have for years with no ill effects.
1015 *
1016 * Additionally the simulator source code indicates that the real condition
1017 * is that the size of the destination type is 4 bytes.
1018 */
1019 if (devinfo->gen <= 7 && dst_regs == 2) {
1020 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1021 bool dst_is_packed_dword =
1022 is_packed(exec_size * stride, exec_size, stride) &&
1023 brw_reg_type_to_size(dst_type) == 4;
1024
1025 for (unsigned i = 0; i < num_sources; i++) {
1026 #define DO_SRC(n) \
1027 unsigned vstride, width, hstride; \
1028 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1029 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1030 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1031 bool src ## n ## _is_packed_word = \
1032 is_packed(vstride, width, hstride) && \
1033 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \
1034 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \
1035 \
1036 ERROR_IF(src ## n ## _regs == 1 && \
1037 !src ## n ## _has_scalar_region(devinfo, inst) && \
1038 !(dst_is_packed_dword && src ## n ## _is_packed_word), \
1039 "When the destination spans two registers, the source must " \
1040 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \
1041 "source and packed-word to packed-dword expansion)")
1042
1043 if (i == 0) {
1044 DO_SRC(0);
1045 } else {
1046 DO_SRC(1);
1047 }
1048 #undef DO_SRC
1049 }
1050 }
1051
1052 return error_msg;
1053 }
1054
1055 static struct string
1056 vector_immediate_restrictions(const struct gen_device_info *devinfo,
1057 const brw_inst *inst)
1058 {
1059 unsigned num_sources = num_sources_from_inst(devinfo, inst);
1060 struct string error_msg = { .str = NULL, .len = 0 };
1061
1062 if (num_sources == 3 || num_sources == 0)
1063 return (struct string){};
1064
1065 unsigned file = num_sources == 1 ?
1066 brw_inst_src0_reg_file(devinfo, inst) :
1067 brw_inst_src1_reg_file(devinfo, inst);
1068 if (file != BRW_IMMEDIATE_VALUE)
1069 return (struct string){};
1070
1071 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1072 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1073 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1074 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1075 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1076 enum brw_reg_type type = num_sources == 1 ?
1077 brw_inst_src0_type(devinfo, inst) :
1078 brw_inst_src1_type(devinfo, inst);
1079
1080 /* The PRMs say:
1081 *
1082 * When an immediate vector is used in an instruction, the destination
1083 * must be 128-bit aligned with destination horizontal stride equivalent
1084 * to a word for an immediate integer vector (v) and equivalent to a
1085 * DWord for an immediate float vector (vf).
1086 *
1087 * The text has not been updated for the addition of the immediate unsigned
1088 * integer vector type (uv) on SNB, but presumably the same restriction
1089 * applies.
1090 */
1091 switch (type) {
1092 case BRW_REGISTER_TYPE_V:
1093 case BRW_REGISTER_TYPE_UV:
1094 case BRW_REGISTER_TYPE_VF:
1095 ERROR_IF(dst_subreg % (128 / 8) != 0,
1096 "Destination must be 128-bit aligned in order to use immediate "
1097 "vector types");
1098
1099 if (type == BRW_REGISTER_TYPE_VF) {
1100 ERROR_IF(dst_type_size * dst_stride != 4,
1101 "Destination must have stride equivalent to dword in order "
1102 "to use the VF type");
1103 } else {
1104 ERROR_IF(dst_type_size * dst_stride != 2,
1105 "Destination must have stride equivalent to word in order "
1106 "to use the V or UV type");
1107 }
1108 break;
1109 default:
1110 break;
1111 }
1112
1113 return error_msg;
1114 }
1115
1116 static struct string
1117 special_requirements_for_handling_double_precision_data_types(
1118 const struct gen_device_info *devinfo,
1119 const brw_inst *inst)
1120 {
1121 unsigned num_sources = num_sources_from_inst(devinfo, inst);
1122 struct string error_msg = { .str = NULL, .len = 0 };
1123
1124 if (num_sources == 3 || num_sources == 0)
1125 return (struct string){};
1126
1127 enum brw_reg_type exec_type = execution_type(devinfo, inst);
1128 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1129
1130 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1131 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1132 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1133 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1134 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1135 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1136 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1137
1138 bool is_integer_dword_multiply =
1139 devinfo->gen >= 8 &&
1140 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
1141 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1142 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1143 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1144 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1145
1146 if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply)
1147 return (struct string){};
1148
1149 for (unsigned i = 0; i < num_sources; i++) {
1150 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1151 bool is_scalar_region;
1152 enum brw_reg_file file;
1153 enum brw_reg_type type;
1154
1155 #define DO_SRC(n) \
1156 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1157 BRW_IMMEDIATE_VALUE) \
1158 continue; \
1159 \
1160 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \
1161 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1162 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1163 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1164 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \
1165 type = brw_inst_src ## n ## _type(devinfo, inst); \
1166 type_size = brw_reg_type_to_size(type); \
1167 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \
1168 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1169 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1170
1171 if (i == 0) {
1172 DO_SRC(0);
1173 } else {
1174 DO_SRC(1);
1175 }
1176 #undef DO_SRC
1177
1178 /* The PRMs say that for CHV, BXT:
1179 *
1180 * When source or destination datatype is 64b or operation is integer
1181 * DWord multiply, regioning in Align1 must follow these rules:
1182 *
1183 * 1. Source and Destination horizontal stride must be aligned to the
1184 * same qword.
1185 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1186 * 3. Source and Destination offset must be the same, except the case
1187 * of scalar source.
1188 *
1189 * We assume that the restriction applies to GLK as well.
1190 */
1191 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1192 (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
1193 unsigned src_stride = hstride * type_size;
1194 unsigned dst_stride = dst_hstride * dst_type_size;
1195
1196 ERROR_IF(!is_scalar_region &&
1197 (src_stride % 8 != 0 ||
1198 dst_stride % 8 != 0 ||
1199 src_stride != dst_stride),
1200 "Source and destination horizontal stride must equal and a "
1201 "multiple of a qword when the execution type is 64-bit");
1202
1203 ERROR_IF(vstride != width * hstride,
1204 "Vstride must be Width * Hstride when the execution type is "
1205 "64-bit");
1206
1207 ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1208 "Source and destination offset must be the same when the "
1209 "execution type is 64-bit");
1210 }
1211
1212 /* The PRMs say that for CHV, BXT:
1213 *
1214 * When source or destination datatype is 64b or operation is integer
1215 * DWord multiply, indirect addressing must not be used.
1216 *
1217 * We assume that the restriction applies to GLK as well.
1218 */
1219 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1220 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1221 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1222 "Indirect addressing is not allowed when the execution type "
1223 "is 64-bit");
1224 }
1225
1226 /* The PRMs say that for CHV, BXT:
1227 *
1228 * ARF registers must never be used with 64b datatype or when
1229 * operation is integer DWord multiply.
1230 *
1231 * We assume that the restriction applies to GLK as well.
1232 *
1233 * We assume that the restriction does not apply to the null register.
1234 */
1235 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1236 ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
1237 brw_inst_acc_wr_control(devinfo, inst) ||
1238 (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1239 reg != BRW_ARF_NULL) ||
1240 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1241 dst_reg != BRW_ARF_NULL),
1242 "Architecture registers cannot be used when the execution "
1243 "type is 64-bit");
1244 }
1245 }
1246
1247 /* The PRMs say that for BDW, SKL:
1248 *
1249 * If Align16 is required for an operation with QW destination and non-QW
1250 * source datatypes, the execution size cannot exceed 2.
1251 *
1252 * We assume that the restriction applies to all Gen8+ parts.
1253 */
1254 if (devinfo->gen >= 8) {
1255 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1256 enum brw_reg_type src1_type =
1257 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1258 unsigned src0_type_size = brw_reg_type_to_size(src0_type);
1259 unsigned src1_type_size = brw_reg_type_to_size(src1_type);
1260
1261 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1262 dst_type_size == 8 &&
1263 (src0_type_size != 8 || src1_type_size != 8) &&
1264 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1265 "In Align16 exec size cannot exceed 2 with a QWord destination "
1266 "and a non-QWord source");
1267 }
1268
1269 /* The PRMs say that for CHV, BXT:
1270 *
1271 * When source or destination datatype is 64b or operation is integer
1272 * DWord multiply, DepCtrl must not be used.
1273 *
1274 * We assume that the restriction applies to GLK as well.
1275 */
1276 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1277 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1278 brw_inst_no_dd_clear(devinfo, inst),
1279 "DepCtrl is not allowed when the execution type is 64-bit");
1280 }
1281
1282 return error_msg;
1283 }
1284
1285 bool
1286 brw_validate_instructions(const struct gen_device_info *devinfo,
1287 const void *assembly, int start_offset, int end_offset,
1288 struct disasm_info *disasm)
1289 {
1290 bool valid = true;
1291
1292 for (int src_offset = start_offset; src_offset < end_offset;) {
1293 struct string error_msg = { .str = NULL, .len = 0 };
1294 const brw_inst *inst = assembly + src_offset;
1295 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
1296 brw_inst uncompacted;
1297
1298 if (is_compact) {
1299 brw_compact_inst *compacted = (void *)inst;
1300 brw_uncompact_instruction(devinfo, &uncompacted, compacted);
1301 inst = &uncompacted;
1302 }
1303
1304 if (is_unsupported_inst(devinfo, inst)) {
1305 ERROR("Instruction not supported on this Gen");
1306 } else {
1307 CHECK(sources_not_null);
1308 CHECK(send_restrictions);
1309 CHECK(general_restrictions_based_on_operand_types);
1310 CHECK(general_restrictions_on_region_parameters);
1311 CHECK(region_alignment_rules);
1312 CHECK(vector_immediate_restrictions);
1313 CHECK(special_requirements_for_handling_double_precision_data_types);
1314 }
1315
1316 if (error_msg.str && disasm) {
1317 disasm_insert_error(disasm, src_offset, error_msg.str);
1318 }
1319 valid = valid && error_msg.len == 0;
1320 free(error_msg.str);
1321
1322 if (is_compact) {
1323 src_offset += sizeof(brw_compact_inst);
1324 } else {
1325 src_offset += sizeof(brw_inst);
1326 }
1327 }
1328
1329 return valid;
1330 }