intel/compiler: Allow MESA_SHADER_KERNEL
[mesa.git] / src / intel / compiler / test_eu_validate.cpp
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25 #include "brw_eu.h"
26 #include "brw_eu_defines.h"
27 #include "util/bitset.h"
28 #include "util/ralloc.h"
29
30 static const struct gen_info {
31 const char *name;
32 } gens[] = {
33 { "brw", },
34 { "g4x", },
35 { "ilk", },
36 { "snb", },
37 { "ivb", },
38 { "byt", },
39 { "hsw", },
40 { "bdw", },
41 { "chv", },
42 { "skl", },
43 { "bxt", },
44 { "kbl", },
45 { "aml", },
46 { "glk", },
47 { "cfl", },
48 { "whl", },
49 { "cnl", },
50 { "icl", },
51 { "tgl", },
52 };
53
54 class validation_test: public ::testing::TestWithParam<struct gen_info> {
55 virtual void SetUp();
56
57 public:
58 validation_test();
59 virtual ~validation_test();
60
61 struct brw_codegen *p;
62 struct gen_device_info devinfo;
63 };
64
65 validation_test::validation_test()
66 {
67 p = rzalloc(NULL, struct brw_codegen);
68 memset(&devinfo, 0, sizeof(devinfo));
69 }
70
71 validation_test::~validation_test()
72 {
73 ralloc_free(p);
74 }
75
76 void validation_test::SetUp()
77 {
78 struct gen_info info = GetParam();
79 int devid = gen_device_name_to_pci_device_id(info.name);
80
81 gen_get_device_info_from_pci_id(devid, &devinfo);
82
83 brw_init_codegen(&devinfo, p, p);
84 }
85
86 struct gen_name {
87 template <class ParamType>
88 std::string
89 operator()(const ::testing::TestParamInfo<ParamType>& info) const {
90 return info.param.name;
91 }
92 };
93
94 INSTANTIATE_TEST_CASE_P(eu_assembly, validation_test,
95 ::testing::ValuesIn(gens),
96 gen_name());
97
98 static bool
99 validate(struct brw_codegen *p)
100 {
101 const bool print = getenv("TEST_DEBUG");
102 struct disasm_info *disasm = disasm_initialize(p->devinfo, NULL);
103
104 if (print) {
105 disasm_new_inst_group(disasm, 0);
106 disasm_new_inst_group(disasm, p->next_insn_offset);
107 }
108
109 bool ret = brw_validate_instructions(p->devinfo, p->store, 0,
110 p->next_insn_offset, disasm);
111
112 if (print) {
113 dump_assembly(p->store, disasm, NULL);
114 }
115 ralloc_free(disasm);
116
117 return ret;
118 }
119
120 #define last_inst (&p->store[p->nr_insn - 1])
121 #define g0 brw_vec8_grf(0, 0)
122 #define acc0 brw_acc_reg(8)
123 #define null brw_null_reg()
124 #define zero brw_imm_f(0.0f)
125
126 static void
127 clear_instructions(struct brw_codegen *p)
128 {
129 p->next_insn_offset = 0;
130 p->nr_insn = 0;
131 }
132
133 TEST_P(validation_test, sanity)
134 {
135 brw_ADD(p, g0, g0, g0);
136
137 EXPECT_TRUE(validate(p));
138 }
139
140 TEST_P(validation_test, src0_null_reg)
141 {
142 brw_MOV(p, g0, null);
143
144 EXPECT_FALSE(validate(p));
145 }
146
147 TEST_P(validation_test, src1_null_reg)
148 {
149 brw_ADD(p, g0, g0, null);
150
151 EXPECT_FALSE(validate(p));
152 }
153
154 TEST_P(validation_test, math_src0_null_reg)
155 {
156 if (devinfo.gen >= 6) {
157 gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
158 } else {
159 gen4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, null, BRW_MATH_PRECISION_FULL);
160 }
161
162 EXPECT_FALSE(validate(p));
163 }
164
165 TEST_P(validation_test, math_src1_null_reg)
166 {
167 if (devinfo.gen >= 6) {
168 gen6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
169 EXPECT_FALSE(validate(p));
170 } else {
171 /* Math instructions on Gen4/5 are actually SEND messages with payloads.
172 * src1 is an immediate message descriptor set by gen4_math.
173 */
174 }
175 }
176
177 TEST_P(validation_test, opcode46)
178 {
179 /* opcode 46 is "push" on Gen 4 and 5
180 * "fork" on Gen 6
181 * reserved on Gen 7
182 * "goto" on Gen8+
183 */
184 brw_next_insn(p, brw_opcode_decode(&devinfo, 46));
185
186 if (devinfo.gen == 7) {
187 EXPECT_FALSE(validate(p));
188 } else {
189 EXPECT_TRUE(validate(p));
190 }
191 }
192
193 TEST_P(validation_test, invalid_exec_size_encoding)
194 {
195 const struct {
196 enum brw_execution_size exec_size;
197 bool expected_result;
198 } test_case[] = {
199 { BRW_EXECUTE_1, true },
200 { BRW_EXECUTE_2, true },
201 { BRW_EXECUTE_4, true },
202 { BRW_EXECUTE_8, true },
203 { BRW_EXECUTE_16, true },
204 { BRW_EXECUTE_32, true },
205
206 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false },
207 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false },
208 };
209
210 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
211 brw_MOV(p, g0, g0);
212
213 brw_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
214 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
215 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
216
217 if (test_case[i].exec_size == BRW_EXECUTE_1) {
218 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
219 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
220 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
221 } else {
222 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
223 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
224 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
225 }
226
227 EXPECT_EQ(test_case[i].expected_result, validate(p));
228
229 clear_instructions(p);
230 }
231 }
232
233 TEST_P(validation_test, invalid_file_encoding)
234 {
235 /* Register file on Gen12 is only one bit */
236 if (devinfo.gen >= 12)
237 return;
238
239 brw_MOV(p, g0, g0);
240 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
241
242 if (devinfo.gen > 6) {
243 EXPECT_FALSE(validate(p));
244 } else {
245 EXPECT_TRUE(validate(p));
246 }
247
248 clear_instructions(p);
249
250 if (devinfo.gen < 6) {
251 gen4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, g0, BRW_MATH_PRECISION_FULL);
252 } else {
253 gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
254 }
255 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
256
257 if (devinfo.gen > 6) {
258 EXPECT_FALSE(validate(p));
259 } else {
260 EXPECT_TRUE(validate(p));
261 }
262 }
263
264 TEST_P(validation_test, invalid_type_encoding)
265 {
266 enum brw_reg_file files[2] = {
267 BRW_GENERAL_REGISTER_FILE,
268 BRW_IMMEDIATE_VALUE,
269 };
270
271 for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
272 const enum brw_reg_file file = files[i];
273 const int num_bits = devinfo.gen >= 8 ? 4 : 3;
274 const int num_encodings = 1 << num_bits;
275
276 /* The data types are encoded into <num_bits> bits to be used in hardware
277 * instructions, so keep a record in a bitset the invalid patterns so
278 * they can be verified to be invalid when used.
279 */
280 BITSET_DECLARE(invalid_encodings, num_encodings);
281
282 const struct {
283 enum brw_reg_type type;
284 bool expected_result;
285 } test_case[] = {
286 { BRW_REGISTER_TYPE_NF, devinfo.gen == 11 && file != IMM },
287 { BRW_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.gen >= 8 || file != IMM) },
288 { BRW_REGISTER_TYPE_F, true },
289 { BRW_REGISTER_TYPE_HF, devinfo.gen >= 8 },
290 { BRW_REGISTER_TYPE_VF, file == IMM },
291 { BRW_REGISTER_TYPE_Q, devinfo.has_64bit_int },
292 { BRW_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
293 { BRW_REGISTER_TYPE_D, true },
294 { BRW_REGISTER_TYPE_UD, true },
295 { BRW_REGISTER_TYPE_W, true },
296 { BRW_REGISTER_TYPE_UW, true },
297 { BRW_REGISTER_TYPE_B, file == FIXED_GRF },
298 { BRW_REGISTER_TYPE_UB, file == FIXED_GRF },
299 { BRW_REGISTER_TYPE_V, file == IMM },
300 { BRW_REGISTER_TYPE_UV, devinfo.gen >= 6 && file == IMM },
301 };
302
303 /* Initially assume all hardware encodings are invalid */
304 BITSET_ONES(invalid_encodings);
305
306 brw_set_default_exec_size(p, BRW_EXECUTE_4);
307
308 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
309 if (test_case[i].expected_result) {
310 unsigned hw_type = brw_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
311 if (hw_type != INVALID_REG_TYPE) {
312 /* ... and remove valid encodings from the set */
313 assert(BITSET_TEST(invalid_encodings, hw_type));
314 BITSET_CLEAR(invalid_encodings, hw_type);
315 }
316
317 if (file == FIXED_GRF) {
318 struct brw_reg g = retype(g0, test_case[i].type);
319 brw_MOV(p, g, g);
320 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
321 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
322 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
323 } else {
324 enum brw_reg_type t;
325
326 switch (test_case[i].type) {
327 case BRW_REGISTER_TYPE_V:
328 t = BRW_REGISTER_TYPE_W;
329 break;
330 case BRW_REGISTER_TYPE_UV:
331 t = BRW_REGISTER_TYPE_UW;
332 break;
333 case BRW_REGISTER_TYPE_VF:
334 t = BRW_REGISTER_TYPE_F;
335 break;
336 default:
337 t = test_case[i].type;
338 break;
339 }
340
341 struct brw_reg g = retype(g0, t);
342 brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type));
343 }
344
345 EXPECT_TRUE(validate(p));
346
347 clear_instructions(p);
348 }
349 }
350
351 /* The remaining encodings in invalid_encodings do not have a mapping
352 * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
353 * encodings are rejected by the validator.
354 */
355 int e;
356 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
357 if (file == FIXED_GRF) {
358 brw_MOV(p, g0, g0);
359 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
360 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
361 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
362 } else {
363 brw_MOV(p, g0, brw_imm_w(0));
364 }
365 brw_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
366 brw_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
367
368 EXPECT_FALSE(validate(p));
369
370 clear_instructions(p);
371 }
372 }
373 }
374
375 TEST_P(validation_test, invalid_type_encoding_3src_a16)
376 {
377 /* 3-src instructions in align16 mode only supported on Gen6-10 */
378 if (devinfo.gen < 6 || devinfo.gen > 10)
379 return;
380
381 const int num_bits = devinfo.gen >= 8 ? 3 : 2;
382 const int num_encodings = 1 << num_bits;
383
384 /* The data types are encoded into <num_bits> bits to be used in hardware
385 * instructions, so keep a record in a bitset the invalid patterns so
386 * they can be verified to be invalid when used.
387 */
388 BITSET_DECLARE(invalid_encodings, num_encodings);
389
390 const struct {
391 enum brw_reg_type type;
392 bool expected_result;
393 } test_case[] = {
394 { BRW_REGISTER_TYPE_DF, devinfo.gen >= 7 },
395 { BRW_REGISTER_TYPE_F, true },
396 { BRW_REGISTER_TYPE_HF, devinfo.gen >= 8 },
397 { BRW_REGISTER_TYPE_D, devinfo.gen >= 7 },
398 { BRW_REGISTER_TYPE_UD, devinfo.gen >= 7 },
399 };
400
401 /* Initially assume all hardware encodings are invalid */
402 BITSET_ONES(invalid_encodings);
403
404 brw_set_default_access_mode(p, BRW_ALIGN_16);
405 brw_set_default_exec_size(p, BRW_EXECUTE_4);
406
407 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
408 if (test_case[i].expected_result) {
409 unsigned hw_type = brw_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
410 if (hw_type != INVALID_HW_REG_TYPE) {
411 /* ... and remove valid encodings from the set */
412 assert(BITSET_TEST(invalid_encodings, hw_type));
413 BITSET_CLEAR(invalid_encodings, hw_type);
414 }
415
416 struct brw_reg g = retype(g0, test_case[i].type);
417 if (!brw_reg_type_is_integer(test_case[i].type)) {
418 brw_MAD(p, g, g, g, g);
419 } else {
420 brw_BFE(p, g, g, g, g);
421 }
422
423 EXPECT_TRUE(validate(p));
424
425 clear_instructions(p);
426 }
427 }
428
429 /* The remaining encodings in invalid_encodings do not have a mapping
430 * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
431 * encodings are rejected by the validator.
432 */
433 int e;
434 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
435 for (unsigned i = 0; i < 2; i++) {
436 if (i == 0) {
437 brw_MAD(p, g0, g0, g0, g0);
438 } else {
439 brw_BFE(p, g0, g0, g0, g0);
440 }
441
442 brw_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
443 brw_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
444
445 EXPECT_FALSE(validate(p));
446
447 clear_instructions(p);
448
449 if (devinfo.gen == 6)
450 break;
451 }
452 }
453 }
454
455 TEST_P(validation_test, invalid_type_encoding_3src_a1)
456 {
457 /* 3-src instructions in align1 mode only supported on Gen10+ */
458 if (devinfo.gen < 10)
459 return;
460
461 const int num_bits = 3 + 1 /* for exec_type */;
462 const int num_encodings = 1 << num_bits;
463
464 /* The data types are encoded into <num_bits> bits to be used in hardware
465 * instructions, so keep a record in a bitset the invalid patterns so
466 * they can be verified to be invalid when used.
467 */
468 BITSET_DECLARE(invalid_encodings, num_encodings);
469
470 const struct {
471 enum brw_reg_type type;
472 unsigned exec_type;
473 bool expected_result;
474 } test_case[] = {
475 #define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x)
476 { BRW_REGISTER_TYPE_NF, E(FLOAT), devinfo.gen == 11 },
477 { BRW_REGISTER_TYPE_DF, E(FLOAT), devinfo.has_64bit_float },
478 { BRW_REGISTER_TYPE_F, E(FLOAT), true },
479 { BRW_REGISTER_TYPE_HF, E(FLOAT), true },
480 { BRW_REGISTER_TYPE_D, E(INT), true },
481 { BRW_REGISTER_TYPE_UD, E(INT), true },
482 { BRW_REGISTER_TYPE_W, E(INT), true },
483 { BRW_REGISTER_TYPE_UW, E(INT), true },
484
485 /* There are no ternary instructions that can operate on B-type sources
486 * on Gen11-12. Src1/Src2 cannot be B-typed either.
487 */
488 { BRW_REGISTER_TYPE_B, E(INT), devinfo.gen == 10 },
489 { BRW_REGISTER_TYPE_UB, E(INT), devinfo.gen == 10 },
490 };
491
492 /* Initially assume all hardware encodings are invalid */
493 BITSET_ONES(invalid_encodings);
494
495 brw_set_default_access_mode(p, BRW_ALIGN_1);
496 brw_set_default_exec_size(p, BRW_EXECUTE_4);
497
498 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
499 if (test_case[i].expected_result) {
500 unsigned hw_type = brw_reg_type_to_a1_hw_3src_type(&devinfo, test_case[i].type);
501 unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3);
502 if (hw_type != INVALID_HW_REG_TYPE) {
503 /* ... and remove valid encodings from the set */
504 assert(BITSET_TEST(invalid_encodings, hw_exec_type));
505 BITSET_CLEAR(invalid_encodings, hw_exec_type);
506 }
507
508 struct brw_reg g = retype(g0, test_case[i].type);
509 if (!brw_reg_type_is_integer(test_case[i].type)) {
510 brw_MAD(p, g, g, g, g);
511 } else {
512 brw_BFE(p, g, g, g, g);
513 }
514
515 EXPECT_TRUE(validate(p));
516
517 clear_instructions(p);
518 }
519 }
520
521 /* The remaining encodings in invalid_encodings do not have a mapping
522 * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
523 * encodings are rejected by the validator.
524 */
525 int e;
526 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
527 const unsigned hw_type = e & 0x7;
528 const unsigned exec_type = e >> 3;
529
530 for (unsigned i = 0; i < 2; i++) {
531 if (i == 0) {
532 brw_MAD(p, g0, g0, g0, g0);
533 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
534 } else {
535 brw_CSEL(p, g0, g0, g0, g0);
536 brw_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ);
537 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
538 }
539
540 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type);
541 brw_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type);
542 brw_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type);
543 brw_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type);
544 brw_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type);
545
546 EXPECT_FALSE(validate(p));
547
548 clear_instructions(p);
549 }
550 }
551 }
552
553 TEST_P(validation_test, 3src_inst_access_mode)
554 {
555 /* 3-src instructions only supported on Gen6+ */
556 if (devinfo.gen < 6)
557 return;
558
559 /* No access mode bit on Gen12+ */
560 if (devinfo.gen >= 12)
561 return;
562
563 const struct {
564 unsigned mode;
565 bool expected_result;
566 } test_case[] = {
567 { BRW_ALIGN_1, devinfo.gen >= 10 },
568 { BRW_ALIGN_16, devinfo.gen <= 10 },
569 };
570
571 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
572 if (devinfo.gen < 10)
573 brw_set_default_access_mode(p, BRW_ALIGN_16);
574
575 brw_MAD(p, g0, g0, g0, g0);
576 brw_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
577
578 EXPECT_EQ(test_case[i].expected_result, validate(p));
579
580 clear_instructions(p);
581 }
582 }
583
584 /* When the Execution Data Type is wider than the destination data type, the
585 * destination must [...] specify a HorzStride equal to the ratio in sizes of
586 * the two data types.
587 */
588 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
589 {
590 brw_ADD(p, g0, g0, g0);
591 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
592 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
593 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
594
595 EXPECT_FALSE(validate(p));
596
597 clear_instructions(p);
598
599 brw_ADD(p, g0, g0, g0);
600 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
601 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
602 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
603 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
604
605 EXPECT_TRUE(validate(p));
606 }
607
608 /* When the Execution Data Type is wider than the destination data type, the
609 * destination must be aligned as required by the wider execution data type
610 * [...]
611 */
612 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
613 {
614 brw_ADD(p, g0, g0, g0);
615 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
616 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
617 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
618 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
619 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
620
621 EXPECT_FALSE(validate(p));
622
623 clear_instructions(p);
624
625 brw_ADD(p, g0, g0, g0);
626 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
627 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
628 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
629 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
630 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
631 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
632 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
633 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
634 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
635 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
636 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
637 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
638
639 EXPECT_TRUE(validate(p));
640 }
641
642 /* ExecSize must be greater than or equal to Width. */
643 TEST_P(validation_test, exec_size_less_than_width)
644 {
645 brw_ADD(p, g0, g0, g0);
646 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_16);
647
648 EXPECT_FALSE(validate(p));
649
650 clear_instructions(p);
651
652 brw_ADD(p, g0, g0, g0);
653 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_16);
654
655 EXPECT_FALSE(validate(p));
656 }
657
658 /* If ExecSize = Width and HorzStride ≠ 0,
659 * VertStride must be set to Width * HorzStride.
660 */
661 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
662 {
663 brw_ADD(p, g0, g0, g0);
664 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
665
666 EXPECT_FALSE(validate(p));
667
668 clear_instructions(p);
669
670 brw_ADD(p, g0, g0, g0);
671 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
672
673 EXPECT_FALSE(validate(p));
674 }
675
676 /* If Width = 1, HorzStride must be 0 regardless of the values
677 * of ExecSize and VertStride.
678 */
679 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
680 {
681 brw_ADD(p, g0, g0, g0);
682 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
683 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
684 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
685
686 EXPECT_FALSE(validate(p));
687
688 clear_instructions(p);
689
690 brw_ADD(p, g0, g0, g0);
691 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
692 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
693 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
694
695 EXPECT_FALSE(validate(p));
696 }
697
698 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
699 TEST_P(validation_test, scalar_region_must_be_0_1_0)
700 {
701 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
702
703 brw_ADD(p, g0, g0, g0_0);
704 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
705 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
706 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
707 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
708
709 EXPECT_FALSE(validate(p));
710
711 clear_instructions(p);
712
713 brw_ADD(p, g0, g0_0, g0);
714 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
715 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
716 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
717 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
718
719 EXPECT_FALSE(validate(p));
720 }
721
722 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
723 * of ExecSize.
724 */
725 TEST_P(validation_test, zero_stride_implies_0_1_0)
726 {
727 brw_ADD(p, g0, g0, g0);
728 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
729 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
730 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
731
732 EXPECT_FALSE(validate(p));
733
734 clear_instructions(p);
735
736 brw_ADD(p, g0, g0, g0);
737 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
738 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
739 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
740
741 EXPECT_FALSE(validate(p));
742 }
743
744 /* Dst.HorzStride must not be 0. */
745 TEST_P(validation_test, dst_horizontal_stride_0)
746 {
747 brw_ADD(p, g0, g0, g0);
748 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
749
750 EXPECT_FALSE(validate(p));
751
752 clear_instructions(p);
753
754 /* Align16 does not exist on Gen11+ */
755 if (devinfo.gen >= 11)
756 return;
757
758 brw_set_default_access_mode(p, BRW_ALIGN_16);
759
760 brw_ADD(p, g0, g0, g0);
761 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
762
763 EXPECT_FALSE(validate(p));
764 }
765
766 /* VertStride must be used to cross BRW_GENERAL_REGISTER_FILE register boundaries. This rule implies
767 * that elements within a 'Width' cannot cross BRW_GENERAL_REGISTER_FILE boundaries.
768 */
769 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
770 {
771 brw_ADD(p, g0, g0, g0);
772 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
773
774 EXPECT_FALSE(validate(p));
775
776 clear_instructions(p);
777
778 brw_ADD(p, g0, g0, g0);
779 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
780
781 EXPECT_FALSE(validate(p));
782
783 clear_instructions(p);
784
785 brw_ADD(p, g0, g0, g0);
786 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
787 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
788 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
789
790 EXPECT_FALSE(validate(p));
791
792 clear_instructions(p);
793
794 brw_ADD(p, g0, g0, g0);
795 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
796 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
797 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
798
799 EXPECT_FALSE(validate(p));
800 }
801
802 /* Destination Horizontal must be 1 in Align16 */
803 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
804 {
805 /* Align16 does not exist on Gen11+ */
806 if (devinfo.gen >= 11)
807 return;
808
809 brw_set_default_access_mode(p, BRW_ALIGN_16);
810
811 brw_ADD(p, g0, g0, g0);
812 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
813
814 EXPECT_FALSE(validate(p));
815
816 clear_instructions(p);
817
818 brw_ADD(p, g0, g0, g0);
819 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
820
821 EXPECT_TRUE(validate(p));
822 }
823
824 /* VertStride must be 0 or 4 in Align16 */
825 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
826 {
827 /* Align16 does not exist on Gen11+ */
828 if (devinfo.gen >= 11)
829 return;
830
831 const struct {
832 enum brw_vertical_stride vstride;
833 bool expected_result;
834 } vstride[] = {
835 { BRW_VERTICAL_STRIDE_0, true },
836 { BRW_VERTICAL_STRIDE_1, false },
837 { BRW_VERTICAL_STRIDE_2, devinfo.is_haswell || devinfo.gen >= 8 },
838 { BRW_VERTICAL_STRIDE_4, true },
839 { BRW_VERTICAL_STRIDE_8, false },
840 { BRW_VERTICAL_STRIDE_16, false },
841 { BRW_VERTICAL_STRIDE_32, false },
842 { BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
843 };
844
845 brw_set_default_access_mode(p, BRW_ALIGN_16);
846
847 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
848 brw_ADD(p, g0, g0, g0);
849 brw_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
850
851 EXPECT_EQ(vstride[i].expected_result, validate(p));
852
853 clear_instructions(p);
854 }
855
856 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
857 brw_ADD(p, g0, g0, g0);
858 brw_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
859
860 EXPECT_EQ(vstride[i].expected_result, validate(p));
861
862 clear_instructions(p);
863 }
864 }
865
866 /* In Direct Addressing mode, a source cannot span more than 2 adjacent BRW_GENERAL_REGISTER_FILE
867 * registers.
868 */
869 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
870 {
871 brw_ADD(p, g0, g0, g0);
872 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
873 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
874 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
875 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
876 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
877 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
878 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
879
880 EXPECT_FALSE(validate(p));
881
882 clear_instructions(p);
883
884 brw_ADD(p, g0, g0, g0);
885 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
886 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
887 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
888 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
889 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
890 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
891 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
892 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
893
894 EXPECT_TRUE(validate(p));
895
896 clear_instructions(p);
897
898 brw_ADD(p, g0, g0, g0);
899 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
900
901 EXPECT_TRUE(validate(p));
902 }
903
904 /* A destination cannot span more than 2 adjacent BRW_GENERAL_REGISTER_FILE registers. */
905 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
906 {
907 brw_ADD(p, g0, g0, g0);
908 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
909 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
910 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
911 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
912 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
913
914 EXPECT_FALSE(validate(p));
915
916 clear_instructions(p);
917
918 brw_ADD(p, g0, g0, g0);
919 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_8);
920 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
921 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
922 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
923 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
924 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
925 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
926 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
927 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
928 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
929 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
930 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
931
932 EXPECT_TRUE(validate(p));
933 }
934
935 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
936 {
937 /* Writes to dest are to the lower OWord */
938 brw_ADD(p, g0, g0, g0);
939 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
940 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
941 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
942 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
943 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
944 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
945
946 EXPECT_TRUE(validate(p));
947
948 clear_instructions(p);
949
950 /* Writes to dest are to the upper OWord */
951 brw_ADD(p, g0, g0, g0);
952 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
953 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
954 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
955 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
956 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
957 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
958 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
959
960 EXPECT_TRUE(validate(p));
961
962 clear_instructions(p);
963
964 /* Writes to dest are evenly split between OWords */
965 brw_ADD(p, g0, g0, g0);
966 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
967 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
968 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
969 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
970 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
971 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
972 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
973
974 EXPECT_TRUE(validate(p));
975
976 clear_instructions(p);
977
978 /* Writes to dest are uneven between OWords */
979 brw_ADD(p, g0, g0, g0);
980 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
981 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
982 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
983 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
984 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
985 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
986 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
987 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
988 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
989 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
990 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
991
992 if (devinfo.gen >= 9) {
993 EXPECT_TRUE(validate(p));
994 } else {
995 EXPECT_FALSE(validate(p));
996 }
997 }
998
999 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
1000 {
1001 brw_ADD(p, g0, g0, g0);
1002 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
1003
1004 if (devinfo.gen >= 9) {
1005 EXPECT_TRUE(validate(p));
1006 } else {
1007 EXPECT_FALSE(validate(p));
1008 }
1009
1010 clear_instructions(p);
1011
1012 brw_ADD(p, g0, g0, g0);
1013 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1014
1015 EXPECT_TRUE(validate(p));
1016
1017 clear_instructions(p);
1018
1019 if (devinfo.gen >= 6) {
1020 gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
1021
1022 EXPECT_TRUE(validate(p));
1023
1024 clear_instructions(p);
1025
1026 gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
1027 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
1028
1029 EXPECT_FALSE(validate(p));
1030 }
1031 }
1032
1033 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
1034 {
1035 brw_ADD(p, g0, g0, g0);
1036 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1037 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
1038 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
1039 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1040 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1041 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1042 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1043 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1044 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1045
1046 if (devinfo.gen <= 7) {
1047 EXPECT_FALSE(validate(p));
1048 } else {
1049 EXPECT_TRUE(validate(p));
1050 }
1051
1052 clear_instructions(p);
1053
1054 brw_ADD(p, g0, g0, g0);
1055 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1056 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
1057 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1058 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1059 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1060 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_8);
1061 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
1062 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1063
1064 EXPECT_TRUE(validate(p));
1065 }
1066
1067 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
1068 {
1069 brw_MOV(p, g0, g0);
1070 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1071 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1072 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1073 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1074 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1075 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1076 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1077 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1078
1079 if (devinfo.gen <= 7) {
1080 EXPECT_FALSE(validate(p));
1081 } else {
1082 EXPECT_TRUE(validate(p));
1083 }
1084
1085 clear_instructions(p);
1086
1087 brw_MOV(p, g0, g0);
1088 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
1089 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1090 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1091 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1092 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1093
1094 if (devinfo.gen <= 7) {
1095 EXPECT_FALSE(validate(p));
1096 } else {
1097 EXPECT_TRUE(validate(p));
1098 }
1099 }
1100
1101 TEST_P(validation_test, one_src_two_dst)
1102 {
1103 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
1104
1105 brw_ADD(p, g0, g0_0, g0_0);
1106 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1107
1108 EXPECT_TRUE(validate(p));
1109
1110 clear_instructions(p);
1111
1112 brw_ADD(p, g0, g0, g0);
1113 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1114 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_D);
1115 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1116 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1117
1118 EXPECT_TRUE(validate(p));
1119
1120 clear_instructions(p);
1121
1122 brw_ADD(p, g0, g0, g0);
1123 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1124 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1125 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1126 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1127 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1128 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1129 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
1130 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1131
1132 if (devinfo.gen >= 8) {
1133 EXPECT_TRUE(validate(p));
1134 } else {
1135 EXPECT_FALSE(validate(p));
1136 }
1137
1138 clear_instructions(p);
1139
1140 brw_ADD(p, g0, g0, g0);
1141 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1142 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1143 brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1144 brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1145 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1146 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1147 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1148 brw_inst_set_src1_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
1149
1150 if (devinfo.gen >= 8) {
1151 EXPECT_TRUE(validate(p));
1152 } else {
1153 EXPECT_FALSE(validate(p));
1154 }
1155 }
1156
1157 TEST_P(validation_test, packed_byte_destination)
1158 {
1159 static const struct {
1160 enum brw_reg_type dst_type;
1161 enum brw_reg_type src_type;
1162 bool neg, abs, sat;
1163 bool expected_result;
1164 } move[] = {
1165 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UB, 0, 0, 0, true },
1166 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_B , 0, 0, 0, true },
1167 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_B , 0, 0, 0, true },
1168 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_UB, 0, 0, 0, true },
1169
1170 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UB, 1, 0, 0, false },
1171 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_B , 1, 0, 0, false },
1172 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_B , 1, 0, 0, false },
1173 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_UB, 1, 0, 0, false },
1174
1175 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UB, 0, 1, 0, false },
1176 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_B , 0, 1, 0, false },
1177 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_B , 0, 1, 0, false },
1178 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_UB, 0, 1, 0, false },
1179
1180 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UB, 0, 0, 1, false },
1181 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_B , 0, 0, 1, false },
1182 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_B , 0, 0, 1, false },
1183 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_UB, 0, 0, 1, false },
1184
1185 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UW, 0, 0, 0, false },
1186 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_W , 0, 0, 0, false },
1187 { BRW_REGISTER_TYPE_UB, BRW_REGISTER_TYPE_UD, 0, 0, 0, false },
1188 { BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_D , 0, 0, 0, false },
1189 };
1190
1191 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1192 brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1193 brw_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1194 brw_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1195 brw_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1196
1197 EXPECT_EQ(move[i].expected_result, validate(p));
1198
1199 clear_instructions(p);
1200 }
1201
1202 brw_SEL(p, retype(g0, BRW_REGISTER_TYPE_UB),
1203 retype(g0, BRW_REGISTER_TYPE_UB),
1204 retype(g0, BRW_REGISTER_TYPE_UB));
1205 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1206
1207 EXPECT_FALSE(validate(p));
1208
1209 clear_instructions(p);
1210
1211 brw_SEL(p, retype(g0, BRW_REGISTER_TYPE_B),
1212 retype(g0, BRW_REGISTER_TYPE_B),
1213 retype(g0, BRW_REGISTER_TYPE_B));
1214 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1215
1216 EXPECT_FALSE(validate(p));
1217 }
1218
1219 TEST_P(validation_test, byte_destination_relaxed_alignment)
1220 {
1221 brw_SEL(p, retype(g0, BRW_REGISTER_TYPE_B),
1222 retype(g0, BRW_REGISTER_TYPE_W),
1223 retype(g0, BRW_REGISTER_TYPE_W));
1224 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1225 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1226
1227 EXPECT_TRUE(validate(p));
1228
1229 clear_instructions(p);
1230
1231 brw_SEL(p, retype(g0, BRW_REGISTER_TYPE_B),
1232 retype(g0, BRW_REGISTER_TYPE_W),
1233 retype(g0, BRW_REGISTER_TYPE_W));
1234 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1235 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1236 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1237
1238 if (devinfo.gen > 4 || devinfo.is_g4x) {
1239 EXPECT_TRUE(validate(p));
1240 } else {
1241 EXPECT_FALSE(validate(p));
1242 }
1243 }
1244
1245 TEST_P(validation_test, byte_64bit_conversion)
1246 {
1247 static const struct {
1248 enum brw_reg_type dst_type;
1249 enum brw_reg_type src_type;
1250 unsigned dst_stride;
1251 bool expected_result;
1252 } inst[] = {
1253 #define INST(dst_type, src_type, dst_stride, expected_result) \
1254 { \
1255 BRW_REGISTER_TYPE_##dst_type, \
1256 BRW_REGISTER_TYPE_##src_type, \
1257 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1258 expected_result, \
1259 }
1260
1261 INST(B, Q, 1, false),
1262 INST(B, UQ, 1, false),
1263 INST(B, DF, 1, false),
1264 INST(UB, Q, 1, false),
1265 INST(UB, UQ, 1, false),
1266 INST(UB, DF, 1, false),
1267
1268 INST(B, Q, 2, false),
1269 INST(B, UQ, 2, false),
1270 INST(B , DF, 2, false),
1271 INST(UB, Q, 2, false),
1272 INST(UB, UQ, 2, false),
1273 INST(UB, DF, 2, false),
1274
1275 INST(B, Q, 4, false),
1276 INST(B, UQ, 4, false),
1277 INST(B, DF, 4, false),
1278 INST(UB, Q, 4, false),
1279 INST(UB, UQ, 4, false),
1280 INST(UB, DF, 4, false),
1281
1282 #undef INST
1283 };
1284
1285 if (devinfo.gen < 8)
1286 return;
1287
1288 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1289 if (!devinfo.has_64bit_float &&
1290 inst[i].src_type == BRW_REGISTER_TYPE_DF)
1291 continue;
1292
1293 if (!devinfo.has_64bit_int &&
1294 (inst[i].src_type == BRW_REGISTER_TYPE_Q ||
1295 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
1296 continue;
1297
1298 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1299 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1300 EXPECT_EQ(inst[i].expected_result, validate(p));
1301
1302 clear_instructions(p);
1303 }
1304 }
1305
1306 TEST_P(validation_test, half_float_conversion)
1307 {
1308 static const struct {
1309 enum brw_reg_type dst_type;
1310 enum brw_reg_type src_type;
1311 unsigned dst_stride;
1312 unsigned dst_subnr;
1313 bool expected_result_bdw;
1314 bool expected_result_chv_gen9;
1315 } inst[] = {
1316 #define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
1317 { \
1318 BRW_REGISTER_TYPE_##dst_type, \
1319 BRW_REGISTER_TYPE_##src_type, \
1320 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1321 dst_subnr, \
1322 expected_result, \
1323 expected_result, \
1324 }
1325 #define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
1326 expected_result_bdw, expected_result_chv_gen9) \
1327 { \
1328 BRW_REGISTER_TYPE_##dst_type, \
1329 BRW_REGISTER_TYPE_##src_type, \
1330 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1331 dst_subnr, \
1332 expected_result_bdw, \
1333 expected_result_chv_gen9, \
1334 }
1335
1336 /* MOV to half-float destination */
1337 INST_C(HF, B, 1, 0, false),
1338 INST_C(HF, W, 1, 0, false),
1339 INST_C(HF, HF, 1, 0, true),
1340 INST_C(HF, HF, 1, 2, true),
1341 INST_C(HF, D, 1, 0, false),
1342 INST_S(HF, F, 1, 0, false, true),
1343 INST_C(HF, Q, 1, 0, false),
1344 INST_C(HF, B, 2, 0, true),
1345 INST_C(HF, B, 2, 2, false),
1346 INST_C(HF, W, 2, 0, true),
1347 INST_C(HF, W, 2, 2, false),
1348 INST_C(HF, HF, 2, 0, true),
1349 INST_C(HF, HF, 2, 2, true),
1350 INST_C(HF, D, 2, 0, true),
1351 INST_C(HF, D, 2, 2, false),
1352 INST_C(HF, F, 2, 0, true),
1353 INST_S(HF, F, 2, 2, false, true),
1354 INST_C(HF, Q, 2, 0, false),
1355 INST_C(HF, DF, 2, 0, false),
1356 INST_C(HF, B, 4, 0, false),
1357 INST_C(HF, W, 4, 0, false),
1358 INST_C(HF, HF, 4, 0, true),
1359 INST_C(HF, HF, 4, 2, true),
1360 INST_C(HF, D, 4, 0, false),
1361 INST_C(HF, F, 4, 0, false),
1362 INST_C(HF, Q, 4, 0, false),
1363 INST_C(HF, DF, 4, 0, false),
1364
1365 /* MOV from half-float source */
1366 INST_C( B, HF, 1, 0, false),
1367 INST_C( W, HF, 1, 0, false),
1368 INST_C( D, HF, 1, 0, true),
1369 INST_C( D, HF, 1, 4, true),
1370 INST_C( F, HF, 1, 0, true),
1371 INST_C( F, HF, 1, 4, true),
1372 INST_C( Q, HF, 1, 0, false),
1373 INST_C(DF, HF, 1, 0, false),
1374 INST_C( B, HF, 2, 0, false),
1375 INST_C( W, HF, 2, 0, true),
1376 INST_C( W, HF, 2, 2, false),
1377 INST_C( D, HF, 2, 0, false),
1378 INST_C( F, HF, 2, 0, true),
1379 INST_C( B, HF, 4, 0, true),
1380 INST_C( B, HF, 4, 1, false),
1381 INST_C( W, HF, 4, 0, false),
1382
1383 #undef INST_C
1384 #undef INST_S
1385 };
1386
1387 if (devinfo.gen < 8)
1388 return;
1389
1390 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1391 if (!devinfo.has_64bit_float &&
1392 (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
1393 inst[i].src_type == BRW_REGISTER_TYPE_DF))
1394 continue;
1395
1396 if (!devinfo.has_64bit_int &&
1397 (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
1398 inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
1399 inst[i].src_type == BRW_REGISTER_TYPE_Q ||
1400 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
1401 continue;
1402
1403 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1404
1405 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1406
1407 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1408 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1409
1410 if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
1411 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1412 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1413 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1414 } else {
1415 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1416 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1417 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1418 }
1419
1420 if (devinfo.is_cherryview || devinfo.gen >= 9)
1421 EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
1422 else
1423 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1424
1425 clear_instructions(p);
1426 }
1427 }
1428
1429 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1430 {
1431 static const struct {
1432 enum brw_reg_type dst_type;
1433 enum brw_reg_type src0_type;
1434 enum brw_reg_type src1_type;
1435 unsigned dst_stride;
1436 bool dst_indirect;
1437 bool src0_indirect;
1438 bool expected_result;
1439 } inst[] = {
1440 #define INST(dst_type, src0_type, src1_type, \
1441 dst_stride, dst_indirect, src0_indirect, expected_result) \
1442 { \
1443 BRW_REGISTER_TYPE_##dst_type, \
1444 BRW_REGISTER_TYPE_##src0_type, \
1445 BRW_REGISTER_TYPE_##src1_type, \
1446 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1447 dst_indirect, \
1448 src0_indirect, \
1449 expected_result, \
1450 }
1451
1452 /* Source and dest are mixed float: indirect src addressing not allowed */
1453 INST(HF, F, F, 2, false, false, true),
1454 INST(HF, F, F, 2, true, false, true),
1455 INST(HF, F, F, 2, false, true, false),
1456 INST(HF, F, F, 2, true, true, false),
1457 INST( F, HF, F, 1, false, false, true),
1458 INST( F, HF, F, 1, true, false, true),
1459 INST( F, HF, F, 1, false, true, false),
1460 INST( F, HF, F, 1, true, true, false),
1461
1462 INST(HF, HF, F, 2, false, false, true),
1463 INST(HF, HF, F, 2, true, false, true),
1464 INST(HF, HF, F, 2, false, true, false),
1465 INST(HF, HF, F, 2, true, true, false),
1466 INST( F, F, HF, 1, false, false, true),
1467 INST( F, F, HF, 1, true, false, true),
1468 INST( F, F, HF, 1, false, true, false),
1469 INST( F, F, HF, 1, true, true, false),
1470
1471 #undef INST
1472 };
1473
1474 if (devinfo.gen < 8)
1475 return;
1476
1477 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1478 brw_ADD(p, retype(g0, inst[i].dst_type),
1479 retype(g0, inst[i].src0_type),
1480 retype(g0, inst[i].src1_type));
1481
1482 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1483 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1484 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1485
1486 EXPECT_EQ(inst[i].expected_result, validate(p));
1487
1488 clear_instructions(p);
1489 }
1490 }
1491
1492 TEST_P(validation_test, mixed_float_align1_simd16)
1493 {
1494 static const struct {
1495 unsigned exec_size;
1496 enum brw_reg_type dst_type;
1497 enum brw_reg_type src0_type;
1498 enum brw_reg_type src1_type;
1499 unsigned dst_stride;
1500 bool expected_result;
1501 } inst[] = {
1502 #define INST(exec_size, dst_type, src0_type, src1_type, \
1503 dst_stride, expected_result) \
1504 { \
1505 BRW_EXECUTE_##exec_size, \
1506 BRW_REGISTER_TYPE_##dst_type, \
1507 BRW_REGISTER_TYPE_##src0_type, \
1508 BRW_REGISTER_TYPE_##src1_type, \
1509 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1510 expected_result, \
1511 }
1512
1513 /* No SIMD16 in mixed mode when destination is packed f16 */
1514 INST( 8, HF, F, HF, 2, true),
1515 INST(16, HF, HF, F, 2, true),
1516 INST(16, HF, HF, F, 1, false),
1517 INST(16, HF, F, HF, 1, false),
1518
1519 /* No SIMD16 in mixed mode when destination is f32 */
1520 INST( 8, F, HF, F, 1, true),
1521 INST( 8, F, F, HF, 1, true),
1522 INST(16, F, HF, F, 1, false),
1523 INST(16, F, F, HF, 1, false),
1524
1525 #undef INST
1526 };
1527
1528 if (devinfo.gen < 8)
1529 return;
1530
1531 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1532 brw_ADD(p, retype(g0, inst[i].dst_type),
1533 retype(g0, inst[i].src0_type),
1534 retype(g0, inst[i].src1_type));
1535
1536 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1537
1538 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1539
1540 EXPECT_EQ(inst[i].expected_result, validate(p));
1541
1542 clear_instructions(p);
1543 }
1544 }
1545
1546 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1547 {
1548 static const struct {
1549 enum brw_reg_type dst_type;
1550 enum brw_reg_type src0_type;
1551 enum brw_reg_type src1_type;
1552 unsigned dst_stride;
1553 bool read_acc;
1554 unsigned subnr;
1555 bool expected_result_bdw;
1556 bool expected_result_chv_skl;
1557 } inst[] = {
1558 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
1559 expected_result_bdw, expected_result_chv_skl) \
1560 { \
1561 BRW_REGISTER_TYPE_##dst_type, \
1562 BRW_REGISTER_TYPE_##src0_type, \
1563 BRW_REGISTER_TYPE_##src1_type, \
1564 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1565 read_acc, \
1566 subnr, \
1567 expected_result_bdw, \
1568 expected_result_chv_skl, \
1569 }
1570
1571 /* Destination is not packed */
1572 INST(HF, HF, F, 2, true, 0, true, true),
1573 INST(HF, HF, F, 2, true, 2, true, true),
1574 INST(HF, HF, F, 2, true, 4, true, true),
1575 INST(HF, HF, F, 2, true, 8, true, true),
1576 INST(HF, HF, F, 2, true, 16, true, true),
1577
1578 /* Destination is packed, we don't read acc */
1579 INST(HF, HF, F, 1, false, 0, false, true),
1580 INST(HF, HF, F, 1, false, 2, false, true),
1581 INST(HF, HF, F, 1, false, 4, false, true),
1582 INST(HF, HF, F, 1, false, 8, false, true),
1583 INST(HF, HF, F, 1, false, 16, false, true),
1584
1585 /* Destination is packed, we read acc */
1586 INST(HF, HF, F, 1, true, 0, false, false),
1587 INST(HF, HF, F, 1, true, 2, false, false),
1588 INST(HF, HF, F, 1, true, 4, false, false),
1589 INST(HF, HF, F, 1, true, 8, false, false),
1590 INST(HF, HF, F, 1, true, 16, false, false),
1591
1592 #undef INST
1593 };
1594
1595 if (devinfo.gen < 8)
1596 return;
1597
1598 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1599 brw_ADD(p, retype(g0, inst[i].dst_type),
1600 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1601 retype(g0, inst[i].src1_type));
1602
1603 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1604
1605 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1606
1607 if (devinfo.is_cherryview || devinfo.gen >= 9)
1608 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1609 else
1610 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1611
1612 clear_instructions(p);
1613 }
1614 }
1615
1616 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1617 {
1618 static const struct {
1619 unsigned exec_size;
1620 unsigned opcode;
1621 enum brw_reg_type dst_type;
1622 enum brw_reg_type src0_type;
1623 enum brw_reg_type src1_type;
1624 unsigned dst_stride;
1625 bool read_acc;
1626 bool expected_result_bdw;
1627 bool expected_result_chv_skl;
1628 } inst[] = {
1629 #define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
1630 dst_stride, read_acc,expected_result_bdw, \
1631 expected_result_chv_skl) \
1632 { \
1633 BRW_EXECUTE_##exec_size, \
1634 BRW_OPCODE_##opcode, \
1635 BRW_REGISTER_TYPE_##dst_type, \
1636 BRW_REGISTER_TYPE_##src0_type, \
1637 BRW_REGISTER_TYPE_##src1_type, \
1638 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1639 read_acc, \
1640 expected_result_bdw, \
1641 expected_result_chv_skl, \
1642 }
1643
1644 /* Packed fp16 dest with implicit acc needs hstride=2 */
1645 INST(8, MAC, HF, HF, F, 1, false, false, false),
1646 INST(8, MAC, HF, HF, F, 2, false, true, true),
1647 INST(8, MAC, HF, F, HF, 1, false, false, false),
1648 INST(8, MAC, HF, F, HF, 2, false, true, true),
1649
1650 /* Packed fp16 dest with explicit acc needs hstride=2 */
1651 INST(8, ADD, HF, HF, F, 1, true, false, false),
1652 INST(8, ADD, HF, HF, F, 2, true, true, true),
1653 INST(8, ADD, HF, F, HF, 1, true, false, false),
1654 INST(8, ADD, HF, F, HF, 2, true, true, true),
1655
1656 /* If destination is not fp16, restriction doesn't apply */
1657 INST(8, MAC, F, HF, F, 1, false, true, true),
1658 INST(8, MAC, F, HF, F, 2, false, true, true),
1659
1660 /* If there is no implicit/explicit acc, restriction doesn't apply */
1661 INST(8, ADD, HF, HF, F, 1, false, false, true),
1662 INST(8, ADD, HF, HF, F, 2, false, true, true),
1663 INST(8, ADD, HF, F, HF, 1, false, false, true),
1664 INST(8, ADD, HF, F, HF, 2, false, true, true),
1665 INST(8, ADD, F, HF, F, 1, false, true, true),
1666 INST(8, ADD, F, HF, F, 2, false, true, true),
1667
1668 #undef INST
1669 };
1670
1671 if (devinfo.gen < 8)
1672 return;
1673
1674 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1675 if (inst[i].opcode == BRW_OPCODE_MAC) {
1676 brw_MAC(p, retype(g0, inst[i].dst_type),
1677 retype(g0, inst[i].src0_type),
1678 retype(g0, inst[i].src1_type));
1679 } else {
1680 assert(inst[i].opcode == BRW_OPCODE_ADD);
1681 brw_ADD(p, retype(g0, inst[i].dst_type),
1682 retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1683 retype(g0, inst[i].src1_type));
1684 }
1685
1686 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1687
1688 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1689
1690 if (devinfo.is_cherryview || devinfo.gen >= 9)
1691 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1692 else
1693 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1694
1695 clear_instructions(p);
1696 }
1697 }
1698
1699 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1700 {
1701 static const struct {
1702 enum brw_reg_type dst_type;
1703 enum brw_reg_type src0_type;
1704 enum brw_reg_type src1_type;
1705 unsigned dst_stride;
1706 unsigned src0_stride;
1707 unsigned src1_stride;
1708 bool expected_result;
1709 } inst[] = {
1710 #define INST(dst_type, src0_type, src1_type, \
1711 dst_stride, src0_stride, src1_stride, expected_result) \
1712 { \
1713 BRW_REGISTER_TYPE_##dst_type, \
1714 BRW_REGISTER_TYPE_##src0_type, \
1715 BRW_REGISTER_TYPE_##src1_type, \
1716 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1717 BRW_HORIZONTAL_STRIDE_##src0_stride, \
1718 BRW_HORIZONTAL_STRIDE_##src1_stride, \
1719 expected_result, \
1720 }
1721
1722 INST(HF, HF, F, 2, 2, 1, true),
1723 INST(HF, F, HF, 2, 1, 2, true),
1724 INST(HF, F, HF, 1, 1, 2, true),
1725 INST(HF, F, HF, 2, 1, 1, false),
1726 INST(HF, HF, F, 2, 1, 1, false),
1727 INST(HF, HF, F, 1, 1, 1, false),
1728 INST(HF, HF, F, 2, 1, 1, false),
1729 INST( F, HF, F, 1, 1, 1, false),
1730 INST( F, F, HF, 1, 1, 2, true),
1731 INST( F, HF, HF, 1, 2, 1, false),
1732 INST( F, HF, HF, 1, 2, 2, true),
1733
1734 #undef INST
1735 };
1736
1737 /* No half-float math in gen8 */
1738 if (devinfo.gen < 9)
1739 return;
1740
1741 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1742 gen6_math(p, retype(g0, inst[i].dst_type),
1743 BRW_MATH_FUNCTION_POW,
1744 retype(g0, inst[i].src0_type),
1745 retype(g0, inst[i].src1_type));
1746
1747 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1748
1749 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1750 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1751 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1752
1753 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1754 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1755 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1756
1757 EXPECT_EQ(inst[i].expected_result, validate(p));
1758
1759 clear_instructions(p);
1760 }
1761 }
1762
1763 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1764 {
1765 static const struct {
1766 unsigned exec_size;
1767 enum brw_reg_type dst_type;
1768 enum brw_reg_type src0_type;
1769 enum brw_reg_type src1_type;
1770 unsigned dst_stride;
1771 unsigned dst_subnr;
1772 bool expected_result_bdw;
1773 bool expected_result_chv_skl;
1774 } inst[] = {
1775 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1776 expected_result_bdw, expected_result_chv_skl) \
1777 { \
1778 BRW_EXECUTE_##exec_size, \
1779 BRW_REGISTER_TYPE_##dst_type, \
1780 BRW_REGISTER_TYPE_##src0_type, \
1781 BRW_REGISTER_TYPE_##src1_type, \
1782 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1783 dst_subnr, \
1784 expected_result_bdw, \
1785 expected_result_chv_skl \
1786 }
1787
1788 /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1789 * oword-aligned
1790 */
1791 INST( 8, HF, HF, F, 1, 0, false, true),
1792 INST( 8, HF, HF, F, 1, 2, false, false),
1793 INST( 8, HF, HF, F, 1, 4, false, false),
1794 INST( 8, HF, HF, F, 1, 8, false, false),
1795 INST( 8, HF, HF, F, 1, 16, false, true),
1796
1797 /* SIMD16 packed fp16 always crosses oword boundaries */
1798 INST(16, HF, HF, F, 1, 0, false, false),
1799 INST(16, HF, HF, F, 1, 2, false, false),
1800 INST(16, HF, HF, F, 1, 4, false, false),
1801 INST(16, HF, HF, F, 1, 8, false, false),
1802 INST(16, HF, HF, F, 1, 16, false, false),
1803
1804 /* If destination is not packed (or not fp16) we can cross oword
1805 * boundaries
1806 */
1807 INST( 8, HF, HF, F, 2, 0, true, true),
1808 INST( 8, F, HF, F, 1, 0, true, true),
1809
1810 #undef INST
1811 };
1812
1813 if (devinfo.gen < 8)
1814 return;
1815
1816 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1817 brw_ADD(p, retype(g0, inst[i].dst_type),
1818 retype(g0, inst[i].src0_type),
1819 retype(g0, inst[i].src1_type));
1820
1821 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1822 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1823
1824 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1825 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1826 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1827
1828 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1829 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1830 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1831
1832 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1833
1834 if (devinfo.is_cherryview || devinfo.gen >= 9)
1835 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1836 else
1837 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1838
1839 clear_instructions(p);
1840 }
1841 }
1842
1843 TEST_P(validation_test, mixed_float_align16_packed_data)
1844 {
1845 static const struct {
1846 enum brw_reg_type dst_type;
1847 enum brw_reg_type src0_type;
1848 enum brw_reg_type src1_type;
1849 unsigned src0_vstride;
1850 unsigned src1_vstride;
1851 bool expected_result;
1852 } inst[] = {
1853 #define INST(dst_type, src0_type, src1_type, \
1854 src0_vstride, src1_vstride, expected_result) \
1855 { \
1856 BRW_REGISTER_TYPE_##dst_type, \
1857 BRW_REGISTER_TYPE_##src0_type, \
1858 BRW_REGISTER_TYPE_##src1_type, \
1859 BRW_VERTICAL_STRIDE_##src0_vstride, \
1860 BRW_VERTICAL_STRIDE_##src1_vstride, \
1861 expected_result, \
1862 }
1863
1864 /* We only test with F destination because there is a restriction
1865 * by which F->HF conversions need to be DWord aligned but Align16 also
1866 * requires that destination horizontal stride is 1.
1867 */
1868 INST(F, F, HF, 4, 4, true),
1869 INST(F, F, HF, 2, 4, false),
1870 INST(F, F, HF, 4, 2, false),
1871 INST(F, F, HF, 0, 4, false),
1872 INST(F, F, HF, 4, 0, false),
1873 INST(F, HF, F, 4, 4, true),
1874 INST(F, HF, F, 4, 2, false),
1875 INST(F, HF, F, 2, 4, false),
1876 INST(F, HF, F, 0, 4, false),
1877 INST(F, HF, F, 4, 0, false),
1878
1879 #undef INST
1880 };
1881
1882 if (devinfo.gen < 8 || devinfo.gen >= 11)
1883 return;
1884
1885 brw_set_default_access_mode(p, BRW_ALIGN_16);
1886
1887 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1888 brw_ADD(p, retype(g0, inst[i].dst_type),
1889 retype(g0, inst[i].src0_type),
1890 retype(g0, inst[i].src1_type));
1891
1892 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1893 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1894
1895 EXPECT_EQ(inst[i].expected_result, validate(p));
1896
1897 clear_instructions(p);
1898 }
1899 }
1900
1901 TEST_P(validation_test, mixed_float_align16_no_simd16)
1902 {
1903 static const struct {
1904 unsigned exec_size;
1905 enum brw_reg_type dst_type;
1906 enum brw_reg_type src0_type;
1907 enum brw_reg_type src1_type;
1908 bool expected_result;
1909 } inst[] = {
1910 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
1911 { \
1912 BRW_EXECUTE_##exec_size, \
1913 BRW_REGISTER_TYPE_##dst_type, \
1914 BRW_REGISTER_TYPE_##src0_type, \
1915 BRW_REGISTER_TYPE_##src1_type, \
1916 expected_result, \
1917 }
1918
1919 /* We only test with F destination because there is a restriction
1920 * by which F->HF conversions need to be DWord aligned but Align16 also
1921 * requires that destination horizontal stride is 1.
1922 */
1923 INST( 8, F, F, HF, true),
1924 INST( 8, F, HF, F, true),
1925 INST( 8, F, F, HF, true),
1926 INST(16, F, F, HF, false),
1927 INST(16, F, HF, F, false),
1928 INST(16, F, F, HF, false),
1929
1930 #undef INST
1931 };
1932
1933 if (devinfo.gen < 8 || devinfo.gen >= 11)
1934 return;
1935
1936 brw_set_default_access_mode(p, BRW_ALIGN_16);
1937
1938 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1939 brw_ADD(p, retype(g0, inst[i].dst_type),
1940 retype(g0, inst[i].src0_type),
1941 retype(g0, inst[i].src1_type));
1942
1943 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1944
1945 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1946 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1947
1948 EXPECT_EQ(inst[i].expected_result, validate(p));
1949
1950 clear_instructions(p);
1951 }
1952 }
1953
1954 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1955 {
1956 static const struct {
1957 enum brw_reg_type dst_type;
1958 enum brw_reg_type src0_type;
1959 enum brw_reg_type src1_type;
1960 bool read_acc;
1961 bool expected_result;
1962 } inst[] = {
1963 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
1964 { \
1965 BRW_REGISTER_TYPE_##dst_type, \
1966 BRW_REGISTER_TYPE_##src0_type, \
1967 BRW_REGISTER_TYPE_##src1_type, \
1968 read_acc, \
1969 expected_result, \
1970 }
1971
1972 /* We only test with F destination because there is a restriction
1973 * by which F->HF conversions need to be DWord aligned but Align16 also
1974 * requires that destination horizontal stride is 1.
1975 */
1976 INST( F, F, HF, false, true),
1977 INST( F, F, HF, true, false),
1978 INST( F, HF, F, false, true),
1979 INST( F, HF, F, true, false),
1980
1981 #undef INST
1982 };
1983
1984 if (devinfo.gen < 8 || devinfo.gen >= 11)
1985 return;
1986
1987 brw_set_default_access_mode(p, BRW_ALIGN_16);
1988
1989 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1990 brw_ADD(p, retype(g0, inst[i].dst_type),
1991 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1992 retype(g0, inst[i].src1_type));
1993
1994 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1995 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1996
1997 EXPECT_EQ(inst[i].expected_result, validate(p));
1998
1999 clear_instructions(p);
2000 }
2001 }
2002
2003 TEST_P(validation_test, mixed_float_align16_math_packed_format)
2004 {
2005 static const struct {
2006 enum brw_reg_type dst_type;
2007 enum brw_reg_type src0_type;
2008 enum brw_reg_type src1_type;
2009 unsigned src0_vstride;
2010 unsigned src1_vstride;
2011 bool expected_result;
2012 } inst[] = {
2013 #define INST(dst_type, src0_type, src1_type, \
2014 src0_vstride, src1_vstride, expected_result) \
2015 { \
2016 BRW_REGISTER_TYPE_##dst_type, \
2017 BRW_REGISTER_TYPE_##src0_type, \
2018 BRW_REGISTER_TYPE_##src1_type, \
2019 BRW_VERTICAL_STRIDE_##src0_vstride, \
2020 BRW_VERTICAL_STRIDE_##src1_vstride, \
2021 expected_result, \
2022 }
2023
2024 /* We only test with F destination because there is a restriction
2025 * by which F->HF conversions need to be DWord aligned but Align16 also
2026 * requires that destination horizontal stride is 1.
2027 */
2028 INST( F, HF, F, 4, 0, false),
2029 INST( F, HF, HF, 4, 4, true),
2030 INST( F, F, HF, 4, 0, false),
2031 INST( F, F, HF, 2, 4, false),
2032 INST( F, F, HF, 4, 2, false),
2033 INST( F, HF, HF, 0, 4, false),
2034
2035 #undef INST
2036 };
2037
2038 /* Align16 Math for mixed float mode is not supported in gen8 */
2039 if (devinfo.gen < 9 || devinfo.gen >= 11)
2040 return;
2041
2042 brw_set_default_access_mode(p, BRW_ALIGN_16);
2043
2044 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2045 gen6_math(p, retype(g0, inst[i].dst_type),
2046 BRW_MATH_FUNCTION_POW,
2047 retype(g0, inst[i].src0_type),
2048 retype(g0, inst[i].src1_type));
2049
2050 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
2051 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
2052
2053 EXPECT_EQ(inst[i].expected_result, validate(p));
2054
2055 clear_instructions(p);
2056 }
2057 }
2058
2059 TEST_P(validation_test, vector_immediate_destination_alignment)
2060 {
2061 static const struct {
2062 enum brw_reg_type dst_type;
2063 enum brw_reg_type src_type;
2064 unsigned subnr;
2065 unsigned exec_size;
2066 bool expected_result;
2067 } move[] = {
2068 { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, 0, BRW_EXECUTE_4, true },
2069 { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, 16, BRW_EXECUTE_4, true },
2070 { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, 1, BRW_EXECUTE_4, false },
2071
2072 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, 0, BRW_EXECUTE_8, true },
2073 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, 16, BRW_EXECUTE_8, true },
2074 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, 1, BRW_EXECUTE_8, false },
2075
2076 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, 0, BRW_EXECUTE_8, true },
2077 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, 16, BRW_EXECUTE_8, true },
2078 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, 1, BRW_EXECUTE_8, false },
2079 };
2080
2081 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2082 /* UV type is Gen6+ */
2083 if (devinfo.gen < 6 &&
2084 move[i].src_type == BRW_REGISTER_TYPE_UV)
2085 continue;
2086
2087 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2088 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2089 brw_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2090
2091 EXPECT_EQ(move[i].expected_result, validate(p));
2092
2093 clear_instructions(p);
2094 }
2095 }
2096
2097 TEST_P(validation_test, vector_immediate_destination_stride)
2098 {
2099 static const struct {
2100 enum brw_reg_type dst_type;
2101 enum brw_reg_type src_type;
2102 unsigned stride;
2103 bool expected_result;
2104 } move[] = {
2105 { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2106 { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2107 { BRW_REGISTER_TYPE_D, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2108 { BRW_REGISTER_TYPE_D, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2109 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true },
2110 { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true },
2111
2112 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, BRW_HORIZONTAL_STRIDE_1, true },
2113 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, BRW_HORIZONTAL_STRIDE_2, false },
2114 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V, BRW_HORIZONTAL_STRIDE_4, false },
2115 { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_V, BRW_HORIZONTAL_STRIDE_2, true },
2116
2117 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true },
2118 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false },
2119 { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false },
2120 { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true },
2121 };
2122
2123 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2124 /* UV type is Gen6+ */
2125 if (devinfo.gen < 6 &&
2126 move[i].src_type == BRW_REGISTER_TYPE_UV)
2127 continue;
2128
2129 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2130 brw_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2131
2132 EXPECT_EQ(move[i].expected_result, validate(p));
2133
2134 clear_instructions(p);
2135 }
2136 }
2137
2138 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2139 {
2140 static const struct {
2141 enum opcode opcode;
2142 unsigned exec_size;
2143
2144 enum brw_reg_type dst_type;
2145 unsigned dst_subreg;
2146 unsigned dst_stride;
2147
2148 enum brw_reg_type src_type;
2149 unsigned src_subreg;
2150 unsigned src_vstride;
2151 unsigned src_width;
2152 unsigned src_hstride;
2153
2154 bool expected_result;
2155 } inst[] = {
2156 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \
2157 src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2158 { \
2159 BRW_OPCODE_##opcode, \
2160 BRW_EXECUTE_##exec_size, \
2161 BRW_REGISTER_TYPE_##dst_type, \
2162 dst_subreg, \
2163 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2164 BRW_REGISTER_TYPE_##src_type, \
2165 src_subreg, \
2166 BRW_VERTICAL_STRIDE_##src_vstride, \
2167 BRW_WIDTH_##src_width, \
2168 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2169 expected_result, \
2170 }
2171
2172 /* Some instruction that violate no restrictions, as a control */
2173 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2174 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2175 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2176
2177 INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ),
2178 INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ),
2179 INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2180
2181 INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ),
2182 INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ),
2183 INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2184
2185 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2186 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2187
2188 /* Something with subreg nrs */
2189 INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2190 INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ),
2191 INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2192
2193 INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ),
2194 INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2195
2196 /* The PRMs say that for CHV, BXT:
2197 *
2198 * When source or destination datatype is 64b or operation is integer
2199 * DWord multiply, regioning in Align1 must follow these rules:
2200 *
2201 * 1. Source and Destination horizontal stride must be aligned to the
2202 * same qword.
2203 */
2204 INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2205 INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false),
2206 INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2207
2208 INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false),
2209 INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false),
2210 INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2211
2212 INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false),
2213 INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false),
2214 INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2215
2216 INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false),
2217 INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2218
2219 INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false),
2220 INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2221
2222 /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2223 INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2224 INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false),
2225 INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2226
2227 INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false),
2228 INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false),
2229 INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2230
2231 INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false),
2232 INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false),
2233 INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2234
2235 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2236 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2237
2238 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2239 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2240
2241 /* 3. Source and Destination offset must be the same, except the case
2242 * of scalar source.
2243 */
2244 INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2245 INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false),
2246 INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2247
2248 INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2249 INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false),
2250 INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2251
2252 INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false),
2253 INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2254
2255 INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false),
2256 INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2257
2258 INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2259 INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ),
2260 INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2261
2262 INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ),
2263 INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ),
2264 INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2265
2266 INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ),
2267 INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2268
2269 INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ),
2270 INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2271
2272 #undef INST
2273 };
2274
2275 /* These restrictions only apply to Gen8+ */
2276 if (devinfo.gen < 8)
2277 return;
2278
2279 /* NoDDChk/NoDDClr does not exist on Gen12+ */
2280 if (devinfo.gen >= 12)
2281 return;
2282
2283 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2284 if (!devinfo.has_64bit_float &&
2285 (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
2286 inst[i].src_type == BRW_REGISTER_TYPE_DF))
2287 continue;
2288
2289 if (!devinfo.has_64bit_int &&
2290 (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
2291 inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
2292 inst[i].src_type == BRW_REGISTER_TYPE_Q ||
2293 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
2294 continue;
2295
2296 if (inst[i].opcode == BRW_OPCODE_MOV) {
2297 brw_MOV(p, retype(g0, inst[i].dst_type),
2298 retype(g0, inst[i].src_type));
2299 } else {
2300 assert(inst[i].opcode == BRW_OPCODE_MUL);
2301 brw_MUL(p, retype(g0, inst[i].dst_type),
2302 retype(g0, inst[i].src_type),
2303 retype(zero, inst[i].src_type));
2304 }
2305 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2306
2307 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2308 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2309
2310 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2311
2312 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2313 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2314 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2315
2316 if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
2317 EXPECT_EQ(inst[i].expected_result, validate(p));
2318 } else {
2319 EXPECT_TRUE(validate(p));
2320 }
2321
2322 clear_instructions(p);
2323 }
2324 }
2325
2326 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2327 {
2328 static const struct {
2329 enum opcode opcode;
2330 unsigned exec_size;
2331
2332 enum brw_reg_type dst_type;
2333 bool dst_is_indirect;
2334 unsigned dst_stride;
2335
2336 enum brw_reg_type src_type;
2337 bool src_is_indirect;
2338 unsigned src_vstride;
2339 unsigned src_width;
2340 unsigned src_hstride;
2341
2342 bool expected_result;
2343 } inst[] = {
2344 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \
2345 src_type, src_is_indirect, src_vstride, src_width, src_hstride, \
2346 expected_result) \
2347 { \
2348 BRW_OPCODE_##opcode, \
2349 BRW_EXECUTE_##exec_size, \
2350 BRW_REGISTER_TYPE_##dst_type, \
2351 dst_is_indirect, \
2352 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2353 BRW_REGISTER_TYPE_##src_type, \
2354 src_is_indirect, \
2355 BRW_VERTICAL_STRIDE_##src_vstride, \
2356 BRW_WIDTH_##src_width, \
2357 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2358 expected_result, \
2359 }
2360
2361 /* Some instruction that violate no restrictions, as a control */
2362 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2363 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2364 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2365
2366 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2367 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2368
2369 INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ),
2370 INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ),
2371 INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ),
2372
2373 /* The PRMs say that for CHV, BXT:
2374 *
2375 * When source or destination datatype is 64b or operation is integer
2376 * DWord multiply, indirect addressing must not be used.
2377 */
2378 INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2379 INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false),
2380 INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2381
2382 INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2383 INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false),
2384 INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2385
2386 INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false),
2387 INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false),
2388 INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2389
2390 INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false),
2391 INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false),
2392 INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2393
2394 INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false),
2395 INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false),
2396 INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2397
2398 INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false),
2399 INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false),
2400 INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2401
2402 INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false),
2403 INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2404
2405 INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false),
2406 INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2407
2408 #undef INST
2409 };
2410
2411 /* These restrictions only apply to Gen8+ */
2412 if (devinfo.gen < 8)
2413 return;
2414
2415 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2416 if (!devinfo.has_64bit_float &&
2417 (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
2418 inst[i].src_type == BRW_REGISTER_TYPE_DF))
2419 continue;
2420
2421 if (!devinfo.has_64bit_int &&
2422 (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
2423 inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
2424 inst[i].src_type == BRW_REGISTER_TYPE_Q ||
2425 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
2426 continue;
2427
2428 if (inst[i].opcode == BRW_OPCODE_MOV) {
2429 brw_MOV(p, retype(g0, inst[i].dst_type),
2430 retype(g0, inst[i].src_type));
2431 } else {
2432 assert(inst[i].opcode == BRW_OPCODE_MUL);
2433 brw_MUL(p, retype(g0, inst[i].dst_type),
2434 retype(g0, inst[i].src_type),
2435 retype(zero, inst[i].src_type));
2436 }
2437 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2438
2439 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2440 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2441
2442 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2443
2444 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2445 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2446 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2447
2448 if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
2449 EXPECT_EQ(inst[i].expected_result, validate(p));
2450 } else {
2451 EXPECT_TRUE(validate(p));
2452 }
2453
2454 clear_instructions(p);
2455 }
2456 }
2457
2458 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2459 {
2460 static const struct {
2461 enum opcode opcode;
2462 unsigned exec_size;
2463
2464 struct brw_reg dst;
2465 enum brw_reg_type dst_type;
2466 unsigned dst_stride;
2467
2468 struct brw_reg src;
2469 enum brw_reg_type src_type;
2470 unsigned src_vstride;
2471 unsigned src_width;
2472 unsigned src_hstride;
2473
2474 bool acc_wr;
2475 bool expected_result;
2476 } inst[] = {
2477 #define INST(opcode, exec_size, dst, dst_type, dst_stride, \
2478 src, src_type, src_vstride, src_width, src_hstride, \
2479 acc_wr, expected_result) \
2480 { \
2481 BRW_OPCODE_##opcode, \
2482 BRW_EXECUTE_##exec_size, \
2483 dst, \
2484 BRW_REGISTER_TYPE_##dst_type, \
2485 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2486 src, \
2487 BRW_REGISTER_TYPE_##src_type, \
2488 BRW_VERTICAL_STRIDE_##src_vstride, \
2489 BRW_WIDTH_##src_width, \
2490 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2491 acc_wr, \
2492 expected_result, \
2493 }
2494
2495 /* Some instruction that violate no restrictions, as a control */
2496 INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ),
2497 INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ),
2498
2499 INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ),
2500 INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ),
2501
2502 INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2503 INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ),
2504
2505 INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ),
2506 INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ),
2507 INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ),
2508
2509 INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ),
2510 INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ),
2511 INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ),
2512
2513 INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2514 INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2515 INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2516
2517 INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ),
2518 INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ),
2519
2520 /* The PRMs say that for CHV, BXT:
2521 *
2522 * ARF registers must never be used with 64b datatype or when
2523 * operation is integer DWord multiply.
2524 */
2525 INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false),
2526 INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false),
2527
2528 INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false),
2529 INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false),
2530
2531 INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false),
2532 INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2533
2534 INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false),
2535 INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false),
2536
2537 INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false),
2538 INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false),
2539
2540 INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false),
2541 INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2542
2543 INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false),
2544 INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false),
2545 /* MUL cannot have integer accumulator sources, so don't test that */
2546
2547 /* We assume that the restriction does not apply to the null register */
2548 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ),
2549 INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ),
2550 INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2551
2552 /* Check implicit accumulator write control */
2553 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2554 INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2555
2556 #undef INST
2557 };
2558
2559 /* These restrictions only apply to Gen8+ */
2560 if (devinfo.gen < 8)
2561 return;
2562
2563 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2564 if (!devinfo.has_64bit_float &&
2565 (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
2566 inst[i].src_type == BRW_REGISTER_TYPE_DF))
2567 continue;
2568
2569 if (!devinfo.has_64bit_int &&
2570 (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
2571 inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
2572 inst[i].src_type == BRW_REGISTER_TYPE_Q ||
2573 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
2574 continue;
2575
2576 if (inst[i].opcode == BRW_OPCODE_MOV) {
2577 brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2578 retype(inst[i].src, inst[i].src_type));
2579 } else {
2580 assert(inst[i].opcode == BRW_OPCODE_MUL);
2581 brw_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2582 retype(inst[i].src, inst[i].src_type),
2583 retype(zero, inst[i].src_type));
2584 brw_inst_set_opcode(&devinfo, last_inst, inst[i].opcode);
2585 }
2586 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2587 brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2588
2589 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2590
2591 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2592 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2593 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2594
2595 if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
2596 EXPECT_EQ(inst[i].expected_result, validate(p));
2597 } else {
2598 EXPECT_TRUE(validate(p));
2599 }
2600
2601 clear_instructions(p);
2602 }
2603
2604 if (!devinfo.has_64bit_float)
2605 return;
2606
2607 /* MAC implicitly reads the accumulator */
2608 brw_MAC(p, retype(g0, BRW_REGISTER_TYPE_DF),
2609 retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF),
2610 retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF));
2611 if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
2612 EXPECT_FALSE(validate(p));
2613 } else {
2614 EXPECT_TRUE(validate(p));
2615 }
2616 }
2617
2618 TEST_P(validation_test, align16_64_bit_integer)
2619 {
2620 static const struct {
2621 enum opcode opcode;
2622 unsigned exec_size;
2623
2624 enum brw_reg_type dst_type;
2625 enum brw_reg_type src_type;
2626
2627 bool expected_result;
2628 } inst[] = {
2629 #define INST(opcode, exec_size, dst_type, src_type, expected_result) \
2630 { \
2631 BRW_OPCODE_##opcode, \
2632 BRW_EXECUTE_##exec_size, \
2633 BRW_REGISTER_TYPE_##dst_type, \
2634 BRW_REGISTER_TYPE_##src_type, \
2635 expected_result, \
2636 }
2637
2638 /* Some instruction that violate no restrictions, as a control */
2639 INST(MOV, 2, Q, D, true ),
2640 INST(MOV, 2, UQ, UD, true ),
2641 INST(MOV, 2, DF, F, true ),
2642
2643 INST(ADD, 2, Q, D, true ),
2644 INST(ADD, 2, UQ, UD, true ),
2645 INST(ADD, 2, DF, F, true ),
2646
2647 /* The PRMs say that for BDW, SKL:
2648 *
2649 * If Align16 is required for an operation with QW destination and non-QW
2650 * source datatypes, the execution size cannot exceed 2.
2651 */
2652
2653 INST(MOV, 4, Q, D, false),
2654 INST(MOV, 4, UQ, UD, false),
2655 INST(MOV, 4, DF, F, false),
2656
2657 INST(ADD, 4, Q, D, false),
2658 INST(ADD, 4, UQ, UD, false),
2659 INST(ADD, 4, DF, F, false),
2660
2661 #undef INST
2662 };
2663
2664 /* 64-bit integer types exist on Gen8+ */
2665 if (devinfo.gen < 8)
2666 return;
2667
2668 /* Align16 does not exist on Gen11+ */
2669 if (devinfo.gen >= 11)
2670 return;
2671
2672 brw_set_default_access_mode(p, BRW_ALIGN_16);
2673
2674 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2675 if (inst[i].opcode == BRW_OPCODE_MOV) {
2676 brw_MOV(p, retype(g0, inst[i].dst_type),
2677 retype(g0, inst[i].src_type));
2678 } else {
2679 assert(inst[i].opcode == BRW_OPCODE_ADD);
2680 brw_ADD(p, retype(g0, inst[i].dst_type),
2681 retype(g0, inst[i].src_type),
2682 retype(g0, inst[i].src_type));
2683 }
2684 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2685
2686 EXPECT_EQ(inst[i].expected_result, validate(p));
2687
2688 clear_instructions(p);
2689 }
2690 }
2691
2692 TEST_P(validation_test, qword_low_power_no_depctrl)
2693 {
2694 static const struct {
2695 enum opcode opcode;
2696 unsigned exec_size;
2697
2698 enum brw_reg_type dst_type;
2699 unsigned dst_stride;
2700
2701 enum brw_reg_type src_type;
2702 unsigned src_vstride;
2703 unsigned src_width;
2704 unsigned src_hstride;
2705
2706 bool no_dd_check;
2707 bool no_dd_clear;
2708
2709 bool expected_result;
2710 } inst[] = {
2711 #define INST(opcode, exec_size, dst_type, dst_stride, \
2712 src_type, src_vstride, src_width, src_hstride, \
2713 no_dd_check, no_dd_clear, expected_result) \
2714 { \
2715 BRW_OPCODE_##opcode, \
2716 BRW_EXECUTE_##exec_size, \
2717 BRW_REGISTER_TYPE_##dst_type, \
2718 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2719 BRW_REGISTER_TYPE_##src_type, \
2720 BRW_VERTICAL_STRIDE_##src_vstride, \
2721 BRW_WIDTH_##src_width, \
2722 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2723 no_dd_check, \
2724 no_dd_clear, \
2725 expected_result, \
2726 }
2727
2728 /* Some instruction that violate no restrictions, as a control */
2729 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ),
2730 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ),
2731 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2732
2733 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ),
2734 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ),
2735 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2736
2737 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ),
2738 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2739
2740 INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ),
2741
2742 /* The PRMs say that for CHV, BXT:
2743 *
2744 * When source or destination datatype is 64b or operation is integer
2745 * DWord multiply, DepCtrl must not be used.
2746 */
2747 INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false),
2748 INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false),
2749 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2750
2751 INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false),
2752 INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false),
2753 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2754
2755 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false),
2756 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false),
2757 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2758
2759 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false),
2760 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false),
2761 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2762
2763 INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false),
2764 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2765
2766 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false),
2767 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2768
2769 #undef INST
2770 };
2771
2772 /* These restrictions only apply to Gen8+ */
2773 if (devinfo.gen < 8)
2774 return;
2775
2776 /* NoDDChk/NoDDClr does not exist on Gen12+ */
2777 if (devinfo.gen >= 12)
2778 return;
2779
2780 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2781 if (!devinfo.has_64bit_float &&
2782 (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
2783 inst[i].src_type == BRW_REGISTER_TYPE_DF))
2784 continue;
2785
2786 if (!devinfo.has_64bit_int &&
2787 (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
2788 inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
2789 inst[i].src_type == BRW_REGISTER_TYPE_Q ||
2790 inst[i].src_type == BRW_REGISTER_TYPE_UQ))
2791 continue;
2792
2793 if (inst[i].opcode == BRW_OPCODE_MOV) {
2794 brw_MOV(p, retype(g0, inst[i].dst_type),
2795 retype(g0, inst[i].src_type));
2796 } else {
2797 assert(inst[i].opcode == BRW_OPCODE_MUL);
2798 brw_MUL(p, retype(g0, inst[i].dst_type),
2799 retype(g0, inst[i].src_type),
2800 retype(zero, inst[i].src_type));
2801 }
2802 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2803
2804 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2805
2806 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2807 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2808 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2809
2810 brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2811 brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2812
2813 if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
2814 EXPECT_EQ(inst[i].expected_result, validate(p));
2815 } else {
2816 EXPECT_TRUE(validate(p));
2817 }
2818
2819 clear_instructions(p);
2820 }
2821 }
2822
2823 TEST_P(validation_test, gen11_no_byte_src_1_2)
2824 {
2825 static const struct {
2826 enum opcode opcode;
2827 unsigned access_mode;
2828
2829 enum brw_reg_type dst_type;
2830 struct {
2831 enum brw_reg_type type;
2832 unsigned vstride;
2833 unsigned width;
2834 unsigned hstride;
2835 } srcs[3];
2836
2837 int gen;
2838 bool expected_result;
2839 } inst[] = {
2840 #define INST(opcode, access_mode, dst_type, \
2841 src0_type, src0_vstride, src0_width, src0_hstride, \
2842 src1_type, src1_vstride, src1_width, src1_hstride, \
2843 src2_type, \
2844 gen, expected_result) \
2845 { \
2846 BRW_OPCODE_##opcode, \
2847 BRW_ALIGN_##access_mode, \
2848 BRW_REGISTER_TYPE_##dst_type, \
2849 { \
2850 { \
2851 BRW_REGISTER_TYPE_##src0_type, \
2852 BRW_VERTICAL_STRIDE_##src0_vstride, \
2853 BRW_WIDTH_##src0_width, \
2854 BRW_HORIZONTAL_STRIDE_##src0_hstride, \
2855 }, \
2856 { \
2857 BRW_REGISTER_TYPE_##src1_type, \
2858 BRW_VERTICAL_STRIDE_##src1_vstride, \
2859 BRW_WIDTH_##src1_width, \
2860 BRW_HORIZONTAL_STRIDE_##src1_hstride, \
2861 }, \
2862 { \
2863 BRW_REGISTER_TYPE_##src2_type, \
2864 }, \
2865 }, \
2866 gen, \
2867 expected_result, \
2868 }
2869
2870 /* Passes on < 11 */
2871 INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ),
2872 INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ),
2873 INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ),
2874
2875 /* Fails on 11+ */
2876 INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ),
2877 INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ),
2878 INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ),
2879
2880 /* Passes on 11+ */
2881 INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ),
2882 INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ),
2883 INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ),
2884
2885 #undef INST
2886 };
2887
2888
2889 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2890 /* Skip instruction not meant for this gen. */
2891 if (devinfo.gen != inst[i].gen)
2892 continue;
2893
2894 brw_push_insn_state(p);
2895
2896 brw_set_default_exec_size(p, BRW_EXECUTE_8);
2897 brw_set_default_access_mode(p, inst[i].access_mode);
2898
2899 switch (inst[i].opcode) {
2900 case BRW_OPCODE_MOV:
2901 brw_MOV(p, retype(g0, inst[i].dst_type),
2902 retype(g0, inst[i].srcs[0].type));
2903 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2904 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2905 break;
2906 case BRW_OPCODE_ADD:
2907 brw_ADD(p, retype(g0, inst[i].dst_type),
2908 retype(g0, inst[i].srcs[0].type),
2909 retype(g0, inst[i].srcs[1].type));
2910 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2911 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2912 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2913 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
2914 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2915 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
2916 break;
2917 case BRW_OPCODE_MAD:
2918 brw_MAD(p, retype(g0, inst[i].dst_type),
2919 retype(g0, inst[i].srcs[0].type),
2920 retype(g0, inst[i].srcs[1].type),
2921 retype(g0, inst[i].srcs[2].type));
2922 brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2923 brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2924 brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2925 brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2926 break;
2927 default:
2928 unreachable("invalid opcode");
2929 }
2930
2931 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
2932
2933 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2934 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2935
2936 brw_pop_insn_state(p);
2937
2938 EXPECT_EQ(inst[i].expected_result, validate(p));
2939
2940 clear_instructions(p);
2941 }
2942 }