b7a88931a5b891bb5fdf26a95fa8cb1fe830d695
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102 /**
103 * Convert a brw_reg_type enumeration value into the hardware representation.
104 *
105 * The hardware encoding may depend on whether the value is an immediate.
106 */
107 unsigned
108 brw_reg_type_to_hw_type(const struct brw_context *brw,
109 enum brw_reg_type type, unsigned file)
110 {
111 bool imm = file == BRW_IMMEDIATE_VALUE;
112
113 if (file == BRW_IMMEDIATE_VALUE) {
114 const static int imm_hw_types[] = {
115 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
116 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
117 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
118 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
119 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
120 [BRW_REGISTER_TYPE_UB] = -1,
121 [BRW_REGISTER_TYPE_B] = -1,
122 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
123 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
124 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V,
125 };
126 assert(type < ARRAY_SIZE(imm_hw_types));
127 assert(imm_hw_types[type] != -1);
128 return imm_hw_types[type];
129 } else {
130 /* Non-immediate registers */
131 const static int hw_types[] = {
132 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
133 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
134 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
135 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
136 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
137 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B,
138 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
139 [BRW_REGISTER_TYPE_UV] = -1,
140 [BRW_REGISTER_TYPE_VF] = -1,
141 [BRW_REGISTER_TYPE_V] = -1,
142 };
143 assert(type < ARRAY_SIZE(hw_types));
144 assert(hw_types[type] != -1);
145 return hw_types[type];
146 }
147 }
148
149 void
150 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
151 struct brw_reg dest)
152 {
153 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
154 dest.file != BRW_MESSAGE_REGISTER_FILE)
155 assert(dest.nr < 128);
156
157 gen7_convert_mrf_to_grf(p, &dest);
158
159 insn->bits1.da1.dest_reg_file = dest.file;
160 insn->bits1.da1.dest_reg_type =
161 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file);
162 insn->bits1.da1.dest_address_mode = dest.address_mode;
163
164 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
165 insn->bits1.da1.dest_reg_nr = dest.nr;
166
167 if (insn->header.access_mode == BRW_ALIGN_1) {
168 insn->bits1.da1.dest_subreg_nr = dest.subnr;
169 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
170 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
171 insn->bits1.da1.dest_horiz_stride = dest.hstride;
172 }
173 else {
174 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
175 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
176 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
177 dest.file == BRW_MESSAGE_REGISTER_FILE) {
178 assert(dest.dw1.bits.writemask != 0);
179 }
180 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
181 * Although Dst.HorzStride is a don't care for Align16, HW needs
182 * this to be programmed as "01".
183 */
184 insn->bits1.da16.dest_horiz_stride = 1;
185 }
186 }
187 else {
188 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
189
190 /* These are different sizes in align1 vs align16:
191 */
192 if (insn->header.access_mode == BRW_ALIGN_1) {
193 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
194 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
195 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
196 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
197 }
198 else {
199 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
200 /* even ignored in da16, still need to set as '01' */
201 insn->bits1.ia16.dest_horiz_stride = 1;
202 }
203 }
204
205 /* NEW: Set the execution size based on dest.width and
206 * insn->compression_control:
207 */
208 guess_execution_size(p, insn, dest);
209 }
210
211 extern int reg_type_size[];
212
213 static void
214 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
215 {
216 int hstride_for_reg[] = {0, 1, 2, 4};
217 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
218 int width_for_reg[] = {1, 2, 4, 8, 16};
219 int execsize_for_reg[] = {1, 2, 4, 8, 16};
220 int width, hstride, vstride, execsize;
221
222 if (reg.file == BRW_IMMEDIATE_VALUE) {
223 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
224 * mean the destination has to be 128-bit aligned and the
225 * destination horiz stride has to be a word.
226 */
227 if (reg.type == BRW_REGISTER_TYPE_V) {
228 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
229 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
230 }
231
232 return;
233 }
234
235 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
236 reg.file == BRW_ARF_NULL)
237 return;
238
239 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
240 hstride = hstride_for_reg[reg.hstride];
241
242 if (reg.vstride == 0xf) {
243 vstride = -1;
244 } else {
245 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
246 vstride = vstride_for_reg[reg.vstride];
247 }
248
249 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
250 width = width_for_reg[reg.width];
251
252 assert(insn->header.execution_size >= 0 &&
253 insn->header.execution_size < Elements(execsize_for_reg));
254 execsize = execsize_for_reg[insn->header.execution_size];
255
256 /* Restrictions from 3.3.10: Register Region Restrictions. */
257 /* 3. */
258 assert(execsize >= width);
259
260 /* 4. */
261 if (execsize == width && hstride != 0) {
262 assert(vstride == -1 || vstride == width * hstride);
263 }
264
265 /* 5. */
266 if (execsize == width && hstride == 0) {
267 /* no restriction on vstride. */
268 }
269
270 /* 6. */
271 if (width == 1) {
272 assert(hstride == 0);
273 }
274
275 /* 7. */
276 if (execsize == 1 && width == 1) {
277 assert(hstride == 0);
278 assert(vstride == 0);
279 }
280
281 /* 8. */
282 if (vstride == 0 && hstride == 0) {
283 assert(width == 1);
284 }
285
286 /* 10. Check destination issues. */
287 }
288
289 void
290 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
291 struct brw_reg reg)
292 {
293 struct brw_context *brw = p->brw;
294
295 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
296 assert(reg.nr < 128);
297
298 gen7_convert_mrf_to_grf(p, &reg);
299
300 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
301 insn->header.opcode == BRW_OPCODE_SENDC)) {
302 /* Any source modifiers or regions will be ignored, since this just
303 * identifies the MRF/GRF to start reading the message contents from.
304 * Check for some likely failures.
305 */
306 assert(!reg.negate);
307 assert(!reg.abs);
308 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
309 }
310
311 validate_reg(insn, reg);
312
313 insn->bits1.da1.src0_reg_file = reg.file;
314 insn->bits1.da1.src0_reg_type =
315 brw_reg_type_to_hw_type(brw, reg.type, reg.file);
316 insn->bits2.da1.src0_abs = reg.abs;
317 insn->bits2.da1.src0_negate = reg.negate;
318 insn->bits2.da1.src0_address_mode = reg.address_mode;
319
320 if (reg.file == BRW_IMMEDIATE_VALUE) {
321 insn->bits3.ud = reg.dw1.ud;
322
323 /* Required to set some fields in src1 as well:
324 */
325 insn->bits1.da1.src1_reg_file = 0; /* arf */
326 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type;
327 }
328 else
329 {
330 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
331 if (insn->header.access_mode == BRW_ALIGN_1) {
332 insn->bits2.da1.src0_subreg_nr = reg.subnr;
333 insn->bits2.da1.src0_reg_nr = reg.nr;
334 }
335 else {
336 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
337 insn->bits2.da16.src0_reg_nr = reg.nr;
338 }
339 }
340 else {
341 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
345 }
346 else {
347 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
348 }
349 }
350
351 if (insn->header.access_mode == BRW_ALIGN_1) {
352 if (reg.width == BRW_WIDTH_1 &&
353 insn->header.execution_size == BRW_EXECUTE_1) {
354 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
355 insn->bits2.da1.src0_width = BRW_WIDTH_1;
356 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
357 }
358 else {
359 insn->bits2.da1.src0_horiz_stride = reg.hstride;
360 insn->bits2.da1.src0_width = reg.width;
361 insn->bits2.da1.src0_vert_stride = reg.vstride;
362 }
363 }
364 else {
365 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
366 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
367 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
368 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
369
370 /* This is an oddity of the fact we're using the same
371 * descriptions for registers in align_16 as align_1:
372 */
373 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
374 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
375 else
376 insn->bits2.da16.src0_vert_stride = reg.vstride;
377 }
378 }
379 }
380
381
382 void brw_set_src1(struct brw_compile *p,
383 struct brw_instruction *insn,
384 struct brw_reg reg)
385 {
386 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
387
388 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
389 assert(reg.nr < 128);
390
391 gen7_convert_mrf_to_grf(p, &reg);
392
393 validate_reg(insn, reg);
394
395 insn->bits1.da1.src1_reg_file = reg.file;
396 insn->bits1.da1.src1_reg_type =
397 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file);
398 insn->bits3.da1.src1_abs = reg.abs;
399 insn->bits3.da1.src1_negate = reg.negate;
400
401 /* Only src1 can be immediate in two-argument instructions.
402 */
403 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
404
405 if (reg.file == BRW_IMMEDIATE_VALUE) {
406 insn->bits3.ud = reg.dw1.ud;
407 }
408 else {
409 /* This is a hardware restriction, which may or may not be lifted
410 * in the future:
411 */
412 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
413 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
414
415 if (insn->header.access_mode == BRW_ALIGN_1) {
416 insn->bits3.da1.src1_subreg_nr = reg.subnr;
417 insn->bits3.da1.src1_reg_nr = reg.nr;
418 }
419 else {
420 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
421 insn->bits3.da16.src1_reg_nr = reg.nr;
422 }
423
424 if (insn->header.access_mode == BRW_ALIGN_1) {
425 if (reg.width == BRW_WIDTH_1 &&
426 insn->header.execution_size == BRW_EXECUTE_1) {
427 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
428 insn->bits3.da1.src1_width = BRW_WIDTH_1;
429 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
430 }
431 else {
432 insn->bits3.da1.src1_horiz_stride = reg.hstride;
433 insn->bits3.da1.src1_width = reg.width;
434 insn->bits3.da1.src1_vert_stride = reg.vstride;
435 }
436 }
437 else {
438 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
439 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
440 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
441 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
442
443 /* This is an oddity of the fact we're using the same
444 * descriptions for registers in align_16 as align_1:
445 */
446 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
447 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
448 else
449 insn->bits3.da16.src1_vert_stride = reg.vstride;
450 }
451 }
452 }
453
454 /**
455 * Set the Message Descriptor and Extended Message Descriptor fields
456 * for SEND messages.
457 *
458 * \note This zeroes out the Function Control bits, so it must be called
459 * \b before filling out any message-specific data. Callers can
460 * choose not to fill in irrelevant bits; they will be zero.
461 */
462 static void
463 brw_set_message_descriptor(struct brw_compile *p,
464 struct brw_instruction *inst,
465 enum brw_message_target sfid,
466 unsigned msg_length,
467 unsigned response_length,
468 bool header_present,
469 bool end_of_thread)
470 {
471 struct brw_context *brw = p->brw;
472
473 brw_set_src1(p, inst, brw_imm_d(0));
474
475 if (brw->gen >= 5) {
476 inst->bits3.generic_gen5.header_present = header_present;
477 inst->bits3.generic_gen5.response_length = response_length;
478 inst->bits3.generic_gen5.msg_length = msg_length;
479 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
480
481 if (brw->gen >= 6) {
482 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
483 inst->header.destreg__conditionalmod = sfid;
484 } else {
485 /* Set Extended Message Descriptor (ex_desc) */
486 inst->bits2.send_gen5.sfid = sfid;
487 inst->bits2.send_gen5.end_of_thread = end_of_thread;
488 }
489 } else {
490 inst->bits3.generic.response_length = response_length;
491 inst->bits3.generic.msg_length = msg_length;
492 inst->bits3.generic.msg_target = sfid;
493 inst->bits3.generic.end_of_thread = end_of_thread;
494 }
495 }
496
497 static void brw_set_math_message( struct brw_compile *p,
498 struct brw_instruction *insn,
499 unsigned function,
500 unsigned integer_type,
501 bool low_precision,
502 unsigned dataType )
503 {
504 struct brw_context *brw = p->brw;
505 unsigned msg_length;
506 unsigned response_length;
507
508 /* Infer message length from the function */
509 switch (function) {
510 case BRW_MATH_FUNCTION_POW:
511 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
512 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
513 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
514 msg_length = 2;
515 break;
516 default:
517 msg_length = 1;
518 break;
519 }
520
521 /* Infer response length from the function */
522 switch (function) {
523 case BRW_MATH_FUNCTION_SINCOS:
524 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
525 response_length = 2;
526 break;
527 default:
528 response_length = 1;
529 break;
530 }
531
532
533 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
534 msg_length, response_length, false, false);
535 if (brw->gen == 5) {
536 insn->bits3.math_gen5.function = function;
537 insn->bits3.math_gen5.int_type = integer_type;
538 insn->bits3.math_gen5.precision = low_precision;
539 insn->bits3.math_gen5.saturate = insn->header.saturate;
540 insn->bits3.math_gen5.data_type = dataType;
541 insn->bits3.math_gen5.snapshot = 0;
542 } else {
543 insn->bits3.math.function = function;
544 insn->bits3.math.int_type = integer_type;
545 insn->bits3.math.precision = low_precision;
546 insn->bits3.math.saturate = insn->header.saturate;
547 insn->bits3.math.data_type = dataType;
548 }
549 insn->header.saturate = 0;
550 }
551
552
553 static void brw_set_ff_sync_message(struct brw_compile *p,
554 struct brw_instruction *insn,
555 bool allocate,
556 unsigned response_length,
557 bool end_of_thread)
558 {
559 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
560 1, response_length, true, end_of_thread);
561 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
562 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
563 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
564 insn->bits3.urb_gen5.allocate = allocate;
565 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
566 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
567 }
568
569 static void brw_set_urb_message( struct brw_compile *p,
570 struct brw_instruction *insn,
571 enum brw_urb_write_flags flags,
572 unsigned msg_length,
573 unsigned response_length,
574 unsigned offset,
575 unsigned swizzle_control )
576 {
577 struct brw_context *brw = p->brw;
578
579 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
580 msg_length, response_length, true,
581 flags & BRW_URB_WRITE_EOT);
582 if (brw->gen == 7) {
583 if (flags & BRW_URB_WRITE_OWORD) {
584 assert(msg_length == 2); /* header + one OWORD of data */
585 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
586 } else {
587 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
588 }
589 insn->bits3.urb_gen7.offset = offset;
590 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
591 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
592 insn->bits3.urb_gen7.per_slot_offset =
593 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
594 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
595 } else if (brw->gen >= 5) {
596 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
597 insn->bits3.urb_gen5.offset = offset;
598 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
599 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
600 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
601 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
602 } else {
603 insn->bits3.urb.opcode = 0; /* ? */
604 insn->bits3.urb.offset = offset;
605 insn->bits3.urb.swizzle_control = swizzle_control;
606 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
607 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
608 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
609 }
610 }
611
612 void
613 brw_set_dp_write_message(struct brw_compile *p,
614 struct brw_instruction *insn,
615 unsigned binding_table_index,
616 unsigned msg_control,
617 unsigned msg_type,
618 unsigned msg_length,
619 bool header_present,
620 unsigned last_render_target,
621 unsigned response_length,
622 unsigned end_of_thread,
623 unsigned send_commit_msg)
624 {
625 struct brw_context *brw = p->brw;
626 unsigned sfid;
627
628 if (brw->gen >= 7) {
629 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
630 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
631 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
632 else
633 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
634 } else if (brw->gen == 6) {
635 /* Use the render cache for all write messages. */
636 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
637 } else {
638 sfid = BRW_SFID_DATAPORT_WRITE;
639 }
640
641 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
642 header_present, end_of_thread);
643
644 if (brw->gen >= 7) {
645 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
646 insn->bits3.gen7_dp.msg_control = msg_control;
647 insn->bits3.gen7_dp.last_render_target = last_render_target;
648 insn->bits3.gen7_dp.msg_type = msg_type;
649 } else if (brw->gen == 6) {
650 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
651 insn->bits3.gen6_dp.msg_control = msg_control;
652 insn->bits3.gen6_dp.last_render_target = last_render_target;
653 insn->bits3.gen6_dp.msg_type = msg_type;
654 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
655 } else if (brw->gen == 5) {
656 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
657 insn->bits3.dp_write_gen5.msg_control = msg_control;
658 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
659 insn->bits3.dp_write_gen5.msg_type = msg_type;
660 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
661 } else {
662 insn->bits3.dp_write.binding_table_index = binding_table_index;
663 insn->bits3.dp_write.msg_control = msg_control;
664 insn->bits3.dp_write.last_render_target = last_render_target;
665 insn->bits3.dp_write.msg_type = msg_type;
666 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
667 }
668 }
669
670 void
671 brw_set_dp_read_message(struct brw_compile *p,
672 struct brw_instruction *insn,
673 unsigned binding_table_index,
674 unsigned msg_control,
675 unsigned msg_type,
676 unsigned target_cache,
677 unsigned msg_length,
678 bool header_present,
679 unsigned response_length)
680 {
681 struct brw_context *brw = p->brw;
682 unsigned sfid;
683
684 if (brw->gen >= 7) {
685 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
686 } else if (brw->gen == 6) {
687 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
688 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
689 else
690 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
691 } else {
692 sfid = BRW_SFID_DATAPORT_READ;
693 }
694
695 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
696 header_present, false);
697
698 if (brw->gen >= 7) {
699 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
700 insn->bits3.gen7_dp.msg_control = msg_control;
701 insn->bits3.gen7_dp.last_render_target = 0;
702 insn->bits3.gen7_dp.msg_type = msg_type;
703 } else if (brw->gen == 6) {
704 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
705 insn->bits3.gen6_dp.msg_control = msg_control;
706 insn->bits3.gen6_dp.last_render_target = 0;
707 insn->bits3.gen6_dp.msg_type = msg_type;
708 insn->bits3.gen6_dp.send_commit_msg = 0;
709 } else if (brw->gen == 5) {
710 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
711 insn->bits3.dp_read_gen5.msg_control = msg_control;
712 insn->bits3.dp_read_gen5.msg_type = msg_type;
713 insn->bits3.dp_read_gen5.target_cache = target_cache;
714 } else if (brw->is_g4x) {
715 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
716 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
717 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
718 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
719 } else {
720 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
721 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
722 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
723 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
724 }
725 }
726
727 void
728 brw_set_sampler_message(struct brw_compile *p,
729 struct brw_instruction *insn,
730 unsigned binding_table_index,
731 unsigned sampler,
732 unsigned msg_type,
733 unsigned response_length,
734 unsigned msg_length,
735 unsigned header_present,
736 unsigned simd_mode,
737 unsigned return_format)
738 {
739 struct brw_context *brw = p->brw;
740
741 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
742 response_length, header_present, false);
743
744 if (brw->gen >= 7) {
745 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
746 insn->bits3.sampler_gen7.sampler = sampler;
747 insn->bits3.sampler_gen7.msg_type = msg_type;
748 insn->bits3.sampler_gen7.simd_mode = simd_mode;
749 } else if (brw->gen >= 5) {
750 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
751 insn->bits3.sampler_gen5.sampler = sampler;
752 insn->bits3.sampler_gen5.msg_type = msg_type;
753 insn->bits3.sampler_gen5.simd_mode = simd_mode;
754 } else if (brw->is_g4x) {
755 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
756 insn->bits3.sampler_g4x.sampler = sampler;
757 insn->bits3.sampler_g4x.msg_type = msg_type;
758 } else {
759 insn->bits3.sampler.binding_table_index = binding_table_index;
760 insn->bits3.sampler.sampler = sampler;
761 insn->bits3.sampler.msg_type = msg_type;
762 insn->bits3.sampler.return_format = return_format;
763 }
764 }
765
766
767 #define next_insn brw_next_insn
768 struct brw_instruction *
769 brw_next_insn(struct brw_compile *p, unsigned opcode)
770 {
771 struct brw_instruction *insn;
772
773 if (p->nr_insn + 1 > p->store_size) {
774 if (0)
775 printf("incresing the store size to %d\n", p->store_size << 1);
776 p->store_size <<= 1;
777 p->store = reralloc(p->mem_ctx, p->store,
778 struct brw_instruction, p->store_size);
779 if (!p->store)
780 assert(!"realloc eu store memeory failed");
781 }
782
783 p->next_insn_offset += 16;
784 insn = &p->store[p->nr_insn++];
785 memcpy(insn, p->current, sizeof(*insn));
786
787 /* Reset this one-shot flag:
788 */
789
790 if (p->current->header.destreg__conditionalmod) {
791 p->current->header.destreg__conditionalmod = 0;
792 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
793 }
794
795 insn->header.opcode = opcode;
796 return insn;
797 }
798
799 static struct brw_instruction *brw_alu1( struct brw_compile *p,
800 unsigned opcode,
801 struct brw_reg dest,
802 struct brw_reg src )
803 {
804 struct brw_instruction *insn = next_insn(p, opcode);
805 brw_set_dest(p, insn, dest);
806 brw_set_src0(p, insn, src);
807 return insn;
808 }
809
810 static struct brw_instruction *brw_alu2(struct brw_compile *p,
811 unsigned opcode,
812 struct brw_reg dest,
813 struct brw_reg src0,
814 struct brw_reg src1 )
815 {
816 struct brw_instruction *insn = next_insn(p, opcode);
817 brw_set_dest(p, insn, dest);
818 brw_set_src0(p, insn, src0);
819 brw_set_src1(p, insn, src1);
820 return insn;
821 }
822
823 static int
824 get_3src_subreg_nr(struct brw_reg reg)
825 {
826 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
827 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
828 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
829 } else {
830 return reg.subnr / 4;
831 }
832 }
833
834 static struct brw_instruction *brw_alu3(struct brw_compile *p,
835 unsigned opcode,
836 struct brw_reg dest,
837 struct brw_reg src0,
838 struct brw_reg src1,
839 struct brw_reg src2)
840 {
841 struct brw_context *brw = p->brw;
842 struct brw_instruction *insn = next_insn(p, opcode);
843
844 gen7_convert_mrf_to_grf(p, &dest);
845
846 assert(insn->header.access_mode == BRW_ALIGN_16);
847
848 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
849 dest.file == BRW_MESSAGE_REGISTER_FILE);
850 assert(dest.nr < 128);
851 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
852 assert(dest.type == BRW_REGISTER_TYPE_F ||
853 dest.type == BRW_REGISTER_TYPE_D ||
854 dest.type == BRW_REGISTER_TYPE_UD);
855 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
856 insn->bits1.da3src.dest_reg_nr = dest.nr;
857 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
858 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
859 guess_execution_size(p, insn, dest);
860
861 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
862 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
863 assert(src0.nr < 128);
864 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
865 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
866 insn->bits2.da3src.src0_reg_nr = src0.nr;
867 insn->bits1.da3src.src0_abs = src0.abs;
868 insn->bits1.da3src.src0_negate = src0.negate;
869 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
870
871 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
872 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
873 assert(src1.nr < 128);
874 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
875 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
876 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
877 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
878 insn->bits3.da3src.src1_reg_nr = src1.nr;
879 insn->bits1.da3src.src1_abs = src1.abs;
880 insn->bits1.da3src.src1_negate = src1.negate;
881
882 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
883 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
884 assert(src2.nr < 128);
885 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
886 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
887 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
888 insn->bits3.da3src.src2_reg_nr = src2.nr;
889 insn->bits1.da3src.src2_abs = src2.abs;
890 insn->bits1.da3src.src2_negate = src2.negate;
891
892 if (brw->gen >= 7) {
893 /* Set both the source and destination types based on dest.type,
894 * ignoring the source register types. The MAD and LRP emitters ensure
895 * that all four types are float. The BFE and BFI2 emitters, however,
896 * may send us mixed D and UD types and want us to ignore that and use
897 * the destination type.
898 */
899 switch (dest.type) {
900 case BRW_REGISTER_TYPE_F:
901 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
902 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
903 break;
904 case BRW_REGISTER_TYPE_D:
905 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
906 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
907 break;
908 case BRW_REGISTER_TYPE_UD:
909 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
910 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
911 break;
912 }
913 }
914
915 return insn;
916 }
917
918
919 /***********************************************************************
920 * Convenience routines.
921 */
922 #define ALU1(OP) \
923 struct brw_instruction *brw_##OP(struct brw_compile *p, \
924 struct brw_reg dest, \
925 struct brw_reg src0) \
926 { \
927 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
928 }
929
930 #define ALU2(OP) \
931 struct brw_instruction *brw_##OP(struct brw_compile *p, \
932 struct brw_reg dest, \
933 struct brw_reg src0, \
934 struct brw_reg src1) \
935 { \
936 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
937 }
938
939 #define ALU3(OP) \
940 struct brw_instruction *brw_##OP(struct brw_compile *p, \
941 struct brw_reg dest, \
942 struct brw_reg src0, \
943 struct brw_reg src1, \
944 struct brw_reg src2) \
945 { \
946 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
947 }
948
949 #define ALU3F(OP) \
950 struct brw_instruction *brw_##OP(struct brw_compile *p, \
951 struct brw_reg dest, \
952 struct brw_reg src0, \
953 struct brw_reg src1, \
954 struct brw_reg src2) \
955 { \
956 assert(dest.type == BRW_REGISTER_TYPE_F); \
957 assert(src0.type == BRW_REGISTER_TYPE_F); \
958 assert(src1.type == BRW_REGISTER_TYPE_F); \
959 assert(src2.type == BRW_REGISTER_TYPE_F); \
960 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
961 }
962
963 /* Rounding operations (other than RNDD) require two instructions - the first
964 * stores a rounded value (possibly the wrong way) in the dest register, but
965 * also sets a per-channel "increment bit" in the flag register. A predicated
966 * add of 1.0 fixes dest to contain the desired result.
967 *
968 * Sandybridge and later appear to round correctly without an ADD.
969 */
970 #define ROUND(OP) \
971 void brw_##OP(struct brw_compile *p, \
972 struct brw_reg dest, \
973 struct brw_reg src) \
974 { \
975 struct brw_instruction *rnd, *add; \
976 rnd = next_insn(p, BRW_OPCODE_##OP); \
977 brw_set_dest(p, rnd, dest); \
978 brw_set_src0(p, rnd, src); \
979 \
980 if (p->brw->gen < 6) { \
981 /* turn on round-increments */ \
982 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
983 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
984 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
985 } \
986 }
987
988
989 ALU1(MOV)
990 ALU2(SEL)
991 ALU1(NOT)
992 ALU2(AND)
993 ALU2(OR)
994 ALU2(XOR)
995 ALU2(SHR)
996 ALU2(SHL)
997 ALU2(ASR)
998 ALU1(F32TO16)
999 ALU1(F16TO32)
1000 ALU1(FRC)
1001 ALU1(RNDD)
1002 ALU2(MAC)
1003 ALU2(MACH)
1004 ALU1(LZD)
1005 ALU2(DP4)
1006 ALU2(DPH)
1007 ALU2(DP3)
1008 ALU2(DP2)
1009 ALU2(LINE)
1010 ALU2(PLN)
1011 ALU3F(MAD)
1012 ALU3F(LRP)
1013 ALU1(BFREV)
1014 ALU3(BFE)
1015 ALU2(BFI1)
1016 ALU3(BFI2)
1017 ALU1(FBH)
1018 ALU1(FBL)
1019 ALU1(CBIT)
1020 ALU2(ADDC)
1021 ALU2(SUBB)
1022
1023 ROUND(RNDZ)
1024 ROUND(RNDE)
1025
1026
1027 struct brw_instruction *brw_ADD(struct brw_compile *p,
1028 struct brw_reg dest,
1029 struct brw_reg src0,
1030 struct brw_reg src1)
1031 {
1032 /* 6.2.2: add */
1033 if (src0.type == BRW_REGISTER_TYPE_F ||
1034 (src0.file == BRW_IMMEDIATE_VALUE &&
1035 src0.type == BRW_REGISTER_TYPE_VF)) {
1036 assert(src1.type != BRW_REGISTER_TYPE_UD);
1037 assert(src1.type != BRW_REGISTER_TYPE_D);
1038 }
1039
1040 if (src1.type == BRW_REGISTER_TYPE_F ||
1041 (src1.file == BRW_IMMEDIATE_VALUE &&
1042 src1.type == BRW_REGISTER_TYPE_VF)) {
1043 assert(src0.type != BRW_REGISTER_TYPE_UD);
1044 assert(src0.type != BRW_REGISTER_TYPE_D);
1045 }
1046
1047 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1048 }
1049
1050 struct brw_instruction *brw_AVG(struct brw_compile *p,
1051 struct brw_reg dest,
1052 struct brw_reg src0,
1053 struct brw_reg src1)
1054 {
1055 assert(dest.type == src0.type);
1056 assert(src0.type == src1.type);
1057 switch (src0.type) {
1058 case BRW_REGISTER_TYPE_B:
1059 case BRW_REGISTER_TYPE_UB:
1060 case BRW_REGISTER_TYPE_W:
1061 case BRW_REGISTER_TYPE_UW:
1062 case BRW_REGISTER_TYPE_D:
1063 case BRW_REGISTER_TYPE_UD:
1064 break;
1065 default:
1066 assert(!"Bad type for brw_AVG");
1067 }
1068
1069 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1070 }
1071
1072 struct brw_instruction *brw_MUL(struct brw_compile *p,
1073 struct brw_reg dest,
1074 struct brw_reg src0,
1075 struct brw_reg src1)
1076 {
1077 /* 6.32.38: mul */
1078 if (src0.type == BRW_REGISTER_TYPE_D ||
1079 src0.type == BRW_REGISTER_TYPE_UD ||
1080 src1.type == BRW_REGISTER_TYPE_D ||
1081 src1.type == BRW_REGISTER_TYPE_UD) {
1082 assert(dest.type != BRW_REGISTER_TYPE_F);
1083 }
1084
1085 if (src0.type == BRW_REGISTER_TYPE_F ||
1086 (src0.file == BRW_IMMEDIATE_VALUE &&
1087 src0.type == BRW_REGISTER_TYPE_VF)) {
1088 assert(src1.type != BRW_REGISTER_TYPE_UD);
1089 assert(src1.type != BRW_REGISTER_TYPE_D);
1090 }
1091
1092 if (src1.type == BRW_REGISTER_TYPE_F ||
1093 (src1.file == BRW_IMMEDIATE_VALUE &&
1094 src1.type == BRW_REGISTER_TYPE_VF)) {
1095 assert(src0.type != BRW_REGISTER_TYPE_UD);
1096 assert(src0.type != BRW_REGISTER_TYPE_D);
1097 }
1098
1099 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1100 src0.nr != BRW_ARF_ACCUMULATOR);
1101 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1102 src1.nr != BRW_ARF_ACCUMULATOR);
1103
1104 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1105 }
1106
1107
1108 void brw_NOP(struct brw_compile *p)
1109 {
1110 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1111 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1112 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1113 brw_set_src1(p, insn, brw_imm_ud(0x0));
1114 }
1115
1116
1117
1118
1119
1120 /***********************************************************************
1121 * Comparisons, if/else/endif
1122 */
1123
1124 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1125 struct brw_reg dest,
1126 struct brw_reg src0,
1127 struct brw_reg src1)
1128 {
1129 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1130
1131 insn->header.execution_size = 1;
1132 insn->header.compression_control = BRW_COMPRESSION_NONE;
1133 insn->header.mask_control = BRW_MASK_DISABLE;
1134
1135 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1136
1137 return insn;
1138 }
1139
1140 static void
1141 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1142 {
1143 p->if_stack[p->if_stack_depth] = inst - p->store;
1144
1145 p->if_stack_depth++;
1146 if (p->if_stack_array_size <= p->if_stack_depth) {
1147 p->if_stack_array_size *= 2;
1148 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1149 p->if_stack_array_size);
1150 }
1151 }
1152
1153 static struct brw_instruction *
1154 pop_if_stack(struct brw_compile *p)
1155 {
1156 p->if_stack_depth--;
1157 return &p->store[p->if_stack[p->if_stack_depth]];
1158 }
1159
1160 static void
1161 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1162 {
1163 if (p->loop_stack_array_size < p->loop_stack_depth) {
1164 p->loop_stack_array_size *= 2;
1165 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1166 p->loop_stack_array_size);
1167 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1168 p->loop_stack_array_size);
1169 }
1170
1171 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1172 p->loop_stack_depth++;
1173 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1174 }
1175
1176 static struct brw_instruction *
1177 get_inner_do_insn(struct brw_compile *p)
1178 {
1179 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1180 }
1181
1182 /* EU takes the value from the flag register and pushes it onto some
1183 * sort of a stack (presumably merging with any flag value already on
1184 * the stack). Within an if block, the flags at the top of the stack
1185 * control execution on each channel of the unit, eg. on each of the
1186 * 16 pixel values in our wm programs.
1187 *
1188 * When the matching 'else' instruction is reached (presumably by
1189 * countdown of the instruction count patched in by our ELSE/ENDIF
1190 * functions), the relevent flags are inverted.
1191 *
1192 * When the matching 'endif' instruction is reached, the flags are
1193 * popped off. If the stack is now empty, normal execution resumes.
1194 */
1195 struct brw_instruction *
1196 brw_IF(struct brw_compile *p, unsigned execute_size)
1197 {
1198 struct brw_context *brw = p->brw;
1199 struct brw_instruction *insn;
1200
1201 insn = next_insn(p, BRW_OPCODE_IF);
1202
1203 /* Override the defaults for this instruction:
1204 */
1205 if (brw->gen < 6) {
1206 brw_set_dest(p, insn, brw_ip_reg());
1207 brw_set_src0(p, insn, brw_ip_reg());
1208 brw_set_src1(p, insn, brw_imm_d(0x0));
1209 } else if (brw->gen == 6) {
1210 brw_set_dest(p, insn, brw_imm_w(0));
1211 insn->bits1.branch_gen6.jump_count = 0;
1212 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1213 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1214 } else {
1215 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1216 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1217 brw_set_src1(p, insn, brw_imm_ud(0));
1218 insn->bits3.break_cont.jip = 0;
1219 insn->bits3.break_cont.uip = 0;
1220 }
1221
1222 insn->header.execution_size = execute_size;
1223 insn->header.compression_control = BRW_COMPRESSION_NONE;
1224 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1225 insn->header.mask_control = BRW_MASK_ENABLE;
1226 if (!p->single_program_flow)
1227 insn->header.thread_control = BRW_THREAD_SWITCH;
1228
1229 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1230
1231 push_if_stack(p, insn);
1232 p->if_depth_in_loop[p->loop_stack_depth]++;
1233 return insn;
1234 }
1235
1236 /* This function is only used for gen6-style IF instructions with an
1237 * embedded comparison (conditional modifier). It is not used on gen7.
1238 */
1239 struct brw_instruction *
1240 gen6_IF(struct brw_compile *p, uint32_t conditional,
1241 struct brw_reg src0, struct brw_reg src1)
1242 {
1243 struct brw_instruction *insn;
1244
1245 insn = next_insn(p, BRW_OPCODE_IF);
1246
1247 brw_set_dest(p, insn, brw_imm_w(0));
1248 if (p->compressed) {
1249 insn->header.execution_size = BRW_EXECUTE_16;
1250 } else {
1251 insn->header.execution_size = BRW_EXECUTE_8;
1252 }
1253 insn->bits1.branch_gen6.jump_count = 0;
1254 brw_set_src0(p, insn, src0);
1255 brw_set_src1(p, insn, src1);
1256
1257 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1258 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1259 insn->header.destreg__conditionalmod = conditional;
1260
1261 if (!p->single_program_flow)
1262 insn->header.thread_control = BRW_THREAD_SWITCH;
1263
1264 push_if_stack(p, insn);
1265 return insn;
1266 }
1267
1268 /**
1269 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1270 */
1271 static void
1272 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1273 struct brw_instruction *if_inst,
1274 struct brw_instruction *else_inst)
1275 {
1276 /* The next instruction (where the ENDIF would be, if it existed) */
1277 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1278
1279 assert(p->single_program_flow);
1280 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1281 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1282 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1283
1284 /* Convert IF to an ADD instruction that moves the instruction pointer
1285 * to the first instruction of the ELSE block. If there is no ELSE
1286 * block, point to where ENDIF would be. Reverse the predicate.
1287 *
1288 * There's no need to execute an ENDIF since we don't need to do any
1289 * stack operations, and if we're currently executing, we just want to
1290 * continue normally.
1291 */
1292 if_inst->header.opcode = BRW_OPCODE_ADD;
1293 if_inst->header.predicate_inverse = 1;
1294
1295 if (else_inst != NULL) {
1296 /* Convert ELSE to an ADD instruction that points where the ENDIF
1297 * would be.
1298 */
1299 else_inst->header.opcode = BRW_OPCODE_ADD;
1300
1301 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1302 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1303 } else {
1304 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1305 }
1306 }
1307
1308 /**
1309 * Patch IF and ELSE instructions with appropriate jump targets.
1310 */
1311 static void
1312 patch_IF_ELSE(struct brw_compile *p,
1313 struct brw_instruction *if_inst,
1314 struct brw_instruction *else_inst,
1315 struct brw_instruction *endif_inst)
1316 {
1317 struct brw_context *brw = p->brw;
1318
1319 /* We shouldn't be patching IF and ELSE instructions in single program flow
1320 * mode when gen < 6, because in single program flow mode on those
1321 * platforms, we convert flow control instructions to conditional ADDs that
1322 * operate on IP (see brw_ENDIF).
1323 *
1324 * However, on Gen6, writing to IP doesn't work in single program flow mode
1325 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1326 * not be updated by non-flow control instructions."). And on later
1327 * platforms, there is no significant benefit to converting control flow
1328 * instructions to conditional ADDs. So we do patch IF and ELSE
1329 * instructions in single program flow mode on those platforms.
1330 */
1331 if (brw->gen < 6)
1332 assert(!p->single_program_flow);
1333
1334 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1335 assert(endif_inst != NULL);
1336 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1337
1338 unsigned br = 1;
1339 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1340 * requires 2 chunks.
1341 */
1342 if (brw->gen >= 5)
1343 br = 2;
1344
1345 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1346 endif_inst->header.execution_size = if_inst->header.execution_size;
1347
1348 if (else_inst == NULL) {
1349 /* Patch IF -> ENDIF */
1350 if (brw->gen < 6) {
1351 /* Turn it into an IFF, which means no mask stack operations for
1352 * all-false and jumping past the ENDIF.
1353 */
1354 if_inst->header.opcode = BRW_OPCODE_IFF;
1355 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1356 if_inst->bits3.if_else.pop_count = 0;
1357 if_inst->bits3.if_else.pad0 = 0;
1358 } else if (brw->gen == 6) {
1359 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1360 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1361 } else {
1362 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1363 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1364 }
1365 } else {
1366 else_inst->header.execution_size = if_inst->header.execution_size;
1367
1368 /* Patch IF -> ELSE */
1369 if (brw->gen < 6) {
1370 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1371 if_inst->bits3.if_else.pop_count = 0;
1372 if_inst->bits3.if_else.pad0 = 0;
1373 } else if (brw->gen == 6) {
1374 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1375 }
1376
1377 /* Patch ELSE -> ENDIF */
1378 if (brw->gen < 6) {
1379 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1380 * matching ENDIF.
1381 */
1382 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1383 else_inst->bits3.if_else.pop_count = 1;
1384 else_inst->bits3.if_else.pad0 = 0;
1385 } else if (brw->gen == 6) {
1386 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1387 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1388 } else {
1389 /* The IF instruction's JIP should point just past the ELSE */
1390 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1391 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1392 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1393 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1394 }
1395 }
1396 }
1397
1398 void
1399 brw_ELSE(struct brw_compile *p)
1400 {
1401 struct brw_context *brw = p->brw;
1402 struct brw_instruction *insn;
1403
1404 insn = next_insn(p, BRW_OPCODE_ELSE);
1405
1406 if (brw->gen < 6) {
1407 brw_set_dest(p, insn, brw_ip_reg());
1408 brw_set_src0(p, insn, brw_ip_reg());
1409 brw_set_src1(p, insn, brw_imm_d(0x0));
1410 } else if (brw->gen == 6) {
1411 brw_set_dest(p, insn, brw_imm_w(0));
1412 insn->bits1.branch_gen6.jump_count = 0;
1413 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1414 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1415 } else {
1416 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1417 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1418 brw_set_src1(p, insn, brw_imm_ud(0));
1419 insn->bits3.break_cont.jip = 0;
1420 insn->bits3.break_cont.uip = 0;
1421 }
1422
1423 insn->header.compression_control = BRW_COMPRESSION_NONE;
1424 insn->header.mask_control = BRW_MASK_ENABLE;
1425 if (!p->single_program_flow)
1426 insn->header.thread_control = BRW_THREAD_SWITCH;
1427
1428 push_if_stack(p, insn);
1429 }
1430
1431 void
1432 brw_ENDIF(struct brw_compile *p)
1433 {
1434 struct brw_context *brw = p->brw;
1435 struct brw_instruction *insn = NULL;
1436 struct brw_instruction *else_inst = NULL;
1437 struct brw_instruction *if_inst = NULL;
1438 struct brw_instruction *tmp;
1439 bool emit_endif = true;
1440
1441 /* In single program flow mode, we can express IF and ELSE instructions
1442 * equivalently as ADD instructions that operate on IP. On platforms prior
1443 * to Gen6, flow control instructions cause an implied thread switch, so
1444 * this is a significant savings.
1445 *
1446 * However, on Gen6, writing to IP doesn't work in single program flow mode
1447 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1448 * not be updated by non-flow control instructions."). And on later
1449 * platforms, there is no significant benefit to converting control flow
1450 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1451 * Gen5.
1452 */
1453 if (brw->gen < 6 && p->single_program_flow)
1454 emit_endif = false;
1455
1456 /*
1457 * A single next_insn() may change the base adress of instruction store
1458 * memory(p->store), so call it first before referencing the instruction
1459 * store pointer from an index
1460 */
1461 if (emit_endif)
1462 insn = next_insn(p, BRW_OPCODE_ENDIF);
1463
1464 /* Pop the IF and (optional) ELSE instructions from the stack */
1465 p->if_depth_in_loop[p->loop_stack_depth]--;
1466 tmp = pop_if_stack(p);
1467 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1468 else_inst = tmp;
1469 tmp = pop_if_stack(p);
1470 }
1471 if_inst = tmp;
1472
1473 if (!emit_endif) {
1474 /* ENDIF is useless; don't bother emitting it. */
1475 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1476 return;
1477 }
1478
1479 if (brw->gen < 6) {
1480 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1481 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1482 brw_set_src1(p, insn, brw_imm_d(0x0));
1483 } else if (brw->gen == 6) {
1484 brw_set_dest(p, insn, brw_imm_w(0));
1485 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1486 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1487 } else {
1488 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1489 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1490 brw_set_src1(p, insn, brw_imm_ud(0));
1491 }
1492
1493 insn->header.compression_control = BRW_COMPRESSION_NONE;
1494 insn->header.mask_control = BRW_MASK_ENABLE;
1495 insn->header.thread_control = BRW_THREAD_SWITCH;
1496
1497 /* Also pop item off the stack in the endif instruction: */
1498 if (brw->gen < 6) {
1499 insn->bits3.if_else.jump_count = 0;
1500 insn->bits3.if_else.pop_count = 1;
1501 insn->bits3.if_else.pad0 = 0;
1502 } else if (brw->gen == 6) {
1503 insn->bits1.branch_gen6.jump_count = 2;
1504 } else {
1505 insn->bits3.break_cont.jip = 2;
1506 }
1507 patch_IF_ELSE(p, if_inst, else_inst, insn);
1508 }
1509
1510 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1511 {
1512 struct brw_context *brw = p->brw;
1513 struct brw_instruction *insn;
1514
1515 insn = next_insn(p, BRW_OPCODE_BREAK);
1516 if (brw->gen >= 6) {
1517 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1518 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1519 brw_set_src1(p, insn, brw_imm_d(0x0));
1520 } else {
1521 brw_set_dest(p, insn, brw_ip_reg());
1522 brw_set_src0(p, insn, brw_ip_reg());
1523 brw_set_src1(p, insn, brw_imm_d(0x0));
1524 insn->bits3.if_else.pad0 = 0;
1525 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1526 }
1527 insn->header.compression_control = BRW_COMPRESSION_NONE;
1528 insn->header.execution_size = BRW_EXECUTE_8;
1529
1530 return insn;
1531 }
1532
1533 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1534 {
1535 struct brw_instruction *insn;
1536
1537 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1538 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1539 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1540 brw_set_dest(p, insn, brw_ip_reg());
1541 brw_set_src0(p, insn, brw_ip_reg());
1542 brw_set_src1(p, insn, brw_imm_d(0x0));
1543
1544 insn->header.compression_control = BRW_COMPRESSION_NONE;
1545 insn->header.execution_size = BRW_EXECUTE_8;
1546 return insn;
1547 }
1548
1549 struct brw_instruction *brw_CONT(struct brw_compile *p)
1550 {
1551 struct brw_instruction *insn;
1552 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1553 brw_set_dest(p, insn, brw_ip_reg());
1554 brw_set_src0(p, insn, brw_ip_reg());
1555 brw_set_src1(p, insn, brw_imm_d(0x0));
1556 insn->header.compression_control = BRW_COMPRESSION_NONE;
1557 insn->header.execution_size = BRW_EXECUTE_8;
1558 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1559 insn->bits3.if_else.pad0 = 0;
1560 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1561 return insn;
1562 }
1563
1564 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1565 {
1566 struct brw_instruction *insn;
1567
1568 insn = next_insn(p, BRW_OPCODE_HALT);
1569 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1570 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1571 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1572
1573 if (p->compressed) {
1574 insn->header.execution_size = BRW_EXECUTE_16;
1575 } else {
1576 insn->header.compression_control = BRW_COMPRESSION_NONE;
1577 insn->header.execution_size = BRW_EXECUTE_8;
1578 }
1579 return insn;
1580 }
1581
1582 /* DO/WHILE loop:
1583 *
1584 * The DO/WHILE is just an unterminated loop -- break or continue are
1585 * used for control within the loop. We have a few ways they can be
1586 * done.
1587 *
1588 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1589 * jip and no DO instruction.
1590 *
1591 * For non-uniform control flow pre-gen6, there's a DO instruction to
1592 * push the mask, and a WHILE to jump back, and BREAK to get out and
1593 * pop the mask.
1594 *
1595 * For gen6, there's no more mask stack, so no need for DO. WHILE
1596 * just points back to the first instruction of the loop.
1597 */
1598 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1599 {
1600 struct brw_context *brw = p->brw;
1601
1602 if (brw->gen >= 6 || p->single_program_flow) {
1603 push_loop_stack(p, &p->store[p->nr_insn]);
1604 return &p->store[p->nr_insn];
1605 } else {
1606 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1607
1608 push_loop_stack(p, insn);
1609
1610 /* Override the defaults for this instruction:
1611 */
1612 brw_set_dest(p, insn, brw_null_reg());
1613 brw_set_src0(p, insn, brw_null_reg());
1614 brw_set_src1(p, insn, brw_null_reg());
1615
1616 insn->header.compression_control = BRW_COMPRESSION_NONE;
1617 insn->header.execution_size = execute_size;
1618 insn->header.predicate_control = BRW_PREDICATE_NONE;
1619 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1620 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1621
1622 return insn;
1623 }
1624 }
1625
1626 /**
1627 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1628 * instruction here.
1629 *
1630 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1631 * nesting, since it can always just point to the end of the block/current loop.
1632 */
1633 static void
1634 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1635 {
1636 struct brw_context *brw = p->brw;
1637 struct brw_instruction *do_inst = get_inner_do_insn(p);
1638 struct brw_instruction *inst;
1639 int br = (brw->gen == 5) ? 2 : 1;
1640
1641 for (inst = while_inst - 1; inst != do_inst; inst--) {
1642 /* If the jump count is != 0, that means that this instruction has already
1643 * been patched because it's part of a loop inside of the one we're
1644 * patching.
1645 */
1646 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1647 inst->bits3.if_else.jump_count == 0) {
1648 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1649 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1650 inst->bits3.if_else.jump_count == 0) {
1651 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1652 }
1653 }
1654 }
1655
1656 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1657 {
1658 struct brw_context *brw = p->brw;
1659 struct brw_instruction *insn, *do_insn;
1660 unsigned br = 1;
1661
1662 if (brw->gen >= 5)
1663 br = 2;
1664
1665 if (brw->gen >= 7) {
1666 insn = next_insn(p, BRW_OPCODE_WHILE);
1667 do_insn = get_inner_do_insn(p);
1668
1669 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1670 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1671 brw_set_src1(p, insn, brw_imm_ud(0));
1672 insn->bits3.break_cont.jip = br * (do_insn - insn);
1673
1674 insn->header.execution_size = BRW_EXECUTE_8;
1675 } else if (brw->gen == 6) {
1676 insn = next_insn(p, BRW_OPCODE_WHILE);
1677 do_insn = get_inner_do_insn(p);
1678
1679 brw_set_dest(p, insn, brw_imm_w(0));
1680 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1681 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1682 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1683
1684 insn->header.execution_size = BRW_EXECUTE_8;
1685 } else {
1686 if (p->single_program_flow) {
1687 insn = next_insn(p, BRW_OPCODE_ADD);
1688 do_insn = get_inner_do_insn(p);
1689
1690 brw_set_dest(p, insn, brw_ip_reg());
1691 brw_set_src0(p, insn, brw_ip_reg());
1692 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1693 insn->header.execution_size = BRW_EXECUTE_1;
1694 } else {
1695 insn = next_insn(p, BRW_OPCODE_WHILE);
1696 do_insn = get_inner_do_insn(p);
1697
1698 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1699
1700 brw_set_dest(p, insn, brw_ip_reg());
1701 brw_set_src0(p, insn, brw_ip_reg());
1702 brw_set_src1(p, insn, brw_imm_d(0));
1703
1704 insn->header.execution_size = do_insn->header.execution_size;
1705 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1706 insn->bits3.if_else.pop_count = 0;
1707 insn->bits3.if_else.pad0 = 0;
1708
1709 brw_patch_break_cont(p, insn);
1710 }
1711 }
1712 insn->header.compression_control = BRW_COMPRESSION_NONE;
1713 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1714
1715 p->loop_stack_depth--;
1716
1717 return insn;
1718 }
1719
1720
1721 /* FORWARD JUMPS:
1722 */
1723 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1724 {
1725 struct brw_context *brw = p->brw;
1726 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1727 unsigned jmpi = 1;
1728
1729 if (brw->gen >= 5)
1730 jmpi = 2;
1731
1732 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1733 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1734
1735 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1736 }
1737
1738
1739
1740 /* To integrate with the above, it makes sense that the comparison
1741 * instruction should populate the flag register. It might be simpler
1742 * just to use the flag reg for most WM tasks?
1743 */
1744 void brw_CMP(struct brw_compile *p,
1745 struct brw_reg dest,
1746 unsigned conditional,
1747 struct brw_reg src0,
1748 struct brw_reg src1)
1749 {
1750 struct brw_context *brw = p->brw;
1751 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1752
1753 insn->header.destreg__conditionalmod = conditional;
1754 brw_set_dest(p, insn, dest);
1755 brw_set_src0(p, insn, src0);
1756 brw_set_src1(p, insn, src1);
1757
1758 /* guess_execution_size(insn, src0); */
1759
1760
1761 /* Make it so that future instructions will use the computed flag
1762 * value until brw_set_predicate_control_flag_value() is called
1763 * again.
1764 */
1765 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1766 dest.nr == 0) {
1767 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1768 p->flag_value = 0xff;
1769 }
1770
1771 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1772 * page says:
1773 * "Any CMP instruction with a null destination must use a {switch}."
1774 *
1775 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1776 * mentioned on their work-arounds pages.
1777 */
1778 if (brw->gen == 7) {
1779 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1780 dest.nr == BRW_ARF_NULL) {
1781 insn->header.thread_control = BRW_THREAD_SWITCH;
1782 }
1783 }
1784 }
1785
1786 /* Issue 'wait' instruction for n1, host could program MMIO
1787 to wake up thread. */
1788 void brw_WAIT (struct brw_compile *p)
1789 {
1790 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1791 struct brw_reg src = brw_notification_1_reg();
1792
1793 brw_set_dest(p, insn, src);
1794 brw_set_src0(p, insn, src);
1795 brw_set_src1(p, insn, brw_null_reg());
1796 insn->header.execution_size = 0; /* must */
1797 insn->header.predicate_control = 0;
1798 insn->header.compression_control = 0;
1799 }
1800
1801
1802 /***********************************************************************
1803 * Helpers for the various SEND message types:
1804 */
1805
1806 /** Extended math function, float[8].
1807 */
1808 void brw_math( struct brw_compile *p,
1809 struct brw_reg dest,
1810 unsigned function,
1811 unsigned msg_reg_nr,
1812 struct brw_reg src,
1813 unsigned data_type,
1814 unsigned precision )
1815 {
1816 struct brw_context *brw = p->brw;
1817
1818 if (brw->gen >= 6) {
1819 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1820
1821 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1822 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1823 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1824
1825 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1826 if (brw->gen == 6)
1827 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1828
1829 /* Source modifiers are ignored for extended math instructions on Gen6. */
1830 if (brw->gen == 6) {
1831 assert(!src.negate);
1832 assert(!src.abs);
1833 }
1834
1835 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1836 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1837 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1838 assert(src.type != BRW_REGISTER_TYPE_F);
1839 } else {
1840 assert(src.type == BRW_REGISTER_TYPE_F);
1841 }
1842
1843 /* Math is the same ISA format as other opcodes, except that CondModifier
1844 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1845 */
1846 insn->header.destreg__conditionalmod = function;
1847
1848 brw_set_dest(p, insn, dest);
1849 brw_set_src0(p, insn, src);
1850 brw_set_src1(p, insn, brw_null_reg());
1851 } else {
1852 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1853
1854 /* Example code doesn't set predicate_control for send
1855 * instructions.
1856 */
1857 insn->header.predicate_control = 0;
1858 insn->header.destreg__conditionalmod = msg_reg_nr;
1859
1860 brw_set_dest(p, insn, dest);
1861 brw_set_src0(p, insn, src);
1862 brw_set_math_message(p,
1863 insn,
1864 function,
1865 src.type == BRW_REGISTER_TYPE_D,
1866 precision,
1867 data_type);
1868 }
1869 }
1870
1871 /** Extended math function, float[8].
1872 */
1873 void brw_math2(struct brw_compile *p,
1874 struct brw_reg dest,
1875 unsigned function,
1876 struct brw_reg src0,
1877 struct brw_reg src1)
1878 {
1879 struct brw_context *brw = p->brw;
1880 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1881
1882 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1883 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1884 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1885 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1886
1887 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1888 if (brw->gen == 6) {
1889 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1890 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1891 }
1892
1893 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1894 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1895 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1896 assert(src0.type != BRW_REGISTER_TYPE_F);
1897 assert(src1.type != BRW_REGISTER_TYPE_F);
1898 } else {
1899 assert(src0.type == BRW_REGISTER_TYPE_F);
1900 assert(src1.type == BRW_REGISTER_TYPE_F);
1901 }
1902
1903 /* Source modifiers are ignored for extended math instructions on Gen6. */
1904 if (brw->gen == 6) {
1905 assert(!src0.negate);
1906 assert(!src0.abs);
1907 assert(!src1.negate);
1908 assert(!src1.abs);
1909 }
1910
1911 /* Math is the same ISA format as other opcodes, except that CondModifier
1912 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1913 */
1914 insn->header.destreg__conditionalmod = function;
1915
1916 brw_set_dest(p, insn, dest);
1917 brw_set_src0(p, insn, src0);
1918 brw_set_src1(p, insn, src1);
1919 }
1920
1921
1922 /**
1923 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1924 * using a constant offset per channel.
1925 *
1926 * The offset must be aligned to oword size (16 bytes). Used for
1927 * register spilling.
1928 */
1929 void brw_oword_block_write_scratch(struct brw_compile *p,
1930 struct brw_reg mrf,
1931 int num_regs,
1932 unsigned offset)
1933 {
1934 struct brw_context *brw = p->brw;
1935 uint32_t msg_control, msg_type;
1936 int mlen;
1937
1938 if (brw->gen >= 6)
1939 offset /= 16;
1940
1941 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1942
1943 if (num_regs == 1) {
1944 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1945 mlen = 2;
1946 } else {
1947 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1948 mlen = 3;
1949 }
1950
1951 /* Set up the message header. This is g0, with g0.2 filled with
1952 * the offset. We don't want to leave our offset around in g0 or
1953 * it'll screw up texture samples, so set it up inside the message
1954 * reg.
1955 */
1956 {
1957 brw_push_insn_state(p);
1958 brw_set_mask_control(p, BRW_MASK_DISABLE);
1959 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1960
1961 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1962
1963 /* set message header global offset field (reg 0, element 2) */
1964 brw_MOV(p,
1965 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1966 mrf.nr,
1967 2), BRW_REGISTER_TYPE_UD),
1968 brw_imm_ud(offset));
1969
1970 brw_pop_insn_state(p);
1971 }
1972
1973 {
1974 struct brw_reg dest;
1975 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1976 int send_commit_msg;
1977 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1978 BRW_REGISTER_TYPE_UW);
1979
1980 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1981 insn->header.compression_control = BRW_COMPRESSION_NONE;
1982 src_header = vec16(src_header);
1983 }
1984 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1985 insn->header.destreg__conditionalmod = mrf.nr;
1986
1987 /* Until gen6, writes followed by reads from the same location
1988 * are not guaranteed to be ordered unless write_commit is set.
1989 * If set, then a no-op write is issued to the destination
1990 * register to set a dependency, and a read from the destination
1991 * can be used to ensure the ordering.
1992 *
1993 * For gen6, only writes between different threads need ordering
1994 * protection. Our use of DP writes is all about register
1995 * spilling within a thread.
1996 */
1997 if (brw->gen >= 6) {
1998 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1999 send_commit_msg = 0;
2000 } else {
2001 dest = src_header;
2002 send_commit_msg = 1;
2003 }
2004
2005 brw_set_dest(p, insn, dest);
2006 if (brw->gen >= 6) {
2007 brw_set_src0(p, insn, mrf);
2008 } else {
2009 brw_set_src0(p, insn, brw_null_reg());
2010 }
2011
2012 if (brw->gen >= 6)
2013 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2014 else
2015 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2016
2017 brw_set_dp_write_message(p,
2018 insn,
2019 255, /* binding table index (255=stateless) */
2020 msg_control,
2021 msg_type,
2022 mlen,
2023 true, /* header_present */
2024 0, /* not a render target */
2025 send_commit_msg, /* response_length */
2026 0, /* eot */
2027 send_commit_msg);
2028 }
2029 }
2030
2031
2032 /**
2033 * Read a block of owords (half a GRF each) from the scratch buffer
2034 * using a constant index per channel.
2035 *
2036 * Offset must be aligned to oword size (16 bytes). Used for register
2037 * spilling.
2038 */
2039 void
2040 brw_oword_block_read_scratch(struct brw_compile *p,
2041 struct brw_reg dest,
2042 struct brw_reg mrf,
2043 int num_regs,
2044 unsigned offset)
2045 {
2046 struct brw_context *brw = p->brw;
2047 uint32_t msg_control;
2048 int rlen;
2049
2050 if (brw->gen >= 6)
2051 offset /= 16;
2052
2053 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2054 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2055
2056 if (num_regs == 1) {
2057 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2058 rlen = 1;
2059 } else {
2060 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2061 rlen = 2;
2062 }
2063
2064 {
2065 brw_push_insn_state(p);
2066 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2067 brw_set_mask_control(p, BRW_MASK_DISABLE);
2068
2069 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2070
2071 /* set message header global offset field (reg 0, element 2) */
2072 brw_MOV(p,
2073 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2074 mrf.nr,
2075 2), BRW_REGISTER_TYPE_UD),
2076 brw_imm_ud(offset));
2077
2078 brw_pop_insn_state(p);
2079 }
2080
2081 {
2082 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2083
2084 assert(insn->header.predicate_control == 0);
2085 insn->header.compression_control = BRW_COMPRESSION_NONE;
2086 insn->header.destreg__conditionalmod = mrf.nr;
2087
2088 brw_set_dest(p, insn, dest); /* UW? */
2089 if (brw->gen >= 6) {
2090 brw_set_src0(p, insn, mrf);
2091 } else {
2092 brw_set_src0(p, insn, brw_null_reg());
2093 }
2094
2095 brw_set_dp_read_message(p,
2096 insn,
2097 255, /* binding table index (255=stateless) */
2098 msg_control,
2099 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2100 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2101 1, /* msg_length */
2102 true, /* header_present */
2103 rlen);
2104 }
2105 }
2106
2107 void
2108 gen7_block_read_scratch(struct brw_compile *p,
2109 struct brw_reg dest,
2110 int num_regs,
2111 unsigned offset)
2112 {
2113 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2114
2115 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2116
2117 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2118 insn->header.compression_control = BRW_COMPRESSION_NONE;
2119
2120 brw_set_dest(p, insn, dest);
2121
2122 /* The HW requires that the header is present; this is to get the g0.5
2123 * scratch offset.
2124 */
2125 bool header_present = true;
2126 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2127
2128 brw_set_message_descriptor(p, insn,
2129 GEN7_SFID_DATAPORT_DATA_CACHE,
2130 1, /* mlen: just g0 */
2131 num_regs,
2132 header_present,
2133 false);
2134
2135 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2136
2137 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2138 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2139
2140 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2141 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2142 * is 32 bytes, which happens to be the size of a register.
2143 */
2144 offset /= REG_SIZE;
2145 assert(offset < (1 << 12));
2146 insn->bits3.ud |= offset;
2147 }
2148
2149 /**
2150 * Read a float[4] vector from the data port Data Cache (const buffer).
2151 * Location (in buffer) should be a multiple of 16.
2152 * Used for fetching shader constants.
2153 */
2154 void brw_oword_block_read(struct brw_compile *p,
2155 struct brw_reg dest,
2156 struct brw_reg mrf,
2157 uint32_t offset,
2158 uint32_t bind_table_index)
2159 {
2160 struct brw_context *brw = p->brw;
2161
2162 /* On newer hardware, offset is in units of owords. */
2163 if (brw->gen >= 6)
2164 offset /= 16;
2165
2166 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2167
2168 brw_push_insn_state(p);
2169 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2170 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2171 brw_set_mask_control(p, BRW_MASK_DISABLE);
2172
2173 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2174
2175 /* set message header global offset field (reg 0, element 2) */
2176 brw_MOV(p,
2177 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2178 mrf.nr,
2179 2), BRW_REGISTER_TYPE_UD),
2180 brw_imm_ud(offset));
2181
2182 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2183 insn->header.destreg__conditionalmod = mrf.nr;
2184
2185 /* cast dest to a uword[8] vector */
2186 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2187
2188 brw_set_dest(p, insn, dest);
2189 if (brw->gen >= 6) {
2190 brw_set_src0(p, insn, mrf);
2191 } else {
2192 brw_set_src0(p, insn, brw_null_reg());
2193 }
2194
2195 brw_set_dp_read_message(p,
2196 insn,
2197 bind_table_index,
2198 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2199 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2200 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2201 1, /* msg_length */
2202 true, /* header_present */
2203 1); /* response_length (1 reg, 2 owords!) */
2204
2205 brw_pop_insn_state(p);
2206 }
2207
2208
2209 void brw_fb_WRITE(struct brw_compile *p,
2210 int dispatch_width,
2211 unsigned msg_reg_nr,
2212 struct brw_reg src0,
2213 unsigned msg_control,
2214 unsigned binding_table_index,
2215 unsigned msg_length,
2216 unsigned response_length,
2217 bool eot,
2218 bool header_present)
2219 {
2220 struct brw_context *brw = p->brw;
2221 struct brw_instruction *insn;
2222 unsigned msg_type;
2223 struct brw_reg dest;
2224
2225 if (dispatch_width == 16)
2226 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2227 else
2228 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2229
2230 if (brw->gen >= 6) {
2231 insn = next_insn(p, BRW_OPCODE_SENDC);
2232 } else {
2233 insn = next_insn(p, BRW_OPCODE_SEND);
2234 }
2235 /* The execution mask is ignored for render target writes. */
2236 insn->header.predicate_control = 0;
2237 insn->header.compression_control = BRW_COMPRESSION_NONE;
2238
2239 if (brw->gen >= 6) {
2240 /* headerless version, just submit color payload */
2241 src0 = brw_message_reg(msg_reg_nr);
2242
2243 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2244 } else {
2245 insn->header.destreg__conditionalmod = msg_reg_nr;
2246
2247 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2248 }
2249
2250 brw_set_dest(p, insn, dest);
2251 brw_set_src0(p, insn, src0);
2252 brw_set_dp_write_message(p,
2253 insn,
2254 binding_table_index,
2255 msg_control,
2256 msg_type,
2257 msg_length,
2258 header_present,
2259 eot, /* last render target write */
2260 response_length,
2261 eot,
2262 0 /* send_commit_msg */);
2263 }
2264
2265
2266 /**
2267 * Texture sample instruction.
2268 * Note: the msg_type plus msg_length values determine exactly what kind
2269 * of sampling operation is performed. See volume 4, page 161 of docs.
2270 */
2271 void brw_SAMPLE(struct brw_compile *p,
2272 struct brw_reg dest,
2273 unsigned msg_reg_nr,
2274 struct brw_reg src0,
2275 unsigned binding_table_index,
2276 unsigned sampler,
2277 unsigned msg_type,
2278 unsigned response_length,
2279 unsigned msg_length,
2280 unsigned header_present,
2281 unsigned simd_mode,
2282 unsigned return_format)
2283 {
2284 struct brw_context *brw = p->brw;
2285 struct brw_instruction *insn;
2286
2287 if (msg_reg_nr != -1)
2288 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2289
2290 insn = next_insn(p, BRW_OPCODE_SEND);
2291 insn->header.predicate_control = 0; /* XXX */
2292
2293 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2294 *
2295 * "Instruction compression is not allowed for this instruction (that
2296 * is, send). The hardware behavior is undefined if this instruction is
2297 * set as compressed. However, compress control can be set to "SecHalf"
2298 * to affect the EMask generation."
2299 *
2300 * No similar wording is found in later PRMs, but there are examples
2301 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2302 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2303 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2304 */
2305 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2306 insn->header.compression_control = BRW_COMPRESSION_NONE;
2307
2308 if (brw->gen < 6)
2309 insn->header.destreg__conditionalmod = msg_reg_nr;
2310
2311 brw_set_dest(p, insn, dest);
2312 brw_set_src0(p, insn, src0);
2313 brw_set_sampler_message(p, insn,
2314 binding_table_index,
2315 sampler,
2316 msg_type,
2317 response_length,
2318 msg_length,
2319 header_present,
2320 simd_mode,
2321 return_format);
2322 }
2323
2324 /* All these variables are pretty confusing - we might be better off
2325 * using bitmasks and macros for this, in the old style. Or perhaps
2326 * just having the caller instantiate the fields in dword3 itself.
2327 */
2328 void brw_urb_WRITE(struct brw_compile *p,
2329 struct brw_reg dest,
2330 unsigned msg_reg_nr,
2331 struct brw_reg src0,
2332 enum brw_urb_write_flags flags,
2333 unsigned msg_length,
2334 unsigned response_length,
2335 unsigned offset,
2336 unsigned swizzle)
2337 {
2338 struct brw_context *brw = p->brw;
2339 struct brw_instruction *insn;
2340
2341 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2342
2343 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2344 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2345 brw_push_insn_state(p);
2346 brw_set_access_mode(p, BRW_ALIGN_1);
2347 brw_set_mask_control(p, BRW_MASK_DISABLE);
2348 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2349 BRW_REGISTER_TYPE_UD),
2350 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2351 brw_imm_ud(0xff00));
2352 brw_pop_insn_state(p);
2353 }
2354
2355 insn = next_insn(p, BRW_OPCODE_SEND);
2356
2357 assert(msg_length < BRW_MAX_MRF);
2358
2359 brw_set_dest(p, insn, dest);
2360 brw_set_src0(p, insn, src0);
2361 brw_set_src1(p, insn, brw_imm_d(0));
2362
2363 if (brw->gen < 6)
2364 insn->header.destreg__conditionalmod = msg_reg_nr;
2365
2366 brw_set_urb_message(p,
2367 insn,
2368 flags,
2369 msg_length,
2370 response_length,
2371 offset,
2372 swizzle);
2373 }
2374
2375 static int
2376 next_ip(struct brw_compile *p, int ip)
2377 {
2378 struct brw_instruction *insn = (void *)p->store + ip;
2379
2380 if (insn->header.cmpt_control)
2381 return ip + 8;
2382 else
2383 return ip + 16;
2384 }
2385
2386 static int
2387 brw_find_next_block_end(struct brw_compile *p, int start)
2388 {
2389 int ip;
2390 void *store = p->store;
2391
2392 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2393 struct brw_instruction *insn = store + ip;
2394
2395 switch (insn->header.opcode) {
2396 case BRW_OPCODE_ENDIF:
2397 case BRW_OPCODE_ELSE:
2398 case BRW_OPCODE_WHILE:
2399 case BRW_OPCODE_HALT:
2400 return ip;
2401 }
2402 }
2403
2404 return 0;
2405 }
2406
2407 /* There is no DO instruction on gen6, so to find the end of the loop
2408 * we have to see if the loop is jumping back before our start
2409 * instruction.
2410 */
2411 static int
2412 brw_find_loop_end(struct brw_compile *p, int start)
2413 {
2414 struct brw_context *brw = p->brw;
2415 int ip;
2416 int scale = 8;
2417 void *store = p->store;
2418
2419 /* Always start after the instruction (such as a WHILE) we're trying to fix
2420 * up.
2421 */
2422 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2423 struct brw_instruction *insn = store + ip;
2424
2425 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2426 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2427 : insn->bits3.break_cont.jip;
2428 if (ip + jip * scale <= start)
2429 return ip;
2430 }
2431 }
2432 assert(!"not reached");
2433 return start;
2434 }
2435
2436 /* After program generation, go back and update the UIP and JIP of
2437 * BREAK, CONT, and HALT instructions to their correct locations.
2438 */
2439 void
2440 brw_set_uip_jip(struct brw_compile *p)
2441 {
2442 struct brw_context *brw = p->brw;
2443 int ip;
2444 int scale = 8;
2445 void *store = p->store;
2446
2447 if (brw->gen < 6)
2448 return;
2449
2450 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2451 struct brw_instruction *insn = store + ip;
2452
2453 if (insn->header.cmpt_control) {
2454 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2455 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2456 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2457 insn->header.opcode != BRW_OPCODE_HALT);
2458 continue;
2459 }
2460
2461 int block_end_ip = brw_find_next_block_end(p, ip);
2462 switch (insn->header.opcode) {
2463 case BRW_OPCODE_BREAK:
2464 assert(block_end_ip != 0);
2465 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2466 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2467 insn->bits3.break_cont.uip =
2468 (brw_find_loop_end(p, ip) - ip +
2469 (brw->gen == 6 ? 16 : 0)) / scale;
2470 break;
2471 case BRW_OPCODE_CONTINUE:
2472 assert(block_end_ip != 0);
2473 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2474 insn->bits3.break_cont.uip =
2475 (brw_find_loop_end(p, ip) - ip) / scale;
2476
2477 assert(insn->bits3.break_cont.uip != 0);
2478 assert(insn->bits3.break_cont.jip != 0);
2479 break;
2480
2481 case BRW_OPCODE_ENDIF:
2482 if (block_end_ip == 0)
2483 insn->bits3.break_cont.jip = 2;
2484 else
2485 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2486 break;
2487
2488 case BRW_OPCODE_HALT:
2489 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2490 *
2491 * "In case of the halt instruction not inside any conditional
2492 * code block, the value of <JIP> and <UIP> should be the
2493 * same. In case of the halt instruction inside conditional code
2494 * block, the <UIP> should be the end of the program, and the
2495 * <JIP> should be end of the most inner conditional code block."
2496 *
2497 * The uip will have already been set by whoever set up the
2498 * instruction.
2499 */
2500 if (block_end_ip == 0) {
2501 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2502 } else {
2503 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2504 }
2505 assert(insn->bits3.break_cont.uip != 0);
2506 assert(insn->bits3.break_cont.jip != 0);
2507 break;
2508 }
2509 }
2510 }
2511
2512 void brw_ff_sync(struct brw_compile *p,
2513 struct brw_reg dest,
2514 unsigned msg_reg_nr,
2515 struct brw_reg src0,
2516 bool allocate,
2517 unsigned response_length,
2518 bool eot)
2519 {
2520 struct brw_context *brw = p->brw;
2521 struct brw_instruction *insn;
2522
2523 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2524
2525 insn = next_insn(p, BRW_OPCODE_SEND);
2526 brw_set_dest(p, insn, dest);
2527 brw_set_src0(p, insn, src0);
2528 brw_set_src1(p, insn, brw_imm_d(0));
2529
2530 if (brw->gen < 6)
2531 insn->header.destreg__conditionalmod = msg_reg_nr;
2532
2533 brw_set_ff_sync_message(p,
2534 insn,
2535 allocate,
2536 response_length,
2537 eot);
2538 }
2539
2540 /**
2541 * Emit the SEND instruction necessary to generate stream output data on Gen6
2542 * (for transform feedback).
2543 *
2544 * If send_commit_msg is true, this is the last piece of stream output data
2545 * from this thread, so send the data as a committed write. According to the
2546 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2547 *
2548 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2549 * writes are complete by sending the final write as a committed write."
2550 */
2551 void
2552 brw_svb_write(struct brw_compile *p,
2553 struct brw_reg dest,
2554 unsigned msg_reg_nr,
2555 struct brw_reg src0,
2556 unsigned binding_table_index,
2557 bool send_commit_msg)
2558 {
2559 struct brw_instruction *insn;
2560
2561 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2562
2563 insn = next_insn(p, BRW_OPCODE_SEND);
2564 brw_set_dest(p, insn, dest);
2565 brw_set_src0(p, insn, src0);
2566 brw_set_src1(p, insn, brw_imm_d(0));
2567 brw_set_dp_write_message(p, insn,
2568 binding_table_index,
2569 0, /* msg_control: ignored */
2570 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2571 1, /* msg_length */
2572 true, /* header_present */
2573 0, /* last_render_target: ignored */
2574 send_commit_msg, /* response_length */
2575 0, /* end_of_thread */
2576 send_commit_msg); /* send_commit_msg */
2577 }
2578
2579 static void
2580 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2581 struct brw_instruction *insn,
2582 unsigned atomic_op,
2583 unsigned bind_table_index,
2584 unsigned msg_length,
2585 unsigned response_length,
2586 bool header_present)
2587 {
2588 if (p->brw->is_haswell) {
2589 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2590 msg_length, response_length,
2591 header_present, false);
2592
2593
2594 if (insn->header.access_mode == BRW_ALIGN_1) {
2595 if (insn->header.execution_size != BRW_EXECUTE_16)
2596 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2597
2598 insn->bits3.gen7_dp.msg_type =
2599 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2600 } else {
2601 insn->bits3.gen7_dp.msg_type =
2602 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2603 }
2604
2605 } else {
2606 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2607 msg_length, response_length,
2608 header_present, false);
2609
2610 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2611
2612 if (insn->header.execution_size != BRW_EXECUTE_16)
2613 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2614 }
2615
2616 if (response_length)
2617 insn->bits3.ud |= 1 << 13; /* Return data expected */
2618
2619 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2620 insn->bits3.ud |= atomic_op << 8;
2621 }
2622
2623 void
2624 brw_untyped_atomic(struct brw_compile *p,
2625 struct brw_reg dest,
2626 struct brw_reg mrf,
2627 unsigned atomic_op,
2628 unsigned bind_table_index,
2629 unsigned msg_length,
2630 unsigned response_length) {
2631 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2632
2633 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2634 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2635 brw_set_src1(p, insn, brw_imm_d(0));
2636 brw_set_dp_untyped_atomic_message(
2637 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2638 insn->header.access_mode == BRW_ALIGN_1);
2639 }
2640
2641 static void
2642 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2643 struct brw_instruction *insn,
2644 unsigned bind_table_index,
2645 unsigned msg_length,
2646 unsigned response_length,
2647 bool header_present)
2648 {
2649 const unsigned dispatch_width =
2650 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2651 const unsigned num_channels = response_length / (dispatch_width / 8);
2652
2653 if (p->brw->is_haswell) {
2654 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2655 msg_length, response_length,
2656 header_present, false);
2657
2658 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2659 } else {
2660 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2661 msg_length, response_length,
2662 header_present, false);
2663
2664 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2665 }
2666
2667 if (insn->header.access_mode == BRW_ALIGN_1) {
2668 if (dispatch_width == 16)
2669 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2670 else
2671 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2672 }
2673
2674 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2675
2676 /* Set mask of 32-bit channels to drop. */
2677 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2678 }
2679
2680 void
2681 brw_untyped_surface_read(struct brw_compile *p,
2682 struct brw_reg dest,
2683 struct brw_reg mrf,
2684 unsigned bind_table_index,
2685 unsigned msg_length,
2686 unsigned response_length)
2687 {
2688 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2689
2690 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2691 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2692 brw_set_dp_untyped_surface_read_message(
2693 p, insn, bind_table_index, msg_length, response_length,
2694 insn->header.access_mode == BRW_ALIGN_1);
2695 }
2696
2697 /**
2698 * This instruction is generated as a single-channel align1 instruction by
2699 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2700 *
2701 * We can't use the typed atomic op in the FS because that has the execution
2702 * mask ANDed with the pixel mask, but we just want to write the one dword for
2703 * all the pixels.
2704 *
2705 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2706 * one u32. So we use the same untyped atomic write message as the pixel
2707 * shader.
2708 *
2709 * The untyped atomic operation requires a BUFFER surface type with RAW
2710 * format, and is only accessible through the legacy DATA_CACHE dataport
2711 * messages.
2712 */
2713 void brw_shader_time_add(struct brw_compile *p,
2714 struct brw_reg payload,
2715 uint32_t surf_index)
2716 {
2717 struct brw_context *brw = p->brw;
2718 assert(brw->gen >= 7);
2719
2720 brw_push_insn_state(p);
2721 brw_set_access_mode(p, BRW_ALIGN_1);
2722 brw_set_mask_control(p, BRW_MASK_DISABLE);
2723 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2724 brw_pop_insn_state(p);
2725
2726 /* We use brw_vec1_reg and unmasked because we want to increment the given
2727 * offset only once.
2728 */
2729 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2730 BRW_ARF_NULL, 0));
2731 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2732 payload.nr, 0));
2733 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2734 2 /* message length */,
2735 0 /* response length */,
2736 false /* header present */);
2737 }