36bb3ceaf4b5866a46c4036c3c034b980b57ee08
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
130 dest.file == BRW_MESSAGE_REGISTER_FILE) {
131 assert(dest.dw1.bits.writemask != 0);
132 }
133 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
134 * Although Dst.HorzStride is a don't care for Align16, HW needs
135 * this to be programmed as "01".
136 */
137 insn->bits1.da16.dest_horiz_stride = 1;
138 }
139 }
140 else {
141 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
142
143 /* These are different sizes in align1 vs align16:
144 */
145 if (insn->header.access_mode == BRW_ALIGN_1) {
146 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
147 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
148 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
149 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
150 }
151 else {
152 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
153 /* even ignored in da16, still need to set as '01' */
154 insn->bits1.ia16.dest_horiz_stride = 1;
155 }
156 }
157
158 /* NEW: Set the execution size based on dest.width and
159 * insn->compression_control:
160 */
161 guess_execution_size(p, insn, dest);
162 }
163
164 extern int reg_type_size[];
165
166 static void
167 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
168 {
169 int hstride_for_reg[] = {0, 1, 2, 4};
170 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
171 int width_for_reg[] = {1, 2, 4, 8, 16};
172 int execsize_for_reg[] = {1, 2, 4, 8, 16};
173 int width, hstride, vstride, execsize;
174
175 if (reg.file == BRW_IMMEDIATE_VALUE) {
176 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
177 * mean the destination has to be 128-bit aligned and the
178 * destination horiz stride has to be a word.
179 */
180 if (reg.type == BRW_REGISTER_TYPE_V) {
181 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
182 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
183 }
184
185 return;
186 }
187
188 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
189 reg.file == BRW_ARF_NULL)
190 return;
191
192 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
193 hstride = hstride_for_reg[reg.hstride];
194
195 if (reg.vstride == 0xf) {
196 vstride = -1;
197 } else {
198 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
199 vstride = vstride_for_reg[reg.vstride];
200 }
201
202 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
203 width = width_for_reg[reg.width];
204
205 assert(insn->header.execution_size >= 0 &&
206 insn->header.execution_size < Elements(execsize_for_reg));
207 execsize = execsize_for_reg[insn->header.execution_size];
208
209 /* Restrictions from 3.3.10: Register Region Restrictions. */
210 /* 3. */
211 assert(execsize >= width);
212
213 /* 4. */
214 if (execsize == width && hstride != 0) {
215 assert(vstride == -1 || vstride == width * hstride);
216 }
217
218 /* 5. */
219 if (execsize == width && hstride == 0) {
220 /* no restriction on vstride. */
221 }
222
223 /* 6. */
224 if (width == 1) {
225 assert(hstride == 0);
226 }
227
228 /* 7. */
229 if (execsize == 1 && width == 1) {
230 assert(hstride == 0);
231 assert(vstride == 0);
232 }
233
234 /* 8. */
235 if (vstride == 0 && hstride == 0) {
236 assert(width == 1);
237 }
238
239 /* 10. Check destination issues. */
240 }
241
242 void
243 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
244 struct brw_reg reg)
245 {
246 struct brw_context *brw = p->brw;
247
248 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
249 assert(reg.nr < 128);
250
251 gen7_convert_mrf_to_grf(p, &reg);
252
253 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
254 insn->header.opcode == BRW_OPCODE_SENDC)) {
255 /* Any source modifiers or regions will be ignored, since this just
256 * identifies the MRF/GRF to start reading the message contents from.
257 * Check for some likely failures.
258 */
259 assert(!reg.negate);
260 assert(!reg.abs);
261 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
262 }
263
264 validate_reg(insn, reg);
265
266 insn->bits1.da1.src0_reg_file = reg.file;
267 insn->bits1.da1.src0_reg_type = reg.type;
268 insn->bits2.da1.src0_abs = reg.abs;
269 insn->bits2.da1.src0_negate = reg.negate;
270 insn->bits2.da1.src0_address_mode = reg.address_mode;
271
272 if (reg.file == BRW_IMMEDIATE_VALUE) {
273 insn->bits3.ud = reg.dw1.ud;
274
275 /* Required to set some fields in src1 as well:
276 */
277 insn->bits1.da1.src1_reg_file = 0; /* arf */
278 insn->bits1.da1.src1_reg_type = reg.type;
279 }
280 else
281 {
282 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
283 if (insn->header.access_mode == BRW_ALIGN_1) {
284 insn->bits2.da1.src0_subreg_nr = reg.subnr;
285 insn->bits2.da1.src0_reg_nr = reg.nr;
286 }
287 else {
288 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
289 insn->bits2.da16.src0_reg_nr = reg.nr;
290 }
291 }
292 else {
293 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
294
295 if (insn->header.access_mode == BRW_ALIGN_1) {
296 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
297 }
298 else {
299 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
300 }
301 }
302
303 if (insn->header.access_mode == BRW_ALIGN_1) {
304 if (reg.width == BRW_WIDTH_1 &&
305 insn->header.execution_size == BRW_EXECUTE_1) {
306 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
307 insn->bits2.da1.src0_width = BRW_WIDTH_1;
308 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
309 }
310 else {
311 insn->bits2.da1.src0_horiz_stride = reg.hstride;
312 insn->bits2.da1.src0_width = reg.width;
313 insn->bits2.da1.src0_vert_stride = reg.vstride;
314 }
315 }
316 else {
317 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
318 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
319 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
320 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
321
322 /* This is an oddity of the fact we're using the same
323 * descriptions for registers in align_16 as align_1:
324 */
325 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
326 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
327 else
328 insn->bits2.da16.src0_vert_stride = reg.vstride;
329 }
330 }
331 }
332
333
334 void brw_set_src1(struct brw_compile *p,
335 struct brw_instruction *insn,
336 struct brw_reg reg)
337 {
338 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
339
340 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
341 assert(reg.nr < 128);
342
343 gen7_convert_mrf_to_grf(p, &reg);
344
345 validate_reg(insn, reg);
346
347 insn->bits1.da1.src1_reg_file = reg.file;
348 insn->bits1.da1.src1_reg_type = reg.type;
349 insn->bits3.da1.src1_abs = reg.abs;
350 insn->bits3.da1.src1_negate = reg.negate;
351
352 /* Only src1 can be immediate in two-argument instructions.
353 */
354 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
355
356 if (reg.file == BRW_IMMEDIATE_VALUE) {
357 insn->bits3.ud = reg.dw1.ud;
358 }
359 else {
360 /* This is a hardware restriction, which may or may not be lifted
361 * in the future:
362 */
363 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
364 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
365
366 if (insn->header.access_mode == BRW_ALIGN_1) {
367 insn->bits3.da1.src1_subreg_nr = reg.subnr;
368 insn->bits3.da1.src1_reg_nr = reg.nr;
369 }
370 else {
371 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
372 insn->bits3.da16.src1_reg_nr = reg.nr;
373 }
374
375 if (insn->header.access_mode == BRW_ALIGN_1) {
376 if (reg.width == BRW_WIDTH_1 &&
377 insn->header.execution_size == BRW_EXECUTE_1) {
378 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
379 insn->bits3.da1.src1_width = BRW_WIDTH_1;
380 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
381 }
382 else {
383 insn->bits3.da1.src1_horiz_stride = reg.hstride;
384 insn->bits3.da1.src1_width = reg.width;
385 insn->bits3.da1.src1_vert_stride = reg.vstride;
386 }
387 }
388 else {
389 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
390 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
391 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
392 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
393
394 /* This is an oddity of the fact we're using the same
395 * descriptions for registers in align_16 as align_1:
396 */
397 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
398 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
399 else
400 insn->bits3.da16.src1_vert_stride = reg.vstride;
401 }
402 }
403 }
404
405 /**
406 * Set the Message Descriptor and Extended Message Descriptor fields
407 * for SEND messages.
408 *
409 * \note This zeroes out the Function Control bits, so it must be called
410 * \b before filling out any message-specific data. Callers can
411 * choose not to fill in irrelevant bits; they will be zero.
412 */
413 static void
414 brw_set_message_descriptor(struct brw_compile *p,
415 struct brw_instruction *inst,
416 enum brw_message_target sfid,
417 unsigned msg_length,
418 unsigned response_length,
419 bool header_present,
420 bool end_of_thread)
421 {
422 struct brw_context *brw = p->brw;
423
424 brw_set_src1(p, inst, brw_imm_d(0));
425
426 if (brw->gen >= 5) {
427 inst->bits3.generic_gen5.header_present = header_present;
428 inst->bits3.generic_gen5.response_length = response_length;
429 inst->bits3.generic_gen5.msg_length = msg_length;
430 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
431
432 if (brw->gen >= 6) {
433 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
434 inst->header.destreg__conditionalmod = sfid;
435 } else {
436 /* Set Extended Message Descriptor (ex_desc) */
437 inst->bits2.send_gen5.sfid = sfid;
438 inst->bits2.send_gen5.end_of_thread = end_of_thread;
439 }
440 } else {
441 inst->bits3.generic.response_length = response_length;
442 inst->bits3.generic.msg_length = msg_length;
443 inst->bits3.generic.msg_target = sfid;
444 inst->bits3.generic.end_of_thread = end_of_thread;
445 }
446 }
447
448 static void brw_set_math_message( struct brw_compile *p,
449 struct brw_instruction *insn,
450 unsigned function,
451 unsigned integer_type,
452 bool low_precision,
453 unsigned dataType )
454 {
455 struct brw_context *brw = p->brw;
456 unsigned msg_length;
457 unsigned response_length;
458
459 /* Infer message length from the function */
460 switch (function) {
461 case BRW_MATH_FUNCTION_POW:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
463 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
464 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
465 msg_length = 2;
466 break;
467 default:
468 msg_length = 1;
469 break;
470 }
471
472 /* Infer response length from the function */
473 switch (function) {
474 case BRW_MATH_FUNCTION_SINCOS:
475 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
476 response_length = 2;
477 break;
478 default:
479 response_length = 1;
480 break;
481 }
482
483
484 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
485 msg_length, response_length, false, false);
486 if (brw->gen == 5) {
487 insn->bits3.math_gen5.function = function;
488 insn->bits3.math_gen5.int_type = integer_type;
489 insn->bits3.math_gen5.precision = low_precision;
490 insn->bits3.math_gen5.saturate = insn->header.saturate;
491 insn->bits3.math_gen5.data_type = dataType;
492 insn->bits3.math_gen5.snapshot = 0;
493 } else {
494 insn->bits3.math.function = function;
495 insn->bits3.math.int_type = integer_type;
496 insn->bits3.math.precision = low_precision;
497 insn->bits3.math.saturate = insn->header.saturate;
498 insn->bits3.math.data_type = dataType;
499 }
500 insn->header.saturate = 0;
501 }
502
503
504 static void brw_set_ff_sync_message(struct brw_compile *p,
505 struct brw_instruction *insn,
506 bool allocate,
507 unsigned response_length,
508 bool end_of_thread)
509 {
510 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
511 1, response_length, true, end_of_thread);
512 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
513 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
514 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
515 insn->bits3.urb_gen5.allocate = allocate;
516 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
517 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
518 }
519
520 static void brw_set_urb_message( struct brw_compile *p,
521 struct brw_instruction *insn,
522 enum brw_urb_write_flags flags,
523 unsigned msg_length,
524 unsigned response_length,
525 unsigned offset,
526 unsigned swizzle_control )
527 {
528 struct brw_context *brw = p->brw;
529
530 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
531 msg_length, response_length, true,
532 flags & BRW_URB_WRITE_EOT);
533 if (brw->gen == 7) {
534 if (flags & BRW_URB_WRITE_OWORD) {
535 assert(msg_length == 2); /* header + one OWORD of data */
536 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
537 } else {
538 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
539 }
540 insn->bits3.urb_gen7.offset = offset;
541 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
542 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
543 insn->bits3.urb_gen7.per_slot_offset =
544 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
545 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
546 } else if (brw->gen >= 5) {
547 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
548 insn->bits3.urb_gen5.offset = offset;
549 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
550 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
551 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
552 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
553 } else {
554 insn->bits3.urb.opcode = 0; /* ? */
555 insn->bits3.urb.offset = offset;
556 insn->bits3.urb.swizzle_control = swizzle_control;
557 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
558 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
559 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
560 }
561 }
562
563 void
564 brw_set_dp_write_message(struct brw_compile *p,
565 struct brw_instruction *insn,
566 unsigned binding_table_index,
567 unsigned msg_control,
568 unsigned msg_type,
569 unsigned msg_length,
570 bool header_present,
571 unsigned last_render_target,
572 unsigned response_length,
573 unsigned end_of_thread,
574 unsigned send_commit_msg)
575 {
576 struct brw_context *brw = p->brw;
577 unsigned sfid;
578
579 if (brw->gen >= 7) {
580 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
581 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
582 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
583 else
584 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
585 } else if (brw->gen == 6) {
586 /* Use the render cache for all write messages. */
587 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
588 } else {
589 sfid = BRW_SFID_DATAPORT_WRITE;
590 }
591
592 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
593 header_present, end_of_thread);
594
595 if (brw->gen >= 7) {
596 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
597 insn->bits3.gen7_dp.msg_control = msg_control;
598 insn->bits3.gen7_dp.last_render_target = last_render_target;
599 insn->bits3.gen7_dp.msg_type = msg_type;
600 } else if (brw->gen == 6) {
601 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
602 insn->bits3.gen6_dp.msg_control = msg_control;
603 insn->bits3.gen6_dp.last_render_target = last_render_target;
604 insn->bits3.gen6_dp.msg_type = msg_type;
605 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
606 } else if (brw->gen == 5) {
607 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
608 insn->bits3.dp_write_gen5.msg_control = msg_control;
609 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
610 insn->bits3.dp_write_gen5.msg_type = msg_type;
611 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
612 } else {
613 insn->bits3.dp_write.binding_table_index = binding_table_index;
614 insn->bits3.dp_write.msg_control = msg_control;
615 insn->bits3.dp_write.last_render_target = last_render_target;
616 insn->bits3.dp_write.msg_type = msg_type;
617 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
618 }
619 }
620
621 void
622 brw_set_dp_read_message(struct brw_compile *p,
623 struct brw_instruction *insn,
624 unsigned binding_table_index,
625 unsigned msg_control,
626 unsigned msg_type,
627 unsigned target_cache,
628 unsigned msg_length,
629 bool header_present,
630 unsigned response_length)
631 {
632 struct brw_context *brw = p->brw;
633 unsigned sfid;
634
635 if (brw->gen >= 7) {
636 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
637 } else if (brw->gen == 6) {
638 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
639 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
640 else
641 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
642 } else {
643 sfid = BRW_SFID_DATAPORT_READ;
644 }
645
646 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
647 header_present, false);
648
649 if (brw->gen >= 7) {
650 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
651 insn->bits3.gen7_dp.msg_control = msg_control;
652 insn->bits3.gen7_dp.last_render_target = 0;
653 insn->bits3.gen7_dp.msg_type = msg_type;
654 } else if (brw->gen == 6) {
655 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
656 insn->bits3.gen6_dp.msg_control = msg_control;
657 insn->bits3.gen6_dp.last_render_target = 0;
658 insn->bits3.gen6_dp.msg_type = msg_type;
659 insn->bits3.gen6_dp.send_commit_msg = 0;
660 } else if (brw->gen == 5) {
661 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
662 insn->bits3.dp_read_gen5.msg_control = msg_control;
663 insn->bits3.dp_read_gen5.msg_type = msg_type;
664 insn->bits3.dp_read_gen5.target_cache = target_cache;
665 } else if (brw->is_g4x) {
666 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
667 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
668 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
669 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
670 } else {
671 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
672 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
673 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
674 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
675 }
676 }
677
678 void
679 brw_set_sampler_message(struct brw_compile *p,
680 struct brw_instruction *insn,
681 unsigned binding_table_index,
682 unsigned sampler,
683 unsigned msg_type,
684 unsigned response_length,
685 unsigned msg_length,
686 unsigned header_present,
687 unsigned simd_mode,
688 unsigned return_format)
689 {
690 struct brw_context *brw = p->brw;
691
692 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
693 response_length, header_present, false);
694
695 if (brw->gen >= 7) {
696 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
697 insn->bits3.sampler_gen7.sampler = sampler;
698 insn->bits3.sampler_gen7.msg_type = msg_type;
699 insn->bits3.sampler_gen7.simd_mode = simd_mode;
700 } else if (brw->gen >= 5) {
701 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
702 insn->bits3.sampler_gen5.sampler = sampler;
703 insn->bits3.sampler_gen5.msg_type = msg_type;
704 insn->bits3.sampler_gen5.simd_mode = simd_mode;
705 } else if (brw->is_g4x) {
706 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
707 insn->bits3.sampler_g4x.sampler = sampler;
708 insn->bits3.sampler_g4x.msg_type = msg_type;
709 } else {
710 insn->bits3.sampler.binding_table_index = binding_table_index;
711 insn->bits3.sampler.sampler = sampler;
712 insn->bits3.sampler.msg_type = msg_type;
713 insn->bits3.sampler.return_format = return_format;
714 }
715 }
716
717
718 #define next_insn brw_next_insn
719 struct brw_instruction *
720 brw_next_insn(struct brw_compile *p, unsigned opcode)
721 {
722 struct brw_instruction *insn;
723
724 if (p->nr_insn + 1 > p->store_size) {
725 if (0)
726 printf("incresing the store size to %d\n", p->store_size << 1);
727 p->store_size <<= 1;
728 p->store = reralloc(p->mem_ctx, p->store,
729 struct brw_instruction, p->store_size);
730 if (!p->store)
731 assert(!"realloc eu store memeory failed");
732 }
733
734 p->next_insn_offset += 16;
735 insn = &p->store[p->nr_insn++];
736 memcpy(insn, p->current, sizeof(*insn));
737
738 /* Reset this one-shot flag:
739 */
740
741 if (p->current->header.destreg__conditionalmod) {
742 p->current->header.destreg__conditionalmod = 0;
743 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
744 }
745
746 insn->header.opcode = opcode;
747 return insn;
748 }
749
750 static struct brw_instruction *brw_alu1( struct brw_compile *p,
751 unsigned opcode,
752 struct brw_reg dest,
753 struct brw_reg src )
754 {
755 struct brw_instruction *insn = next_insn(p, opcode);
756 brw_set_dest(p, insn, dest);
757 brw_set_src0(p, insn, src);
758 return insn;
759 }
760
761 static struct brw_instruction *brw_alu2(struct brw_compile *p,
762 unsigned opcode,
763 struct brw_reg dest,
764 struct brw_reg src0,
765 struct brw_reg src1 )
766 {
767 struct brw_instruction *insn = next_insn(p, opcode);
768 brw_set_dest(p, insn, dest);
769 brw_set_src0(p, insn, src0);
770 brw_set_src1(p, insn, src1);
771 return insn;
772 }
773
774 static int
775 get_3src_subreg_nr(struct brw_reg reg)
776 {
777 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
778 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
779 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
780 } else {
781 return reg.subnr / 4;
782 }
783 }
784
785 static struct brw_instruction *brw_alu3(struct brw_compile *p,
786 unsigned opcode,
787 struct brw_reg dest,
788 struct brw_reg src0,
789 struct brw_reg src1,
790 struct brw_reg src2)
791 {
792 struct brw_context *brw = p->brw;
793 struct brw_instruction *insn = next_insn(p, opcode);
794
795 gen7_convert_mrf_to_grf(p, &dest);
796
797 assert(insn->header.access_mode == BRW_ALIGN_16);
798
799 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
800 dest.file == BRW_MESSAGE_REGISTER_FILE);
801 assert(dest.nr < 128);
802 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
803 assert(dest.type == BRW_REGISTER_TYPE_F ||
804 dest.type == BRW_REGISTER_TYPE_D ||
805 dest.type == BRW_REGISTER_TYPE_UD);
806 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
807 insn->bits1.da3src.dest_reg_nr = dest.nr;
808 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
809 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
810 guess_execution_size(p, insn, dest);
811
812 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
813 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
814 assert(src0.nr < 128);
815 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
816 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
817 insn->bits2.da3src.src0_reg_nr = src0.nr;
818 insn->bits1.da3src.src0_abs = src0.abs;
819 insn->bits1.da3src.src0_negate = src0.negate;
820 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
821
822 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
823 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
824 assert(src1.nr < 128);
825 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
826 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
827 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
828 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
829 insn->bits3.da3src.src1_reg_nr = src1.nr;
830 insn->bits1.da3src.src1_abs = src1.abs;
831 insn->bits1.da3src.src1_negate = src1.negate;
832
833 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
834 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
835 assert(src2.nr < 128);
836 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
837 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
838 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
839 insn->bits3.da3src.src2_reg_nr = src2.nr;
840 insn->bits1.da3src.src2_abs = src2.abs;
841 insn->bits1.da3src.src2_negate = src2.negate;
842
843 if (brw->gen >= 7) {
844 /* Set both the source and destination types based on dest.type,
845 * ignoring the source register types. The MAD and LRP emitters ensure
846 * that all four types are float. The BFE and BFI2 emitters, however,
847 * may send us mixed D and UD types and want us to ignore that and use
848 * the destination type.
849 */
850 switch (dest.type) {
851 case BRW_REGISTER_TYPE_F:
852 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
853 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
854 break;
855 case BRW_REGISTER_TYPE_D:
856 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
857 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
858 break;
859 case BRW_REGISTER_TYPE_UD:
860 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
861 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
862 break;
863 }
864 }
865
866 return insn;
867 }
868
869
870 /***********************************************************************
871 * Convenience routines.
872 */
873 #define ALU1(OP) \
874 struct brw_instruction *brw_##OP(struct brw_compile *p, \
875 struct brw_reg dest, \
876 struct brw_reg src0) \
877 { \
878 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
879 }
880
881 #define ALU2(OP) \
882 struct brw_instruction *brw_##OP(struct brw_compile *p, \
883 struct brw_reg dest, \
884 struct brw_reg src0, \
885 struct brw_reg src1) \
886 { \
887 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
888 }
889
890 #define ALU3(OP) \
891 struct brw_instruction *brw_##OP(struct brw_compile *p, \
892 struct brw_reg dest, \
893 struct brw_reg src0, \
894 struct brw_reg src1, \
895 struct brw_reg src2) \
896 { \
897 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
898 }
899
900 #define ALU3F(OP) \
901 struct brw_instruction *brw_##OP(struct brw_compile *p, \
902 struct brw_reg dest, \
903 struct brw_reg src0, \
904 struct brw_reg src1, \
905 struct brw_reg src2) \
906 { \
907 assert(dest.type == BRW_REGISTER_TYPE_F); \
908 assert(src0.type == BRW_REGISTER_TYPE_F); \
909 assert(src1.type == BRW_REGISTER_TYPE_F); \
910 assert(src2.type == BRW_REGISTER_TYPE_F); \
911 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
912 }
913
914 /* Rounding operations (other than RNDD) require two instructions - the first
915 * stores a rounded value (possibly the wrong way) in the dest register, but
916 * also sets a per-channel "increment bit" in the flag register. A predicated
917 * add of 1.0 fixes dest to contain the desired result.
918 *
919 * Sandybridge and later appear to round correctly without an ADD.
920 */
921 #define ROUND(OP) \
922 void brw_##OP(struct brw_compile *p, \
923 struct brw_reg dest, \
924 struct brw_reg src) \
925 { \
926 struct brw_instruction *rnd, *add; \
927 rnd = next_insn(p, BRW_OPCODE_##OP); \
928 brw_set_dest(p, rnd, dest); \
929 brw_set_src0(p, rnd, src); \
930 \
931 if (p->brw->gen < 6) { \
932 /* turn on round-increments */ \
933 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
934 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
935 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
936 } \
937 }
938
939
940 ALU1(MOV)
941 ALU2(SEL)
942 ALU1(NOT)
943 ALU2(AND)
944 ALU2(OR)
945 ALU2(XOR)
946 ALU2(SHR)
947 ALU2(SHL)
948 ALU2(ASR)
949 ALU1(F32TO16)
950 ALU1(F16TO32)
951 ALU1(FRC)
952 ALU1(RNDD)
953 ALU2(MAC)
954 ALU2(MACH)
955 ALU1(LZD)
956 ALU2(DP4)
957 ALU2(DPH)
958 ALU2(DP3)
959 ALU2(DP2)
960 ALU2(LINE)
961 ALU2(PLN)
962 ALU3F(MAD)
963 ALU3F(LRP)
964 ALU1(BFREV)
965 ALU3(BFE)
966 ALU2(BFI1)
967 ALU3(BFI2)
968 ALU1(FBH)
969 ALU1(FBL)
970 ALU1(CBIT)
971 ALU2(ADDC)
972 ALU2(SUBB)
973
974 ROUND(RNDZ)
975 ROUND(RNDE)
976
977
978 struct brw_instruction *brw_ADD(struct brw_compile *p,
979 struct brw_reg dest,
980 struct brw_reg src0,
981 struct brw_reg src1)
982 {
983 /* 6.2.2: add */
984 if (src0.type == BRW_REGISTER_TYPE_F ||
985 (src0.file == BRW_IMMEDIATE_VALUE &&
986 src0.type == BRW_REGISTER_TYPE_VF)) {
987 assert(src1.type != BRW_REGISTER_TYPE_UD);
988 assert(src1.type != BRW_REGISTER_TYPE_D);
989 }
990
991 if (src1.type == BRW_REGISTER_TYPE_F ||
992 (src1.file == BRW_IMMEDIATE_VALUE &&
993 src1.type == BRW_REGISTER_TYPE_VF)) {
994 assert(src0.type != BRW_REGISTER_TYPE_UD);
995 assert(src0.type != BRW_REGISTER_TYPE_D);
996 }
997
998 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
999 }
1000
1001 struct brw_instruction *brw_AVG(struct brw_compile *p,
1002 struct brw_reg dest,
1003 struct brw_reg src0,
1004 struct brw_reg src1)
1005 {
1006 assert(dest.type == src0.type);
1007 assert(src0.type == src1.type);
1008 switch (src0.type) {
1009 case BRW_REGISTER_TYPE_B:
1010 case BRW_REGISTER_TYPE_UB:
1011 case BRW_REGISTER_TYPE_W:
1012 case BRW_REGISTER_TYPE_UW:
1013 case BRW_REGISTER_TYPE_D:
1014 case BRW_REGISTER_TYPE_UD:
1015 break;
1016 default:
1017 assert(!"Bad type for brw_AVG");
1018 }
1019
1020 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1021 }
1022
1023 struct brw_instruction *brw_MUL(struct brw_compile *p,
1024 struct brw_reg dest,
1025 struct brw_reg src0,
1026 struct brw_reg src1)
1027 {
1028 /* 6.32.38: mul */
1029 if (src0.type == BRW_REGISTER_TYPE_D ||
1030 src0.type == BRW_REGISTER_TYPE_UD ||
1031 src1.type == BRW_REGISTER_TYPE_D ||
1032 src1.type == BRW_REGISTER_TYPE_UD) {
1033 assert(dest.type != BRW_REGISTER_TYPE_F);
1034 }
1035
1036 if (src0.type == BRW_REGISTER_TYPE_F ||
1037 (src0.file == BRW_IMMEDIATE_VALUE &&
1038 src0.type == BRW_REGISTER_TYPE_VF)) {
1039 assert(src1.type != BRW_REGISTER_TYPE_UD);
1040 assert(src1.type != BRW_REGISTER_TYPE_D);
1041 }
1042
1043 if (src1.type == BRW_REGISTER_TYPE_F ||
1044 (src1.file == BRW_IMMEDIATE_VALUE &&
1045 src1.type == BRW_REGISTER_TYPE_VF)) {
1046 assert(src0.type != BRW_REGISTER_TYPE_UD);
1047 assert(src0.type != BRW_REGISTER_TYPE_D);
1048 }
1049
1050 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1051 src0.nr != BRW_ARF_ACCUMULATOR);
1052 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1053 src1.nr != BRW_ARF_ACCUMULATOR);
1054
1055 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1056 }
1057
1058
1059 void brw_NOP(struct brw_compile *p)
1060 {
1061 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1062 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1063 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1064 brw_set_src1(p, insn, brw_imm_ud(0x0));
1065 }
1066
1067
1068
1069
1070
1071 /***********************************************************************
1072 * Comparisons, if/else/endif
1073 */
1074
1075 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1076 struct brw_reg dest,
1077 struct brw_reg src0,
1078 struct brw_reg src1)
1079 {
1080 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1081
1082 insn->header.execution_size = 1;
1083 insn->header.compression_control = BRW_COMPRESSION_NONE;
1084 insn->header.mask_control = BRW_MASK_DISABLE;
1085
1086 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1087
1088 return insn;
1089 }
1090
1091 static void
1092 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1093 {
1094 p->if_stack[p->if_stack_depth] = inst - p->store;
1095
1096 p->if_stack_depth++;
1097 if (p->if_stack_array_size <= p->if_stack_depth) {
1098 p->if_stack_array_size *= 2;
1099 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1100 p->if_stack_array_size);
1101 }
1102 }
1103
1104 static struct brw_instruction *
1105 pop_if_stack(struct brw_compile *p)
1106 {
1107 p->if_stack_depth--;
1108 return &p->store[p->if_stack[p->if_stack_depth]];
1109 }
1110
1111 static void
1112 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1113 {
1114 if (p->loop_stack_array_size < p->loop_stack_depth) {
1115 p->loop_stack_array_size *= 2;
1116 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1117 p->loop_stack_array_size);
1118 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1119 p->loop_stack_array_size);
1120 }
1121
1122 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1123 p->loop_stack_depth++;
1124 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1125 }
1126
1127 static struct brw_instruction *
1128 get_inner_do_insn(struct brw_compile *p)
1129 {
1130 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1131 }
1132
1133 /* EU takes the value from the flag register and pushes it onto some
1134 * sort of a stack (presumably merging with any flag value already on
1135 * the stack). Within an if block, the flags at the top of the stack
1136 * control execution on each channel of the unit, eg. on each of the
1137 * 16 pixel values in our wm programs.
1138 *
1139 * When the matching 'else' instruction is reached (presumably by
1140 * countdown of the instruction count patched in by our ELSE/ENDIF
1141 * functions), the relevent flags are inverted.
1142 *
1143 * When the matching 'endif' instruction is reached, the flags are
1144 * popped off. If the stack is now empty, normal execution resumes.
1145 */
1146 struct brw_instruction *
1147 brw_IF(struct brw_compile *p, unsigned execute_size)
1148 {
1149 struct brw_context *brw = p->brw;
1150 struct brw_instruction *insn;
1151
1152 insn = next_insn(p, BRW_OPCODE_IF);
1153
1154 /* Override the defaults for this instruction:
1155 */
1156 if (brw->gen < 6) {
1157 brw_set_dest(p, insn, brw_ip_reg());
1158 brw_set_src0(p, insn, brw_ip_reg());
1159 brw_set_src1(p, insn, brw_imm_d(0x0));
1160 } else if (brw->gen == 6) {
1161 brw_set_dest(p, insn, brw_imm_w(0));
1162 insn->bits1.branch_gen6.jump_count = 0;
1163 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1164 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1165 } else {
1166 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1167 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1168 brw_set_src1(p, insn, brw_imm_ud(0));
1169 insn->bits3.break_cont.jip = 0;
1170 insn->bits3.break_cont.uip = 0;
1171 }
1172
1173 insn->header.execution_size = execute_size;
1174 insn->header.compression_control = BRW_COMPRESSION_NONE;
1175 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1176 insn->header.mask_control = BRW_MASK_ENABLE;
1177 if (!p->single_program_flow)
1178 insn->header.thread_control = BRW_THREAD_SWITCH;
1179
1180 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1181
1182 push_if_stack(p, insn);
1183 p->if_depth_in_loop[p->loop_stack_depth]++;
1184 return insn;
1185 }
1186
1187 /* This function is only used for gen6-style IF instructions with an
1188 * embedded comparison (conditional modifier). It is not used on gen7.
1189 */
1190 struct brw_instruction *
1191 gen6_IF(struct brw_compile *p, uint32_t conditional,
1192 struct brw_reg src0, struct brw_reg src1)
1193 {
1194 struct brw_instruction *insn;
1195
1196 insn = next_insn(p, BRW_OPCODE_IF);
1197
1198 brw_set_dest(p, insn, brw_imm_w(0));
1199 if (p->compressed) {
1200 insn->header.execution_size = BRW_EXECUTE_16;
1201 } else {
1202 insn->header.execution_size = BRW_EXECUTE_8;
1203 }
1204 insn->bits1.branch_gen6.jump_count = 0;
1205 brw_set_src0(p, insn, src0);
1206 brw_set_src1(p, insn, src1);
1207
1208 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1209 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1210 insn->header.destreg__conditionalmod = conditional;
1211
1212 if (!p->single_program_flow)
1213 insn->header.thread_control = BRW_THREAD_SWITCH;
1214
1215 push_if_stack(p, insn);
1216 return insn;
1217 }
1218
1219 /**
1220 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1221 */
1222 static void
1223 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1224 struct brw_instruction *if_inst,
1225 struct brw_instruction *else_inst)
1226 {
1227 /* The next instruction (where the ENDIF would be, if it existed) */
1228 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1229
1230 assert(p->single_program_flow);
1231 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1232 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1233 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1234
1235 /* Convert IF to an ADD instruction that moves the instruction pointer
1236 * to the first instruction of the ELSE block. If there is no ELSE
1237 * block, point to where ENDIF would be. Reverse the predicate.
1238 *
1239 * There's no need to execute an ENDIF since we don't need to do any
1240 * stack operations, and if we're currently executing, we just want to
1241 * continue normally.
1242 */
1243 if_inst->header.opcode = BRW_OPCODE_ADD;
1244 if_inst->header.predicate_inverse = 1;
1245
1246 if (else_inst != NULL) {
1247 /* Convert ELSE to an ADD instruction that points where the ENDIF
1248 * would be.
1249 */
1250 else_inst->header.opcode = BRW_OPCODE_ADD;
1251
1252 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1253 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1254 } else {
1255 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1256 }
1257 }
1258
1259 /**
1260 * Patch IF and ELSE instructions with appropriate jump targets.
1261 */
1262 static void
1263 patch_IF_ELSE(struct brw_compile *p,
1264 struct brw_instruction *if_inst,
1265 struct brw_instruction *else_inst,
1266 struct brw_instruction *endif_inst)
1267 {
1268 struct brw_context *brw = p->brw;
1269
1270 /* We shouldn't be patching IF and ELSE instructions in single program flow
1271 * mode when gen < 6, because in single program flow mode on those
1272 * platforms, we convert flow control instructions to conditional ADDs that
1273 * operate on IP (see brw_ENDIF).
1274 *
1275 * However, on Gen6, writing to IP doesn't work in single program flow mode
1276 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1277 * not be updated by non-flow control instructions."). And on later
1278 * platforms, there is no significant benefit to converting control flow
1279 * instructions to conditional ADDs. So we do patch IF and ELSE
1280 * instructions in single program flow mode on those platforms.
1281 */
1282 if (brw->gen < 6)
1283 assert(!p->single_program_flow);
1284
1285 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1286 assert(endif_inst != NULL);
1287 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1288
1289 unsigned br = 1;
1290 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1291 * requires 2 chunks.
1292 */
1293 if (brw->gen >= 5)
1294 br = 2;
1295
1296 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1297 endif_inst->header.execution_size = if_inst->header.execution_size;
1298
1299 if (else_inst == NULL) {
1300 /* Patch IF -> ENDIF */
1301 if (brw->gen < 6) {
1302 /* Turn it into an IFF, which means no mask stack operations for
1303 * all-false and jumping past the ENDIF.
1304 */
1305 if_inst->header.opcode = BRW_OPCODE_IFF;
1306 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1307 if_inst->bits3.if_else.pop_count = 0;
1308 if_inst->bits3.if_else.pad0 = 0;
1309 } else if (brw->gen == 6) {
1310 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1311 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1312 } else {
1313 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1314 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1315 }
1316 } else {
1317 else_inst->header.execution_size = if_inst->header.execution_size;
1318
1319 /* Patch IF -> ELSE */
1320 if (brw->gen < 6) {
1321 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1322 if_inst->bits3.if_else.pop_count = 0;
1323 if_inst->bits3.if_else.pad0 = 0;
1324 } else if (brw->gen == 6) {
1325 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1326 }
1327
1328 /* Patch ELSE -> ENDIF */
1329 if (brw->gen < 6) {
1330 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1331 * matching ENDIF.
1332 */
1333 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1334 else_inst->bits3.if_else.pop_count = 1;
1335 else_inst->bits3.if_else.pad0 = 0;
1336 } else if (brw->gen == 6) {
1337 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1338 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1339 } else {
1340 /* The IF instruction's JIP should point just past the ELSE */
1341 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1342 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1343 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1344 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1345 }
1346 }
1347 }
1348
1349 void
1350 brw_ELSE(struct brw_compile *p)
1351 {
1352 struct brw_context *brw = p->brw;
1353 struct brw_instruction *insn;
1354
1355 insn = next_insn(p, BRW_OPCODE_ELSE);
1356
1357 if (brw->gen < 6) {
1358 brw_set_dest(p, insn, brw_ip_reg());
1359 brw_set_src0(p, insn, brw_ip_reg());
1360 brw_set_src1(p, insn, brw_imm_d(0x0));
1361 } else if (brw->gen == 6) {
1362 brw_set_dest(p, insn, brw_imm_w(0));
1363 insn->bits1.branch_gen6.jump_count = 0;
1364 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1365 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1366 } else {
1367 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1368 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1369 brw_set_src1(p, insn, brw_imm_ud(0));
1370 insn->bits3.break_cont.jip = 0;
1371 insn->bits3.break_cont.uip = 0;
1372 }
1373
1374 insn->header.compression_control = BRW_COMPRESSION_NONE;
1375 insn->header.mask_control = BRW_MASK_ENABLE;
1376 if (!p->single_program_flow)
1377 insn->header.thread_control = BRW_THREAD_SWITCH;
1378
1379 push_if_stack(p, insn);
1380 }
1381
1382 void
1383 brw_ENDIF(struct brw_compile *p)
1384 {
1385 struct brw_context *brw = p->brw;
1386 struct brw_instruction *insn = NULL;
1387 struct brw_instruction *else_inst = NULL;
1388 struct brw_instruction *if_inst = NULL;
1389 struct brw_instruction *tmp;
1390 bool emit_endif = true;
1391
1392 /* In single program flow mode, we can express IF and ELSE instructions
1393 * equivalently as ADD instructions that operate on IP. On platforms prior
1394 * to Gen6, flow control instructions cause an implied thread switch, so
1395 * this is a significant savings.
1396 *
1397 * However, on Gen6, writing to IP doesn't work in single program flow mode
1398 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1399 * not be updated by non-flow control instructions."). And on later
1400 * platforms, there is no significant benefit to converting control flow
1401 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1402 * Gen5.
1403 */
1404 if (brw->gen < 6 && p->single_program_flow)
1405 emit_endif = false;
1406
1407 /*
1408 * A single next_insn() may change the base adress of instruction store
1409 * memory(p->store), so call it first before referencing the instruction
1410 * store pointer from an index
1411 */
1412 if (emit_endif)
1413 insn = next_insn(p, BRW_OPCODE_ENDIF);
1414
1415 /* Pop the IF and (optional) ELSE instructions from the stack */
1416 p->if_depth_in_loop[p->loop_stack_depth]--;
1417 tmp = pop_if_stack(p);
1418 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1419 else_inst = tmp;
1420 tmp = pop_if_stack(p);
1421 }
1422 if_inst = tmp;
1423
1424 if (!emit_endif) {
1425 /* ENDIF is useless; don't bother emitting it. */
1426 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1427 return;
1428 }
1429
1430 if (brw->gen < 6) {
1431 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1432 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1433 brw_set_src1(p, insn, brw_imm_d(0x0));
1434 } else if (brw->gen == 6) {
1435 brw_set_dest(p, insn, brw_imm_w(0));
1436 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1437 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1438 } else {
1439 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1440 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1441 brw_set_src1(p, insn, brw_imm_ud(0));
1442 }
1443
1444 insn->header.compression_control = BRW_COMPRESSION_NONE;
1445 insn->header.mask_control = BRW_MASK_ENABLE;
1446 insn->header.thread_control = BRW_THREAD_SWITCH;
1447
1448 /* Also pop item off the stack in the endif instruction: */
1449 if (brw->gen < 6) {
1450 insn->bits3.if_else.jump_count = 0;
1451 insn->bits3.if_else.pop_count = 1;
1452 insn->bits3.if_else.pad0 = 0;
1453 } else if (brw->gen == 6) {
1454 insn->bits1.branch_gen6.jump_count = 2;
1455 } else {
1456 insn->bits3.break_cont.jip = 2;
1457 }
1458 patch_IF_ELSE(p, if_inst, else_inst, insn);
1459 }
1460
1461 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1462 {
1463 struct brw_context *brw = p->brw;
1464 struct brw_instruction *insn;
1465
1466 insn = next_insn(p, BRW_OPCODE_BREAK);
1467 if (brw->gen >= 6) {
1468 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1469 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1470 brw_set_src1(p, insn, brw_imm_d(0x0));
1471 } else {
1472 brw_set_dest(p, insn, brw_ip_reg());
1473 brw_set_src0(p, insn, brw_ip_reg());
1474 brw_set_src1(p, insn, brw_imm_d(0x0));
1475 insn->bits3.if_else.pad0 = 0;
1476 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1477 }
1478 insn->header.compression_control = BRW_COMPRESSION_NONE;
1479 insn->header.execution_size = BRW_EXECUTE_8;
1480
1481 return insn;
1482 }
1483
1484 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1485 {
1486 struct brw_instruction *insn;
1487
1488 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1489 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1490 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1491 brw_set_dest(p, insn, brw_ip_reg());
1492 brw_set_src0(p, insn, brw_ip_reg());
1493 brw_set_src1(p, insn, brw_imm_d(0x0));
1494
1495 insn->header.compression_control = BRW_COMPRESSION_NONE;
1496 insn->header.execution_size = BRW_EXECUTE_8;
1497 return insn;
1498 }
1499
1500 struct brw_instruction *brw_CONT(struct brw_compile *p)
1501 {
1502 struct brw_instruction *insn;
1503 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1504 brw_set_dest(p, insn, brw_ip_reg());
1505 brw_set_src0(p, insn, brw_ip_reg());
1506 brw_set_src1(p, insn, brw_imm_d(0x0));
1507 insn->header.compression_control = BRW_COMPRESSION_NONE;
1508 insn->header.execution_size = BRW_EXECUTE_8;
1509 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1510 insn->bits3.if_else.pad0 = 0;
1511 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1512 return insn;
1513 }
1514
1515 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1516 {
1517 struct brw_instruction *insn;
1518
1519 insn = next_insn(p, BRW_OPCODE_HALT);
1520 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1521 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1522 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1523
1524 if (p->compressed) {
1525 insn->header.execution_size = BRW_EXECUTE_16;
1526 } else {
1527 insn->header.compression_control = BRW_COMPRESSION_NONE;
1528 insn->header.execution_size = BRW_EXECUTE_8;
1529 }
1530 return insn;
1531 }
1532
1533 /* DO/WHILE loop:
1534 *
1535 * The DO/WHILE is just an unterminated loop -- break or continue are
1536 * used for control within the loop. We have a few ways they can be
1537 * done.
1538 *
1539 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1540 * jip and no DO instruction.
1541 *
1542 * For non-uniform control flow pre-gen6, there's a DO instruction to
1543 * push the mask, and a WHILE to jump back, and BREAK to get out and
1544 * pop the mask.
1545 *
1546 * For gen6, there's no more mask stack, so no need for DO. WHILE
1547 * just points back to the first instruction of the loop.
1548 */
1549 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1550 {
1551 struct brw_context *brw = p->brw;
1552
1553 if (brw->gen >= 6 || p->single_program_flow) {
1554 push_loop_stack(p, &p->store[p->nr_insn]);
1555 return &p->store[p->nr_insn];
1556 } else {
1557 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1558
1559 push_loop_stack(p, insn);
1560
1561 /* Override the defaults for this instruction:
1562 */
1563 brw_set_dest(p, insn, brw_null_reg());
1564 brw_set_src0(p, insn, brw_null_reg());
1565 brw_set_src1(p, insn, brw_null_reg());
1566
1567 insn->header.compression_control = BRW_COMPRESSION_NONE;
1568 insn->header.execution_size = execute_size;
1569 insn->header.predicate_control = BRW_PREDICATE_NONE;
1570 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1571 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1572
1573 return insn;
1574 }
1575 }
1576
1577 /**
1578 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1579 * instruction here.
1580 *
1581 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1582 * nesting, since it can always just point to the end of the block/current loop.
1583 */
1584 static void
1585 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1586 {
1587 struct brw_context *brw = p->brw;
1588 struct brw_instruction *do_inst = get_inner_do_insn(p);
1589 struct brw_instruction *inst;
1590 int br = (brw->gen == 5) ? 2 : 1;
1591
1592 for (inst = while_inst - 1; inst != do_inst; inst--) {
1593 /* If the jump count is != 0, that means that this instruction has already
1594 * been patched because it's part of a loop inside of the one we're
1595 * patching.
1596 */
1597 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1598 inst->bits3.if_else.jump_count == 0) {
1599 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1600 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1601 inst->bits3.if_else.jump_count == 0) {
1602 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1603 }
1604 }
1605 }
1606
1607 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1608 {
1609 struct brw_context *brw = p->brw;
1610 struct brw_instruction *insn, *do_insn;
1611 unsigned br = 1;
1612
1613 if (brw->gen >= 5)
1614 br = 2;
1615
1616 if (brw->gen >= 7) {
1617 insn = next_insn(p, BRW_OPCODE_WHILE);
1618 do_insn = get_inner_do_insn(p);
1619
1620 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1621 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1622 brw_set_src1(p, insn, brw_imm_ud(0));
1623 insn->bits3.break_cont.jip = br * (do_insn - insn);
1624
1625 insn->header.execution_size = BRW_EXECUTE_8;
1626 } else if (brw->gen == 6) {
1627 insn = next_insn(p, BRW_OPCODE_WHILE);
1628 do_insn = get_inner_do_insn(p);
1629
1630 brw_set_dest(p, insn, brw_imm_w(0));
1631 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1632 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1633 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1634
1635 insn->header.execution_size = BRW_EXECUTE_8;
1636 } else {
1637 if (p->single_program_flow) {
1638 insn = next_insn(p, BRW_OPCODE_ADD);
1639 do_insn = get_inner_do_insn(p);
1640
1641 brw_set_dest(p, insn, brw_ip_reg());
1642 brw_set_src0(p, insn, brw_ip_reg());
1643 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1644 insn->header.execution_size = BRW_EXECUTE_1;
1645 } else {
1646 insn = next_insn(p, BRW_OPCODE_WHILE);
1647 do_insn = get_inner_do_insn(p);
1648
1649 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1650
1651 brw_set_dest(p, insn, brw_ip_reg());
1652 brw_set_src0(p, insn, brw_ip_reg());
1653 brw_set_src1(p, insn, brw_imm_d(0));
1654
1655 insn->header.execution_size = do_insn->header.execution_size;
1656 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1657 insn->bits3.if_else.pop_count = 0;
1658 insn->bits3.if_else.pad0 = 0;
1659
1660 brw_patch_break_cont(p, insn);
1661 }
1662 }
1663 insn->header.compression_control = BRW_COMPRESSION_NONE;
1664 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1665
1666 p->loop_stack_depth--;
1667
1668 return insn;
1669 }
1670
1671
1672 /* FORWARD JUMPS:
1673 */
1674 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1675 {
1676 struct brw_context *brw = p->brw;
1677 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1678 unsigned jmpi = 1;
1679
1680 if (brw->gen >= 5)
1681 jmpi = 2;
1682
1683 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1684 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1685
1686 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1687 }
1688
1689
1690
1691 /* To integrate with the above, it makes sense that the comparison
1692 * instruction should populate the flag register. It might be simpler
1693 * just to use the flag reg for most WM tasks?
1694 */
1695 void brw_CMP(struct brw_compile *p,
1696 struct brw_reg dest,
1697 unsigned conditional,
1698 struct brw_reg src0,
1699 struct brw_reg src1)
1700 {
1701 struct brw_context *brw = p->brw;
1702 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1703
1704 insn->header.destreg__conditionalmod = conditional;
1705 brw_set_dest(p, insn, dest);
1706 brw_set_src0(p, insn, src0);
1707 brw_set_src1(p, insn, src1);
1708
1709 /* guess_execution_size(insn, src0); */
1710
1711
1712 /* Make it so that future instructions will use the computed flag
1713 * value until brw_set_predicate_control_flag_value() is called
1714 * again.
1715 */
1716 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1717 dest.nr == 0) {
1718 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1719 p->flag_value = 0xff;
1720 }
1721
1722 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1723 * page says:
1724 * "Any CMP instruction with a null destination must use a {switch}."
1725 *
1726 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1727 * mentioned on their work-arounds pages.
1728 */
1729 if (brw->gen == 7) {
1730 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1731 dest.nr == BRW_ARF_NULL) {
1732 insn->header.thread_control = BRW_THREAD_SWITCH;
1733 }
1734 }
1735 }
1736
1737 /* Issue 'wait' instruction for n1, host could program MMIO
1738 to wake up thread. */
1739 void brw_WAIT (struct brw_compile *p)
1740 {
1741 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1742 struct brw_reg src = brw_notification_1_reg();
1743
1744 brw_set_dest(p, insn, src);
1745 brw_set_src0(p, insn, src);
1746 brw_set_src1(p, insn, brw_null_reg());
1747 insn->header.execution_size = 0; /* must */
1748 insn->header.predicate_control = 0;
1749 insn->header.compression_control = 0;
1750 }
1751
1752
1753 /***********************************************************************
1754 * Helpers for the various SEND message types:
1755 */
1756
1757 /** Extended math function, float[8].
1758 */
1759 void brw_math( struct brw_compile *p,
1760 struct brw_reg dest,
1761 unsigned function,
1762 unsigned msg_reg_nr,
1763 struct brw_reg src,
1764 unsigned data_type,
1765 unsigned precision )
1766 {
1767 struct brw_context *brw = p->brw;
1768
1769 if (brw->gen >= 6) {
1770 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1771
1772 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1773 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1774 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1775
1776 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1777 if (brw->gen == 6)
1778 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1779
1780 /* Source modifiers are ignored for extended math instructions on Gen6. */
1781 if (brw->gen == 6) {
1782 assert(!src.negate);
1783 assert(!src.abs);
1784 }
1785
1786 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1787 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1788 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1789 assert(src.type != BRW_REGISTER_TYPE_F);
1790 } else {
1791 assert(src.type == BRW_REGISTER_TYPE_F);
1792 }
1793
1794 /* Math is the same ISA format as other opcodes, except that CondModifier
1795 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1796 */
1797 insn->header.destreg__conditionalmod = function;
1798
1799 brw_set_dest(p, insn, dest);
1800 brw_set_src0(p, insn, src);
1801 brw_set_src1(p, insn, brw_null_reg());
1802 } else {
1803 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1804
1805 /* Example code doesn't set predicate_control for send
1806 * instructions.
1807 */
1808 insn->header.predicate_control = 0;
1809 insn->header.destreg__conditionalmod = msg_reg_nr;
1810
1811 brw_set_dest(p, insn, dest);
1812 brw_set_src0(p, insn, src);
1813 brw_set_math_message(p,
1814 insn,
1815 function,
1816 src.type == BRW_REGISTER_TYPE_D,
1817 precision,
1818 data_type);
1819 }
1820 }
1821
1822 /** Extended math function, float[8].
1823 */
1824 void brw_math2(struct brw_compile *p,
1825 struct brw_reg dest,
1826 unsigned function,
1827 struct brw_reg src0,
1828 struct brw_reg src1)
1829 {
1830 struct brw_context *brw = p->brw;
1831 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1832
1833 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1834 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1835 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1836 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1837
1838 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1839 if (brw->gen == 6) {
1840 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1841 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1842 }
1843
1844 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1845 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1846 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1847 assert(src0.type != BRW_REGISTER_TYPE_F);
1848 assert(src1.type != BRW_REGISTER_TYPE_F);
1849 } else {
1850 assert(src0.type == BRW_REGISTER_TYPE_F);
1851 assert(src1.type == BRW_REGISTER_TYPE_F);
1852 }
1853
1854 /* Source modifiers are ignored for extended math instructions on Gen6. */
1855 if (brw->gen == 6) {
1856 assert(!src0.negate);
1857 assert(!src0.abs);
1858 assert(!src1.negate);
1859 assert(!src1.abs);
1860 }
1861
1862 /* Math is the same ISA format as other opcodes, except that CondModifier
1863 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1864 */
1865 insn->header.destreg__conditionalmod = function;
1866
1867 brw_set_dest(p, insn, dest);
1868 brw_set_src0(p, insn, src0);
1869 brw_set_src1(p, insn, src1);
1870 }
1871
1872
1873 /**
1874 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1875 * using a constant offset per channel.
1876 *
1877 * The offset must be aligned to oword size (16 bytes). Used for
1878 * register spilling.
1879 */
1880 void brw_oword_block_write_scratch(struct brw_compile *p,
1881 struct brw_reg mrf,
1882 int num_regs,
1883 unsigned offset)
1884 {
1885 struct brw_context *brw = p->brw;
1886 uint32_t msg_control, msg_type;
1887 int mlen;
1888
1889 if (brw->gen >= 6)
1890 offset /= 16;
1891
1892 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1893
1894 if (num_regs == 1) {
1895 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1896 mlen = 2;
1897 } else {
1898 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1899 mlen = 3;
1900 }
1901
1902 /* Set up the message header. This is g0, with g0.2 filled with
1903 * the offset. We don't want to leave our offset around in g0 or
1904 * it'll screw up texture samples, so set it up inside the message
1905 * reg.
1906 */
1907 {
1908 brw_push_insn_state(p);
1909 brw_set_mask_control(p, BRW_MASK_DISABLE);
1910 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1911
1912 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1913
1914 /* set message header global offset field (reg 0, element 2) */
1915 brw_MOV(p,
1916 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1917 mrf.nr,
1918 2), BRW_REGISTER_TYPE_UD),
1919 brw_imm_ud(offset));
1920
1921 brw_pop_insn_state(p);
1922 }
1923
1924 {
1925 struct brw_reg dest;
1926 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1927 int send_commit_msg;
1928 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1929 BRW_REGISTER_TYPE_UW);
1930
1931 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1932 insn->header.compression_control = BRW_COMPRESSION_NONE;
1933 src_header = vec16(src_header);
1934 }
1935 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1936 insn->header.destreg__conditionalmod = mrf.nr;
1937
1938 /* Until gen6, writes followed by reads from the same location
1939 * are not guaranteed to be ordered unless write_commit is set.
1940 * If set, then a no-op write is issued to the destination
1941 * register to set a dependency, and a read from the destination
1942 * can be used to ensure the ordering.
1943 *
1944 * For gen6, only writes between different threads need ordering
1945 * protection. Our use of DP writes is all about register
1946 * spilling within a thread.
1947 */
1948 if (brw->gen >= 6) {
1949 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1950 send_commit_msg = 0;
1951 } else {
1952 dest = src_header;
1953 send_commit_msg = 1;
1954 }
1955
1956 brw_set_dest(p, insn, dest);
1957 if (brw->gen >= 6) {
1958 brw_set_src0(p, insn, mrf);
1959 } else {
1960 brw_set_src0(p, insn, brw_null_reg());
1961 }
1962
1963 if (brw->gen >= 6)
1964 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1965 else
1966 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1967
1968 brw_set_dp_write_message(p,
1969 insn,
1970 255, /* binding table index (255=stateless) */
1971 msg_control,
1972 msg_type,
1973 mlen,
1974 true, /* header_present */
1975 0, /* not a render target */
1976 send_commit_msg, /* response_length */
1977 0, /* eot */
1978 send_commit_msg);
1979 }
1980 }
1981
1982
1983 /**
1984 * Read a block of owords (half a GRF each) from the scratch buffer
1985 * using a constant index per channel.
1986 *
1987 * Offset must be aligned to oword size (16 bytes). Used for register
1988 * spilling.
1989 */
1990 void
1991 brw_oword_block_read_scratch(struct brw_compile *p,
1992 struct brw_reg dest,
1993 struct brw_reg mrf,
1994 int num_regs,
1995 unsigned offset)
1996 {
1997 struct brw_context *brw = p->brw;
1998 uint32_t msg_control;
1999 int rlen;
2000
2001 if (brw->gen >= 6)
2002 offset /= 16;
2003
2004 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2005 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2006
2007 if (num_regs == 1) {
2008 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2009 rlen = 1;
2010 } else {
2011 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2012 rlen = 2;
2013 }
2014
2015 {
2016 brw_push_insn_state(p);
2017 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2018 brw_set_mask_control(p, BRW_MASK_DISABLE);
2019
2020 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2021
2022 /* set message header global offset field (reg 0, element 2) */
2023 brw_MOV(p,
2024 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2025 mrf.nr,
2026 2), BRW_REGISTER_TYPE_UD),
2027 brw_imm_ud(offset));
2028
2029 brw_pop_insn_state(p);
2030 }
2031
2032 {
2033 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2034
2035 assert(insn->header.predicate_control == 0);
2036 insn->header.compression_control = BRW_COMPRESSION_NONE;
2037 insn->header.destreg__conditionalmod = mrf.nr;
2038
2039 brw_set_dest(p, insn, dest); /* UW? */
2040 if (brw->gen >= 6) {
2041 brw_set_src0(p, insn, mrf);
2042 } else {
2043 brw_set_src0(p, insn, brw_null_reg());
2044 }
2045
2046 brw_set_dp_read_message(p,
2047 insn,
2048 255, /* binding table index (255=stateless) */
2049 msg_control,
2050 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2051 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2052 1, /* msg_length */
2053 true, /* header_present */
2054 rlen);
2055 }
2056 }
2057
2058 void
2059 gen7_block_read_scratch(struct brw_compile *p,
2060 struct brw_reg dest,
2061 int num_regs,
2062 unsigned offset)
2063 {
2064 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2065
2066 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2067
2068 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2069 insn->header.compression_control = BRW_COMPRESSION_NONE;
2070
2071 brw_set_dest(p, insn, dest);
2072
2073 /* The HW requires that the header is present; this is to get the g0.5
2074 * scratch offset.
2075 */
2076 bool header_present = true;
2077 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2078
2079 brw_set_message_descriptor(p, insn,
2080 GEN7_SFID_DATAPORT_DATA_CACHE,
2081 1, /* mlen: just g0 */
2082 num_regs,
2083 header_present,
2084 false);
2085
2086 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2087
2088 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2089 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2090
2091 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2092 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2093 * is 32 bytes, which happens to be the size of a register.
2094 */
2095 offset /= REG_SIZE;
2096 assert(offset < (1 << 12));
2097 insn->bits3.ud |= offset;
2098 }
2099
2100 /**
2101 * Read a float[4] vector from the data port Data Cache (const buffer).
2102 * Location (in buffer) should be a multiple of 16.
2103 * Used for fetching shader constants.
2104 */
2105 void brw_oword_block_read(struct brw_compile *p,
2106 struct brw_reg dest,
2107 struct brw_reg mrf,
2108 uint32_t offset,
2109 uint32_t bind_table_index)
2110 {
2111 struct brw_context *brw = p->brw;
2112
2113 /* On newer hardware, offset is in units of owords. */
2114 if (brw->gen >= 6)
2115 offset /= 16;
2116
2117 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2118
2119 brw_push_insn_state(p);
2120 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2121 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2122 brw_set_mask_control(p, BRW_MASK_DISABLE);
2123
2124 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2125
2126 /* set message header global offset field (reg 0, element 2) */
2127 brw_MOV(p,
2128 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2129 mrf.nr,
2130 2), BRW_REGISTER_TYPE_UD),
2131 brw_imm_ud(offset));
2132
2133 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2134 insn->header.destreg__conditionalmod = mrf.nr;
2135
2136 /* cast dest to a uword[8] vector */
2137 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2138
2139 brw_set_dest(p, insn, dest);
2140 if (brw->gen >= 6) {
2141 brw_set_src0(p, insn, mrf);
2142 } else {
2143 brw_set_src0(p, insn, brw_null_reg());
2144 }
2145
2146 brw_set_dp_read_message(p,
2147 insn,
2148 bind_table_index,
2149 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2150 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2151 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2152 1, /* msg_length */
2153 true, /* header_present */
2154 1); /* response_length (1 reg, 2 owords!) */
2155
2156 brw_pop_insn_state(p);
2157 }
2158
2159
2160 void brw_fb_WRITE(struct brw_compile *p,
2161 int dispatch_width,
2162 unsigned msg_reg_nr,
2163 struct brw_reg src0,
2164 unsigned msg_control,
2165 unsigned binding_table_index,
2166 unsigned msg_length,
2167 unsigned response_length,
2168 bool eot,
2169 bool header_present)
2170 {
2171 struct brw_context *brw = p->brw;
2172 struct brw_instruction *insn;
2173 unsigned msg_type;
2174 struct brw_reg dest;
2175
2176 if (dispatch_width == 16)
2177 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2178 else
2179 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2180
2181 if (brw->gen >= 6) {
2182 insn = next_insn(p, BRW_OPCODE_SENDC);
2183 } else {
2184 insn = next_insn(p, BRW_OPCODE_SEND);
2185 }
2186 /* The execution mask is ignored for render target writes. */
2187 insn->header.predicate_control = 0;
2188 insn->header.compression_control = BRW_COMPRESSION_NONE;
2189
2190 if (brw->gen >= 6) {
2191 /* headerless version, just submit color payload */
2192 src0 = brw_message_reg(msg_reg_nr);
2193
2194 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2195 } else {
2196 insn->header.destreg__conditionalmod = msg_reg_nr;
2197
2198 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2199 }
2200
2201 brw_set_dest(p, insn, dest);
2202 brw_set_src0(p, insn, src0);
2203 brw_set_dp_write_message(p,
2204 insn,
2205 binding_table_index,
2206 msg_control,
2207 msg_type,
2208 msg_length,
2209 header_present,
2210 eot, /* last render target write */
2211 response_length,
2212 eot,
2213 0 /* send_commit_msg */);
2214 }
2215
2216
2217 /**
2218 * Texture sample instruction.
2219 * Note: the msg_type plus msg_length values determine exactly what kind
2220 * of sampling operation is performed. See volume 4, page 161 of docs.
2221 */
2222 void brw_SAMPLE(struct brw_compile *p,
2223 struct brw_reg dest,
2224 unsigned msg_reg_nr,
2225 struct brw_reg src0,
2226 unsigned binding_table_index,
2227 unsigned sampler,
2228 unsigned msg_type,
2229 unsigned response_length,
2230 unsigned msg_length,
2231 unsigned header_present,
2232 unsigned simd_mode,
2233 unsigned return_format)
2234 {
2235 struct brw_context *brw = p->brw;
2236 struct brw_instruction *insn;
2237
2238 if (msg_reg_nr != -1)
2239 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2240
2241 insn = next_insn(p, BRW_OPCODE_SEND);
2242 insn->header.predicate_control = 0; /* XXX */
2243
2244 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2245 *
2246 * "Instruction compression is not allowed for this instruction (that
2247 * is, send). The hardware behavior is undefined if this instruction is
2248 * set as compressed. However, compress control can be set to "SecHalf"
2249 * to affect the EMask generation."
2250 *
2251 * No similar wording is found in later PRMs, but there are examples
2252 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2253 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2254 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2255 */
2256 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2257 insn->header.compression_control = BRW_COMPRESSION_NONE;
2258
2259 if (brw->gen < 6)
2260 insn->header.destreg__conditionalmod = msg_reg_nr;
2261
2262 brw_set_dest(p, insn, dest);
2263 brw_set_src0(p, insn, src0);
2264 brw_set_sampler_message(p, insn,
2265 binding_table_index,
2266 sampler,
2267 msg_type,
2268 response_length,
2269 msg_length,
2270 header_present,
2271 simd_mode,
2272 return_format);
2273 }
2274
2275 /* All these variables are pretty confusing - we might be better off
2276 * using bitmasks and macros for this, in the old style. Or perhaps
2277 * just having the caller instantiate the fields in dword3 itself.
2278 */
2279 void brw_urb_WRITE(struct brw_compile *p,
2280 struct brw_reg dest,
2281 unsigned msg_reg_nr,
2282 struct brw_reg src0,
2283 enum brw_urb_write_flags flags,
2284 unsigned msg_length,
2285 unsigned response_length,
2286 unsigned offset,
2287 unsigned swizzle)
2288 {
2289 struct brw_context *brw = p->brw;
2290 struct brw_instruction *insn;
2291
2292 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2293
2294 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2295 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2296 brw_push_insn_state(p);
2297 brw_set_access_mode(p, BRW_ALIGN_1);
2298 brw_set_mask_control(p, BRW_MASK_DISABLE);
2299 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2300 BRW_REGISTER_TYPE_UD),
2301 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2302 brw_imm_ud(0xff00));
2303 brw_pop_insn_state(p);
2304 }
2305
2306 insn = next_insn(p, BRW_OPCODE_SEND);
2307
2308 assert(msg_length < BRW_MAX_MRF);
2309
2310 brw_set_dest(p, insn, dest);
2311 brw_set_src0(p, insn, src0);
2312 brw_set_src1(p, insn, brw_imm_d(0));
2313
2314 if (brw->gen < 6)
2315 insn->header.destreg__conditionalmod = msg_reg_nr;
2316
2317 brw_set_urb_message(p,
2318 insn,
2319 flags,
2320 msg_length,
2321 response_length,
2322 offset,
2323 swizzle);
2324 }
2325
2326 static int
2327 next_ip(struct brw_compile *p, int ip)
2328 {
2329 struct brw_instruction *insn = (void *)p->store + ip;
2330
2331 if (insn->header.cmpt_control)
2332 return ip + 8;
2333 else
2334 return ip + 16;
2335 }
2336
2337 static int
2338 brw_find_next_block_end(struct brw_compile *p, int start)
2339 {
2340 int ip;
2341 void *store = p->store;
2342
2343 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2344 struct brw_instruction *insn = store + ip;
2345
2346 switch (insn->header.opcode) {
2347 case BRW_OPCODE_ENDIF:
2348 case BRW_OPCODE_ELSE:
2349 case BRW_OPCODE_WHILE:
2350 case BRW_OPCODE_HALT:
2351 return ip;
2352 }
2353 }
2354
2355 return 0;
2356 }
2357
2358 /* There is no DO instruction on gen6, so to find the end of the loop
2359 * we have to see if the loop is jumping back before our start
2360 * instruction.
2361 */
2362 static int
2363 brw_find_loop_end(struct brw_compile *p, int start)
2364 {
2365 struct brw_context *brw = p->brw;
2366 int ip;
2367 int scale = 8;
2368 void *store = p->store;
2369
2370 /* Always start after the instruction (such as a WHILE) we're trying to fix
2371 * up.
2372 */
2373 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2374 struct brw_instruction *insn = store + ip;
2375
2376 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2377 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2378 : insn->bits3.break_cont.jip;
2379 if (ip + jip * scale <= start)
2380 return ip;
2381 }
2382 }
2383 assert(!"not reached");
2384 return start;
2385 }
2386
2387 /* After program generation, go back and update the UIP and JIP of
2388 * BREAK, CONT, and HALT instructions to their correct locations.
2389 */
2390 void
2391 brw_set_uip_jip(struct brw_compile *p)
2392 {
2393 struct brw_context *brw = p->brw;
2394 int ip;
2395 int scale = 8;
2396 void *store = p->store;
2397
2398 if (brw->gen < 6)
2399 return;
2400
2401 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2402 struct brw_instruction *insn = store + ip;
2403
2404 if (insn->header.cmpt_control) {
2405 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2406 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2407 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2408 insn->header.opcode != BRW_OPCODE_HALT);
2409 continue;
2410 }
2411
2412 int block_end_ip = brw_find_next_block_end(p, ip);
2413 switch (insn->header.opcode) {
2414 case BRW_OPCODE_BREAK:
2415 assert(block_end_ip != 0);
2416 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2417 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2418 insn->bits3.break_cont.uip =
2419 (brw_find_loop_end(p, ip) - ip +
2420 (brw->gen == 6 ? 16 : 0)) / scale;
2421 break;
2422 case BRW_OPCODE_CONTINUE:
2423 assert(block_end_ip != 0);
2424 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2425 insn->bits3.break_cont.uip =
2426 (brw_find_loop_end(p, ip) - ip) / scale;
2427
2428 assert(insn->bits3.break_cont.uip != 0);
2429 assert(insn->bits3.break_cont.jip != 0);
2430 break;
2431
2432 case BRW_OPCODE_ENDIF:
2433 if (block_end_ip == 0)
2434 insn->bits3.break_cont.jip = 2;
2435 else
2436 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2437 break;
2438
2439 case BRW_OPCODE_HALT:
2440 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2441 *
2442 * "In case of the halt instruction not inside any conditional
2443 * code block, the value of <JIP> and <UIP> should be the
2444 * same. In case of the halt instruction inside conditional code
2445 * block, the <UIP> should be the end of the program, and the
2446 * <JIP> should be end of the most inner conditional code block."
2447 *
2448 * The uip will have already been set by whoever set up the
2449 * instruction.
2450 */
2451 if (block_end_ip == 0) {
2452 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2453 } else {
2454 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2455 }
2456 assert(insn->bits3.break_cont.uip != 0);
2457 assert(insn->bits3.break_cont.jip != 0);
2458 break;
2459 }
2460 }
2461 }
2462
2463 void brw_ff_sync(struct brw_compile *p,
2464 struct brw_reg dest,
2465 unsigned msg_reg_nr,
2466 struct brw_reg src0,
2467 bool allocate,
2468 unsigned response_length,
2469 bool eot)
2470 {
2471 struct brw_context *brw = p->brw;
2472 struct brw_instruction *insn;
2473
2474 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2475
2476 insn = next_insn(p, BRW_OPCODE_SEND);
2477 brw_set_dest(p, insn, dest);
2478 brw_set_src0(p, insn, src0);
2479 brw_set_src1(p, insn, brw_imm_d(0));
2480
2481 if (brw->gen < 6)
2482 insn->header.destreg__conditionalmod = msg_reg_nr;
2483
2484 brw_set_ff_sync_message(p,
2485 insn,
2486 allocate,
2487 response_length,
2488 eot);
2489 }
2490
2491 /**
2492 * Emit the SEND instruction necessary to generate stream output data on Gen6
2493 * (for transform feedback).
2494 *
2495 * If send_commit_msg is true, this is the last piece of stream output data
2496 * from this thread, so send the data as a committed write. According to the
2497 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2498 *
2499 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2500 * writes are complete by sending the final write as a committed write."
2501 */
2502 void
2503 brw_svb_write(struct brw_compile *p,
2504 struct brw_reg dest,
2505 unsigned msg_reg_nr,
2506 struct brw_reg src0,
2507 unsigned binding_table_index,
2508 bool send_commit_msg)
2509 {
2510 struct brw_instruction *insn;
2511
2512 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2513
2514 insn = next_insn(p, BRW_OPCODE_SEND);
2515 brw_set_dest(p, insn, dest);
2516 brw_set_src0(p, insn, src0);
2517 brw_set_src1(p, insn, brw_imm_d(0));
2518 brw_set_dp_write_message(p, insn,
2519 binding_table_index,
2520 0, /* msg_control: ignored */
2521 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2522 1, /* msg_length */
2523 true, /* header_present */
2524 0, /* last_render_target: ignored */
2525 send_commit_msg, /* response_length */
2526 0, /* end_of_thread */
2527 send_commit_msg); /* send_commit_msg */
2528 }
2529
2530 static void
2531 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2532 struct brw_instruction *insn,
2533 unsigned atomic_op,
2534 unsigned bind_table_index,
2535 unsigned msg_length,
2536 unsigned response_length,
2537 bool header_present)
2538 {
2539 if (p->brw->is_haswell) {
2540 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2541 msg_length, response_length,
2542 header_present, false);
2543
2544
2545 if (insn->header.access_mode == BRW_ALIGN_1) {
2546 if (insn->header.execution_size != BRW_EXECUTE_16)
2547 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2548
2549 insn->bits3.gen7_dp.msg_type =
2550 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2551 } else {
2552 insn->bits3.gen7_dp.msg_type =
2553 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2554 }
2555
2556 } else {
2557 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2558 msg_length, response_length,
2559 header_present, false);
2560
2561 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2562
2563 if (insn->header.execution_size != BRW_EXECUTE_16)
2564 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2565 }
2566
2567 if (response_length)
2568 insn->bits3.ud |= 1 << 13; /* Return data expected */
2569
2570 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2571 insn->bits3.ud |= atomic_op << 8;
2572 }
2573
2574 void
2575 brw_untyped_atomic(struct brw_compile *p,
2576 struct brw_reg dest,
2577 struct brw_reg mrf,
2578 unsigned atomic_op,
2579 unsigned bind_table_index,
2580 unsigned msg_length,
2581 unsigned response_length) {
2582 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2583
2584 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2585 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2586 brw_set_src1(p, insn, brw_imm_d(0));
2587 brw_set_dp_untyped_atomic_message(
2588 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2589 insn->header.access_mode == BRW_ALIGN_1);
2590 }
2591
2592 static void
2593 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2594 struct brw_instruction *insn,
2595 unsigned bind_table_index,
2596 unsigned msg_length,
2597 unsigned response_length,
2598 bool header_present)
2599 {
2600 const unsigned dispatch_width =
2601 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2602 const unsigned num_channels = response_length / (dispatch_width / 8);
2603
2604 if (p->brw->is_haswell) {
2605 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2606 msg_length, response_length,
2607 header_present, false);
2608
2609 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2610 } else {
2611 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2612 msg_length, response_length,
2613 header_present, false);
2614
2615 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2616 }
2617
2618 if (insn->header.access_mode == BRW_ALIGN_1) {
2619 if (dispatch_width == 16)
2620 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2621 else
2622 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2623 }
2624
2625 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2626
2627 /* Set mask of 32-bit channels to drop. */
2628 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2629 }
2630
2631 void
2632 brw_untyped_surface_read(struct brw_compile *p,
2633 struct brw_reg dest,
2634 struct brw_reg mrf,
2635 unsigned bind_table_index,
2636 unsigned msg_length,
2637 unsigned response_length)
2638 {
2639 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2640
2641 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2642 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2643 brw_set_dp_untyped_surface_read_message(
2644 p, insn, bind_table_index, msg_length, response_length,
2645 insn->header.access_mode == BRW_ALIGN_1);
2646 }
2647
2648 /**
2649 * This instruction is generated as a single-channel align1 instruction by
2650 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2651 *
2652 * We can't use the typed atomic op in the FS because that has the execution
2653 * mask ANDed with the pixel mask, but we just want to write the one dword for
2654 * all the pixels.
2655 *
2656 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2657 * one u32. So we use the same untyped atomic write message as the pixel
2658 * shader.
2659 *
2660 * The untyped atomic operation requires a BUFFER surface type with RAW
2661 * format, and is only accessible through the legacy DATA_CACHE dataport
2662 * messages.
2663 */
2664 void brw_shader_time_add(struct brw_compile *p,
2665 struct brw_reg payload,
2666 uint32_t surf_index)
2667 {
2668 struct brw_context *brw = p->brw;
2669 assert(brw->gen >= 7);
2670
2671 brw_push_insn_state(p);
2672 brw_set_access_mode(p, BRW_ALIGN_1);
2673 brw_set_mask_control(p, BRW_MASK_DISABLE);
2674 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2675 brw_pop_insn_state(p);
2676
2677 /* We use brw_vec1_reg and unmasked because we want to increment the given
2678 * offset only once.
2679 */
2680 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2681 BRW_ARF_NULL, 0));
2682 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2683 payload.nr, 0));
2684 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2685 2 /* message length */,
2686 0 /* response length */,
2687 false /* header present */);
2688 }