cce87522f8b74f15a7f7526f39679519d7727808
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 assert(dest.dw1.bits.writemask != 0 ||
130 dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
131 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
132 * Although Dst.HorzStride is a don't care for Align16, HW needs
133 * this to be programmed as "01".
134 */
135 insn->bits1.da16.dest_horiz_stride = 1;
136 }
137 }
138 else {
139 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
140
141 /* These are different sizes in align1 vs align16:
142 */
143 if (insn->header.access_mode == BRW_ALIGN_1) {
144 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
145 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
146 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
147 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
148 }
149 else {
150 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
151 /* even ignored in da16, still need to set as '01' */
152 insn->bits1.ia16.dest_horiz_stride = 1;
153 }
154 }
155
156 /* NEW: Set the execution size based on dest.width and
157 * insn->compression_control:
158 */
159 guess_execution_size(p, insn, dest);
160 }
161
162 extern int reg_type_size[];
163
164 static void
165 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
166 {
167 int hstride_for_reg[] = {0, 1, 2, 4};
168 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
169 int width_for_reg[] = {1, 2, 4, 8, 16};
170 int execsize_for_reg[] = {1, 2, 4, 8, 16};
171 int width, hstride, vstride, execsize;
172
173 if (reg.file == BRW_IMMEDIATE_VALUE) {
174 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
175 * mean the destination has to be 128-bit aligned and the
176 * destination horiz stride has to be a word.
177 */
178 if (reg.type == BRW_REGISTER_TYPE_V) {
179 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
180 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
181 }
182
183 return;
184 }
185
186 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
187 reg.file == BRW_ARF_NULL)
188 return;
189
190 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
191 hstride = hstride_for_reg[reg.hstride];
192
193 if (reg.vstride == 0xf) {
194 vstride = -1;
195 } else {
196 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
197 vstride = vstride_for_reg[reg.vstride];
198 }
199
200 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
201 width = width_for_reg[reg.width];
202
203 assert(insn->header.execution_size >= 0 &&
204 insn->header.execution_size < Elements(execsize_for_reg));
205 execsize = execsize_for_reg[insn->header.execution_size];
206
207 /* Restrictions from 3.3.10: Register Region Restrictions. */
208 /* 3. */
209 assert(execsize >= width);
210
211 /* 4. */
212 if (execsize == width && hstride != 0) {
213 assert(vstride == -1 || vstride == width * hstride);
214 }
215
216 /* 5. */
217 if (execsize == width && hstride == 0) {
218 /* no restriction on vstride. */
219 }
220
221 /* 6. */
222 if (width == 1) {
223 assert(hstride == 0);
224 }
225
226 /* 7. */
227 if (execsize == 1 && width == 1) {
228 assert(hstride == 0);
229 assert(vstride == 0);
230 }
231
232 /* 8. */
233 if (vstride == 0 && hstride == 0) {
234 assert(width == 1);
235 }
236
237 /* 10. Check destination issues. */
238 }
239
240 void
241 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
242 struct brw_reg reg)
243 {
244 struct brw_context *brw = p->brw;
245
246 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
247 assert(reg.nr < 128);
248
249 gen7_convert_mrf_to_grf(p, &reg);
250
251 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
252 insn->header.opcode == BRW_OPCODE_SENDC)) {
253 /* Any source modifiers or regions will be ignored, since this just
254 * identifies the MRF/GRF to start reading the message contents from.
255 * Check for some likely failures.
256 */
257 assert(!reg.negate);
258 assert(!reg.abs);
259 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
260 }
261
262 validate_reg(insn, reg);
263
264 insn->bits1.da1.src0_reg_file = reg.file;
265 insn->bits1.da1.src0_reg_type = reg.type;
266 insn->bits2.da1.src0_abs = reg.abs;
267 insn->bits2.da1.src0_negate = reg.negate;
268 insn->bits2.da1.src0_address_mode = reg.address_mode;
269
270 if (reg.file == BRW_IMMEDIATE_VALUE) {
271 insn->bits3.ud = reg.dw1.ud;
272
273 /* Required to set some fields in src1 as well:
274 */
275 insn->bits1.da1.src1_reg_file = 0; /* arf */
276 insn->bits1.da1.src1_reg_type = reg.type;
277 }
278 else
279 {
280 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
281 if (insn->header.access_mode == BRW_ALIGN_1) {
282 insn->bits2.da1.src0_subreg_nr = reg.subnr;
283 insn->bits2.da1.src0_reg_nr = reg.nr;
284 }
285 else {
286 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
287 insn->bits2.da16.src0_reg_nr = reg.nr;
288 }
289 }
290 else {
291 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
292
293 if (insn->header.access_mode == BRW_ALIGN_1) {
294 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
295 }
296 else {
297 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
298 }
299 }
300
301 if (insn->header.access_mode == BRW_ALIGN_1) {
302 if (reg.width == BRW_WIDTH_1 &&
303 insn->header.execution_size == BRW_EXECUTE_1) {
304 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
305 insn->bits2.da1.src0_width = BRW_WIDTH_1;
306 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
307 }
308 else {
309 insn->bits2.da1.src0_horiz_stride = reg.hstride;
310 insn->bits2.da1.src0_width = reg.width;
311 insn->bits2.da1.src0_vert_stride = reg.vstride;
312 }
313 }
314 else {
315 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
316 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
317 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
318 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
319
320 /* This is an oddity of the fact we're using the same
321 * descriptions for registers in align_16 as align_1:
322 */
323 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
324 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
325 else
326 insn->bits2.da16.src0_vert_stride = reg.vstride;
327 }
328 }
329 }
330
331
332 void brw_set_src1(struct brw_compile *p,
333 struct brw_instruction *insn,
334 struct brw_reg reg)
335 {
336 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
337
338 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
339 assert(reg.nr < 128);
340
341 gen7_convert_mrf_to_grf(p, &reg);
342
343 validate_reg(insn, reg);
344
345 insn->bits1.da1.src1_reg_file = reg.file;
346 insn->bits1.da1.src1_reg_type = reg.type;
347 insn->bits3.da1.src1_abs = reg.abs;
348 insn->bits3.da1.src1_negate = reg.negate;
349
350 /* Only src1 can be immediate in two-argument instructions.
351 */
352 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
353
354 if (reg.file == BRW_IMMEDIATE_VALUE) {
355 insn->bits3.ud = reg.dw1.ud;
356 }
357 else {
358 /* This is a hardware restriction, which may or may not be lifted
359 * in the future:
360 */
361 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
362 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
363
364 if (insn->header.access_mode == BRW_ALIGN_1) {
365 insn->bits3.da1.src1_subreg_nr = reg.subnr;
366 insn->bits3.da1.src1_reg_nr = reg.nr;
367 }
368 else {
369 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
370 insn->bits3.da16.src1_reg_nr = reg.nr;
371 }
372
373 if (insn->header.access_mode == BRW_ALIGN_1) {
374 if (reg.width == BRW_WIDTH_1 &&
375 insn->header.execution_size == BRW_EXECUTE_1) {
376 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
377 insn->bits3.da1.src1_width = BRW_WIDTH_1;
378 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
379 }
380 else {
381 insn->bits3.da1.src1_horiz_stride = reg.hstride;
382 insn->bits3.da1.src1_width = reg.width;
383 insn->bits3.da1.src1_vert_stride = reg.vstride;
384 }
385 }
386 else {
387 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
388 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
389 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
390 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
391
392 /* This is an oddity of the fact we're using the same
393 * descriptions for registers in align_16 as align_1:
394 */
395 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
396 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
397 else
398 insn->bits3.da16.src1_vert_stride = reg.vstride;
399 }
400 }
401 }
402
403 /**
404 * Set the Message Descriptor and Extended Message Descriptor fields
405 * for SEND messages.
406 *
407 * \note This zeroes out the Function Control bits, so it must be called
408 * \b before filling out any message-specific data. Callers can
409 * choose not to fill in irrelevant bits; they will be zero.
410 */
411 static void
412 brw_set_message_descriptor(struct brw_compile *p,
413 struct brw_instruction *inst,
414 enum brw_message_target sfid,
415 unsigned msg_length,
416 unsigned response_length,
417 bool header_present,
418 bool end_of_thread)
419 {
420 struct brw_context *brw = p->brw;
421
422 brw_set_src1(p, inst, brw_imm_d(0));
423
424 if (brw->gen >= 5) {
425 inst->bits3.generic_gen5.header_present = header_present;
426 inst->bits3.generic_gen5.response_length = response_length;
427 inst->bits3.generic_gen5.msg_length = msg_length;
428 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
429
430 if (brw->gen >= 6) {
431 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
432 inst->header.destreg__conditionalmod = sfid;
433 } else {
434 /* Set Extended Message Descriptor (ex_desc) */
435 inst->bits2.send_gen5.sfid = sfid;
436 inst->bits2.send_gen5.end_of_thread = end_of_thread;
437 }
438 } else {
439 inst->bits3.generic.response_length = response_length;
440 inst->bits3.generic.msg_length = msg_length;
441 inst->bits3.generic.msg_target = sfid;
442 inst->bits3.generic.end_of_thread = end_of_thread;
443 }
444 }
445
446 static void brw_set_math_message( struct brw_compile *p,
447 struct brw_instruction *insn,
448 GLuint function,
449 GLuint integer_type,
450 bool low_precision,
451 GLuint dataType )
452 {
453 struct brw_context *brw = p->brw;
454 unsigned msg_length;
455 unsigned response_length;
456
457 /* Infer message length from the function */
458 switch (function) {
459 case BRW_MATH_FUNCTION_POW:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
461 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
463 msg_length = 2;
464 break;
465 default:
466 msg_length = 1;
467 break;
468 }
469
470 /* Infer response length from the function */
471 switch (function) {
472 case BRW_MATH_FUNCTION_SINCOS:
473 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
474 response_length = 2;
475 break;
476 default:
477 response_length = 1;
478 break;
479 }
480
481
482 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
483 msg_length, response_length, false, false);
484 if (brw->gen == 5) {
485 insn->bits3.math_gen5.function = function;
486 insn->bits3.math_gen5.int_type = integer_type;
487 insn->bits3.math_gen5.precision = low_precision;
488 insn->bits3.math_gen5.saturate = insn->header.saturate;
489 insn->bits3.math_gen5.data_type = dataType;
490 insn->bits3.math_gen5.snapshot = 0;
491 } else {
492 insn->bits3.math.function = function;
493 insn->bits3.math.int_type = integer_type;
494 insn->bits3.math.precision = low_precision;
495 insn->bits3.math.saturate = insn->header.saturate;
496 insn->bits3.math.data_type = dataType;
497 }
498 insn->header.saturate = 0;
499 }
500
501
502 static void brw_set_ff_sync_message(struct brw_compile *p,
503 struct brw_instruction *insn,
504 bool allocate,
505 GLuint response_length,
506 bool end_of_thread)
507 {
508 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
509 1, response_length, true, end_of_thread);
510 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
511 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
512 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
513 insn->bits3.urb_gen5.allocate = allocate;
514 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
515 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
516 }
517
518 static void brw_set_urb_message( struct brw_compile *p,
519 struct brw_instruction *insn,
520 enum brw_urb_write_flags flags,
521 GLuint msg_length,
522 GLuint response_length,
523 GLuint offset,
524 GLuint swizzle_control )
525 {
526 struct brw_context *brw = p->brw;
527
528 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
529 msg_length, response_length, true,
530 flags & BRW_URB_WRITE_EOT);
531 if (brw->gen == 7) {
532 if (flags & BRW_URB_WRITE_OWORD) {
533 assert(msg_length == 2); /* header + one OWORD of data */
534 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
535 } else {
536 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
537 }
538 insn->bits3.urb_gen7.offset = offset;
539 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
540 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
541 insn->bits3.urb_gen7.per_slot_offset =
542 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
543 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
544 } else if (brw->gen >= 5) {
545 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
546 insn->bits3.urb_gen5.offset = offset;
547 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
548 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
549 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
550 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
551 } else {
552 insn->bits3.urb.opcode = 0; /* ? */
553 insn->bits3.urb.offset = offset;
554 insn->bits3.urb.swizzle_control = swizzle_control;
555 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
556 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
557 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
558 }
559 }
560
561 void
562 brw_set_dp_write_message(struct brw_compile *p,
563 struct brw_instruction *insn,
564 GLuint binding_table_index,
565 GLuint msg_control,
566 GLuint msg_type,
567 GLuint msg_length,
568 bool header_present,
569 GLuint last_render_target,
570 GLuint response_length,
571 GLuint end_of_thread,
572 GLuint send_commit_msg)
573 {
574 struct brw_context *brw = p->brw;
575 unsigned sfid;
576
577 if (brw->gen >= 7) {
578 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
579 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
580 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
581 else
582 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
583 } else if (brw->gen == 6) {
584 /* Use the render cache for all write messages. */
585 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
586 } else {
587 sfid = BRW_SFID_DATAPORT_WRITE;
588 }
589
590 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
591 header_present, end_of_thread);
592
593 if (brw->gen >= 7) {
594 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
595 insn->bits3.gen7_dp.msg_control = msg_control;
596 insn->bits3.gen7_dp.last_render_target = last_render_target;
597 insn->bits3.gen7_dp.msg_type = msg_type;
598 } else if (brw->gen == 6) {
599 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
600 insn->bits3.gen6_dp.msg_control = msg_control;
601 insn->bits3.gen6_dp.last_render_target = last_render_target;
602 insn->bits3.gen6_dp.msg_type = msg_type;
603 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
604 } else if (brw->gen == 5) {
605 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
606 insn->bits3.dp_write_gen5.msg_control = msg_control;
607 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
608 insn->bits3.dp_write_gen5.msg_type = msg_type;
609 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
610 } else {
611 insn->bits3.dp_write.binding_table_index = binding_table_index;
612 insn->bits3.dp_write.msg_control = msg_control;
613 insn->bits3.dp_write.last_render_target = last_render_target;
614 insn->bits3.dp_write.msg_type = msg_type;
615 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
616 }
617 }
618
619 void
620 brw_set_dp_read_message(struct brw_compile *p,
621 struct brw_instruction *insn,
622 GLuint binding_table_index,
623 GLuint msg_control,
624 GLuint msg_type,
625 GLuint target_cache,
626 GLuint msg_length,
627 bool header_present,
628 GLuint response_length)
629 {
630 struct brw_context *brw = p->brw;
631 unsigned sfid;
632
633 if (brw->gen >= 7) {
634 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
635 } else if (brw->gen == 6) {
636 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
637 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
638 else
639 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
640 } else {
641 sfid = BRW_SFID_DATAPORT_READ;
642 }
643
644 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
645 header_present, false);
646
647 if (brw->gen >= 7) {
648 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
649 insn->bits3.gen7_dp.msg_control = msg_control;
650 insn->bits3.gen7_dp.last_render_target = 0;
651 insn->bits3.gen7_dp.msg_type = msg_type;
652 } else if (brw->gen == 6) {
653 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
654 insn->bits3.gen6_dp.msg_control = msg_control;
655 insn->bits3.gen6_dp.last_render_target = 0;
656 insn->bits3.gen6_dp.msg_type = msg_type;
657 insn->bits3.gen6_dp.send_commit_msg = 0;
658 } else if (brw->gen == 5) {
659 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
660 insn->bits3.dp_read_gen5.msg_control = msg_control;
661 insn->bits3.dp_read_gen5.msg_type = msg_type;
662 insn->bits3.dp_read_gen5.target_cache = target_cache;
663 } else if (brw->is_g4x) {
664 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
665 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
666 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
667 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
668 } else {
669 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
670 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
671 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
672 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
673 }
674 }
675
676 void
677 brw_set_sampler_message(struct brw_compile *p,
678 struct brw_instruction *insn,
679 GLuint binding_table_index,
680 GLuint sampler,
681 GLuint msg_type,
682 GLuint response_length,
683 GLuint msg_length,
684 GLuint header_present,
685 GLuint simd_mode,
686 GLuint return_format)
687 {
688 struct brw_context *brw = p->brw;
689
690 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
691 response_length, header_present, false);
692
693 if (brw->gen >= 7) {
694 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
695 insn->bits3.sampler_gen7.sampler = sampler;
696 insn->bits3.sampler_gen7.msg_type = msg_type;
697 insn->bits3.sampler_gen7.simd_mode = simd_mode;
698 } else if (brw->gen >= 5) {
699 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
700 insn->bits3.sampler_gen5.sampler = sampler;
701 insn->bits3.sampler_gen5.msg_type = msg_type;
702 insn->bits3.sampler_gen5.simd_mode = simd_mode;
703 } else if (brw->is_g4x) {
704 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
705 insn->bits3.sampler_g4x.sampler = sampler;
706 insn->bits3.sampler_g4x.msg_type = msg_type;
707 } else {
708 insn->bits3.sampler.binding_table_index = binding_table_index;
709 insn->bits3.sampler.sampler = sampler;
710 insn->bits3.sampler.msg_type = msg_type;
711 insn->bits3.sampler.return_format = return_format;
712 }
713 }
714
715
716 #define next_insn brw_next_insn
717 struct brw_instruction *
718 brw_next_insn(struct brw_compile *p, GLuint opcode)
719 {
720 struct brw_instruction *insn;
721
722 if (p->nr_insn + 1 > p->store_size) {
723 if (0)
724 printf("incresing the store size to %d\n", p->store_size << 1);
725 p->store_size <<= 1;
726 p->store = reralloc(p->mem_ctx, p->store,
727 struct brw_instruction, p->store_size);
728 if (!p->store)
729 assert(!"realloc eu store memeory failed");
730 }
731
732 p->next_insn_offset += 16;
733 insn = &p->store[p->nr_insn++];
734 memcpy(insn, p->current, sizeof(*insn));
735
736 /* Reset this one-shot flag:
737 */
738
739 if (p->current->header.destreg__conditionalmod) {
740 p->current->header.destreg__conditionalmod = 0;
741 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
742 }
743
744 insn->header.opcode = opcode;
745 return insn;
746 }
747
748 static struct brw_instruction *brw_alu1( struct brw_compile *p,
749 GLuint opcode,
750 struct brw_reg dest,
751 struct brw_reg src )
752 {
753 struct brw_instruction *insn = next_insn(p, opcode);
754 brw_set_dest(p, insn, dest);
755 brw_set_src0(p, insn, src);
756 return insn;
757 }
758
759 static struct brw_instruction *brw_alu2(struct brw_compile *p,
760 GLuint opcode,
761 struct brw_reg dest,
762 struct brw_reg src0,
763 struct brw_reg src1 )
764 {
765 struct brw_instruction *insn = next_insn(p, opcode);
766 brw_set_dest(p, insn, dest);
767 brw_set_src0(p, insn, src0);
768 brw_set_src1(p, insn, src1);
769 return insn;
770 }
771
772 static int
773 get_3src_subreg_nr(struct brw_reg reg)
774 {
775 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
776 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
777 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
778 } else {
779 return reg.subnr / 4;
780 }
781 }
782
783 static struct brw_instruction *brw_alu3(struct brw_compile *p,
784 GLuint opcode,
785 struct brw_reg dest,
786 struct brw_reg src0,
787 struct brw_reg src1,
788 struct brw_reg src2)
789 {
790 struct brw_context *brw = p->brw;
791 struct brw_instruction *insn = next_insn(p, opcode);
792
793 gen7_convert_mrf_to_grf(p, &dest);
794
795 assert(insn->header.access_mode == BRW_ALIGN_16);
796
797 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
798 dest.file == BRW_MESSAGE_REGISTER_FILE);
799 assert(dest.nr < 128);
800 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
801 assert(dest.type == BRW_REGISTER_TYPE_F ||
802 dest.type == BRW_REGISTER_TYPE_D ||
803 dest.type == BRW_REGISTER_TYPE_UD);
804 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
805 insn->bits1.da3src.dest_reg_nr = dest.nr;
806 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
807 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
808 guess_execution_size(p, insn, dest);
809
810 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
811 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
812 assert(src0.nr < 128);
813 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
814 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
815 insn->bits2.da3src.src0_reg_nr = src0.nr;
816 insn->bits1.da3src.src0_abs = src0.abs;
817 insn->bits1.da3src.src0_negate = src0.negate;
818 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
819
820 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
821 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
822 assert(src1.nr < 128);
823 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
824 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
825 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
826 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
827 insn->bits3.da3src.src1_reg_nr = src1.nr;
828 insn->bits1.da3src.src1_abs = src1.abs;
829 insn->bits1.da3src.src1_negate = src1.negate;
830
831 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
832 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
833 assert(src2.nr < 128);
834 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
835 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
836 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
837 insn->bits3.da3src.src2_reg_nr = src2.nr;
838 insn->bits1.da3src.src2_abs = src2.abs;
839 insn->bits1.da3src.src2_negate = src2.negate;
840
841 if (brw->gen >= 7) {
842 /* Set both the source and destination types based on dest.type,
843 * ignoring the source register types. The MAD and LRP emitters ensure
844 * that all four types are float. The BFE and BFI2 emitters, however,
845 * may send us mixed D and UD types and want us to ignore that and use
846 * the destination type.
847 */
848 switch (dest.type) {
849 case BRW_REGISTER_TYPE_F:
850 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
851 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
852 break;
853 case BRW_REGISTER_TYPE_D:
854 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
855 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
856 break;
857 case BRW_REGISTER_TYPE_UD:
858 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
859 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
860 break;
861 }
862 }
863
864 return insn;
865 }
866
867
868 /***********************************************************************
869 * Convenience routines.
870 */
871 #define ALU1(OP) \
872 struct brw_instruction *brw_##OP(struct brw_compile *p, \
873 struct brw_reg dest, \
874 struct brw_reg src0) \
875 { \
876 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
877 }
878
879 #define ALU2(OP) \
880 struct brw_instruction *brw_##OP(struct brw_compile *p, \
881 struct brw_reg dest, \
882 struct brw_reg src0, \
883 struct brw_reg src1) \
884 { \
885 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
886 }
887
888 #define ALU3(OP) \
889 struct brw_instruction *brw_##OP(struct brw_compile *p, \
890 struct brw_reg dest, \
891 struct brw_reg src0, \
892 struct brw_reg src1, \
893 struct brw_reg src2) \
894 { \
895 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
896 }
897
898 #define ALU3F(OP) \
899 struct brw_instruction *brw_##OP(struct brw_compile *p, \
900 struct brw_reg dest, \
901 struct brw_reg src0, \
902 struct brw_reg src1, \
903 struct brw_reg src2) \
904 { \
905 assert(dest.type == BRW_REGISTER_TYPE_F); \
906 assert(src0.type == BRW_REGISTER_TYPE_F); \
907 assert(src1.type == BRW_REGISTER_TYPE_F); \
908 assert(src2.type == BRW_REGISTER_TYPE_F); \
909 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
910 }
911
912 /* Rounding operations (other than RNDD) require two instructions - the first
913 * stores a rounded value (possibly the wrong way) in the dest register, but
914 * also sets a per-channel "increment bit" in the flag register. A predicated
915 * add of 1.0 fixes dest to contain the desired result.
916 *
917 * Sandybridge and later appear to round correctly without an ADD.
918 */
919 #define ROUND(OP) \
920 void brw_##OP(struct brw_compile *p, \
921 struct brw_reg dest, \
922 struct brw_reg src) \
923 { \
924 struct brw_instruction *rnd, *add; \
925 rnd = next_insn(p, BRW_OPCODE_##OP); \
926 brw_set_dest(p, rnd, dest); \
927 brw_set_src0(p, rnd, src); \
928 \
929 if (p->brw->gen < 6) { \
930 /* turn on round-increments */ \
931 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
932 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
933 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
934 } \
935 }
936
937
938 ALU1(MOV)
939 ALU2(SEL)
940 ALU1(NOT)
941 ALU2(AND)
942 ALU2(OR)
943 ALU2(XOR)
944 ALU2(SHR)
945 ALU2(SHL)
946 ALU2(ASR)
947 ALU1(F32TO16)
948 ALU1(F16TO32)
949 ALU1(FRC)
950 ALU1(RNDD)
951 ALU2(MAC)
952 ALU2(MACH)
953 ALU1(LZD)
954 ALU2(DP4)
955 ALU2(DPH)
956 ALU2(DP3)
957 ALU2(DP2)
958 ALU2(LINE)
959 ALU2(PLN)
960 ALU3F(MAD)
961 ALU3F(LRP)
962 ALU1(BFREV)
963 ALU3(BFE)
964 ALU2(BFI1)
965 ALU3(BFI2)
966 ALU1(FBH)
967 ALU1(FBL)
968 ALU1(CBIT)
969
970 ROUND(RNDZ)
971 ROUND(RNDE)
972
973
974 struct brw_instruction *brw_ADD(struct brw_compile *p,
975 struct brw_reg dest,
976 struct brw_reg src0,
977 struct brw_reg src1)
978 {
979 /* 6.2.2: add */
980 if (src0.type == BRW_REGISTER_TYPE_F ||
981 (src0.file == BRW_IMMEDIATE_VALUE &&
982 src0.type == BRW_REGISTER_TYPE_VF)) {
983 assert(src1.type != BRW_REGISTER_TYPE_UD);
984 assert(src1.type != BRW_REGISTER_TYPE_D);
985 }
986
987 if (src1.type == BRW_REGISTER_TYPE_F ||
988 (src1.file == BRW_IMMEDIATE_VALUE &&
989 src1.type == BRW_REGISTER_TYPE_VF)) {
990 assert(src0.type != BRW_REGISTER_TYPE_UD);
991 assert(src0.type != BRW_REGISTER_TYPE_D);
992 }
993
994 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
995 }
996
997 struct brw_instruction *brw_AVG(struct brw_compile *p,
998 struct brw_reg dest,
999 struct brw_reg src0,
1000 struct brw_reg src1)
1001 {
1002 assert(dest.type == src0.type);
1003 assert(src0.type == src1.type);
1004 switch (src0.type) {
1005 case BRW_REGISTER_TYPE_B:
1006 case BRW_REGISTER_TYPE_UB:
1007 case BRW_REGISTER_TYPE_W:
1008 case BRW_REGISTER_TYPE_UW:
1009 case BRW_REGISTER_TYPE_D:
1010 case BRW_REGISTER_TYPE_UD:
1011 break;
1012 default:
1013 assert(!"Bad type for brw_AVG");
1014 }
1015
1016 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1017 }
1018
1019 struct brw_instruction *brw_MUL(struct brw_compile *p,
1020 struct brw_reg dest,
1021 struct brw_reg src0,
1022 struct brw_reg src1)
1023 {
1024 /* 6.32.38: mul */
1025 if (src0.type == BRW_REGISTER_TYPE_D ||
1026 src0.type == BRW_REGISTER_TYPE_UD ||
1027 src1.type == BRW_REGISTER_TYPE_D ||
1028 src1.type == BRW_REGISTER_TYPE_UD) {
1029 assert(dest.type != BRW_REGISTER_TYPE_F);
1030 }
1031
1032 if (src0.type == BRW_REGISTER_TYPE_F ||
1033 (src0.file == BRW_IMMEDIATE_VALUE &&
1034 src0.type == BRW_REGISTER_TYPE_VF)) {
1035 assert(src1.type != BRW_REGISTER_TYPE_UD);
1036 assert(src1.type != BRW_REGISTER_TYPE_D);
1037 }
1038
1039 if (src1.type == BRW_REGISTER_TYPE_F ||
1040 (src1.file == BRW_IMMEDIATE_VALUE &&
1041 src1.type == BRW_REGISTER_TYPE_VF)) {
1042 assert(src0.type != BRW_REGISTER_TYPE_UD);
1043 assert(src0.type != BRW_REGISTER_TYPE_D);
1044 }
1045
1046 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1047 src0.nr != BRW_ARF_ACCUMULATOR);
1048 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1049 src1.nr != BRW_ARF_ACCUMULATOR);
1050
1051 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1052 }
1053
1054
1055 void brw_NOP(struct brw_compile *p)
1056 {
1057 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1058 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1059 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1060 brw_set_src1(p, insn, brw_imm_ud(0x0));
1061 }
1062
1063
1064
1065
1066
1067 /***********************************************************************
1068 * Comparisons, if/else/endif
1069 */
1070
1071 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1072 struct brw_reg dest,
1073 struct brw_reg src0,
1074 struct brw_reg src1)
1075 {
1076 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1077
1078 insn->header.execution_size = 1;
1079 insn->header.compression_control = BRW_COMPRESSION_NONE;
1080 insn->header.mask_control = BRW_MASK_DISABLE;
1081
1082 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1083
1084 return insn;
1085 }
1086
1087 static void
1088 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1089 {
1090 p->if_stack[p->if_stack_depth] = inst - p->store;
1091
1092 p->if_stack_depth++;
1093 if (p->if_stack_array_size <= p->if_stack_depth) {
1094 p->if_stack_array_size *= 2;
1095 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1096 p->if_stack_array_size);
1097 }
1098 }
1099
1100 static struct brw_instruction *
1101 pop_if_stack(struct brw_compile *p)
1102 {
1103 p->if_stack_depth--;
1104 return &p->store[p->if_stack[p->if_stack_depth]];
1105 }
1106
1107 static void
1108 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1109 {
1110 if (p->loop_stack_array_size < p->loop_stack_depth) {
1111 p->loop_stack_array_size *= 2;
1112 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1113 p->loop_stack_array_size);
1114 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1115 p->loop_stack_array_size);
1116 }
1117
1118 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1119 p->loop_stack_depth++;
1120 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1121 }
1122
1123 static struct brw_instruction *
1124 get_inner_do_insn(struct brw_compile *p)
1125 {
1126 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1127 }
1128
1129 /* EU takes the value from the flag register and pushes it onto some
1130 * sort of a stack (presumably merging with any flag value already on
1131 * the stack). Within an if block, the flags at the top of the stack
1132 * control execution on each channel of the unit, eg. on each of the
1133 * 16 pixel values in our wm programs.
1134 *
1135 * When the matching 'else' instruction is reached (presumably by
1136 * countdown of the instruction count patched in by our ELSE/ENDIF
1137 * functions), the relevent flags are inverted.
1138 *
1139 * When the matching 'endif' instruction is reached, the flags are
1140 * popped off. If the stack is now empty, normal execution resumes.
1141 */
1142 struct brw_instruction *
1143 brw_IF(struct brw_compile *p, GLuint execute_size)
1144 {
1145 struct brw_context *brw = p->brw;
1146 struct brw_instruction *insn;
1147
1148 insn = next_insn(p, BRW_OPCODE_IF);
1149
1150 /* Override the defaults for this instruction:
1151 */
1152 if (brw->gen < 6) {
1153 brw_set_dest(p, insn, brw_ip_reg());
1154 brw_set_src0(p, insn, brw_ip_reg());
1155 brw_set_src1(p, insn, brw_imm_d(0x0));
1156 } else if (brw->gen == 6) {
1157 brw_set_dest(p, insn, brw_imm_w(0));
1158 insn->bits1.branch_gen6.jump_count = 0;
1159 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1160 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1161 } else {
1162 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1163 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1164 brw_set_src1(p, insn, brw_imm_ud(0));
1165 insn->bits3.break_cont.jip = 0;
1166 insn->bits3.break_cont.uip = 0;
1167 }
1168
1169 insn->header.execution_size = execute_size;
1170 insn->header.compression_control = BRW_COMPRESSION_NONE;
1171 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1172 insn->header.mask_control = BRW_MASK_ENABLE;
1173 if (!p->single_program_flow)
1174 insn->header.thread_control = BRW_THREAD_SWITCH;
1175
1176 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1177
1178 push_if_stack(p, insn);
1179 p->if_depth_in_loop[p->loop_stack_depth]++;
1180 return insn;
1181 }
1182
1183 /* This function is only used for gen6-style IF instructions with an
1184 * embedded comparison (conditional modifier). It is not used on gen7.
1185 */
1186 struct brw_instruction *
1187 gen6_IF(struct brw_compile *p, uint32_t conditional,
1188 struct brw_reg src0, struct brw_reg src1)
1189 {
1190 struct brw_instruction *insn;
1191
1192 insn = next_insn(p, BRW_OPCODE_IF);
1193
1194 brw_set_dest(p, insn, brw_imm_w(0));
1195 if (p->compressed) {
1196 insn->header.execution_size = BRW_EXECUTE_16;
1197 } else {
1198 insn->header.execution_size = BRW_EXECUTE_8;
1199 }
1200 insn->bits1.branch_gen6.jump_count = 0;
1201 brw_set_src0(p, insn, src0);
1202 brw_set_src1(p, insn, src1);
1203
1204 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1205 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1206 insn->header.destreg__conditionalmod = conditional;
1207
1208 if (!p->single_program_flow)
1209 insn->header.thread_control = BRW_THREAD_SWITCH;
1210
1211 push_if_stack(p, insn);
1212 return insn;
1213 }
1214
1215 /**
1216 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1217 */
1218 static void
1219 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1220 struct brw_instruction *if_inst,
1221 struct brw_instruction *else_inst)
1222 {
1223 /* The next instruction (where the ENDIF would be, if it existed) */
1224 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1225
1226 assert(p->single_program_flow);
1227 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1228 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1229 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1230
1231 /* Convert IF to an ADD instruction that moves the instruction pointer
1232 * to the first instruction of the ELSE block. If there is no ELSE
1233 * block, point to where ENDIF would be. Reverse the predicate.
1234 *
1235 * There's no need to execute an ENDIF since we don't need to do any
1236 * stack operations, and if we're currently executing, we just want to
1237 * continue normally.
1238 */
1239 if_inst->header.opcode = BRW_OPCODE_ADD;
1240 if_inst->header.predicate_inverse = 1;
1241
1242 if (else_inst != NULL) {
1243 /* Convert ELSE to an ADD instruction that points where the ENDIF
1244 * would be.
1245 */
1246 else_inst->header.opcode = BRW_OPCODE_ADD;
1247
1248 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1249 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1250 } else {
1251 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1252 }
1253 }
1254
1255 /**
1256 * Patch IF and ELSE instructions with appropriate jump targets.
1257 */
1258 static void
1259 patch_IF_ELSE(struct brw_compile *p,
1260 struct brw_instruction *if_inst,
1261 struct brw_instruction *else_inst,
1262 struct brw_instruction *endif_inst)
1263 {
1264 struct brw_context *brw = p->brw;
1265
1266 /* We shouldn't be patching IF and ELSE instructions in single program flow
1267 * mode when gen < 6, because in single program flow mode on those
1268 * platforms, we convert flow control instructions to conditional ADDs that
1269 * operate on IP (see brw_ENDIF).
1270 *
1271 * However, on Gen6, writing to IP doesn't work in single program flow mode
1272 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1273 * not be updated by non-flow control instructions."). And on later
1274 * platforms, there is no significant benefit to converting control flow
1275 * instructions to conditional ADDs. So we do patch IF and ELSE
1276 * instructions in single program flow mode on those platforms.
1277 */
1278 if (brw->gen < 6)
1279 assert(!p->single_program_flow);
1280
1281 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1282 assert(endif_inst != NULL);
1283 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1284
1285 unsigned br = 1;
1286 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1287 * requires 2 chunks.
1288 */
1289 if (brw->gen >= 5)
1290 br = 2;
1291
1292 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1293 endif_inst->header.execution_size = if_inst->header.execution_size;
1294
1295 if (else_inst == NULL) {
1296 /* Patch IF -> ENDIF */
1297 if (brw->gen < 6) {
1298 /* Turn it into an IFF, which means no mask stack operations for
1299 * all-false and jumping past the ENDIF.
1300 */
1301 if_inst->header.opcode = BRW_OPCODE_IFF;
1302 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1303 if_inst->bits3.if_else.pop_count = 0;
1304 if_inst->bits3.if_else.pad0 = 0;
1305 } else if (brw->gen == 6) {
1306 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1307 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1308 } else {
1309 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1310 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1311 }
1312 } else {
1313 else_inst->header.execution_size = if_inst->header.execution_size;
1314
1315 /* Patch IF -> ELSE */
1316 if (brw->gen < 6) {
1317 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1318 if_inst->bits3.if_else.pop_count = 0;
1319 if_inst->bits3.if_else.pad0 = 0;
1320 } else if (brw->gen == 6) {
1321 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1322 }
1323
1324 /* Patch ELSE -> ENDIF */
1325 if (brw->gen < 6) {
1326 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1327 * matching ENDIF.
1328 */
1329 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1330 else_inst->bits3.if_else.pop_count = 1;
1331 else_inst->bits3.if_else.pad0 = 0;
1332 } else if (brw->gen == 6) {
1333 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1334 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1335 } else {
1336 /* The IF instruction's JIP should point just past the ELSE */
1337 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1338 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1339 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1340 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1341 }
1342 }
1343 }
1344
1345 void
1346 brw_ELSE(struct brw_compile *p)
1347 {
1348 struct brw_context *brw = p->brw;
1349 struct brw_instruction *insn;
1350
1351 insn = next_insn(p, BRW_OPCODE_ELSE);
1352
1353 if (brw->gen < 6) {
1354 brw_set_dest(p, insn, brw_ip_reg());
1355 brw_set_src0(p, insn, brw_ip_reg());
1356 brw_set_src1(p, insn, brw_imm_d(0x0));
1357 } else if (brw->gen == 6) {
1358 brw_set_dest(p, insn, brw_imm_w(0));
1359 insn->bits1.branch_gen6.jump_count = 0;
1360 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1361 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1362 } else {
1363 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1364 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1365 brw_set_src1(p, insn, brw_imm_ud(0));
1366 insn->bits3.break_cont.jip = 0;
1367 insn->bits3.break_cont.uip = 0;
1368 }
1369
1370 insn->header.compression_control = BRW_COMPRESSION_NONE;
1371 insn->header.mask_control = BRW_MASK_ENABLE;
1372 if (!p->single_program_flow)
1373 insn->header.thread_control = BRW_THREAD_SWITCH;
1374
1375 push_if_stack(p, insn);
1376 }
1377
1378 void
1379 brw_ENDIF(struct brw_compile *p)
1380 {
1381 struct brw_context *brw = p->brw;
1382 struct brw_instruction *insn = NULL;
1383 struct brw_instruction *else_inst = NULL;
1384 struct brw_instruction *if_inst = NULL;
1385 struct brw_instruction *tmp;
1386 bool emit_endif = true;
1387
1388 /* In single program flow mode, we can express IF and ELSE instructions
1389 * equivalently as ADD instructions that operate on IP. On platforms prior
1390 * to Gen6, flow control instructions cause an implied thread switch, so
1391 * this is a significant savings.
1392 *
1393 * However, on Gen6, writing to IP doesn't work in single program flow mode
1394 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1395 * not be updated by non-flow control instructions."). And on later
1396 * platforms, there is no significant benefit to converting control flow
1397 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1398 * Gen5.
1399 */
1400 if (brw->gen < 6 && p->single_program_flow)
1401 emit_endif = false;
1402
1403 /*
1404 * A single next_insn() may change the base adress of instruction store
1405 * memory(p->store), so call it first before referencing the instruction
1406 * store pointer from an index
1407 */
1408 if (emit_endif)
1409 insn = next_insn(p, BRW_OPCODE_ENDIF);
1410
1411 /* Pop the IF and (optional) ELSE instructions from the stack */
1412 p->if_depth_in_loop[p->loop_stack_depth]--;
1413 tmp = pop_if_stack(p);
1414 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1415 else_inst = tmp;
1416 tmp = pop_if_stack(p);
1417 }
1418 if_inst = tmp;
1419
1420 if (!emit_endif) {
1421 /* ENDIF is useless; don't bother emitting it. */
1422 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1423 return;
1424 }
1425
1426 if (brw->gen < 6) {
1427 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1428 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1429 brw_set_src1(p, insn, brw_imm_d(0x0));
1430 } else if (brw->gen == 6) {
1431 brw_set_dest(p, insn, brw_imm_w(0));
1432 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1433 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1434 } else {
1435 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1436 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1437 brw_set_src1(p, insn, brw_imm_ud(0));
1438 }
1439
1440 insn->header.compression_control = BRW_COMPRESSION_NONE;
1441 insn->header.mask_control = BRW_MASK_ENABLE;
1442 insn->header.thread_control = BRW_THREAD_SWITCH;
1443
1444 /* Also pop item off the stack in the endif instruction: */
1445 if (brw->gen < 6) {
1446 insn->bits3.if_else.jump_count = 0;
1447 insn->bits3.if_else.pop_count = 1;
1448 insn->bits3.if_else.pad0 = 0;
1449 } else if (brw->gen == 6) {
1450 insn->bits1.branch_gen6.jump_count = 2;
1451 } else {
1452 insn->bits3.break_cont.jip = 2;
1453 }
1454 patch_IF_ELSE(p, if_inst, else_inst, insn);
1455 }
1456
1457 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1458 {
1459 struct brw_context *brw = p->brw;
1460 struct brw_instruction *insn;
1461
1462 insn = next_insn(p, BRW_OPCODE_BREAK);
1463 if (brw->gen >= 6) {
1464 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1465 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1466 brw_set_src1(p, insn, brw_imm_d(0x0));
1467 } else {
1468 brw_set_dest(p, insn, brw_ip_reg());
1469 brw_set_src0(p, insn, brw_ip_reg());
1470 brw_set_src1(p, insn, brw_imm_d(0x0));
1471 insn->bits3.if_else.pad0 = 0;
1472 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1473 }
1474 insn->header.compression_control = BRW_COMPRESSION_NONE;
1475 insn->header.execution_size = BRW_EXECUTE_8;
1476
1477 return insn;
1478 }
1479
1480 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1481 {
1482 struct brw_instruction *insn;
1483
1484 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1485 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1486 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1487 brw_set_dest(p, insn, brw_ip_reg());
1488 brw_set_src0(p, insn, brw_ip_reg());
1489 brw_set_src1(p, insn, brw_imm_d(0x0));
1490
1491 insn->header.compression_control = BRW_COMPRESSION_NONE;
1492 insn->header.execution_size = BRW_EXECUTE_8;
1493 return insn;
1494 }
1495
1496 struct brw_instruction *brw_CONT(struct brw_compile *p)
1497 {
1498 struct brw_instruction *insn;
1499 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1500 brw_set_dest(p, insn, brw_ip_reg());
1501 brw_set_src0(p, insn, brw_ip_reg());
1502 brw_set_src1(p, insn, brw_imm_d(0x0));
1503 insn->header.compression_control = BRW_COMPRESSION_NONE;
1504 insn->header.execution_size = BRW_EXECUTE_8;
1505 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1506 insn->bits3.if_else.pad0 = 0;
1507 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1508 return insn;
1509 }
1510
1511 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1512 {
1513 struct brw_instruction *insn;
1514
1515 insn = next_insn(p, BRW_OPCODE_HALT);
1516 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1517 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1518 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1519
1520 if (p->compressed) {
1521 insn->header.execution_size = BRW_EXECUTE_16;
1522 } else {
1523 insn->header.compression_control = BRW_COMPRESSION_NONE;
1524 insn->header.execution_size = BRW_EXECUTE_8;
1525 }
1526 return insn;
1527 }
1528
1529 /* DO/WHILE loop:
1530 *
1531 * The DO/WHILE is just an unterminated loop -- break or continue are
1532 * used for control within the loop. We have a few ways they can be
1533 * done.
1534 *
1535 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1536 * jip and no DO instruction.
1537 *
1538 * For non-uniform control flow pre-gen6, there's a DO instruction to
1539 * push the mask, and a WHILE to jump back, and BREAK to get out and
1540 * pop the mask.
1541 *
1542 * For gen6, there's no more mask stack, so no need for DO. WHILE
1543 * just points back to the first instruction of the loop.
1544 */
1545 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1546 {
1547 struct brw_context *brw = p->brw;
1548
1549 if (brw->gen >= 6 || p->single_program_flow) {
1550 push_loop_stack(p, &p->store[p->nr_insn]);
1551 return &p->store[p->nr_insn];
1552 } else {
1553 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1554
1555 push_loop_stack(p, insn);
1556
1557 /* Override the defaults for this instruction:
1558 */
1559 brw_set_dest(p, insn, brw_null_reg());
1560 brw_set_src0(p, insn, brw_null_reg());
1561 brw_set_src1(p, insn, brw_null_reg());
1562
1563 insn->header.compression_control = BRW_COMPRESSION_NONE;
1564 insn->header.execution_size = execute_size;
1565 insn->header.predicate_control = BRW_PREDICATE_NONE;
1566 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1567 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1568
1569 return insn;
1570 }
1571 }
1572
1573 /**
1574 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1575 * instruction here.
1576 *
1577 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1578 * nesting, since it can always just point to the end of the block/current loop.
1579 */
1580 static void
1581 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1582 {
1583 struct brw_context *brw = p->brw;
1584 struct brw_instruction *do_inst = get_inner_do_insn(p);
1585 struct brw_instruction *inst;
1586 int br = (brw->gen == 5) ? 2 : 1;
1587
1588 for (inst = while_inst - 1; inst != do_inst; inst--) {
1589 /* If the jump count is != 0, that means that this instruction has already
1590 * been patched because it's part of a loop inside of the one we're
1591 * patching.
1592 */
1593 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1594 inst->bits3.if_else.jump_count == 0) {
1595 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1596 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1597 inst->bits3.if_else.jump_count == 0) {
1598 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1599 }
1600 }
1601 }
1602
1603 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1604 {
1605 struct brw_context *brw = p->brw;
1606 struct brw_instruction *insn, *do_insn;
1607 GLuint br = 1;
1608
1609 if (brw->gen >= 5)
1610 br = 2;
1611
1612 if (brw->gen >= 7) {
1613 insn = next_insn(p, BRW_OPCODE_WHILE);
1614 do_insn = get_inner_do_insn(p);
1615
1616 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1617 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1618 brw_set_src1(p, insn, brw_imm_ud(0));
1619 insn->bits3.break_cont.jip = br * (do_insn - insn);
1620
1621 insn->header.execution_size = BRW_EXECUTE_8;
1622 } else if (brw->gen == 6) {
1623 insn = next_insn(p, BRW_OPCODE_WHILE);
1624 do_insn = get_inner_do_insn(p);
1625
1626 brw_set_dest(p, insn, brw_imm_w(0));
1627 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1628 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1629 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1630
1631 insn->header.execution_size = BRW_EXECUTE_8;
1632 } else {
1633 if (p->single_program_flow) {
1634 insn = next_insn(p, BRW_OPCODE_ADD);
1635 do_insn = get_inner_do_insn(p);
1636
1637 brw_set_dest(p, insn, brw_ip_reg());
1638 brw_set_src0(p, insn, brw_ip_reg());
1639 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1640 insn->header.execution_size = BRW_EXECUTE_1;
1641 } else {
1642 insn = next_insn(p, BRW_OPCODE_WHILE);
1643 do_insn = get_inner_do_insn(p);
1644
1645 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1646
1647 brw_set_dest(p, insn, brw_ip_reg());
1648 brw_set_src0(p, insn, brw_ip_reg());
1649 brw_set_src1(p, insn, brw_imm_d(0));
1650
1651 insn->header.execution_size = do_insn->header.execution_size;
1652 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1653 insn->bits3.if_else.pop_count = 0;
1654 insn->bits3.if_else.pad0 = 0;
1655
1656 brw_patch_break_cont(p, insn);
1657 }
1658 }
1659 insn->header.compression_control = BRW_COMPRESSION_NONE;
1660 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1661
1662 p->loop_stack_depth--;
1663
1664 return insn;
1665 }
1666
1667
1668 /* FORWARD JUMPS:
1669 */
1670 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1671 {
1672 struct brw_context *brw = p->brw;
1673 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1674 GLuint jmpi = 1;
1675
1676 if (brw->gen >= 5)
1677 jmpi = 2;
1678
1679 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1680 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1681
1682 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1683 }
1684
1685
1686
1687 /* To integrate with the above, it makes sense that the comparison
1688 * instruction should populate the flag register. It might be simpler
1689 * just to use the flag reg for most WM tasks?
1690 */
1691 void brw_CMP(struct brw_compile *p,
1692 struct brw_reg dest,
1693 GLuint conditional,
1694 struct brw_reg src0,
1695 struct brw_reg src1)
1696 {
1697 struct brw_context *brw = p->brw;
1698 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1699
1700 insn->header.destreg__conditionalmod = conditional;
1701 brw_set_dest(p, insn, dest);
1702 brw_set_src0(p, insn, src0);
1703 brw_set_src1(p, insn, src1);
1704
1705 /* guess_execution_size(insn, src0); */
1706
1707
1708 /* Make it so that future instructions will use the computed flag
1709 * value until brw_set_predicate_control_flag_value() is called
1710 * again.
1711 */
1712 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1713 dest.nr == 0) {
1714 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1715 p->flag_value = 0xff;
1716 }
1717
1718 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1719 * page says:
1720 * "Any CMP instruction with a null destination must use a {switch}."
1721 *
1722 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1723 * mentioned on their work-arounds pages.
1724 */
1725 if (brw->gen == 7) {
1726 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1727 dest.nr == BRW_ARF_NULL) {
1728 insn->header.thread_control = BRW_THREAD_SWITCH;
1729 }
1730 }
1731 }
1732
1733 /* Issue 'wait' instruction for n1, host could program MMIO
1734 to wake up thread. */
1735 void brw_WAIT (struct brw_compile *p)
1736 {
1737 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1738 struct brw_reg src = brw_notification_1_reg();
1739
1740 brw_set_dest(p, insn, src);
1741 brw_set_src0(p, insn, src);
1742 brw_set_src1(p, insn, brw_null_reg());
1743 insn->header.execution_size = 0; /* must */
1744 insn->header.predicate_control = 0;
1745 insn->header.compression_control = 0;
1746 }
1747
1748
1749 /***********************************************************************
1750 * Helpers for the various SEND message types:
1751 */
1752
1753 /** Extended math function, float[8].
1754 */
1755 void brw_math( struct brw_compile *p,
1756 struct brw_reg dest,
1757 GLuint function,
1758 GLuint msg_reg_nr,
1759 struct brw_reg src,
1760 GLuint data_type,
1761 GLuint precision )
1762 {
1763 struct brw_context *brw = p->brw;
1764
1765 if (brw->gen >= 6) {
1766 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1767
1768 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1769 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1770 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1771
1772 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1773 if (brw->gen == 6)
1774 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1775
1776 /* Source modifiers are ignored for extended math instructions on Gen6. */
1777 if (brw->gen == 6) {
1778 assert(!src.negate);
1779 assert(!src.abs);
1780 }
1781
1782 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1783 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1784 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1785 assert(src.type != BRW_REGISTER_TYPE_F);
1786 } else {
1787 assert(src.type == BRW_REGISTER_TYPE_F);
1788 }
1789
1790 /* Math is the same ISA format as other opcodes, except that CondModifier
1791 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1792 */
1793 insn->header.destreg__conditionalmod = function;
1794
1795 brw_set_dest(p, insn, dest);
1796 brw_set_src0(p, insn, src);
1797 brw_set_src1(p, insn, brw_null_reg());
1798 } else {
1799 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1800
1801 /* Example code doesn't set predicate_control for send
1802 * instructions.
1803 */
1804 insn->header.predicate_control = 0;
1805 insn->header.destreg__conditionalmod = msg_reg_nr;
1806
1807 brw_set_dest(p, insn, dest);
1808 brw_set_src0(p, insn, src);
1809 brw_set_math_message(p,
1810 insn,
1811 function,
1812 src.type == BRW_REGISTER_TYPE_D,
1813 precision,
1814 data_type);
1815 }
1816 }
1817
1818 /** Extended math function, float[8].
1819 */
1820 void brw_math2(struct brw_compile *p,
1821 struct brw_reg dest,
1822 GLuint function,
1823 struct brw_reg src0,
1824 struct brw_reg src1)
1825 {
1826 struct brw_context *brw = p->brw;
1827 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1828
1829 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1830 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1831 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1832 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1833
1834 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1835 if (brw->gen == 6) {
1836 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1837 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1838 }
1839
1840 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1841 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1842 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1843 assert(src0.type != BRW_REGISTER_TYPE_F);
1844 assert(src1.type != BRW_REGISTER_TYPE_F);
1845 } else {
1846 assert(src0.type == BRW_REGISTER_TYPE_F);
1847 assert(src1.type == BRW_REGISTER_TYPE_F);
1848 }
1849
1850 /* Source modifiers are ignored for extended math instructions on Gen6. */
1851 if (brw->gen == 6) {
1852 assert(!src0.negate);
1853 assert(!src0.abs);
1854 assert(!src1.negate);
1855 assert(!src1.abs);
1856 }
1857
1858 /* Math is the same ISA format as other opcodes, except that CondModifier
1859 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1860 */
1861 insn->header.destreg__conditionalmod = function;
1862
1863 brw_set_dest(p, insn, dest);
1864 brw_set_src0(p, insn, src0);
1865 brw_set_src1(p, insn, src1);
1866 }
1867
1868
1869 /**
1870 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1871 * using a constant offset per channel.
1872 *
1873 * The offset must be aligned to oword size (16 bytes). Used for
1874 * register spilling.
1875 */
1876 void brw_oword_block_write_scratch(struct brw_compile *p,
1877 struct brw_reg mrf,
1878 int num_regs,
1879 GLuint offset)
1880 {
1881 struct brw_context *brw = p->brw;
1882 uint32_t msg_control, msg_type;
1883 int mlen;
1884
1885 if (brw->gen >= 6)
1886 offset /= 16;
1887
1888 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1889
1890 if (num_regs == 1) {
1891 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1892 mlen = 2;
1893 } else {
1894 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1895 mlen = 3;
1896 }
1897
1898 /* Set up the message header. This is g0, with g0.2 filled with
1899 * the offset. We don't want to leave our offset around in g0 or
1900 * it'll screw up texture samples, so set it up inside the message
1901 * reg.
1902 */
1903 {
1904 brw_push_insn_state(p);
1905 brw_set_mask_control(p, BRW_MASK_DISABLE);
1906 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1907
1908 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1909
1910 /* set message header global offset field (reg 0, element 2) */
1911 brw_MOV(p,
1912 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1913 mrf.nr,
1914 2), BRW_REGISTER_TYPE_UD),
1915 brw_imm_ud(offset));
1916
1917 brw_pop_insn_state(p);
1918 }
1919
1920 {
1921 struct brw_reg dest;
1922 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1923 int send_commit_msg;
1924 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1925 BRW_REGISTER_TYPE_UW);
1926
1927 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1928 insn->header.compression_control = BRW_COMPRESSION_NONE;
1929 src_header = vec16(src_header);
1930 }
1931 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1932 insn->header.destreg__conditionalmod = mrf.nr;
1933
1934 /* Until gen6, writes followed by reads from the same location
1935 * are not guaranteed to be ordered unless write_commit is set.
1936 * If set, then a no-op write is issued to the destination
1937 * register to set a dependency, and a read from the destination
1938 * can be used to ensure the ordering.
1939 *
1940 * For gen6, only writes between different threads need ordering
1941 * protection. Our use of DP writes is all about register
1942 * spilling within a thread.
1943 */
1944 if (brw->gen >= 6) {
1945 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1946 send_commit_msg = 0;
1947 } else {
1948 dest = src_header;
1949 send_commit_msg = 1;
1950 }
1951
1952 brw_set_dest(p, insn, dest);
1953 if (brw->gen >= 6) {
1954 brw_set_src0(p, insn, mrf);
1955 } else {
1956 brw_set_src0(p, insn, brw_null_reg());
1957 }
1958
1959 if (brw->gen >= 6)
1960 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1961 else
1962 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1963
1964 brw_set_dp_write_message(p,
1965 insn,
1966 255, /* binding table index (255=stateless) */
1967 msg_control,
1968 msg_type,
1969 mlen,
1970 true, /* header_present */
1971 0, /* not a render target */
1972 send_commit_msg, /* response_length */
1973 0, /* eot */
1974 send_commit_msg);
1975 }
1976 }
1977
1978
1979 /**
1980 * Read a block of owords (half a GRF each) from the scratch buffer
1981 * using a constant index per channel.
1982 *
1983 * Offset must be aligned to oword size (16 bytes). Used for register
1984 * spilling.
1985 */
1986 void
1987 brw_oword_block_read_scratch(struct brw_compile *p,
1988 struct brw_reg dest,
1989 struct brw_reg mrf,
1990 int num_regs,
1991 GLuint offset)
1992 {
1993 struct brw_context *brw = p->brw;
1994 uint32_t msg_control;
1995 int rlen;
1996
1997 if (brw->gen >= 6)
1998 offset /= 16;
1999
2000 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2001 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2002
2003 if (num_regs == 1) {
2004 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2005 rlen = 1;
2006 } else {
2007 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2008 rlen = 2;
2009 }
2010
2011 {
2012 brw_push_insn_state(p);
2013 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2014 brw_set_mask_control(p, BRW_MASK_DISABLE);
2015
2016 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2017
2018 /* set message header global offset field (reg 0, element 2) */
2019 brw_MOV(p,
2020 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2021 mrf.nr,
2022 2), BRW_REGISTER_TYPE_UD),
2023 brw_imm_ud(offset));
2024
2025 brw_pop_insn_state(p);
2026 }
2027
2028 {
2029 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2030
2031 assert(insn->header.predicate_control == 0);
2032 insn->header.compression_control = BRW_COMPRESSION_NONE;
2033 insn->header.destreg__conditionalmod = mrf.nr;
2034
2035 brw_set_dest(p, insn, dest); /* UW? */
2036 if (brw->gen >= 6) {
2037 brw_set_src0(p, insn, mrf);
2038 } else {
2039 brw_set_src0(p, insn, brw_null_reg());
2040 }
2041
2042 brw_set_dp_read_message(p,
2043 insn,
2044 255, /* binding table index (255=stateless) */
2045 msg_control,
2046 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2047 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2048 1, /* msg_length */
2049 true, /* header_present */
2050 rlen);
2051 }
2052 }
2053
2054 /**
2055 * Read a float[4] vector from the data port Data Cache (const buffer).
2056 * Location (in buffer) should be a multiple of 16.
2057 * Used for fetching shader constants.
2058 */
2059 void brw_oword_block_read(struct brw_compile *p,
2060 struct brw_reg dest,
2061 struct brw_reg mrf,
2062 uint32_t offset,
2063 uint32_t bind_table_index)
2064 {
2065 struct brw_context *brw = p->brw;
2066
2067 /* On newer hardware, offset is in units of owords. */
2068 if (brw->gen >= 6)
2069 offset /= 16;
2070
2071 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2072
2073 brw_push_insn_state(p);
2074 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2075 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2076 brw_set_mask_control(p, BRW_MASK_DISABLE);
2077
2078 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2079
2080 /* set message header global offset field (reg 0, element 2) */
2081 brw_MOV(p,
2082 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2083 mrf.nr,
2084 2), BRW_REGISTER_TYPE_UD),
2085 brw_imm_ud(offset));
2086
2087 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2088 insn->header.destreg__conditionalmod = mrf.nr;
2089
2090 /* cast dest to a uword[8] vector */
2091 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2092
2093 brw_set_dest(p, insn, dest);
2094 if (brw->gen >= 6) {
2095 brw_set_src0(p, insn, mrf);
2096 } else {
2097 brw_set_src0(p, insn, brw_null_reg());
2098 }
2099
2100 brw_set_dp_read_message(p,
2101 insn,
2102 bind_table_index,
2103 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2104 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2105 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2106 1, /* msg_length */
2107 true, /* header_present */
2108 1); /* response_length (1 reg, 2 owords!) */
2109
2110 brw_pop_insn_state(p);
2111 }
2112
2113
2114 void brw_fb_WRITE(struct brw_compile *p,
2115 int dispatch_width,
2116 GLuint msg_reg_nr,
2117 struct brw_reg src0,
2118 GLuint msg_control,
2119 GLuint binding_table_index,
2120 GLuint msg_length,
2121 GLuint response_length,
2122 bool eot,
2123 bool header_present)
2124 {
2125 struct brw_context *brw = p->brw;
2126 struct brw_instruction *insn;
2127 GLuint msg_type;
2128 struct brw_reg dest;
2129
2130 if (dispatch_width == 16)
2131 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2132 else
2133 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2134
2135 if (brw->gen >= 6) {
2136 insn = next_insn(p, BRW_OPCODE_SENDC);
2137 } else {
2138 insn = next_insn(p, BRW_OPCODE_SEND);
2139 }
2140 /* The execution mask is ignored for render target writes. */
2141 insn->header.predicate_control = 0;
2142 insn->header.compression_control = BRW_COMPRESSION_NONE;
2143
2144 if (brw->gen >= 6) {
2145 /* headerless version, just submit color payload */
2146 src0 = brw_message_reg(msg_reg_nr);
2147
2148 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2149 } else {
2150 insn->header.destreg__conditionalmod = msg_reg_nr;
2151
2152 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2153 }
2154
2155 brw_set_dest(p, insn, dest);
2156 brw_set_src0(p, insn, src0);
2157 brw_set_dp_write_message(p,
2158 insn,
2159 binding_table_index,
2160 msg_control,
2161 msg_type,
2162 msg_length,
2163 header_present,
2164 eot, /* last render target write */
2165 response_length,
2166 eot,
2167 0 /* send_commit_msg */);
2168 }
2169
2170
2171 /**
2172 * Texture sample instruction.
2173 * Note: the msg_type plus msg_length values determine exactly what kind
2174 * of sampling operation is performed. See volume 4, page 161 of docs.
2175 */
2176 void brw_SAMPLE(struct brw_compile *p,
2177 struct brw_reg dest,
2178 GLuint msg_reg_nr,
2179 struct brw_reg src0,
2180 GLuint binding_table_index,
2181 GLuint sampler,
2182 GLuint msg_type,
2183 GLuint response_length,
2184 GLuint msg_length,
2185 GLuint header_present,
2186 GLuint simd_mode,
2187 GLuint return_format)
2188 {
2189 struct brw_context *brw = p->brw;
2190 struct brw_instruction *insn;
2191
2192 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2193
2194 insn = next_insn(p, BRW_OPCODE_SEND);
2195 insn->header.predicate_control = 0; /* XXX */
2196 insn->header.compression_control = BRW_COMPRESSION_NONE;
2197 if (brw->gen < 6)
2198 insn->header.destreg__conditionalmod = msg_reg_nr;
2199
2200 brw_set_dest(p, insn, dest);
2201 brw_set_src0(p, insn, src0);
2202 brw_set_sampler_message(p, insn,
2203 binding_table_index,
2204 sampler,
2205 msg_type,
2206 response_length,
2207 msg_length,
2208 header_present,
2209 simd_mode,
2210 return_format);
2211 }
2212
2213 /* All these variables are pretty confusing - we might be better off
2214 * using bitmasks and macros for this, in the old style. Or perhaps
2215 * just having the caller instantiate the fields in dword3 itself.
2216 */
2217 void brw_urb_WRITE(struct brw_compile *p,
2218 struct brw_reg dest,
2219 GLuint msg_reg_nr,
2220 struct brw_reg src0,
2221 enum brw_urb_write_flags flags,
2222 GLuint msg_length,
2223 GLuint response_length,
2224 GLuint offset,
2225 GLuint swizzle)
2226 {
2227 struct brw_context *brw = p->brw;
2228 struct brw_instruction *insn;
2229
2230 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2231
2232 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2233 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2234 brw_push_insn_state(p);
2235 brw_set_access_mode(p, BRW_ALIGN_1);
2236 brw_set_mask_control(p, BRW_MASK_DISABLE);
2237 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2238 BRW_REGISTER_TYPE_UD),
2239 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2240 brw_imm_ud(0xff00));
2241 brw_pop_insn_state(p);
2242 }
2243
2244 insn = next_insn(p, BRW_OPCODE_SEND);
2245
2246 assert(msg_length < BRW_MAX_MRF);
2247
2248 brw_set_dest(p, insn, dest);
2249 brw_set_src0(p, insn, src0);
2250 brw_set_src1(p, insn, brw_imm_d(0));
2251
2252 if (brw->gen < 6)
2253 insn->header.destreg__conditionalmod = msg_reg_nr;
2254
2255 brw_set_urb_message(p,
2256 insn,
2257 flags,
2258 msg_length,
2259 response_length,
2260 offset,
2261 swizzle);
2262 }
2263
2264 static int
2265 next_ip(struct brw_compile *p, int ip)
2266 {
2267 struct brw_instruction *insn = (void *)p->store + ip;
2268
2269 if (insn->header.cmpt_control)
2270 return ip + 8;
2271 else
2272 return ip + 16;
2273 }
2274
2275 static int
2276 brw_find_next_block_end(struct brw_compile *p, int start)
2277 {
2278 int ip;
2279 void *store = p->store;
2280
2281 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2282 struct brw_instruction *insn = store + ip;
2283
2284 switch (insn->header.opcode) {
2285 case BRW_OPCODE_ENDIF:
2286 case BRW_OPCODE_ELSE:
2287 case BRW_OPCODE_WHILE:
2288 case BRW_OPCODE_HALT:
2289 return ip;
2290 }
2291 }
2292
2293 return 0;
2294 }
2295
2296 /* There is no DO instruction on gen6, so to find the end of the loop
2297 * we have to see if the loop is jumping back before our start
2298 * instruction.
2299 */
2300 static int
2301 brw_find_loop_end(struct brw_compile *p, int start)
2302 {
2303 struct brw_context *brw = p->brw;
2304 int ip;
2305 int scale = 8;
2306 void *store = p->store;
2307
2308 /* Always start after the instruction (such as a WHILE) we're trying to fix
2309 * up.
2310 */
2311 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2312 struct brw_instruction *insn = store + ip;
2313
2314 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2315 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2316 : insn->bits3.break_cont.jip;
2317 if (ip + jip * scale <= start)
2318 return ip;
2319 }
2320 }
2321 assert(!"not reached");
2322 return start;
2323 }
2324
2325 /* After program generation, go back and update the UIP and JIP of
2326 * BREAK, CONT, and HALT instructions to their correct locations.
2327 */
2328 void
2329 brw_set_uip_jip(struct brw_compile *p)
2330 {
2331 struct brw_context *brw = p->brw;
2332 int ip;
2333 int scale = 8;
2334 void *store = p->store;
2335
2336 if (brw->gen < 6)
2337 return;
2338
2339 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2340 struct brw_instruction *insn = store + ip;
2341
2342 if (insn->header.cmpt_control) {
2343 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2344 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2345 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2346 insn->header.opcode != BRW_OPCODE_HALT);
2347 continue;
2348 }
2349
2350 int block_end_ip = brw_find_next_block_end(p, ip);
2351 switch (insn->header.opcode) {
2352 case BRW_OPCODE_BREAK:
2353 assert(block_end_ip != 0);
2354 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2355 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2356 insn->bits3.break_cont.uip =
2357 (brw_find_loop_end(p, ip) - ip +
2358 (brw->gen == 6 ? 16 : 0)) / scale;
2359 break;
2360 case BRW_OPCODE_CONTINUE:
2361 assert(block_end_ip != 0);
2362 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2363 insn->bits3.break_cont.uip =
2364 (brw_find_loop_end(p, ip) - ip) / scale;
2365
2366 assert(insn->bits3.break_cont.uip != 0);
2367 assert(insn->bits3.break_cont.jip != 0);
2368 break;
2369
2370 case BRW_OPCODE_ENDIF:
2371 if (block_end_ip == 0)
2372 insn->bits3.break_cont.jip = 2;
2373 else
2374 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2375 break;
2376
2377 case BRW_OPCODE_HALT:
2378 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2379 *
2380 * "In case of the halt instruction not inside any conditional
2381 * code block, the value of <JIP> and <UIP> should be the
2382 * same. In case of the halt instruction inside conditional code
2383 * block, the <UIP> should be the end of the program, and the
2384 * <JIP> should be end of the most inner conditional code block."
2385 *
2386 * The uip will have already been set by whoever set up the
2387 * instruction.
2388 */
2389 if (block_end_ip == 0) {
2390 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2391 } else {
2392 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2393 }
2394 assert(insn->bits3.break_cont.uip != 0);
2395 assert(insn->bits3.break_cont.jip != 0);
2396 break;
2397 }
2398 }
2399 }
2400
2401 void brw_ff_sync(struct brw_compile *p,
2402 struct brw_reg dest,
2403 GLuint msg_reg_nr,
2404 struct brw_reg src0,
2405 bool allocate,
2406 GLuint response_length,
2407 bool eot)
2408 {
2409 struct brw_context *brw = p->brw;
2410 struct brw_instruction *insn;
2411
2412 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2413
2414 insn = next_insn(p, BRW_OPCODE_SEND);
2415 brw_set_dest(p, insn, dest);
2416 brw_set_src0(p, insn, src0);
2417 brw_set_src1(p, insn, brw_imm_d(0));
2418
2419 if (brw->gen < 6)
2420 insn->header.destreg__conditionalmod = msg_reg_nr;
2421
2422 brw_set_ff_sync_message(p,
2423 insn,
2424 allocate,
2425 response_length,
2426 eot);
2427 }
2428
2429 /**
2430 * Emit the SEND instruction necessary to generate stream output data on Gen6
2431 * (for transform feedback).
2432 *
2433 * If send_commit_msg is true, this is the last piece of stream output data
2434 * from this thread, so send the data as a committed write. According to the
2435 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2436 *
2437 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2438 * writes are complete by sending the final write as a committed write."
2439 */
2440 void
2441 brw_svb_write(struct brw_compile *p,
2442 struct brw_reg dest,
2443 GLuint msg_reg_nr,
2444 struct brw_reg src0,
2445 GLuint binding_table_index,
2446 bool send_commit_msg)
2447 {
2448 struct brw_instruction *insn;
2449
2450 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2451
2452 insn = next_insn(p, BRW_OPCODE_SEND);
2453 brw_set_dest(p, insn, dest);
2454 brw_set_src0(p, insn, src0);
2455 brw_set_src1(p, insn, brw_imm_d(0));
2456 brw_set_dp_write_message(p, insn,
2457 binding_table_index,
2458 0, /* msg_control: ignored */
2459 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2460 1, /* msg_length */
2461 true, /* header_present */
2462 0, /* last_render_target: ignored */
2463 send_commit_msg, /* response_length */
2464 0, /* end_of_thread */
2465 send_commit_msg); /* send_commit_msg */
2466 }
2467
2468 /**
2469 * This instruction is generated as a single-channel align1 instruction by
2470 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2471 *
2472 * We can't use the typed atomic op in the FS because that has the execution
2473 * mask ANDed with the pixel mask, but we just want to write the one dword for
2474 * all the pixels.
2475 *
2476 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2477 * one u32. So we use the same untyped atomic write message as the pixel
2478 * shader.
2479 *
2480 * The untyped atomic operation requires a BUFFER surface type with RAW
2481 * format, and is only accessible through the legacy DATA_CACHE dataport
2482 * messages.
2483 */
2484 void brw_shader_time_add(struct brw_compile *p,
2485 struct brw_reg payload,
2486 uint32_t surf_index)
2487 {
2488 struct brw_context *brw = p->brw;
2489 assert(brw->gen >= 7);
2490
2491 brw_push_insn_state(p);
2492 brw_set_access_mode(p, BRW_ALIGN_1);
2493 brw_set_mask_control(p, BRW_MASK_DISABLE);
2494 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2495 brw_pop_insn_state(p);
2496
2497 /* We use brw_vec1_reg and unmasked because we want to increment the given
2498 * offset only once.
2499 */
2500 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2501 BRW_ARF_NULL, 0));
2502 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2503 payload.nr, 0));
2504
2505 uint32_t sfid, msg_type;
2506 if (brw->is_haswell) {
2507 sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2508 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2509 } else {
2510 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
2511 msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2512 }
2513
2514 bool header_present = false;
2515 bool eot = false;
2516 uint32_t mlen = 2; /* offset, value */
2517 uint32_t rlen = 0;
2518 brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
2519
2520 send->bits3.ud |= msg_type << 14;
2521 send->bits3.ud |= 0 << 13; /* no return data */
2522 send->bits3.ud |= 1 << 12; /* SIMD8 mode */
2523 send->bits3.ud |= BRW_AOP_ADD << 8;
2524 send->bits3.ud |= surf_index << 0;
2525 }