i965/gs: Add a case to brwNewProgram() for geometry shaders.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
132 */
133 insn->bits1.da16.dest_horiz_stride = 1;
134 }
135 }
136 else {
137 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
138
139 /* These are different sizes in align1 vs align16:
140 */
141 if (insn->header.access_mode == BRW_ALIGN_1) {
142 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
143 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
144 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
145 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
146 }
147 else {
148 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
149 /* even ignored in da16, still need to set as '01' */
150 insn->bits1.ia16.dest_horiz_stride = 1;
151 }
152 }
153
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
156 */
157 guess_execution_size(p, insn, dest);
158 }
159
160 extern int reg_type_size[];
161
162 static void
163 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
164 {
165 int hstride_for_reg[] = {0, 1, 2, 4};
166 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg[] = {1, 2, 4, 8, 16};
169 int width, hstride, vstride, execsize;
170
171 if (reg.file == BRW_IMMEDIATE_VALUE) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
175 */
176 if (reg.type == BRW_REGISTER_TYPE_V) {
177 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
178 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
179 }
180
181 return;
182 }
183
184 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
185 reg.file == BRW_ARF_NULL)
186 return;
187
188 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
189 hstride = hstride_for_reg[reg.hstride];
190
191 if (reg.vstride == 0xf) {
192 vstride = -1;
193 } else {
194 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
195 vstride = vstride_for_reg[reg.vstride];
196 }
197
198 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
199 width = width_for_reg[reg.width];
200
201 assert(insn->header.execution_size >= 0 &&
202 insn->header.execution_size < Elements(execsize_for_reg));
203 execsize = execsize_for_reg[insn->header.execution_size];
204
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
206 /* 3. */
207 assert(execsize >= width);
208
209 /* 4. */
210 if (execsize == width && hstride != 0) {
211 assert(vstride == -1 || vstride == width * hstride);
212 }
213
214 /* 5. */
215 if (execsize == width && hstride == 0) {
216 /* no restriction on vstride. */
217 }
218
219 /* 6. */
220 if (width == 1) {
221 assert(hstride == 0);
222 }
223
224 /* 7. */
225 if (execsize == 1 && width == 1) {
226 assert(hstride == 0);
227 assert(vstride == 0);
228 }
229
230 /* 8. */
231 if (vstride == 0 && hstride == 0) {
232 assert(width == 1);
233 }
234
235 /* 10. Check destination issues. */
236 }
237
238 void
239 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
240 struct brw_reg reg)
241 {
242 struct brw_context *brw = p->brw;
243
244 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
245 assert(reg.nr < 128);
246
247 gen7_convert_mrf_to_grf(p, &reg);
248
249 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
250 insn->header.opcode == BRW_OPCODE_SENDC)) {
251 /* Any source modifiers or regions will be ignored, since this just
252 * identifies the MRF/GRF to start reading the message contents from.
253 * Check for some likely failures.
254 */
255 assert(!reg.negate);
256 assert(!reg.abs);
257 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
258 }
259
260 validate_reg(insn, reg);
261
262 insn->bits1.da1.src0_reg_file = reg.file;
263 insn->bits1.da1.src0_reg_type = reg.type;
264 insn->bits2.da1.src0_abs = reg.abs;
265 insn->bits2.da1.src0_negate = reg.negate;
266 insn->bits2.da1.src0_address_mode = reg.address_mode;
267
268 if (reg.file == BRW_IMMEDIATE_VALUE) {
269 insn->bits3.ud = reg.dw1.ud;
270
271 /* Required to set some fields in src1 as well:
272 */
273 insn->bits1.da1.src1_reg_file = 0; /* arf */
274 insn->bits1.da1.src1_reg_type = reg.type;
275 }
276 else
277 {
278 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
279 if (insn->header.access_mode == BRW_ALIGN_1) {
280 insn->bits2.da1.src0_subreg_nr = reg.subnr;
281 insn->bits2.da1.src0_reg_nr = reg.nr;
282 }
283 else {
284 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
285 insn->bits2.da16.src0_reg_nr = reg.nr;
286 }
287 }
288 else {
289 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
290
291 if (insn->header.access_mode == BRW_ALIGN_1) {
292 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
293 }
294 else {
295 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
296 }
297 }
298
299 if (insn->header.access_mode == BRW_ALIGN_1) {
300 if (reg.width == BRW_WIDTH_1 &&
301 insn->header.execution_size == BRW_EXECUTE_1) {
302 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
303 insn->bits2.da1.src0_width = BRW_WIDTH_1;
304 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
305 }
306 else {
307 insn->bits2.da1.src0_horiz_stride = reg.hstride;
308 insn->bits2.da1.src0_width = reg.width;
309 insn->bits2.da1.src0_vert_stride = reg.vstride;
310 }
311 }
312 else {
313 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
314 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
315 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
316 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
317
318 /* This is an oddity of the fact we're using the same
319 * descriptions for registers in align_16 as align_1:
320 */
321 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
322 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
323 else
324 insn->bits2.da16.src0_vert_stride = reg.vstride;
325 }
326 }
327 }
328
329
330 void brw_set_src1(struct brw_compile *p,
331 struct brw_instruction *insn,
332 struct brw_reg reg)
333 {
334 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
335
336 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
337 assert(reg.nr < 128);
338
339 gen7_convert_mrf_to_grf(p, &reg);
340
341 validate_reg(insn, reg);
342
343 insn->bits1.da1.src1_reg_file = reg.file;
344 insn->bits1.da1.src1_reg_type = reg.type;
345 insn->bits3.da1.src1_abs = reg.abs;
346 insn->bits3.da1.src1_negate = reg.negate;
347
348 /* Only src1 can be immediate in two-argument instructions.
349 */
350 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
351
352 if (reg.file == BRW_IMMEDIATE_VALUE) {
353 insn->bits3.ud = reg.dw1.ud;
354 }
355 else {
356 /* This is a hardware restriction, which may or may not be lifted
357 * in the future:
358 */
359 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
360 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
361
362 if (insn->header.access_mode == BRW_ALIGN_1) {
363 insn->bits3.da1.src1_subreg_nr = reg.subnr;
364 insn->bits3.da1.src1_reg_nr = reg.nr;
365 }
366 else {
367 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
368 insn->bits3.da16.src1_reg_nr = reg.nr;
369 }
370
371 if (insn->header.access_mode == BRW_ALIGN_1) {
372 if (reg.width == BRW_WIDTH_1 &&
373 insn->header.execution_size == BRW_EXECUTE_1) {
374 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
375 insn->bits3.da1.src1_width = BRW_WIDTH_1;
376 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
377 }
378 else {
379 insn->bits3.da1.src1_horiz_stride = reg.hstride;
380 insn->bits3.da1.src1_width = reg.width;
381 insn->bits3.da1.src1_vert_stride = reg.vstride;
382 }
383 }
384 else {
385 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
386 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
387 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
388 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
389
390 /* This is an oddity of the fact we're using the same
391 * descriptions for registers in align_16 as align_1:
392 */
393 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
394 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
395 else
396 insn->bits3.da16.src1_vert_stride = reg.vstride;
397 }
398 }
399 }
400
401 /**
402 * Set the Message Descriptor and Extended Message Descriptor fields
403 * for SEND messages.
404 *
405 * \note This zeroes out the Function Control bits, so it must be called
406 * \b before filling out any message-specific data. Callers can
407 * choose not to fill in irrelevant bits; they will be zero.
408 */
409 static void
410 brw_set_message_descriptor(struct brw_compile *p,
411 struct brw_instruction *inst,
412 enum brw_message_target sfid,
413 unsigned msg_length,
414 unsigned response_length,
415 bool header_present,
416 bool end_of_thread)
417 {
418 struct brw_context *brw = p->brw;
419
420 brw_set_src1(p, inst, brw_imm_d(0));
421
422 if (brw->gen >= 5) {
423 inst->bits3.generic_gen5.header_present = header_present;
424 inst->bits3.generic_gen5.response_length = response_length;
425 inst->bits3.generic_gen5.msg_length = msg_length;
426 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
427
428 if (brw->gen >= 6) {
429 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
430 inst->header.destreg__conditionalmod = sfid;
431 } else {
432 /* Set Extended Message Descriptor (ex_desc) */
433 inst->bits2.send_gen5.sfid = sfid;
434 inst->bits2.send_gen5.end_of_thread = end_of_thread;
435 }
436 } else {
437 inst->bits3.generic.response_length = response_length;
438 inst->bits3.generic.msg_length = msg_length;
439 inst->bits3.generic.msg_target = sfid;
440 inst->bits3.generic.end_of_thread = end_of_thread;
441 }
442 }
443
444 static void brw_set_math_message( struct brw_compile *p,
445 struct brw_instruction *insn,
446 GLuint function,
447 GLuint integer_type,
448 bool low_precision,
449 GLuint dataType )
450 {
451 struct brw_context *brw = p->brw;
452 unsigned msg_length;
453 unsigned response_length;
454
455 /* Infer message length from the function */
456 switch (function) {
457 case BRW_MATH_FUNCTION_POW:
458 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
459 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
461 msg_length = 2;
462 break;
463 default:
464 msg_length = 1;
465 break;
466 }
467
468 /* Infer response length from the function */
469 switch (function) {
470 case BRW_MATH_FUNCTION_SINCOS:
471 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
472 response_length = 2;
473 break;
474 default:
475 response_length = 1;
476 break;
477 }
478
479
480 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
481 msg_length, response_length, false, false);
482 if (brw->gen == 5) {
483 insn->bits3.math_gen5.function = function;
484 insn->bits3.math_gen5.int_type = integer_type;
485 insn->bits3.math_gen5.precision = low_precision;
486 insn->bits3.math_gen5.saturate = insn->header.saturate;
487 insn->bits3.math_gen5.data_type = dataType;
488 insn->bits3.math_gen5.snapshot = 0;
489 } else {
490 insn->bits3.math.function = function;
491 insn->bits3.math.int_type = integer_type;
492 insn->bits3.math.precision = low_precision;
493 insn->bits3.math.saturate = insn->header.saturate;
494 insn->bits3.math.data_type = dataType;
495 }
496 insn->header.saturate = 0;
497 }
498
499
500 static void brw_set_ff_sync_message(struct brw_compile *p,
501 struct brw_instruction *insn,
502 bool allocate,
503 GLuint response_length,
504 bool end_of_thread)
505 {
506 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
507 1, response_length, true, end_of_thread);
508 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
509 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
510 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
511 insn->bits3.urb_gen5.allocate = allocate;
512 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
513 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
514 }
515
516 static void brw_set_urb_message( struct brw_compile *p,
517 struct brw_instruction *insn,
518 bool allocate,
519 bool used,
520 GLuint msg_length,
521 GLuint response_length,
522 bool end_of_thread,
523 bool complete,
524 GLuint offset,
525 GLuint swizzle_control )
526 {
527 struct brw_context *brw = p->brw;
528
529 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
530 msg_length, response_length, true, end_of_thread);
531 if (brw->gen == 7) {
532 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
533 insn->bits3.urb_gen7.offset = offset;
534 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
535 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
536 /* per_slot_offset = 0 makes it ignore offsets in message header */
537 insn->bits3.urb_gen7.per_slot_offset = 0;
538 insn->bits3.urb_gen7.complete = complete;
539 } else if (brw->gen >= 5) {
540 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
541 insn->bits3.urb_gen5.offset = offset;
542 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
543 insn->bits3.urb_gen5.allocate = allocate;
544 insn->bits3.urb_gen5.used = used; /* ? */
545 insn->bits3.urb_gen5.complete = complete;
546 } else {
547 insn->bits3.urb.opcode = 0; /* ? */
548 insn->bits3.urb.offset = offset;
549 insn->bits3.urb.swizzle_control = swizzle_control;
550 insn->bits3.urb.allocate = allocate;
551 insn->bits3.urb.used = used; /* ? */
552 insn->bits3.urb.complete = complete;
553 }
554 }
555
556 void
557 brw_set_dp_write_message(struct brw_compile *p,
558 struct brw_instruction *insn,
559 GLuint binding_table_index,
560 GLuint msg_control,
561 GLuint msg_type,
562 GLuint msg_length,
563 bool header_present,
564 GLuint last_render_target,
565 GLuint response_length,
566 GLuint end_of_thread,
567 GLuint send_commit_msg)
568 {
569 struct brw_context *brw = p->brw;
570 unsigned sfid;
571
572 if (brw->gen >= 7) {
573 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
574 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
575 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
576 else
577 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
578 } else if (brw->gen == 6) {
579 /* Use the render cache for all write messages. */
580 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
581 } else {
582 sfid = BRW_SFID_DATAPORT_WRITE;
583 }
584
585 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
586 header_present, end_of_thread);
587
588 if (brw->gen >= 7) {
589 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
590 insn->bits3.gen7_dp.msg_control = msg_control;
591 insn->bits3.gen7_dp.last_render_target = last_render_target;
592 insn->bits3.gen7_dp.msg_type = msg_type;
593 } else if (brw->gen == 6) {
594 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
595 insn->bits3.gen6_dp.msg_control = msg_control;
596 insn->bits3.gen6_dp.last_render_target = last_render_target;
597 insn->bits3.gen6_dp.msg_type = msg_type;
598 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
599 } else if (brw->gen == 5) {
600 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
601 insn->bits3.dp_write_gen5.msg_control = msg_control;
602 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
603 insn->bits3.dp_write_gen5.msg_type = msg_type;
604 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
605 } else {
606 insn->bits3.dp_write.binding_table_index = binding_table_index;
607 insn->bits3.dp_write.msg_control = msg_control;
608 insn->bits3.dp_write.last_render_target = last_render_target;
609 insn->bits3.dp_write.msg_type = msg_type;
610 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
611 }
612 }
613
614 void
615 brw_set_dp_read_message(struct brw_compile *p,
616 struct brw_instruction *insn,
617 GLuint binding_table_index,
618 GLuint msg_control,
619 GLuint msg_type,
620 GLuint target_cache,
621 GLuint msg_length,
622 bool header_present,
623 GLuint response_length)
624 {
625 struct brw_context *brw = p->brw;
626 unsigned sfid;
627
628 if (brw->gen >= 7) {
629 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
630 } else if (brw->gen == 6) {
631 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
632 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
633 else
634 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
635 } else {
636 sfid = BRW_SFID_DATAPORT_READ;
637 }
638
639 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
640 header_present, false);
641
642 if (brw->gen >= 7) {
643 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
644 insn->bits3.gen7_dp.msg_control = msg_control;
645 insn->bits3.gen7_dp.last_render_target = 0;
646 insn->bits3.gen7_dp.msg_type = msg_type;
647 } else if (brw->gen == 6) {
648 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
649 insn->bits3.gen6_dp.msg_control = msg_control;
650 insn->bits3.gen6_dp.last_render_target = 0;
651 insn->bits3.gen6_dp.msg_type = msg_type;
652 insn->bits3.gen6_dp.send_commit_msg = 0;
653 } else if (brw->gen == 5) {
654 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
655 insn->bits3.dp_read_gen5.msg_control = msg_control;
656 insn->bits3.dp_read_gen5.msg_type = msg_type;
657 insn->bits3.dp_read_gen5.target_cache = target_cache;
658 } else if (brw->is_g4x) {
659 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
660 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
661 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
662 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
663 } else {
664 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
665 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
666 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
667 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
668 }
669 }
670
671 void
672 brw_set_sampler_message(struct brw_compile *p,
673 struct brw_instruction *insn,
674 GLuint binding_table_index,
675 GLuint sampler,
676 GLuint msg_type,
677 GLuint response_length,
678 GLuint msg_length,
679 GLuint header_present,
680 GLuint simd_mode,
681 GLuint return_format)
682 {
683 struct brw_context *brw = p->brw;
684
685 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
686 response_length, header_present, false);
687
688 if (brw->gen >= 7) {
689 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
690 insn->bits3.sampler_gen7.sampler = sampler;
691 insn->bits3.sampler_gen7.msg_type = msg_type;
692 insn->bits3.sampler_gen7.simd_mode = simd_mode;
693 } else if (brw->gen >= 5) {
694 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
695 insn->bits3.sampler_gen5.sampler = sampler;
696 insn->bits3.sampler_gen5.msg_type = msg_type;
697 insn->bits3.sampler_gen5.simd_mode = simd_mode;
698 } else if (brw->is_g4x) {
699 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
700 insn->bits3.sampler_g4x.sampler = sampler;
701 insn->bits3.sampler_g4x.msg_type = msg_type;
702 } else {
703 insn->bits3.sampler.binding_table_index = binding_table_index;
704 insn->bits3.sampler.sampler = sampler;
705 insn->bits3.sampler.msg_type = msg_type;
706 insn->bits3.sampler.return_format = return_format;
707 }
708 }
709
710
711 #define next_insn brw_next_insn
712 struct brw_instruction *
713 brw_next_insn(struct brw_compile *p, GLuint opcode)
714 {
715 struct brw_instruction *insn;
716
717 if (p->nr_insn + 1 > p->store_size) {
718 if (0)
719 printf("incresing the store size to %d\n", p->store_size << 1);
720 p->store_size <<= 1;
721 p->store = reralloc(p->mem_ctx, p->store,
722 struct brw_instruction, p->store_size);
723 if (!p->store)
724 assert(!"realloc eu store memeory failed");
725 }
726
727 p->next_insn_offset += 16;
728 insn = &p->store[p->nr_insn++];
729 memcpy(insn, p->current, sizeof(*insn));
730
731 /* Reset this one-shot flag:
732 */
733
734 if (p->current->header.destreg__conditionalmod) {
735 p->current->header.destreg__conditionalmod = 0;
736 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
737 }
738
739 insn->header.opcode = opcode;
740 return insn;
741 }
742
743 static struct brw_instruction *brw_alu1( struct brw_compile *p,
744 GLuint opcode,
745 struct brw_reg dest,
746 struct brw_reg src )
747 {
748 struct brw_instruction *insn = next_insn(p, opcode);
749 brw_set_dest(p, insn, dest);
750 brw_set_src0(p, insn, src);
751 return insn;
752 }
753
754 static struct brw_instruction *brw_alu2(struct brw_compile *p,
755 GLuint opcode,
756 struct brw_reg dest,
757 struct brw_reg src0,
758 struct brw_reg src1 )
759 {
760 struct brw_instruction *insn = next_insn(p, opcode);
761 brw_set_dest(p, insn, dest);
762 brw_set_src0(p, insn, src0);
763 brw_set_src1(p, insn, src1);
764 return insn;
765 }
766
767 static int
768 get_3src_subreg_nr(struct brw_reg reg)
769 {
770 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
771 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
772 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
773 } else {
774 return reg.subnr / 4;
775 }
776 }
777
778 static struct brw_instruction *brw_alu3(struct brw_compile *p,
779 GLuint opcode,
780 struct brw_reg dest,
781 struct brw_reg src0,
782 struct brw_reg src1,
783 struct brw_reg src2)
784 {
785 struct brw_context *brw = p->brw;
786 struct brw_instruction *insn = next_insn(p, opcode);
787
788 gen7_convert_mrf_to_grf(p, &dest);
789
790 assert(insn->header.access_mode == BRW_ALIGN_16);
791
792 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
793 dest.file == BRW_MESSAGE_REGISTER_FILE);
794 assert(dest.nr < 128);
795 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
796 assert(dest.type == BRW_REGISTER_TYPE_F ||
797 dest.type == BRW_REGISTER_TYPE_D ||
798 dest.type == BRW_REGISTER_TYPE_UD);
799 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
800 insn->bits1.da3src.dest_reg_nr = dest.nr;
801 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
802 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
803 guess_execution_size(p, insn, dest);
804
805 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
806 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
807 assert(src0.nr < 128);
808 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
809 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
810 insn->bits2.da3src.src0_reg_nr = src0.nr;
811 insn->bits1.da3src.src0_abs = src0.abs;
812 insn->bits1.da3src.src0_negate = src0.negate;
813 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
814
815 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
816 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
817 assert(src1.nr < 128);
818 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
819 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
820 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
821 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
822 insn->bits3.da3src.src1_reg_nr = src1.nr;
823 insn->bits1.da3src.src1_abs = src1.abs;
824 insn->bits1.da3src.src1_negate = src1.negate;
825
826 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
827 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
828 assert(src2.nr < 128);
829 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
830 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
831 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
832 insn->bits3.da3src.src2_reg_nr = src2.nr;
833 insn->bits1.da3src.src2_abs = src2.abs;
834 insn->bits1.da3src.src2_negate = src2.negate;
835
836 if (brw->gen >= 7) {
837 /* Set both the source and destination types based on dest.type,
838 * ignoring the source register types. The MAD and LRP emitters ensure
839 * that all four types are float. The BFE and BFI2 emitters, however,
840 * may send us mixed D and UD types and want us to ignore that and use
841 * the destination type.
842 */
843 switch (dest.type) {
844 case BRW_REGISTER_TYPE_F:
845 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
846 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
847 break;
848 case BRW_REGISTER_TYPE_D:
849 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
850 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
851 break;
852 case BRW_REGISTER_TYPE_UD:
853 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
854 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
855 break;
856 }
857 }
858
859 return insn;
860 }
861
862
863 /***********************************************************************
864 * Convenience routines.
865 */
866 #define ALU1(OP) \
867 struct brw_instruction *brw_##OP(struct brw_compile *p, \
868 struct brw_reg dest, \
869 struct brw_reg src0) \
870 { \
871 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
872 }
873
874 #define ALU2(OP) \
875 struct brw_instruction *brw_##OP(struct brw_compile *p, \
876 struct brw_reg dest, \
877 struct brw_reg src0, \
878 struct brw_reg src1) \
879 { \
880 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
881 }
882
883 #define ALU3(OP) \
884 struct brw_instruction *brw_##OP(struct brw_compile *p, \
885 struct brw_reg dest, \
886 struct brw_reg src0, \
887 struct brw_reg src1, \
888 struct brw_reg src2) \
889 { \
890 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
891 }
892
893 #define ALU3F(OP) \
894 struct brw_instruction *brw_##OP(struct brw_compile *p, \
895 struct brw_reg dest, \
896 struct brw_reg src0, \
897 struct brw_reg src1, \
898 struct brw_reg src2) \
899 { \
900 assert(dest.type == BRW_REGISTER_TYPE_F); \
901 assert(src0.type == BRW_REGISTER_TYPE_F); \
902 assert(src1.type == BRW_REGISTER_TYPE_F); \
903 assert(src2.type == BRW_REGISTER_TYPE_F); \
904 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
905 }
906
907 /* Rounding operations (other than RNDD) require two instructions - the first
908 * stores a rounded value (possibly the wrong way) in the dest register, but
909 * also sets a per-channel "increment bit" in the flag register. A predicated
910 * add of 1.0 fixes dest to contain the desired result.
911 *
912 * Sandybridge and later appear to round correctly without an ADD.
913 */
914 #define ROUND(OP) \
915 void brw_##OP(struct brw_compile *p, \
916 struct brw_reg dest, \
917 struct brw_reg src) \
918 { \
919 struct brw_instruction *rnd, *add; \
920 rnd = next_insn(p, BRW_OPCODE_##OP); \
921 brw_set_dest(p, rnd, dest); \
922 brw_set_src0(p, rnd, src); \
923 \
924 if (p->brw->gen < 6) { \
925 /* turn on round-increments */ \
926 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
927 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
928 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
929 } \
930 }
931
932
933 ALU1(MOV)
934 ALU2(SEL)
935 ALU1(NOT)
936 ALU2(AND)
937 ALU2(OR)
938 ALU2(XOR)
939 ALU2(SHR)
940 ALU2(SHL)
941 ALU2(RSR)
942 ALU2(RSL)
943 ALU2(ASR)
944 ALU1(F32TO16)
945 ALU1(F16TO32)
946 ALU1(FRC)
947 ALU1(RNDD)
948 ALU2(MAC)
949 ALU2(MACH)
950 ALU1(LZD)
951 ALU2(DP4)
952 ALU2(DPH)
953 ALU2(DP3)
954 ALU2(DP2)
955 ALU2(LINE)
956 ALU2(PLN)
957 ALU3F(MAD)
958 ALU3F(LRP)
959 ALU1(BFREV)
960 ALU3(BFE)
961 ALU2(BFI1)
962 ALU3(BFI2)
963 ALU1(FBH)
964 ALU1(FBL)
965 ALU1(CBIT)
966
967 ROUND(RNDZ)
968 ROUND(RNDE)
969
970
971 struct brw_instruction *brw_ADD(struct brw_compile *p,
972 struct brw_reg dest,
973 struct brw_reg src0,
974 struct brw_reg src1)
975 {
976 /* 6.2.2: add */
977 if (src0.type == BRW_REGISTER_TYPE_F ||
978 (src0.file == BRW_IMMEDIATE_VALUE &&
979 src0.type == BRW_REGISTER_TYPE_VF)) {
980 assert(src1.type != BRW_REGISTER_TYPE_UD);
981 assert(src1.type != BRW_REGISTER_TYPE_D);
982 }
983
984 if (src1.type == BRW_REGISTER_TYPE_F ||
985 (src1.file == BRW_IMMEDIATE_VALUE &&
986 src1.type == BRW_REGISTER_TYPE_VF)) {
987 assert(src0.type != BRW_REGISTER_TYPE_UD);
988 assert(src0.type != BRW_REGISTER_TYPE_D);
989 }
990
991 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
992 }
993
994 struct brw_instruction *brw_AVG(struct brw_compile *p,
995 struct brw_reg dest,
996 struct brw_reg src0,
997 struct brw_reg src1)
998 {
999 assert(dest.type == src0.type);
1000 assert(src0.type == src1.type);
1001 switch (src0.type) {
1002 case BRW_REGISTER_TYPE_B:
1003 case BRW_REGISTER_TYPE_UB:
1004 case BRW_REGISTER_TYPE_W:
1005 case BRW_REGISTER_TYPE_UW:
1006 case BRW_REGISTER_TYPE_D:
1007 case BRW_REGISTER_TYPE_UD:
1008 break;
1009 default:
1010 assert(!"Bad type for brw_AVG");
1011 }
1012
1013 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1014 }
1015
1016 struct brw_instruction *brw_MUL(struct brw_compile *p,
1017 struct brw_reg dest,
1018 struct brw_reg src0,
1019 struct brw_reg src1)
1020 {
1021 /* 6.32.38: mul */
1022 if (src0.type == BRW_REGISTER_TYPE_D ||
1023 src0.type == BRW_REGISTER_TYPE_UD ||
1024 src1.type == BRW_REGISTER_TYPE_D ||
1025 src1.type == BRW_REGISTER_TYPE_UD) {
1026 assert(dest.type != BRW_REGISTER_TYPE_F);
1027 }
1028
1029 if (src0.type == BRW_REGISTER_TYPE_F ||
1030 (src0.file == BRW_IMMEDIATE_VALUE &&
1031 src0.type == BRW_REGISTER_TYPE_VF)) {
1032 assert(src1.type != BRW_REGISTER_TYPE_UD);
1033 assert(src1.type != BRW_REGISTER_TYPE_D);
1034 }
1035
1036 if (src1.type == BRW_REGISTER_TYPE_F ||
1037 (src1.file == BRW_IMMEDIATE_VALUE &&
1038 src1.type == BRW_REGISTER_TYPE_VF)) {
1039 assert(src0.type != BRW_REGISTER_TYPE_UD);
1040 assert(src0.type != BRW_REGISTER_TYPE_D);
1041 }
1042
1043 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1044 src0.nr != BRW_ARF_ACCUMULATOR);
1045 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1046 src1.nr != BRW_ARF_ACCUMULATOR);
1047
1048 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1049 }
1050
1051
1052 void brw_NOP(struct brw_compile *p)
1053 {
1054 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1055 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1056 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1057 brw_set_src1(p, insn, brw_imm_ud(0x0));
1058 }
1059
1060
1061
1062
1063
1064 /***********************************************************************
1065 * Comparisons, if/else/endif
1066 */
1067
1068 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1069 struct brw_reg dest,
1070 struct brw_reg src0,
1071 struct brw_reg src1)
1072 {
1073 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1074
1075 insn->header.execution_size = 1;
1076 insn->header.compression_control = BRW_COMPRESSION_NONE;
1077 insn->header.mask_control = BRW_MASK_DISABLE;
1078
1079 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1080
1081 return insn;
1082 }
1083
1084 static void
1085 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1086 {
1087 p->if_stack[p->if_stack_depth] = inst - p->store;
1088
1089 p->if_stack_depth++;
1090 if (p->if_stack_array_size <= p->if_stack_depth) {
1091 p->if_stack_array_size *= 2;
1092 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1093 p->if_stack_array_size);
1094 }
1095 }
1096
1097 static struct brw_instruction *
1098 pop_if_stack(struct brw_compile *p)
1099 {
1100 p->if_stack_depth--;
1101 return &p->store[p->if_stack[p->if_stack_depth]];
1102 }
1103
1104 static void
1105 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1106 {
1107 if (p->loop_stack_array_size < p->loop_stack_depth) {
1108 p->loop_stack_array_size *= 2;
1109 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1110 p->loop_stack_array_size);
1111 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1112 p->loop_stack_array_size);
1113 }
1114
1115 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1116 p->loop_stack_depth++;
1117 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1118 }
1119
1120 static struct brw_instruction *
1121 get_inner_do_insn(struct brw_compile *p)
1122 {
1123 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1124 }
1125
1126 /* EU takes the value from the flag register and pushes it onto some
1127 * sort of a stack (presumably merging with any flag value already on
1128 * the stack). Within an if block, the flags at the top of the stack
1129 * control execution on each channel of the unit, eg. on each of the
1130 * 16 pixel values in our wm programs.
1131 *
1132 * When the matching 'else' instruction is reached (presumably by
1133 * countdown of the instruction count patched in by our ELSE/ENDIF
1134 * functions), the relevent flags are inverted.
1135 *
1136 * When the matching 'endif' instruction is reached, the flags are
1137 * popped off. If the stack is now empty, normal execution resumes.
1138 */
1139 struct brw_instruction *
1140 brw_IF(struct brw_compile *p, GLuint execute_size)
1141 {
1142 struct brw_context *brw = p->brw;
1143 struct brw_instruction *insn;
1144
1145 insn = next_insn(p, BRW_OPCODE_IF);
1146
1147 /* Override the defaults for this instruction:
1148 */
1149 if (brw->gen < 6) {
1150 brw_set_dest(p, insn, brw_ip_reg());
1151 brw_set_src0(p, insn, brw_ip_reg());
1152 brw_set_src1(p, insn, brw_imm_d(0x0));
1153 } else if (brw->gen == 6) {
1154 brw_set_dest(p, insn, brw_imm_w(0));
1155 insn->bits1.branch_gen6.jump_count = 0;
1156 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1157 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1158 } else {
1159 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1160 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1161 brw_set_src1(p, insn, brw_imm_ud(0));
1162 insn->bits3.break_cont.jip = 0;
1163 insn->bits3.break_cont.uip = 0;
1164 }
1165
1166 insn->header.execution_size = execute_size;
1167 insn->header.compression_control = BRW_COMPRESSION_NONE;
1168 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1169 insn->header.mask_control = BRW_MASK_ENABLE;
1170 if (!p->single_program_flow)
1171 insn->header.thread_control = BRW_THREAD_SWITCH;
1172
1173 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1174
1175 push_if_stack(p, insn);
1176 p->if_depth_in_loop[p->loop_stack_depth]++;
1177 return insn;
1178 }
1179
1180 /* This function is only used for gen6-style IF instructions with an
1181 * embedded comparison (conditional modifier). It is not used on gen7.
1182 */
1183 struct brw_instruction *
1184 gen6_IF(struct brw_compile *p, uint32_t conditional,
1185 struct brw_reg src0, struct brw_reg src1)
1186 {
1187 struct brw_instruction *insn;
1188
1189 insn = next_insn(p, BRW_OPCODE_IF);
1190
1191 brw_set_dest(p, insn, brw_imm_w(0));
1192 if (p->compressed) {
1193 insn->header.execution_size = BRW_EXECUTE_16;
1194 } else {
1195 insn->header.execution_size = BRW_EXECUTE_8;
1196 }
1197 insn->bits1.branch_gen6.jump_count = 0;
1198 brw_set_src0(p, insn, src0);
1199 brw_set_src1(p, insn, src1);
1200
1201 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1202 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1203 insn->header.destreg__conditionalmod = conditional;
1204
1205 if (!p->single_program_flow)
1206 insn->header.thread_control = BRW_THREAD_SWITCH;
1207
1208 push_if_stack(p, insn);
1209 return insn;
1210 }
1211
1212 /**
1213 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1214 */
1215 static void
1216 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1217 struct brw_instruction *if_inst,
1218 struct brw_instruction *else_inst)
1219 {
1220 /* The next instruction (where the ENDIF would be, if it existed) */
1221 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1222
1223 assert(p->single_program_flow);
1224 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1225 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1226 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1227
1228 /* Convert IF to an ADD instruction that moves the instruction pointer
1229 * to the first instruction of the ELSE block. If there is no ELSE
1230 * block, point to where ENDIF would be. Reverse the predicate.
1231 *
1232 * There's no need to execute an ENDIF since we don't need to do any
1233 * stack operations, and if we're currently executing, we just want to
1234 * continue normally.
1235 */
1236 if_inst->header.opcode = BRW_OPCODE_ADD;
1237 if_inst->header.predicate_inverse = 1;
1238
1239 if (else_inst != NULL) {
1240 /* Convert ELSE to an ADD instruction that points where the ENDIF
1241 * would be.
1242 */
1243 else_inst->header.opcode = BRW_OPCODE_ADD;
1244
1245 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1246 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1247 } else {
1248 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1249 }
1250 }
1251
1252 /**
1253 * Patch IF and ELSE instructions with appropriate jump targets.
1254 */
1255 static void
1256 patch_IF_ELSE(struct brw_compile *p,
1257 struct brw_instruction *if_inst,
1258 struct brw_instruction *else_inst,
1259 struct brw_instruction *endif_inst)
1260 {
1261 struct brw_context *brw = p->brw;
1262
1263 /* We shouldn't be patching IF and ELSE instructions in single program flow
1264 * mode when gen < 6, because in single program flow mode on those
1265 * platforms, we convert flow control instructions to conditional ADDs that
1266 * operate on IP (see brw_ENDIF).
1267 *
1268 * However, on Gen6, writing to IP doesn't work in single program flow mode
1269 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1270 * not be updated by non-flow control instructions."). And on later
1271 * platforms, there is no significant benefit to converting control flow
1272 * instructions to conditional ADDs. So we do patch IF and ELSE
1273 * instructions in single program flow mode on those platforms.
1274 */
1275 if (brw->gen < 6)
1276 assert(!p->single_program_flow);
1277
1278 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1279 assert(endif_inst != NULL);
1280 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1281
1282 unsigned br = 1;
1283 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1284 * requires 2 chunks.
1285 */
1286 if (brw->gen >= 5)
1287 br = 2;
1288
1289 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1290 endif_inst->header.execution_size = if_inst->header.execution_size;
1291
1292 if (else_inst == NULL) {
1293 /* Patch IF -> ENDIF */
1294 if (brw->gen < 6) {
1295 /* Turn it into an IFF, which means no mask stack operations for
1296 * all-false and jumping past the ENDIF.
1297 */
1298 if_inst->header.opcode = BRW_OPCODE_IFF;
1299 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1300 if_inst->bits3.if_else.pop_count = 0;
1301 if_inst->bits3.if_else.pad0 = 0;
1302 } else if (brw->gen == 6) {
1303 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1304 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1305 } else {
1306 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1307 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1308 }
1309 } else {
1310 else_inst->header.execution_size = if_inst->header.execution_size;
1311
1312 /* Patch IF -> ELSE */
1313 if (brw->gen < 6) {
1314 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1315 if_inst->bits3.if_else.pop_count = 0;
1316 if_inst->bits3.if_else.pad0 = 0;
1317 } else if (brw->gen == 6) {
1318 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1319 }
1320
1321 /* Patch ELSE -> ENDIF */
1322 if (brw->gen < 6) {
1323 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1324 * matching ENDIF.
1325 */
1326 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1327 else_inst->bits3.if_else.pop_count = 1;
1328 else_inst->bits3.if_else.pad0 = 0;
1329 } else if (brw->gen == 6) {
1330 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1331 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1332 } else {
1333 /* The IF instruction's JIP should point just past the ELSE */
1334 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1335 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1336 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1337 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1338 }
1339 }
1340 }
1341
1342 void
1343 brw_ELSE(struct brw_compile *p)
1344 {
1345 struct brw_context *brw = p->brw;
1346 struct brw_instruction *insn;
1347
1348 insn = next_insn(p, BRW_OPCODE_ELSE);
1349
1350 if (brw->gen < 6) {
1351 brw_set_dest(p, insn, brw_ip_reg());
1352 brw_set_src0(p, insn, brw_ip_reg());
1353 brw_set_src1(p, insn, brw_imm_d(0x0));
1354 } else if (brw->gen == 6) {
1355 brw_set_dest(p, insn, brw_imm_w(0));
1356 insn->bits1.branch_gen6.jump_count = 0;
1357 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1358 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1359 } else {
1360 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1361 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1362 brw_set_src1(p, insn, brw_imm_ud(0));
1363 insn->bits3.break_cont.jip = 0;
1364 insn->bits3.break_cont.uip = 0;
1365 }
1366
1367 insn->header.compression_control = BRW_COMPRESSION_NONE;
1368 insn->header.mask_control = BRW_MASK_ENABLE;
1369 if (!p->single_program_flow)
1370 insn->header.thread_control = BRW_THREAD_SWITCH;
1371
1372 push_if_stack(p, insn);
1373 }
1374
1375 void
1376 brw_ENDIF(struct brw_compile *p)
1377 {
1378 struct brw_context *brw = p->brw;
1379 struct brw_instruction *insn = NULL;
1380 struct brw_instruction *else_inst = NULL;
1381 struct brw_instruction *if_inst = NULL;
1382 struct brw_instruction *tmp;
1383 bool emit_endif = true;
1384
1385 /* In single program flow mode, we can express IF and ELSE instructions
1386 * equivalently as ADD instructions that operate on IP. On platforms prior
1387 * to Gen6, flow control instructions cause an implied thread switch, so
1388 * this is a significant savings.
1389 *
1390 * However, on Gen6, writing to IP doesn't work in single program flow mode
1391 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1392 * not be updated by non-flow control instructions."). And on later
1393 * platforms, there is no significant benefit to converting control flow
1394 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1395 * Gen5.
1396 */
1397 if (brw->gen < 6 && p->single_program_flow)
1398 emit_endif = false;
1399
1400 /*
1401 * A single next_insn() may change the base adress of instruction store
1402 * memory(p->store), so call it first before referencing the instruction
1403 * store pointer from an index
1404 */
1405 if (emit_endif)
1406 insn = next_insn(p, BRW_OPCODE_ENDIF);
1407
1408 /* Pop the IF and (optional) ELSE instructions from the stack */
1409 p->if_depth_in_loop[p->loop_stack_depth]--;
1410 tmp = pop_if_stack(p);
1411 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1412 else_inst = tmp;
1413 tmp = pop_if_stack(p);
1414 }
1415 if_inst = tmp;
1416
1417 if (!emit_endif) {
1418 /* ENDIF is useless; don't bother emitting it. */
1419 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1420 return;
1421 }
1422
1423 if (brw->gen < 6) {
1424 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1425 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1426 brw_set_src1(p, insn, brw_imm_d(0x0));
1427 } else if (brw->gen == 6) {
1428 brw_set_dest(p, insn, brw_imm_w(0));
1429 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1430 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1431 } else {
1432 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1433 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1434 brw_set_src1(p, insn, brw_imm_ud(0));
1435 }
1436
1437 insn->header.compression_control = BRW_COMPRESSION_NONE;
1438 insn->header.mask_control = BRW_MASK_ENABLE;
1439 insn->header.thread_control = BRW_THREAD_SWITCH;
1440
1441 /* Also pop item off the stack in the endif instruction: */
1442 if (brw->gen < 6) {
1443 insn->bits3.if_else.jump_count = 0;
1444 insn->bits3.if_else.pop_count = 1;
1445 insn->bits3.if_else.pad0 = 0;
1446 } else if (brw->gen == 6) {
1447 insn->bits1.branch_gen6.jump_count = 2;
1448 } else {
1449 insn->bits3.break_cont.jip = 2;
1450 }
1451 patch_IF_ELSE(p, if_inst, else_inst, insn);
1452 }
1453
1454 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1455 {
1456 struct brw_context *brw = p->brw;
1457 struct brw_instruction *insn;
1458
1459 insn = next_insn(p, BRW_OPCODE_BREAK);
1460 if (brw->gen >= 6) {
1461 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1462 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1463 brw_set_src1(p, insn, brw_imm_d(0x0));
1464 } else {
1465 brw_set_dest(p, insn, brw_ip_reg());
1466 brw_set_src0(p, insn, brw_ip_reg());
1467 brw_set_src1(p, insn, brw_imm_d(0x0));
1468 insn->bits3.if_else.pad0 = 0;
1469 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1470 }
1471 insn->header.compression_control = BRW_COMPRESSION_NONE;
1472 insn->header.execution_size = BRW_EXECUTE_8;
1473
1474 return insn;
1475 }
1476
1477 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1478 {
1479 struct brw_instruction *insn;
1480
1481 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1482 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1483 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1484 brw_set_dest(p, insn, brw_ip_reg());
1485 brw_set_src0(p, insn, brw_ip_reg());
1486 brw_set_src1(p, insn, brw_imm_d(0x0));
1487
1488 insn->header.compression_control = BRW_COMPRESSION_NONE;
1489 insn->header.execution_size = BRW_EXECUTE_8;
1490 return insn;
1491 }
1492
1493 struct brw_instruction *brw_CONT(struct brw_compile *p)
1494 {
1495 struct brw_instruction *insn;
1496 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1497 brw_set_dest(p, insn, brw_ip_reg());
1498 brw_set_src0(p, insn, brw_ip_reg());
1499 brw_set_src1(p, insn, brw_imm_d(0x0));
1500 insn->header.compression_control = BRW_COMPRESSION_NONE;
1501 insn->header.execution_size = BRW_EXECUTE_8;
1502 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1503 insn->bits3.if_else.pad0 = 0;
1504 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1505 return insn;
1506 }
1507
1508 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1509 {
1510 struct brw_instruction *insn;
1511
1512 insn = next_insn(p, BRW_OPCODE_HALT);
1513 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1514 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1515 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1516
1517 if (p->compressed) {
1518 insn->header.execution_size = BRW_EXECUTE_16;
1519 } else {
1520 insn->header.compression_control = BRW_COMPRESSION_NONE;
1521 insn->header.execution_size = BRW_EXECUTE_8;
1522 }
1523 return insn;
1524 }
1525
1526 /* DO/WHILE loop:
1527 *
1528 * The DO/WHILE is just an unterminated loop -- break or continue are
1529 * used for control within the loop. We have a few ways they can be
1530 * done.
1531 *
1532 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1533 * jip and no DO instruction.
1534 *
1535 * For non-uniform control flow pre-gen6, there's a DO instruction to
1536 * push the mask, and a WHILE to jump back, and BREAK to get out and
1537 * pop the mask.
1538 *
1539 * For gen6, there's no more mask stack, so no need for DO. WHILE
1540 * just points back to the first instruction of the loop.
1541 */
1542 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1543 {
1544 struct brw_context *brw = p->brw;
1545
1546 if (brw->gen >= 6 || p->single_program_flow) {
1547 push_loop_stack(p, &p->store[p->nr_insn]);
1548 return &p->store[p->nr_insn];
1549 } else {
1550 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1551
1552 push_loop_stack(p, insn);
1553
1554 /* Override the defaults for this instruction:
1555 */
1556 brw_set_dest(p, insn, brw_null_reg());
1557 brw_set_src0(p, insn, brw_null_reg());
1558 brw_set_src1(p, insn, brw_null_reg());
1559
1560 insn->header.compression_control = BRW_COMPRESSION_NONE;
1561 insn->header.execution_size = execute_size;
1562 insn->header.predicate_control = BRW_PREDICATE_NONE;
1563 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1564 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1565
1566 return insn;
1567 }
1568 }
1569
1570 /**
1571 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1572 * instruction here.
1573 *
1574 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1575 * nesting, since it can always just point to the end of the block/current loop.
1576 */
1577 static void
1578 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1579 {
1580 struct brw_context *brw = p->brw;
1581 struct brw_instruction *do_inst = get_inner_do_insn(p);
1582 struct brw_instruction *inst;
1583 int br = (brw->gen == 5) ? 2 : 1;
1584
1585 for (inst = while_inst - 1; inst != do_inst; inst--) {
1586 /* If the jump count is != 0, that means that this instruction has already
1587 * been patched because it's part of a loop inside of the one we're
1588 * patching.
1589 */
1590 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1591 inst->bits3.if_else.jump_count == 0) {
1592 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1593 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1594 inst->bits3.if_else.jump_count == 0) {
1595 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1596 }
1597 }
1598 }
1599
1600 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1601 {
1602 struct brw_context *brw = p->brw;
1603 struct brw_instruction *insn, *do_insn;
1604 GLuint br = 1;
1605
1606 if (brw->gen >= 5)
1607 br = 2;
1608
1609 if (brw->gen >= 7) {
1610 insn = next_insn(p, BRW_OPCODE_WHILE);
1611 do_insn = get_inner_do_insn(p);
1612
1613 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1614 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1615 brw_set_src1(p, insn, brw_imm_ud(0));
1616 insn->bits3.break_cont.jip = br * (do_insn - insn);
1617
1618 insn->header.execution_size = BRW_EXECUTE_8;
1619 } else if (brw->gen == 6) {
1620 insn = next_insn(p, BRW_OPCODE_WHILE);
1621 do_insn = get_inner_do_insn(p);
1622
1623 brw_set_dest(p, insn, brw_imm_w(0));
1624 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1625 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1626 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1627
1628 insn->header.execution_size = BRW_EXECUTE_8;
1629 } else {
1630 if (p->single_program_flow) {
1631 insn = next_insn(p, BRW_OPCODE_ADD);
1632 do_insn = get_inner_do_insn(p);
1633
1634 brw_set_dest(p, insn, brw_ip_reg());
1635 brw_set_src0(p, insn, brw_ip_reg());
1636 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1637 insn->header.execution_size = BRW_EXECUTE_1;
1638 } else {
1639 insn = next_insn(p, BRW_OPCODE_WHILE);
1640 do_insn = get_inner_do_insn(p);
1641
1642 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1643
1644 brw_set_dest(p, insn, brw_ip_reg());
1645 brw_set_src0(p, insn, brw_ip_reg());
1646 brw_set_src1(p, insn, brw_imm_d(0));
1647
1648 insn->header.execution_size = do_insn->header.execution_size;
1649 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1650 insn->bits3.if_else.pop_count = 0;
1651 insn->bits3.if_else.pad0 = 0;
1652
1653 brw_patch_break_cont(p, insn);
1654 }
1655 }
1656 insn->header.compression_control = BRW_COMPRESSION_NONE;
1657 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1658
1659 p->loop_stack_depth--;
1660
1661 return insn;
1662 }
1663
1664
1665 /* FORWARD JUMPS:
1666 */
1667 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1668 {
1669 struct brw_context *brw = p->brw;
1670 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1671 GLuint jmpi = 1;
1672
1673 if (brw->gen >= 5)
1674 jmpi = 2;
1675
1676 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1677 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1678
1679 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1680 }
1681
1682
1683
1684 /* To integrate with the above, it makes sense that the comparison
1685 * instruction should populate the flag register. It might be simpler
1686 * just to use the flag reg for most WM tasks?
1687 */
1688 void brw_CMP(struct brw_compile *p,
1689 struct brw_reg dest,
1690 GLuint conditional,
1691 struct brw_reg src0,
1692 struct brw_reg src1)
1693 {
1694 struct brw_context *brw = p->brw;
1695 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1696
1697 insn->header.destreg__conditionalmod = conditional;
1698 brw_set_dest(p, insn, dest);
1699 brw_set_src0(p, insn, src0);
1700 brw_set_src1(p, insn, src1);
1701
1702 /* guess_execution_size(insn, src0); */
1703
1704
1705 /* Make it so that future instructions will use the computed flag
1706 * value until brw_set_predicate_control_flag_value() is called
1707 * again.
1708 */
1709 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1710 dest.nr == 0) {
1711 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1712 p->flag_value = 0xff;
1713 }
1714
1715 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1716 * page says:
1717 * "Any CMP instruction with a null destination must use a {switch}."
1718 *
1719 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1720 * mentioned on their work-arounds pages.
1721 */
1722 if (brw->gen == 7) {
1723 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1724 dest.nr == BRW_ARF_NULL) {
1725 insn->header.thread_control = BRW_THREAD_SWITCH;
1726 }
1727 }
1728 }
1729
1730 /* Issue 'wait' instruction for n1, host could program MMIO
1731 to wake up thread. */
1732 void brw_WAIT (struct brw_compile *p)
1733 {
1734 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1735 struct brw_reg src = brw_notification_1_reg();
1736
1737 brw_set_dest(p, insn, src);
1738 brw_set_src0(p, insn, src);
1739 brw_set_src1(p, insn, brw_null_reg());
1740 insn->header.execution_size = 0; /* must */
1741 insn->header.predicate_control = 0;
1742 insn->header.compression_control = 0;
1743 }
1744
1745
1746 /***********************************************************************
1747 * Helpers for the various SEND message types:
1748 */
1749
1750 /** Extended math function, float[8].
1751 */
1752 void brw_math( struct brw_compile *p,
1753 struct brw_reg dest,
1754 GLuint function,
1755 GLuint msg_reg_nr,
1756 struct brw_reg src,
1757 GLuint data_type,
1758 GLuint precision )
1759 {
1760 struct brw_context *brw = p->brw;
1761
1762 if (brw->gen >= 6) {
1763 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1764
1765 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1766 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1767 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1768
1769 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1770 if (brw->gen == 6)
1771 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1772
1773 /* Source modifiers are ignored for extended math instructions on Gen6. */
1774 if (brw->gen == 6) {
1775 assert(!src.negate);
1776 assert(!src.abs);
1777 }
1778
1779 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1780 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1781 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1782 assert(src.type != BRW_REGISTER_TYPE_F);
1783 } else {
1784 assert(src.type == BRW_REGISTER_TYPE_F);
1785 }
1786
1787 /* Math is the same ISA format as other opcodes, except that CondModifier
1788 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1789 */
1790 insn->header.destreg__conditionalmod = function;
1791
1792 brw_set_dest(p, insn, dest);
1793 brw_set_src0(p, insn, src);
1794 brw_set_src1(p, insn, brw_null_reg());
1795 } else {
1796 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1797
1798 /* Example code doesn't set predicate_control for send
1799 * instructions.
1800 */
1801 insn->header.predicate_control = 0;
1802 insn->header.destreg__conditionalmod = msg_reg_nr;
1803
1804 brw_set_dest(p, insn, dest);
1805 brw_set_src0(p, insn, src);
1806 brw_set_math_message(p,
1807 insn,
1808 function,
1809 src.type == BRW_REGISTER_TYPE_D,
1810 precision,
1811 data_type);
1812 }
1813 }
1814
1815 /** Extended math function, float[8].
1816 */
1817 void brw_math2(struct brw_compile *p,
1818 struct brw_reg dest,
1819 GLuint function,
1820 struct brw_reg src0,
1821 struct brw_reg src1)
1822 {
1823 struct brw_context *brw = p->brw;
1824 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1825
1826 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1827 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1828 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1829 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1830
1831 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1832 if (brw->gen == 6) {
1833 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1834 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1835 }
1836
1837 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1838 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1839 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1840 assert(src0.type != BRW_REGISTER_TYPE_F);
1841 assert(src1.type != BRW_REGISTER_TYPE_F);
1842 } else {
1843 assert(src0.type == BRW_REGISTER_TYPE_F);
1844 assert(src1.type == BRW_REGISTER_TYPE_F);
1845 }
1846
1847 /* Source modifiers are ignored for extended math instructions on Gen6. */
1848 if (brw->gen == 6) {
1849 assert(!src0.negate);
1850 assert(!src0.abs);
1851 assert(!src1.negate);
1852 assert(!src1.abs);
1853 }
1854
1855 /* Math is the same ISA format as other opcodes, except that CondModifier
1856 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1857 */
1858 insn->header.destreg__conditionalmod = function;
1859
1860 brw_set_dest(p, insn, dest);
1861 brw_set_src0(p, insn, src0);
1862 brw_set_src1(p, insn, src1);
1863 }
1864
1865
1866 /**
1867 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1868 * using a constant offset per channel.
1869 *
1870 * The offset must be aligned to oword size (16 bytes). Used for
1871 * register spilling.
1872 */
1873 void brw_oword_block_write_scratch(struct brw_compile *p,
1874 struct brw_reg mrf,
1875 int num_regs,
1876 GLuint offset)
1877 {
1878 struct brw_context *brw = p->brw;
1879 uint32_t msg_control, msg_type;
1880 int mlen;
1881
1882 if (brw->gen >= 6)
1883 offset /= 16;
1884
1885 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1886
1887 if (num_regs == 1) {
1888 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1889 mlen = 2;
1890 } else {
1891 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1892 mlen = 3;
1893 }
1894
1895 /* Set up the message header. This is g0, with g0.2 filled with
1896 * the offset. We don't want to leave our offset around in g0 or
1897 * it'll screw up texture samples, so set it up inside the message
1898 * reg.
1899 */
1900 {
1901 brw_push_insn_state(p);
1902 brw_set_mask_control(p, BRW_MASK_DISABLE);
1903 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1904
1905 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1906
1907 /* set message header global offset field (reg 0, element 2) */
1908 brw_MOV(p,
1909 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1910 mrf.nr,
1911 2), BRW_REGISTER_TYPE_UD),
1912 brw_imm_ud(offset));
1913
1914 brw_pop_insn_state(p);
1915 }
1916
1917 {
1918 struct brw_reg dest;
1919 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1920 int send_commit_msg;
1921 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1922 BRW_REGISTER_TYPE_UW);
1923
1924 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1925 insn->header.compression_control = BRW_COMPRESSION_NONE;
1926 src_header = vec16(src_header);
1927 }
1928 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1929 insn->header.destreg__conditionalmod = mrf.nr;
1930
1931 /* Until gen6, writes followed by reads from the same location
1932 * are not guaranteed to be ordered unless write_commit is set.
1933 * If set, then a no-op write is issued to the destination
1934 * register to set a dependency, and a read from the destination
1935 * can be used to ensure the ordering.
1936 *
1937 * For gen6, only writes between different threads need ordering
1938 * protection. Our use of DP writes is all about register
1939 * spilling within a thread.
1940 */
1941 if (brw->gen >= 6) {
1942 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1943 send_commit_msg = 0;
1944 } else {
1945 dest = src_header;
1946 send_commit_msg = 1;
1947 }
1948
1949 brw_set_dest(p, insn, dest);
1950 if (brw->gen >= 6) {
1951 brw_set_src0(p, insn, mrf);
1952 } else {
1953 brw_set_src0(p, insn, brw_null_reg());
1954 }
1955
1956 if (brw->gen >= 6)
1957 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1958 else
1959 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1960
1961 brw_set_dp_write_message(p,
1962 insn,
1963 255, /* binding table index (255=stateless) */
1964 msg_control,
1965 msg_type,
1966 mlen,
1967 true, /* header_present */
1968 0, /* not a render target */
1969 send_commit_msg, /* response_length */
1970 0, /* eot */
1971 send_commit_msg);
1972 }
1973 }
1974
1975
1976 /**
1977 * Read a block of owords (half a GRF each) from the scratch buffer
1978 * using a constant index per channel.
1979 *
1980 * Offset must be aligned to oword size (16 bytes). Used for register
1981 * spilling.
1982 */
1983 void
1984 brw_oword_block_read_scratch(struct brw_compile *p,
1985 struct brw_reg dest,
1986 struct brw_reg mrf,
1987 int num_regs,
1988 GLuint offset)
1989 {
1990 struct brw_context *brw = p->brw;
1991 uint32_t msg_control;
1992 int rlen;
1993
1994 if (brw->gen >= 6)
1995 offset /= 16;
1996
1997 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1998 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1999
2000 if (num_regs == 1) {
2001 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2002 rlen = 1;
2003 } else {
2004 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2005 rlen = 2;
2006 }
2007
2008 {
2009 brw_push_insn_state(p);
2010 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2011 brw_set_mask_control(p, BRW_MASK_DISABLE);
2012
2013 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2014
2015 /* set message header global offset field (reg 0, element 2) */
2016 brw_MOV(p,
2017 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2018 mrf.nr,
2019 2), BRW_REGISTER_TYPE_UD),
2020 brw_imm_ud(offset));
2021
2022 brw_pop_insn_state(p);
2023 }
2024
2025 {
2026 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2027
2028 assert(insn->header.predicate_control == 0);
2029 insn->header.compression_control = BRW_COMPRESSION_NONE;
2030 insn->header.destreg__conditionalmod = mrf.nr;
2031
2032 brw_set_dest(p, insn, dest); /* UW? */
2033 if (brw->gen >= 6) {
2034 brw_set_src0(p, insn, mrf);
2035 } else {
2036 brw_set_src0(p, insn, brw_null_reg());
2037 }
2038
2039 brw_set_dp_read_message(p,
2040 insn,
2041 255, /* binding table index (255=stateless) */
2042 msg_control,
2043 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2044 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2045 1, /* msg_length */
2046 true, /* header_present */
2047 rlen);
2048 }
2049 }
2050
2051 /**
2052 * Read a float[4] vector from the data port Data Cache (const buffer).
2053 * Location (in buffer) should be a multiple of 16.
2054 * Used for fetching shader constants.
2055 */
2056 void brw_oword_block_read(struct brw_compile *p,
2057 struct brw_reg dest,
2058 struct brw_reg mrf,
2059 uint32_t offset,
2060 uint32_t bind_table_index)
2061 {
2062 struct brw_context *brw = p->brw;
2063
2064 /* On newer hardware, offset is in units of owords. */
2065 if (brw->gen >= 6)
2066 offset /= 16;
2067
2068 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2069
2070 brw_push_insn_state(p);
2071 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2072 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2073 brw_set_mask_control(p, BRW_MASK_DISABLE);
2074
2075 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2076
2077 /* set message header global offset field (reg 0, element 2) */
2078 brw_MOV(p,
2079 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2080 mrf.nr,
2081 2), BRW_REGISTER_TYPE_UD),
2082 brw_imm_ud(offset));
2083
2084 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2085 insn->header.destreg__conditionalmod = mrf.nr;
2086
2087 /* cast dest to a uword[8] vector */
2088 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2089
2090 brw_set_dest(p, insn, dest);
2091 if (brw->gen >= 6) {
2092 brw_set_src0(p, insn, mrf);
2093 } else {
2094 brw_set_src0(p, insn, brw_null_reg());
2095 }
2096
2097 brw_set_dp_read_message(p,
2098 insn,
2099 bind_table_index,
2100 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2101 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2102 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2103 1, /* msg_length */
2104 true, /* header_present */
2105 1); /* response_length (1 reg, 2 owords!) */
2106
2107 brw_pop_insn_state(p);
2108 }
2109
2110
2111 void brw_fb_WRITE(struct brw_compile *p,
2112 int dispatch_width,
2113 GLuint msg_reg_nr,
2114 struct brw_reg src0,
2115 GLuint msg_control,
2116 GLuint binding_table_index,
2117 GLuint msg_length,
2118 GLuint response_length,
2119 bool eot,
2120 bool header_present)
2121 {
2122 struct brw_context *brw = p->brw;
2123 struct brw_instruction *insn;
2124 GLuint msg_type;
2125 struct brw_reg dest;
2126
2127 if (dispatch_width == 16)
2128 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2129 else
2130 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2131
2132 if (brw->gen >= 6) {
2133 insn = next_insn(p, BRW_OPCODE_SENDC);
2134 } else {
2135 insn = next_insn(p, BRW_OPCODE_SEND);
2136 }
2137 /* The execution mask is ignored for render target writes. */
2138 insn->header.predicate_control = 0;
2139 insn->header.compression_control = BRW_COMPRESSION_NONE;
2140
2141 if (brw->gen >= 6) {
2142 /* headerless version, just submit color payload */
2143 src0 = brw_message_reg(msg_reg_nr);
2144
2145 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2146 } else {
2147 insn->header.destreg__conditionalmod = msg_reg_nr;
2148
2149 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2150 }
2151
2152 brw_set_dest(p, insn, dest);
2153 brw_set_src0(p, insn, src0);
2154 brw_set_dp_write_message(p,
2155 insn,
2156 binding_table_index,
2157 msg_control,
2158 msg_type,
2159 msg_length,
2160 header_present,
2161 eot, /* last render target write */
2162 response_length,
2163 eot,
2164 0 /* send_commit_msg */);
2165 }
2166
2167
2168 /**
2169 * Texture sample instruction.
2170 * Note: the msg_type plus msg_length values determine exactly what kind
2171 * of sampling operation is performed. See volume 4, page 161 of docs.
2172 */
2173 void brw_SAMPLE(struct brw_compile *p,
2174 struct brw_reg dest,
2175 GLuint msg_reg_nr,
2176 struct brw_reg src0,
2177 GLuint binding_table_index,
2178 GLuint sampler,
2179 GLuint msg_type,
2180 GLuint response_length,
2181 GLuint msg_length,
2182 GLuint header_present,
2183 GLuint simd_mode,
2184 GLuint return_format)
2185 {
2186 struct brw_context *brw = p->brw;
2187 struct brw_instruction *insn;
2188
2189 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2190
2191 insn = next_insn(p, BRW_OPCODE_SEND);
2192 insn->header.predicate_control = 0; /* XXX */
2193 insn->header.compression_control = BRW_COMPRESSION_NONE;
2194 if (brw->gen < 6)
2195 insn->header.destreg__conditionalmod = msg_reg_nr;
2196
2197 brw_set_dest(p, insn, dest);
2198 brw_set_src0(p, insn, src0);
2199 brw_set_sampler_message(p, insn,
2200 binding_table_index,
2201 sampler,
2202 msg_type,
2203 response_length,
2204 msg_length,
2205 header_present,
2206 simd_mode,
2207 return_format);
2208 }
2209
2210 /* All these variables are pretty confusing - we might be better off
2211 * using bitmasks and macros for this, in the old style. Or perhaps
2212 * just having the caller instantiate the fields in dword3 itself.
2213 */
2214 void brw_urb_WRITE(struct brw_compile *p,
2215 struct brw_reg dest,
2216 GLuint msg_reg_nr,
2217 struct brw_reg src0,
2218 bool allocate,
2219 bool used,
2220 GLuint msg_length,
2221 GLuint response_length,
2222 bool eot,
2223 bool writes_complete,
2224 GLuint offset,
2225 GLuint swizzle)
2226 {
2227 struct brw_context *brw = p->brw;
2228 struct brw_instruction *insn;
2229
2230 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2231
2232 if (brw->gen == 7) {
2233 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2234 brw_push_insn_state(p);
2235 brw_set_access_mode(p, BRW_ALIGN_1);
2236 brw_set_mask_control(p, BRW_MASK_DISABLE);
2237 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2238 BRW_REGISTER_TYPE_UD),
2239 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2240 brw_imm_ud(0xff00));
2241 brw_pop_insn_state(p);
2242 }
2243
2244 insn = next_insn(p, BRW_OPCODE_SEND);
2245
2246 assert(msg_length < BRW_MAX_MRF);
2247
2248 brw_set_dest(p, insn, dest);
2249 brw_set_src0(p, insn, src0);
2250 brw_set_src1(p, insn, brw_imm_d(0));
2251
2252 if (brw->gen < 6)
2253 insn->header.destreg__conditionalmod = msg_reg_nr;
2254
2255 brw_set_urb_message(p,
2256 insn,
2257 allocate,
2258 used,
2259 msg_length,
2260 response_length,
2261 eot,
2262 writes_complete,
2263 offset,
2264 swizzle);
2265 }
2266
2267 static int
2268 next_ip(struct brw_compile *p, int ip)
2269 {
2270 struct brw_instruction *insn = (void *)p->store + ip;
2271
2272 if (insn->header.cmpt_control)
2273 return ip + 8;
2274 else
2275 return ip + 16;
2276 }
2277
2278 static int
2279 brw_find_next_block_end(struct brw_compile *p, int start)
2280 {
2281 int ip;
2282 void *store = p->store;
2283
2284 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2285 struct brw_instruction *insn = store + ip;
2286
2287 switch (insn->header.opcode) {
2288 case BRW_OPCODE_ENDIF:
2289 case BRW_OPCODE_ELSE:
2290 case BRW_OPCODE_WHILE:
2291 case BRW_OPCODE_HALT:
2292 return ip;
2293 }
2294 }
2295
2296 return 0;
2297 }
2298
2299 /* There is no DO instruction on gen6, so to find the end of the loop
2300 * we have to see if the loop is jumping back before our start
2301 * instruction.
2302 */
2303 static int
2304 brw_find_loop_end(struct brw_compile *p, int start)
2305 {
2306 struct brw_context *brw = p->brw;
2307 int ip;
2308 int scale = 8;
2309 void *store = p->store;
2310
2311 /* Always start after the instruction (such as a WHILE) we're trying to fix
2312 * up.
2313 */
2314 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2315 struct brw_instruction *insn = store + ip;
2316
2317 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2318 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2319 : insn->bits3.break_cont.jip;
2320 if (ip + jip * scale <= start)
2321 return ip;
2322 }
2323 }
2324 assert(!"not reached");
2325 return start;
2326 }
2327
2328 /* After program generation, go back and update the UIP and JIP of
2329 * BREAK, CONT, and HALT instructions to their correct locations.
2330 */
2331 void
2332 brw_set_uip_jip(struct brw_compile *p)
2333 {
2334 struct brw_context *brw = p->brw;
2335 int ip;
2336 int scale = 8;
2337 void *store = p->store;
2338
2339 if (brw->gen < 6)
2340 return;
2341
2342 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2343 struct brw_instruction *insn = store + ip;
2344
2345 if (insn->header.cmpt_control) {
2346 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2347 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2348 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2349 insn->header.opcode != BRW_OPCODE_HALT);
2350 continue;
2351 }
2352
2353 int block_end_ip = brw_find_next_block_end(p, ip);
2354 switch (insn->header.opcode) {
2355 case BRW_OPCODE_BREAK:
2356 assert(block_end_ip != 0);
2357 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2358 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2359 insn->bits3.break_cont.uip =
2360 (brw_find_loop_end(p, ip) - ip +
2361 (brw->gen == 6 ? 16 : 0)) / scale;
2362 break;
2363 case BRW_OPCODE_CONTINUE:
2364 assert(block_end_ip != 0);
2365 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2366 insn->bits3.break_cont.uip =
2367 (brw_find_loop_end(p, ip) - ip) / scale;
2368
2369 assert(insn->bits3.break_cont.uip != 0);
2370 assert(insn->bits3.break_cont.jip != 0);
2371 break;
2372
2373 case BRW_OPCODE_ENDIF:
2374 if (block_end_ip == 0)
2375 insn->bits3.break_cont.jip = 2;
2376 else
2377 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2378 break;
2379
2380 case BRW_OPCODE_HALT:
2381 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2382 *
2383 * "In case of the halt instruction not inside any conditional
2384 * code block, the value of <JIP> and <UIP> should be the
2385 * same. In case of the halt instruction inside conditional code
2386 * block, the <UIP> should be the end of the program, and the
2387 * <JIP> should be end of the most inner conditional code block."
2388 *
2389 * The uip will have already been set by whoever set up the
2390 * instruction.
2391 */
2392 if (block_end_ip == 0) {
2393 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2394 } else {
2395 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2396 }
2397 assert(insn->bits3.break_cont.uip != 0);
2398 assert(insn->bits3.break_cont.jip != 0);
2399 break;
2400 }
2401 }
2402 }
2403
2404 void brw_ff_sync(struct brw_compile *p,
2405 struct brw_reg dest,
2406 GLuint msg_reg_nr,
2407 struct brw_reg src0,
2408 bool allocate,
2409 GLuint response_length,
2410 bool eot)
2411 {
2412 struct brw_context *brw = p->brw;
2413 struct brw_instruction *insn;
2414
2415 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2416
2417 insn = next_insn(p, BRW_OPCODE_SEND);
2418 brw_set_dest(p, insn, dest);
2419 brw_set_src0(p, insn, src0);
2420 brw_set_src1(p, insn, brw_imm_d(0));
2421
2422 if (brw->gen < 6)
2423 insn->header.destreg__conditionalmod = msg_reg_nr;
2424
2425 brw_set_ff_sync_message(p,
2426 insn,
2427 allocate,
2428 response_length,
2429 eot);
2430 }
2431
2432 /**
2433 * Emit the SEND instruction necessary to generate stream output data on Gen6
2434 * (for transform feedback).
2435 *
2436 * If send_commit_msg is true, this is the last piece of stream output data
2437 * from this thread, so send the data as a committed write. According to the
2438 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2439 *
2440 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2441 * writes are complete by sending the final write as a committed write."
2442 */
2443 void
2444 brw_svb_write(struct brw_compile *p,
2445 struct brw_reg dest,
2446 GLuint msg_reg_nr,
2447 struct brw_reg src0,
2448 GLuint binding_table_index,
2449 bool send_commit_msg)
2450 {
2451 struct brw_instruction *insn;
2452
2453 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2454
2455 insn = next_insn(p, BRW_OPCODE_SEND);
2456 brw_set_dest(p, insn, dest);
2457 brw_set_src0(p, insn, src0);
2458 brw_set_src1(p, insn, brw_imm_d(0));
2459 brw_set_dp_write_message(p, insn,
2460 binding_table_index,
2461 0, /* msg_control: ignored */
2462 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2463 1, /* msg_length */
2464 true, /* header_present */
2465 0, /* last_render_target: ignored */
2466 send_commit_msg, /* response_length */
2467 0, /* end_of_thread */
2468 send_commit_msg); /* send_commit_msg */
2469 }
2470
2471 /**
2472 * This instruction is generated as a single-channel align1 instruction by
2473 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2474 *
2475 * We can't use the typed atomic op in the FS because that has the execution
2476 * mask ANDed with the pixel mask, but we just want to write the one dword for
2477 * all the pixels.
2478 *
2479 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2480 * one u32. So we use the same untyped atomic write message as the pixel
2481 * shader.
2482 *
2483 * The untyped atomic operation requires a BUFFER surface type with RAW
2484 * format, and is only accessible through the legacy DATA_CACHE dataport
2485 * messages.
2486 */
2487 void brw_shader_time_add(struct brw_compile *p,
2488 struct brw_reg payload,
2489 uint32_t surf_index)
2490 {
2491 struct brw_context *brw = p->brw;
2492 assert(brw->gen >= 7);
2493
2494 brw_push_insn_state(p);
2495 brw_set_access_mode(p, BRW_ALIGN_1);
2496 brw_set_mask_control(p, BRW_MASK_DISABLE);
2497 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2498 brw_pop_insn_state(p);
2499
2500 /* We use brw_vec1_reg and unmasked because we want to increment the given
2501 * offset only once.
2502 */
2503 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2504 BRW_ARF_NULL, 0));
2505 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2506 payload.nr, 0));
2507
2508 uint32_t sfid, msg_type;
2509 if (brw->is_haswell) {
2510 sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2511 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2512 } else {
2513 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
2514 msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2515 }
2516
2517 bool header_present = false;
2518 bool eot = false;
2519 uint32_t mlen = 2; /* offset, value */
2520 uint32_t rlen = 0;
2521 brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
2522
2523 send->bits3.ud |= msg_type << 14;
2524 send->bits3.ud |= 0 << 13; /* no return data */
2525 send->bits3.ud |= 1 << 12; /* SIMD8 mode */
2526 send->bits3.ud |= BRW_AOP_ADD << 8;
2527 send->bits3.ud |= surf_index << 0;
2528 }