i965: Remove never used RSR and RSL opcodes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
132 */
133 insn->bits1.da16.dest_horiz_stride = 1;
134 }
135 }
136 else {
137 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
138
139 /* These are different sizes in align1 vs align16:
140 */
141 if (insn->header.access_mode == BRW_ALIGN_1) {
142 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
143 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
144 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
145 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
146 }
147 else {
148 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
149 /* even ignored in da16, still need to set as '01' */
150 insn->bits1.ia16.dest_horiz_stride = 1;
151 }
152 }
153
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
156 */
157 guess_execution_size(p, insn, dest);
158 }
159
160 extern int reg_type_size[];
161
162 static void
163 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
164 {
165 int hstride_for_reg[] = {0, 1, 2, 4};
166 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg[] = {1, 2, 4, 8, 16};
169 int width, hstride, vstride, execsize;
170
171 if (reg.file == BRW_IMMEDIATE_VALUE) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
175 */
176 if (reg.type == BRW_REGISTER_TYPE_V) {
177 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
178 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
179 }
180
181 return;
182 }
183
184 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
185 reg.file == BRW_ARF_NULL)
186 return;
187
188 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
189 hstride = hstride_for_reg[reg.hstride];
190
191 if (reg.vstride == 0xf) {
192 vstride = -1;
193 } else {
194 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
195 vstride = vstride_for_reg[reg.vstride];
196 }
197
198 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
199 width = width_for_reg[reg.width];
200
201 assert(insn->header.execution_size >= 0 &&
202 insn->header.execution_size < Elements(execsize_for_reg));
203 execsize = execsize_for_reg[insn->header.execution_size];
204
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
206 /* 3. */
207 assert(execsize >= width);
208
209 /* 4. */
210 if (execsize == width && hstride != 0) {
211 assert(vstride == -1 || vstride == width * hstride);
212 }
213
214 /* 5. */
215 if (execsize == width && hstride == 0) {
216 /* no restriction on vstride. */
217 }
218
219 /* 6. */
220 if (width == 1) {
221 assert(hstride == 0);
222 }
223
224 /* 7. */
225 if (execsize == 1 && width == 1) {
226 assert(hstride == 0);
227 assert(vstride == 0);
228 }
229
230 /* 8. */
231 if (vstride == 0 && hstride == 0) {
232 assert(width == 1);
233 }
234
235 /* 10. Check destination issues. */
236 }
237
238 void
239 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
240 struct brw_reg reg)
241 {
242 struct brw_context *brw = p->brw;
243
244 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
245 assert(reg.nr < 128);
246
247 gen7_convert_mrf_to_grf(p, &reg);
248
249 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
250 insn->header.opcode == BRW_OPCODE_SENDC)) {
251 /* Any source modifiers or regions will be ignored, since this just
252 * identifies the MRF/GRF to start reading the message contents from.
253 * Check for some likely failures.
254 */
255 assert(!reg.negate);
256 assert(!reg.abs);
257 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
258 }
259
260 validate_reg(insn, reg);
261
262 insn->bits1.da1.src0_reg_file = reg.file;
263 insn->bits1.da1.src0_reg_type = reg.type;
264 insn->bits2.da1.src0_abs = reg.abs;
265 insn->bits2.da1.src0_negate = reg.negate;
266 insn->bits2.da1.src0_address_mode = reg.address_mode;
267
268 if (reg.file == BRW_IMMEDIATE_VALUE) {
269 insn->bits3.ud = reg.dw1.ud;
270
271 /* Required to set some fields in src1 as well:
272 */
273 insn->bits1.da1.src1_reg_file = 0; /* arf */
274 insn->bits1.da1.src1_reg_type = reg.type;
275 }
276 else
277 {
278 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
279 if (insn->header.access_mode == BRW_ALIGN_1) {
280 insn->bits2.da1.src0_subreg_nr = reg.subnr;
281 insn->bits2.da1.src0_reg_nr = reg.nr;
282 }
283 else {
284 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
285 insn->bits2.da16.src0_reg_nr = reg.nr;
286 }
287 }
288 else {
289 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
290
291 if (insn->header.access_mode == BRW_ALIGN_1) {
292 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
293 }
294 else {
295 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
296 }
297 }
298
299 if (insn->header.access_mode == BRW_ALIGN_1) {
300 if (reg.width == BRW_WIDTH_1 &&
301 insn->header.execution_size == BRW_EXECUTE_1) {
302 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
303 insn->bits2.da1.src0_width = BRW_WIDTH_1;
304 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
305 }
306 else {
307 insn->bits2.da1.src0_horiz_stride = reg.hstride;
308 insn->bits2.da1.src0_width = reg.width;
309 insn->bits2.da1.src0_vert_stride = reg.vstride;
310 }
311 }
312 else {
313 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
314 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
315 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
316 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
317
318 /* This is an oddity of the fact we're using the same
319 * descriptions for registers in align_16 as align_1:
320 */
321 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
322 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
323 else
324 insn->bits2.da16.src0_vert_stride = reg.vstride;
325 }
326 }
327 }
328
329
330 void brw_set_src1(struct brw_compile *p,
331 struct brw_instruction *insn,
332 struct brw_reg reg)
333 {
334 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
335
336 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
337 assert(reg.nr < 128);
338
339 gen7_convert_mrf_to_grf(p, &reg);
340
341 validate_reg(insn, reg);
342
343 insn->bits1.da1.src1_reg_file = reg.file;
344 insn->bits1.da1.src1_reg_type = reg.type;
345 insn->bits3.da1.src1_abs = reg.abs;
346 insn->bits3.da1.src1_negate = reg.negate;
347
348 /* Only src1 can be immediate in two-argument instructions.
349 */
350 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
351
352 if (reg.file == BRW_IMMEDIATE_VALUE) {
353 insn->bits3.ud = reg.dw1.ud;
354 }
355 else {
356 /* This is a hardware restriction, which may or may not be lifted
357 * in the future:
358 */
359 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
360 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
361
362 if (insn->header.access_mode == BRW_ALIGN_1) {
363 insn->bits3.da1.src1_subreg_nr = reg.subnr;
364 insn->bits3.da1.src1_reg_nr = reg.nr;
365 }
366 else {
367 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
368 insn->bits3.da16.src1_reg_nr = reg.nr;
369 }
370
371 if (insn->header.access_mode == BRW_ALIGN_1) {
372 if (reg.width == BRW_WIDTH_1 &&
373 insn->header.execution_size == BRW_EXECUTE_1) {
374 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
375 insn->bits3.da1.src1_width = BRW_WIDTH_1;
376 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
377 }
378 else {
379 insn->bits3.da1.src1_horiz_stride = reg.hstride;
380 insn->bits3.da1.src1_width = reg.width;
381 insn->bits3.da1.src1_vert_stride = reg.vstride;
382 }
383 }
384 else {
385 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
386 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
387 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
388 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
389
390 /* This is an oddity of the fact we're using the same
391 * descriptions for registers in align_16 as align_1:
392 */
393 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
394 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
395 else
396 insn->bits3.da16.src1_vert_stride = reg.vstride;
397 }
398 }
399 }
400
401 /**
402 * Set the Message Descriptor and Extended Message Descriptor fields
403 * for SEND messages.
404 *
405 * \note This zeroes out the Function Control bits, so it must be called
406 * \b before filling out any message-specific data. Callers can
407 * choose not to fill in irrelevant bits; they will be zero.
408 */
409 static void
410 brw_set_message_descriptor(struct brw_compile *p,
411 struct brw_instruction *inst,
412 enum brw_message_target sfid,
413 unsigned msg_length,
414 unsigned response_length,
415 bool header_present,
416 bool end_of_thread)
417 {
418 struct brw_context *brw = p->brw;
419
420 brw_set_src1(p, inst, brw_imm_d(0));
421
422 if (brw->gen >= 5) {
423 inst->bits3.generic_gen5.header_present = header_present;
424 inst->bits3.generic_gen5.response_length = response_length;
425 inst->bits3.generic_gen5.msg_length = msg_length;
426 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
427
428 if (brw->gen >= 6) {
429 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
430 inst->header.destreg__conditionalmod = sfid;
431 } else {
432 /* Set Extended Message Descriptor (ex_desc) */
433 inst->bits2.send_gen5.sfid = sfid;
434 inst->bits2.send_gen5.end_of_thread = end_of_thread;
435 }
436 } else {
437 inst->bits3.generic.response_length = response_length;
438 inst->bits3.generic.msg_length = msg_length;
439 inst->bits3.generic.msg_target = sfid;
440 inst->bits3.generic.end_of_thread = end_of_thread;
441 }
442 }
443
444 static void brw_set_math_message( struct brw_compile *p,
445 struct brw_instruction *insn,
446 GLuint function,
447 GLuint integer_type,
448 bool low_precision,
449 GLuint dataType )
450 {
451 struct brw_context *brw = p->brw;
452 unsigned msg_length;
453 unsigned response_length;
454
455 /* Infer message length from the function */
456 switch (function) {
457 case BRW_MATH_FUNCTION_POW:
458 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
459 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
461 msg_length = 2;
462 break;
463 default:
464 msg_length = 1;
465 break;
466 }
467
468 /* Infer response length from the function */
469 switch (function) {
470 case BRW_MATH_FUNCTION_SINCOS:
471 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
472 response_length = 2;
473 break;
474 default:
475 response_length = 1;
476 break;
477 }
478
479
480 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
481 msg_length, response_length, false, false);
482 if (brw->gen == 5) {
483 insn->bits3.math_gen5.function = function;
484 insn->bits3.math_gen5.int_type = integer_type;
485 insn->bits3.math_gen5.precision = low_precision;
486 insn->bits3.math_gen5.saturate = insn->header.saturate;
487 insn->bits3.math_gen5.data_type = dataType;
488 insn->bits3.math_gen5.snapshot = 0;
489 } else {
490 insn->bits3.math.function = function;
491 insn->bits3.math.int_type = integer_type;
492 insn->bits3.math.precision = low_precision;
493 insn->bits3.math.saturate = insn->header.saturate;
494 insn->bits3.math.data_type = dataType;
495 }
496 insn->header.saturate = 0;
497 }
498
499
500 static void brw_set_ff_sync_message(struct brw_compile *p,
501 struct brw_instruction *insn,
502 bool allocate,
503 GLuint response_length,
504 bool end_of_thread)
505 {
506 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
507 1, response_length, true, end_of_thread);
508 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
509 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
510 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
511 insn->bits3.urb_gen5.allocate = allocate;
512 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
513 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
514 }
515
516 static void brw_set_urb_message( struct brw_compile *p,
517 struct brw_instruction *insn,
518 enum brw_urb_write_flags flags,
519 GLuint msg_length,
520 GLuint response_length,
521 GLuint offset,
522 GLuint swizzle_control )
523 {
524 struct brw_context *brw = p->brw;
525
526 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
527 msg_length, response_length, true,
528 flags & BRW_URB_WRITE_EOT);
529 if (brw->gen == 7) {
530 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
531 insn->bits3.urb_gen7.offset = offset;
532 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
533 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
534 insn->bits3.urb_gen7.per_slot_offset =
535 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
536 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
537 } else if (brw->gen >= 5) {
538 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
539 insn->bits3.urb_gen5.offset = offset;
540 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
541 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
542 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
543 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
544 } else {
545 insn->bits3.urb.opcode = 0; /* ? */
546 insn->bits3.urb.offset = offset;
547 insn->bits3.urb.swizzle_control = swizzle_control;
548 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
549 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
550 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
551 }
552 }
553
554 void
555 brw_set_dp_write_message(struct brw_compile *p,
556 struct brw_instruction *insn,
557 GLuint binding_table_index,
558 GLuint msg_control,
559 GLuint msg_type,
560 GLuint msg_length,
561 bool header_present,
562 GLuint last_render_target,
563 GLuint response_length,
564 GLuint end_of_thread,
565 GLuint send_commit_msg)
566 {
567 struct brw_context *brw = p->brw;
568 unsigned sfid;
569
570 if (brw->gen >= 7) {
571 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
572 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
573 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
574 else
575 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
576 } else if (brw->gen == 6) {
577 /* Use the render cache for all write messages. */
578 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
579 } else {
580 sfid = BRW_SFID_DATAPORT_WRITE;
581 }
582
583 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
584 header_present, end_of_thread);
585
586 if (brw->gen >= 7) {
587 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
588 insn->bits3.gen7_dp.msg_control = msg_control;
589 insn->bits3.gen7_dp.last_render_target = last_render_target;
590 insn->bits3.gen7_dp.msg_type = msg_type;
591 } else if (brw->gen == 6) {
592 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
593 insn->bits3.gen6_dp.msg_control = msg_control;
594 insn->bits3.gen6_dp.last_render_target = last_render_target;
595 insn->bits3.gen6_dp.msg_type = msg_type;
596 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
597 } else if (brw->gen == 5) {
598 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
599 insn->bits3.dp_write_gen5.msg_control = msg_control;
600 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
601 insn->bits3.dp_write_gen5.msg_type = msg_type;
602 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
603 } else {
604 insn->bits3.dp_write.binding_table_index = binding_table_index;
605 insn->bits3.dp_write.msg_control = msg_control;
606 insn->bits3.dp_write.last_render_target = last_render_target;
607 insn->bits3.dp_write.msg_type = msg_type;
608 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
609 }
610 }
611
612 void
613 brw_set_dp_read_message(struct brw_compile *p,
614 struct brw_instruction *insn,
615 GLuint binding_table_index,
616 GLuint msg_control,
617 GLuint msg_type,
618 GLuint target_cache,
619 GLuint msg_length,
620 bool header_present,
621 GLuint response_length)
622 {
623 struct brw_context *brw = p->brw;
624 unsigned sfid;
625
626 if (brw->gen >= 7) {
627 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
628 } else if (brw->gen == 6) {
629 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
630 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
631 else
632 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
633 } else {
634 sfid = BRW_SFID_DATAPORT_READ;
635 }
636
637 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
638 header_present, false);
639
640 if (brw->gen >= 7) {
641 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
642 insn->bits3.gen7_dp.msg_control = msg_control;
643 insn->bits3.gen7_dp.last_render_target = 0;
644 insn->bits3.gen7_dp.msg_type = msg_type;
645 } else if (brw->gen == 6) {
646 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
647 insn->bits3.gen6_dp.msg_control = msg_control;
648 insn->bits3.gen6_dp.last_render_target = 0;
649 insn->bits3.gen6_dp.msg_type = msg_type;
650 insn->bits3.gen6_dp.send_commit_msg = 0;
651 } else if (brw->gen == 5) {
652 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
653 insn->bits3.dp_read_gen5.msg_control = msg_control;
654 insn->bits3.dp_read_gen5.msg_type = msg_type;
655 insn->bits3.dp_read_gen5.target_cache = target_cache;
656 } else if (brw->is_g4x) {
657 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
658 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
659 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
660 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
661 } else {
662 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
663 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
664 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
665 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
666 }
667 }
668
669 void
670 brw_set_sampler_message(struct brw_compile *p,
671 struct brw_instruction *insn,
672 GLuint binding_table_index,
673 GLuint sampler,
674 GLuint msg_type,
675 GLuint response_length,
676 GLuint msg_length,
677 GLuint header_present,
678 GLuint simd_mode,
679 GLuint return_format)
680 {
681 struct brw_context *brw = p->brw;
682
683 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
684 response_length, header_present, false);
685
686 if (brw->gen >= 7) {
687 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
688 insn->bits3.sampler_gen7.sampler = sampler;
689 insn->bits3.sampler_gen7.msg_type = msg_type;
690 insn->bits3.sampler_gen7.simd_mode = simd_mode;
691 } else if (brw->gen >= 5) {
692 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
693 insn->bits3.sampler_gen5.sampler = sampler;
694 insn->bits3.sampler_gen5.msg_type = msg_type;
695 insn->bits3.sampler_gen5.simd_mode = simd_mode;
696 } else if (brw->is_g4x) {
697 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
698 insn->bits3.sampler_g4x.sampler = sampler;
699 insn->bits3.sampler_g4x.msg_type = msg_type;
700 } else {
701 insn->bits3.sampler.binding_table_index = binding_table_index;
702 insn->bits3.sampler.sampler = sampler;
703 insn->bits3.sampler.msg_type = msg_type;
704 insn->bits3.sampler.return_format = return_format;
705 }
706 }
707
708
709 #define next_insn brw_next_insn
710 struct brw_instruction *
711 brw_next_insn(struct brw_compile *p, GLuint opcode)
712 {
713 struct brw_instruction *insn;
714
715 if (p->nr_insn + 1 > p->store_size) {
716 if (0)
717 printf("incresing the store size to %d\n", p->store_size << 1);
718 p->store_size <<= 1;
719 p->store = reralloc(p->mem_ctx, p->store,
720 struct brw_instruction, p->store_size);
721 if (!p->store)
722 assert(!"realloc eu store memeory failed");
723 }
724
725 p->next_insn_offset += 16;
726 insn = &p->store[p->nr_insn++];
727 memcpy(insn, p->current, sizeof(*insn));
728
729 /* Reset this one-shot flag:
730 */
731
732 if (p->current->header.destreg__conditionalmod) {
733 p->current->header.destreg__conditionalmod = 0;
734 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
735 }
736
737 insn->header.opcode = opcode;
738 return insn;
739 }
740
741 static struct brw_instruction *brw_alu1( struct brw_compile *p,
742 GLuint opcode,
743 struct brw_reg dest,
744 struct brw_reg src )
745 {
746 struct brw_instruction *insn = next_insn(p, opcode);
747 brw_set_dest(p, insn, dest);
748 brw_set_src0(p, insn, src);
749 return insn;
750 }
751
752 static struct brw_instruction *brw_alu2(struct brw_compile *p,
753 GLuint opcode,
754 struct brw_reg dest,
755 struct brw_reg src0,
756 struct brw_reg src1 )
757 {
758 struct brw_instruction *insn = next_insn(p, opcode);
759 brw_set_dest(p, insn, dest);
760 brw_set_src0(p, insn, src0);
761 brw_set_src1(p, insn, src1);
762 return insn;
763 }
764
765 static int
766 get_3src_subreg_nr(struct brw_reg reg)
767 {
768 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
769 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
770 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
771 } else {
772 return reg.subnr / 4;
773 }
774 }
775
776 static struct brw_instruction *brw_alu3(struct brw_compile *p,
777 GLuint opcode,
778 struct brw_reg dest,
779 struct brw_reg src0,
780 struct brw_reg src1,
781 struct brw_reg src2)
782 {
783 struct brw_context *brw = p->brw;
784 struct brw_instruction *insn = next_insn(p, opcode);
785
786 gen7_convert_mrf_to_grf(p, &dest);
787
788 assert(insn->header.access_mode == BRW_ALIGN_16);
789
790 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
791 dest.file == BRW_MESSAGE_REGISTER_FILE);
792 assert(dest.nr < 128);
793 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
794 assert(dest.type == BRW_REGISTER_TYPE_F ||
795 dest.type == BRW_REGISTER_TYPE_D ||
796 dest.type == BRW_REGISTER_TYPE_UD);
797 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
798 insn->bits1.da3src.dest_reg_nr = dest.nr;
799 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
800 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
801 guess_execution_size(p, insn, dest);
802
803 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
804 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
805 assert(src0.nr < 128);
806 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
807 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
808 insn->bits2.da3src.src0_reg_nr = src0.nr;
809 insn->bits1.da3src.src0_abs = src0.abs;
810 insn->bits1.da3src.src0_negate = src0.negate;
811 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
812
813 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
814 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
815 assert(src1.nr < 128);
816 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
817 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
818 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
819 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
820 insn->bits3.da3src.src1_reg_nr = src1.nr;
821 insn->bits1.da3src.src1_abs = src1.abs;
822 insn->bits1.da3src.src1_negate = src1.negate;
823
824 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
825 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
826 assert(src2.nr < 128);
827 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
828 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
829 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
830 insn->bits3.da3src.src2_reg_nr = src2.nr;
831 insn->bits1.da3src.src2_abs = src2.abs;
832 insn->bits1.da3src.src2_negate = src2.negate;
833
834 if (brw->gen >= 7) {
835 /* Set both the source and destination types based on dest.type,
836 * ignoring the source register types. The MAD and LRP emitters ensure
837 * that all four types are float. The BFE and BFI2 emitters, however,
838 * may send us mixed D and UD types and want us to ignore that and use
839 * the destination type.
840 */
841 switch (dest.type) {
842 case BRW_REGISTER_TYPE_F:
843 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
844 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
845 break;
846 case BRW_REGISTER_TYPE_D:
847 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
848 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
849 break;
850 case BRW_REGISTER_TYPE_UD:
851 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
852 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
853 break;
854 }
855 }
856
857 return insn;
858 }
859
860
861 /***********************************************************************
862 * Convenience routines.
863 */
864 #define ALU1(OP) \
865 struct brw_instruction *brw_##OP(struct brw_compile *p, \
866 struct brw_reg dest, \
867 struct brw_reg src0) \
868 { \
869 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
870 }
871
872 #define ALU2(OP) \
873 struct brw_instruction *brw_##OP(struct brw_compile *p, \
874 struct brw_reg dest, \
875 struct brw_reg src0, \
876 struct brw_reg src1) \
877 { \
878 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
879 }
880
881 #define ALU3(OP) \
882 struct brw_instruction *brw_##OP(struct brw_compile *p, \
883 struct brw_reg dest, \
884 struct brw_reg src0, \
885 struct brw_reg src1, \
886 struct brw_reg src2) \
887 { \
888 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
889 }
890
891 #define ALU3F(OP) \
892 struct brw_instruction *brw_##OP(struct brw_compile *p, \
893 struct brw_reg dest, \
894 struct brw_reg src0, \
895 struct brw_reg src1, \
896 struct brw_reg src2) \
897 { \
898 assert(dest.type == BRW_REGISTER_TYPE_F); \
899 assert(src0.type == BRW_REGISTER_TYPE_F); \
900 assert(src1.type == BRW_REGISTER_TYPE_F); \
901 assert(src2.type == BRW_REGISTER_TYPE_F); \
902 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
903 }
904
905 /* Rounding operations (other than RNDD) require two instructions - the first
906 * stores a rounded value (possibly the wrong way) in the dest register, but
907 * also sets a per-channel "increment bit" in the flag register. A predicated
908 * add of 1.0 fixes dest to contain the desired result.
909 *
910 * Sandybridge and later appear to round correctly without an ADD.
911 */
912 #define ROUND(OP) \
913 void brw_##OP(struct brw_compile *p, \
914 struct brw_reg dest, \
915 struct brw_reg src) \
916 { \
917 struct brw_instruction *rnd, *add; \
918 rnd = next_insn(p, BRW_OPCODE_##OP); \
919 brw_set_dest(p, rnd, dest); \
920 brw_set_src0(p, rnd, src); \
921 \
922 if (p->brw->gen < 6) { \
923 /* turn on round-increments */ \
924 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
925 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
926 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
927 } \
928 }
929
930
931 ALU1(MOV)
932 ALU2(SEL)
933 ALU1(NOT)
934 ALU2(AND)
935 ALU2(OR)
936 ALU2(XOR)
937 ALU2(SHR)
938 ALU2(SHL)
939 ALU2(ASR)
940 ALU1(F32TO16)
941 ALU1(F16TO32)
942 ALU1(FRC)
943 ALU1(RNDD)
944 ALU2(MAC)
945 ALU2(MACH)
946 ALU1(LZD)
947 ALU2(DP4)
948 ALU2(DPH)
949 ALU2(DP3)
950 ALU2(DP2)
951 ALU2(LINE)
952 ALU2(PLN)
953 ALU3F(MAD)
954 ALU3F(LRP)
955 ALU1(BFREV)
956 ALU3(BFE)
957 ALU2(BFI1)
958 ALU3(BFI2)
959 ALU1(FBH)
960 ALU1(FBL)
961 ALU1(CBIT)
962
963 ROUND(RNDZ)
964 ROUND(RNDE)
965
966
967 struct brw_instruction *brw_ADD(struct brw_compile *p,
968 struct brw_reg dest,
969 struct brw_reg src0,
970 struct brw_reg src1)
971 {
972 /* 6.2.2: add */
973 if (src0.type == BRW_REGISTER_TYPE_F ||
974 (src0.file == BRW_IMMEDIATE_VALUE &&
975 src0.type == BRW_REGISTER_TYPE_VF)) {
976 assert(src1.type != BRW_REGISTER_TYPE_UD);
977 assert(src1.type != BRW_REGISTER_TYPE_D);
978 }
979
980 if (src1.type == BRW_REGISTER_TYPE_F ||
981 (src1.file == BRW_IMMEDIATE_VALUE &&
982 src1.type == BRW_REGISTER_TYPE_VF)) {
983 assert(src0.type != BRW_REGISTER_TYPE_UD);
984 assert(src0.type != BRW_REGISTER_TYPE_D);
985 }
986
987 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
988 }
989
990 struct brw_instruction *brw_AVG(struct brw_compile *p,
991 struct brw_reg dest,
992 struct brw_reg src0,
993 struct brw_reg src1)
994 {
995 assert(dest.type == src0.type);
996 assert(src0.type == src1.type);
997 switch (src0.type) {
998 case BRW_REGISTER_TYPE_B:
999 case BRW_REGISTER_TYPE_UB:
1000 case BRW_REGISTER_TYPE_W:
1001 case BRW_REGISTER_TYPE_UW:
1002 case BRW_REGISTER_TYPE_D:
1003 case BRW_REGISTER_TYPE_UD:
1004 break;
1005 default:
1006 assert(!"Bad type for brw_AVG");
1007 }
1008
1009 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1010 }
1011
1012 struct brw_instruction *brw_MUL(struct brw_compile *p,
1013 struct brw_reg dest,
1014 struct brw_reg src0,
1015 struct brw_reg src1)
1016 {
1017 /* 6.32.38: mul */
1018 if (src0.type == BRW_REGISTER_TYPE_D ||
1019 src0.type == BRW_REGISTER_TYPE_UD ||
1020 src1.type == BRW_REGISTER_TYPE_D ||
1021 src1.type == BRW_REGISTER_TYPE_UD) {
1022 assert(dest.type != BRW_REGISTER_TYPE_F);
1023 }
1024
1025 if (src0.type == BRW_REGISTER_TYPE_F ||
1026 (src0.file == BRW_IMMEDIATE_VALUE &&
1027 src0.type == BRW_REGISTER_TYPE_VF)) {
1028 assert(src1.type != BRW_REGISTER_TYPE_UD);
1029 assert(src1.type != BRW_REGISTER_TYPE_D);
1030 }
1031
1032 if (src1.type == BRW_REGISTER_TYPE_F ||
1033 (src1.file == BRW_IMMEDIATE_VALUE &&
1034 src1.type == BRW_REGISTER_TYPE_VF)) {
1035 assert(src0.type != BRW_REGISTER_TYPE_UD);
1036 assert(src0.type != BRW_REGISTER_TYPE_D);
1037 }
1038
1039 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1040 src0.nr != BRW_ARF_ACCUMULATOR);
1041 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1042 src1.nr != BRW_ARF_ACCUMULATOR);
1043
1044 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1045 }
1046
1047
1048 void brw_NOP(struct brw_compile *p)
1049 {
1050 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1051 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1052 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1053 brw_set_src1(p, insn, brw_imm_ud(0x0));
1054 }
1055
1056
1057
1058
1059
1060 /***********************************************************************
1061 * Comparisons, if/else/endif
1062 */
1063
1064 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1065 struct brw_reg dest,
1066 struct brw_reg src0,
1067 struct brw_reg src1)
1068 {
1069 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1070
1071 insn->header.execution_size = 1;
1072 insn->header.compression_control = BRW_COMPRESSION_NONE;
1073 insn->header.mask_control = BRW_MASK_DISABLE;
1074
1075 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1076
1077 return insn;
1078 }
1079
1080 static void
1081 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1082 {
1083 p->if_stack[p->if_stack_depth] = inst - p->store;
1084
1085 p->if_stack_depth++;
1086 if (p->if_stack_array_size <= p->if_stack_depth) {
1087 p->if_stack_array_size *= 2;
1088 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1089 p->if_stack_array_size);
1090 }
1091 }
1092
1093 static struct brw_instruction *
1094 pop_if_stack(struct brw_compile *p)
1095 {
1096 p->if_stack_depth--;
1097 return &p->store[p->if_stack[p->if_stack_depth]];
1098 }
1099
1100 static void
1101 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1102 {
1103 if (p->loop_stack_array_size < p->loop_stack_depth) {
1104 p->loop_stack_array_size *= 2;
1105 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1106 p->loop_stack_array_size);
1107 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1108 p->loop_stack_array_size);
1109 }
1110
1111 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1112 p->loop_stack_depth++;
1113 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1114 }
1115
1116 static struct brw_instruction *
1117 get_inner_do_insn(struct brw_compile *p)
1118 {
1119 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1120 }
1121
1122 /* EU takes the value from the flag register and pushes it onto some
1123 * sort of a stack (presumably merging with any flag value already on
1124 * the stack). Within an if block, the flags at the top of the stack
1125 * control execution on each channel of the unit, eg. on each of the
1126 * 16 pixel values in our wm programs.
1127 *
1128 * When the matching 'else' instruction is reached (presumably by
1129 * countdown of the instruction count patched in by our ELSE/ENDIF
1130 * functions), the relevent flags are inverted.
1131 *
1132 * When the matching 'endif' instruction is reached, the flags are
1133 * popped off. If the stack is now empty, normal execution resumes.
1134 */
1135 struct brw_instruction *
1136 brw_IF(struct brw_compile *p, GLuint execute_size)
1137 {
1138 struct brw_context *brw = p->brw;
1139 struct brw_instruction *insn;
1140
1141 insn = next_insn(p, BRW_OPCODE_IF);
1142
1143 /* Override the defaults for this instruction:
1144 */
1145 if (brw->gen < 6) {
1146 brw_set_dest(p, insn, brw_ip_reg());
1147 brw_set_src0(p, insn, brw_ip_reg());
1148 brw_set_src1(p, insn, brw_imm_d(0x0));
1149 } else if (brw->gen == 6) {
1150 brw_set_dest(p, insn, brw_imm_w(0));
1151 insn->bits1.branch_gen6.jump_count = 0;
1152 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1153 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1154 } else {
1155 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1156 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1157 brw_set_src1(p, insn, brw_imm_ud(0));
1158 insn->bits3.break_cont.jip = 0;
1159 insn->bits3.break_cont.uip = 0;
1160 }
1161
1162 insn->header.execution_size = execute_size;
1163 insn->header.compression_control = BRW_COMPRESSION_NONE;
1164 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1165 insn->header.mask_control = BRW_MASK_ENABLE;
1166 if (!p->single_program_flow)
1167 insn->header.thread_control = BRW_THREAD_SWITCH;
1168
1169 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1170
1171 push_if_stack(p, insn);
1172 p->if_depth_in_loop[p->loop_stack_depth]++;
1173 return insn;
1174 }
1175
1176 /* This function is only used for gen6-style IF instructions with an
1177 * embedded comparison (conditional modifier). It is not used on gen7.
1178 */
1179 struct brw_instruction *
1180 gen6_IF(struct brw_compile *p, uint32_t conditional,
1181 struct brw_reg src0, struct brw_reg src1)
1182 {
1183 struct brw_instruction *insn;
1184
1185 insn = next_insn(p, BRW_OPCODE_IF);
1186
1187 brw_set_dest(p, insn, brw_imm_w(0));
1188 if (p->compressed) {
1189 insn->header.execution_size = BRW_EXECUTE_16;
1190 } else {
1191 insn->header.execution_size = BRW_EXECUTE_8;
1192 }
1193 insn->bits1.branch_gen6.jump_count = 0;
1194 brw_set_src0(p, insn, src0);
1195 brw_set_src1(p, insn, src1);
1196
1197 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1198 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1199 insn->header.destreg__conditionalmod = conditional;
1200
1201 if (!p->single_program_flow)
1202 insn->header.thread_control = BRW_THREAD_SWITCH;
1203
1204 push_if_stack(p, insn);
1205 return insn;
1206 }
1207
1208 /**
1209 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1210 */
1211 static void
1212 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1213 struct brw_instruction *if_inst,
1214 struct brw_instruction *else_inst)
1215 {
1216 /* The next instruction (where the ENDIF would be, if it existed) */
1217 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1218
1219 assert(p->single_program_flow);
1220 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1221 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1222 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1223
1224 /* Convert IF to an ADD instruction that moves the instruction pointer
1225 * to the first instruction of the ELSE block. If there is no ELSE
1226 * block, point to where ENDIF would be. Reverse the predicate.
1227 *
1228 * There's no need to execute an ENDIF since we don't need to do any
1229 * stack operations, and if we're currently executing, we just want to
1230 * continue normally.
1231 */
1232 if_inst->header.opcode = BRW_OPCODE_ADD;
1233 if_inst->header.predicate_inverse = 1;
1234
1235 if (else_inst != NULL) {
1236 /* Convert ELSE to an ADD instruction that points where the ENDIF
1237 * would be.
1238 */
1239 else_inst->header.opcode = BRW_OPCODE_ADD;
1240
1241 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1242 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1243 } else {
1244 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1245 }
1246 }
1247
1248 /**
1249 * Patch IF and ELSE instructions with appropriate jump targets.
1250 */
1251 static void
1252 patch_IF_ELSE(struct brw_compile *p,
1253 struct brw_instruction *if_inst,
1254 struct brw_instruction *else_inst,
1255 struct brw_instruction *endif_inst)
1256 {
1257 struct brw_context *brw = p->brw;
1258
1259 /* We shouldn't be patching IF and ELSE instructions in single program flow
1260 * mode when gen < 6, because in single program flow mode on those
1261 * platforms, we convert flow control instructions to conditional ADDs that
1262 * operate on IP (see brw_ENDIF).
1263 *
1264 * However, on Gen6, writing to IP doesn't work in single program flow mode
1265 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1266 * not be updated by non-flow control instructions."). And on later
1267 * platforms, there is no significant benefit to converting control flow
1268 * instructions to conditional ADDs. So we do patch IF and ELSE
1269 * instructions in single program flow mode on those platforms.
1270 */
1271 if (brw->gen < 6)
1272 assert(!p->single_program_flow);
1273
1274 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1275 assert(endif_inst != NULL);
1276 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1277
1278 unsigned br = 1;
1279 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1280 * requires 2 chunks.
1281 */
1282 if (brw->gen >= 5)
1283 br = 2;
1284
1285 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1286 endif_inst->header.execution_size = if_inst->header.execution_size;
1287
1288 if (else_inst == NULL) {
1289 /* Patch IF -> ENDIF */
1290 if (brw->gen < 6) {
1291 /* Turn it into an IFF, which means no mask stack operations for
1292 * all-false and jumping past the ENDIF.
1293 */
1294 if_inst->header.opcode = BRW_OPCODE_IFF;
1295 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1296 if_inst->bits3.if_else.pop_count = 0;
1297 if_inst->bits3.if_else.pad0 = 0;
1298 } else if (brw->gen == 6) {
1299 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1300 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1301 } else {
1302 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1303 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1304 }
1305 } else {
1306 else_inst->header.execution_size = if_inst->header.execution_size;
1307
1308 /* Patch IF -> ELSE */
1309 if (brw->gen < 6) {
1310 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1311 if_inst->bits3.if_else.pop_count = 0;
1312 if_inst->bits3.if_else.pad0 = 0;
1313 } else if (brw->gen == 6) {
1314 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1315 }
1316
1317 /* Patch ELSE -> ENDIF */
1318 if (brw->gen < 6) {
1319 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1320 * matching ENDIF.
1321 */
1322 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1323 else_inst->bits3.if_else.pop_count = 1;
1324 else_inst->bits3.if_else.pad0 = 0;
1325 } else if (brw->gen == 6) {
1326 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1327 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1328 } else {
1329 /* The IF instruction's JIP should point just past the ELSE */
1330 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1331 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1332 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1333 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1334 }
1335 }
1336 }
1337
1338 void
1339 brw_ELSE(struct brw_compile *p)
1340 {
1341 struct brw_context *brw = p->brw;
1342 struct brw_instruction *insn;
1343
1344 insn = next_insn(p, BRW_OPCODE_ELSE);
1345
1346 if (brw->gen < 6) {
1347 brw_set_dest(p, insn, brw_ip_reg());
1348 brw_set_src0(p, insn, brw_ip_reg());
1349 brw_set_src1(p, insn, brw_imm_d(0x0));
1350 } else if (brw->gen == 6) {
1351 brw_set_dest(p, insn, brw_imm_w(0));
1352 insn->bits1.branch_gen6.jump_count = 0;
1353 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1354 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1355 } else {
1356 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1357 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1358 brw_set_src1(p, insn, brw_imm_ud(0));
1359 insn->bits3.break_cont.jip = 0;
1360 insn->bits3.break_cont.uip = 0;
1361 }
1362
1363 insn->header.compression_control = BRW_COMPRESSION_NONE;
1364 insn->header.mask_control = BRW_MASK_ENABLE;
1365 if (!p->single_program_flow)
1366 insn->header.thread_control = BRW_THREAD_SWITCH;
1367
1368 push_if_stack(p, insn);
1369 }
1370
1371 void
1372 brw_ENDIF(struct brw_compile *p)
1373 {
1374 struct brw_context *brw = p->brw;
1375 struct brw_instruction *insn = NULL;
1376 struct brw_instruction *else_inst = NULL;
1377 struct brw_instruction *if_inst = NULL;
1378 struct brw_instruction *tmp;
1379 bool emit_endif = true;
1380
1381 /* In single program flow mode, we can express IF and ELSE instructions
1382 * equivalently as ADD instructions that operate on IP. On platforms prior
1383 * to Gen6, flow control instructions cause an implied thread switch, so
1384 * this is a significant savings.
1385 *
1386 * However, on Gen6, writing to IP doesn't work in single program flow mode
1387 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1388 * not be updated by non-flow control instructions."). And on later
1389 * platforms, there is no significant benefit to converting control flow
1390 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1391 * Gen5.
1392 */
1393 if (brw->gen < 6 && p->single_program_flow)
1394 emit_endif = false;
1395
1396 /*
1397 * A single next_insn() may change the base adress of instruction store
1398 * memory(p->store), so call it first before referencing the instruction
1399 * store pointer from an index
1400 */
1401 if (emit_endif)
1402 insn = next_insn(p, BRW_OPCODE_ENDIF);
1403
1404 /* Pop the IF and (optional) ELSE instructions from the stack */
1405 p->if_depth_in_loop[p->loop_stack_depth]--;
1406 tmp = pop_if_stack(p);
1407 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1408 else_inst = tmp;
1409 tmp = pop_if_stack(p);
1410 }
1411 if_inst = tmp;
1412
1413 if (!emit_endif) {
1414 /* ENDIF is useless; don't bother emitting it. */
1415 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1416 return;
1417 }
1418
1419 if (brw->gen < 6) {
1420 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1421 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1422 brw_set_src1(p, insn, brw_imm_d(0x0));
1423 } else if (brw->gen == 6) {
1424 brw_set_dest(p, insn, brw_imm_w(0));
1425 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1426 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1427 } else {
1428 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1429 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1430 brw_set_src1(p, insn, brw_imm_ud(0));
1431 }
1432
1433 insn->header.compression_control = BRW_COMPRESSION_NONE;
1434 insn->header.mask_control = BRW_MASK_ENABLE;
1435 insn->header.thread_control = BRW_THREAD_SWITCH;
1436
1437 /* Also pop item off the stack in the endif instruction: */
1438 if (brw->gen < 6) {
1439 insn->bits3.if_else.jump_count = 0;
1440 insn->bits3.if_else.pop_count = 1;
1441 insn->bits3.if_else.pad0 = 0;
1442 } else if (brw->gen == 6) {
1443 insn->bits1.branch_gen6.jump_count = 2;
1444 } else {
1445 insn->bits3.break_cont.jip = 2;
1446 }
1447 patch_IF_ELSE(p, if_inst, else_inst, insn);
1448 }
1449
1450 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1451 {
1452 struct brw_context *brw = p->brw;
1453 struct brw_instruction *insn;
1454
1455 insn = next_insn(p, BRW_OPCODE_BREAK);
1456 if (brw->gen >= 6) {
1457 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1458 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1459 brw_set_src1(p, insn, brw_imm_d(0x0));
1460 } else {
1461 brw_set_dest(p, insn, brw_ip_reg());
1462 brw_set_src0(p, insn, brw_ip_reg());
1463 brw_set_src1(p, insn, brw_imm_d(0x0));
1464 insn->bits3.if_else.pad0 = 0;
1465 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1466 }
1467 insn->header.compression_control = BRW_COMPRESSION_NONE;
1468 insn->header.execution_size = BRW_EXECUTE_8;
1469
1470 return insn;
1471 }
1472
1473 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1474 {
1475 struct brw_instruction *insn;
1476
1477 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1478 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1479 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1480 brw_set_dest(p, insn, brw_ip_reg());
1481 brw_set_src0(p, insn, brw_ip_reg());
1482 brw_set_src1(p, insn, brw_imm_d(0x0));
1483
1484 insn->header.compression_control = BRW_COMPRESSION_NONE;
1485 insn->header.execution_size = BRW_EXECUTE_8;
1486 return insn;
1487 }
1488
1489 struct brw_instruction *brw_CONT(struct brw_compile *p)
1490 {
1491 struct brw_instruction *insn;
1492 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1493 brw_set_dest(p, insn, brw_ip_reg());
1494 brw_set_src0(p, insn, brw_ip_reg());
1495 brw_set_src1(p, insn, brw_imm_d(0x0));
1496 insn->header.compression_control = BRW_COMPRESSION_NONE;
1497 insn->header.execution_size = BRW_EXECUTE_8;
1498 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1499 insn->bits3.if_else.pad0 = 0;
1500 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1501 return insn;
1502 }
1503
1504 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1505 {
1506 struct brw_instruction *insn;
1507
1508 insn = next_insn(p, BRW_OPCODE_HALT);
1509 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1510 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1511 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1512
1513 if (p->compressed) {
1514 insn->header.execution_size = BRW_EXECUTE_16;
1515 } else {
1516 insn->header.compression_control = BRW_COMPRESSION_NONE;
1517 insn->header.execution_size = BRW_EXECUTE_8;
1518 }
1519 return insn;
1520 }
1521
1522 /* DO/WHILE loop:
1523 *
1524 * The DO/WHILE is just an unterminated loop -- break or continue are
1525 * used for control within the loop. We have a few ways they can be
1526 * done.
1527 *
1528 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1529 * jip and no DO instruction.
1530 *
1531 * For non-uniform control flow pre-gen6, there's a DO instruction to
1532 * push the mask, and a WHILE to jump back, and BREAK to get out and
1533 * pop the mask.
1534 *
1535 * For gen6, there's no more mask stack, so no need for DO. WHILE
1536 * just points back to the first instruction of the loop.
1537 */
1538 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1539 {
1540 struct brw_context *brw = p->brw;
1541
1542 if (brw->gen >= 6 || p->single_program_flow) {
1543 push_loop_stack(p, &p->store[p->nr_insn]);
1544 return &p->store[p->nr_insn];
1545 } else {
1546 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1547
1548 push_loop_stack(p, insn);
1549
1550 /* Override the defaults for this instruction:
1551 */
1552 brw_set_dest(p, insn, brw_null_reg());
1553 brw_set_src0(p, insn, brw_null_reg());
1554 brw_set_src1(p, insn, brw_null_reg());
1555
1556 insn->header.compression_control = BRW_COMPRESSION_NONE;
1557 insn->header.execution_size = execute_size;
1558 insn->header.predicate_control = BRW_PREDICATE_NONE;
1559 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1560 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1561
1562 return insn;
1563 }
1564 }
1565
1566 /**
1567 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1568 * instruction here.
1569 *
1570 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1571 * nesting, since it can always just point to the end of the block/current loop.
1572 */
1573 static void
1574 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1575 {
1576 struct brw_context *brw = p->brw;
1577 struct brw_instruction *do_inst = get_inner_do_insn(p);
1578 struct brw_instruction *inst;
1579 int br = (brw->gen == 5) ? 2 : 1;
1580
1581 for (inst = while_inst - 1; inst != do_inst; inst--) {
1582 /* If the jump count is != 0, that means that this instruction has already
1583 * been patched because it's part of a loop inside of the one we're
1584 * patching.
1585 */
1586 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1587 inst->bits3.if_else.jump_count == 0) {
1588 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1589 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1590 inst->bits3.if_else.jump_count == 0) {
1591 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1592 }
1593 }
1594 }
1595
1596 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1597 {
1598 struct brw_context *brw = p->brw;
1599 struct brw_instruction *insn, *do_insn;
1600 GLuint br = 1;
1601
1602 if (brw->gen >= 5)
1603 br = 2;
1604
1605 if (brw->gen >= 7) {
1606 insn = next_insn(p, BRW_OPCODE_WHILE);
1607 do_insn = get_inner_do_insn(p);
1608
1609 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1610 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1611 brw_set_src1(p, insn, brw_imm_ud(0));
1612 insn->bits3.break_cont.jip = br * (do_insn - insn);
1613
1614 insn->header.execution_size = BRW_EXECUTE_8;
1615 } else if (brw->gen == 6) {
1616 insn = next_insn(p, BRW_OPCODE_WHILE);
1617 do_insn = get_inner_do_insn(p);
1618
1619 brw_set_dest(p, insn, brw_imm_w(0));
1620 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1621 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1622 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1623
1624 insn->header.execution_size = BRW_EXECUTE_8;
1625 } else {
1626 if (p->single_program_flow) {
1627 insn = next_insn(p, BRW_OPCODE_ADD);
1628 do_insn = get_inner_do_insn(p);
1629
1630 brw_set_dest(p, insn, brw_ip_reg());
1631 brw_set_src0(p, insn, brw_ip_reg());
1632 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1633 insn->header.execution_size = BRW_EXECUTE_1;
1634 } else {
1635 insn = next_insn(p, BRW_OPCODE_WHILE);
1636 do_insn = get_inner_do_insn(p);
1637
1638 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1639
1640 brw_set_dest(p, insn, brw_ip_reg());
1641 brw_set_src0(p, insn, brw_ip_reg());
1642 brw_set_src1(p, insn, brw_imm_d(0));
1643
1644 insn->header.execution_size = do_insn->header.execution_size;
1645 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1646 insn->bits3.if_else.pop_count = 0;
1647 insn->bits3.if_else.pad0 = 0;
1648
1649 brw_patch_break_cont(p, insn);
1650 }
1651 }
1652 insn->header.compression_control = BRW_COMPRESSION_NONE;
1653 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1654
1655 p->loop_stack_depth--;
1656
1657 return insn;
1658 }
1659
1660
1661 /* FORWARD JUMPS:
1662 */
1663 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1664 {
1665 struct brw_context *brw = p->brw;
1666 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1667 GLuint jmpi = 1;
1668
1669 if (brw->gen >= 5)
1670 jmpi = 2;
1671
1672 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1673 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1674
1675 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1676 }
1677
1678
1679
1680 /* To integrate with the above, it makes sense that the comparison
1681 * instruction should populate the flag register. It might be simpler
1682 * just to use the flag reg for most WM tasks?
1683 */
1684 void brw_CMP(struct brw_compile *p,
1685 struct brw_reg dest,
1686 GLuint conditional,
1687 struct brw_reg src0,
1688 struct brw_reg src1)
1689 {
1690 struct brw_context *brw = p->brw;
1691 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1692
1693 insn->header.destreg__conditionalmod = conditional;
1694 brw_set_dest(p, insn, dest);
1695 brw_set_src0(p, insn, src0);
1696 brw_set_src1(p, insn, src1);
1697
1698 /* guess_execution_size(insn, src0); */
1699
1700
1701 /* Make it so that future instructions will use the computed flag
1702 * value until brw_set_predicate_control_flag_value() is called
1703 * again.
1704 */
1705 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1706 dest.nr == 0) {
1707 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1708 p->flag_value = 0xff;
1709 }
1710
1711 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1712 * page says:
1713 * "Any CMP instruction with a null destination must use a {switch}."
1714 *
1715 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1716 * mentioned on their work-arounds pages.
1717 */
1718 if (brw->gen == 7) {
1719 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1720 dest.nr == BRW_ARF_NULL) {
1721 insn->header.thread_control = BRW_THREAD_SWITCH;
1722 }
1723 }
1724 }
1725
1726 /* Issue 'wait' instruction for n1, host could program MMIO
1727 to wake up thread. */
1728 void brw_WAIT (struct brw_compile *p)
1729 {
1730 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1731 struct brw_reg src = brw_notification_1_reg();
1732
1733 brw_set_dest(p, insn, src);
1734 brw_set_src0(p, insn, src);
1735 brw_set_src1(p, insn, brw_null_reg());
1736 insn->header.execution_size = 0; /* must */
1737 insn->header.predicate_control = 0;
1738 insn->header.compression_control = 0;
1739 }
1740
1741
1742 /***********************************************************************
1743 * Helpers for the various SEND message types:
1744 */
1745
1746 /** Extended math function, float[8].
1747 */
1748 void brw_math( struct brw_compile *p,
1749 struct brw_reg dest,
1750 GLuint function,
1751 GLuint msg_reg_nr,
1752 struct brw_reg src,
1753 GLuint data_type,
1754 GLuint precision )
1755 {
1756 struct brw_context *brw = p->brw;
1757
1758 if (brw->gen >= 6) {
1759 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1760
1761 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1762 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1763 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1764
1765 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1766 if (brw->gen == 6)
1767 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1768
1769 /* Source modifiers are ignored for extended math instructions on Gen6. */
1770 if (brw->gen == 6) {
1771 assert(!src.negate);
1772 assert(!src.abs);
1773 }
1774
1775 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1776 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1777 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1778 assert(src.type != BRW_REGISTER_TYPE_F);
1779 } else {
1780 assert(src.type == BRW_REGISTER_TYPE_F);
1781 }
1782
1783 /* Math is the same ISA format as other opcodes, except that CondModifier
1784 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1785 */
1786 insn->header.destreg__conditionalmod = function;
1787
1788 brw_set_dest(p, insn, dest);
1789 brw_set_src0(p, insn, src);
1790 brw_set_src1(p, insn, brw_null_reg());
1791 } else {
1792 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1793
1794 /* Example code doesn't set predicate_control for send
1795 * instructions.
1796 */
1797 insn->header.predicate_control = 0;
1798 insn->header.destreg__conditionalmod = msg_reg_nr;
1799
1800 brw_set_dest(p, insn, dest);
1801 brw_set_src0(p, insn, src);
1802 brw_set_math_message(p,
1803 insn,
1804 function,
1805 src.type == BRW_REGISTER_TYPE_D,
1806 precision,
1807 data_type);
1808 }
1809 }
1810
1811 /** Extended math function, float[8].
1812 */
1813 void brw_math2(struct brw_compile *p,
1814 struct brw_reg dest,
1815 GLuint function,
1816 struct brw_reg src0,
1817 struct brw_reg src1)
1818 {
1819 struct brw_context *brw = p->brw;
1820 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1821
1822 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1823 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1824 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1825 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1826
1827 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1828 if (brw->gen == 6) {
1829 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1830 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1831 }
1832
1833 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1834 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1835 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1836 assert(src0.type != BRW_REGISTER_TYPE_F);
1837 assert(src1.type != BRW_REGISTER_TYPE_F);
1838 } else {
1839 assert(src0.type == BRW_REGISTER_TYPE_F);
1840 assert(src1.type == BRW_REGISTER_TYPE_F);
1841 }
1842
1843 /* Source modifiers are ignored for extended math instructions on Gen6. */
1844 if (brw->gen == 6) {
1845 assert(!src0.negate);
1846 assert(!src0.abs);
1847 assert(!src1.negate);
1848 assert(!src1.abs);
1849 }
1850
1851 /* Math is the same ISA format as other opcodes, except that CondModifier
1852 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1853 */
1854 insn->header.destreg__conditionalmod = function;
1855
1856 brw_set_dest(p, insn, dest);
1857 brw_set_src0(p, insn, src0);
1858 brw_set_src1(p, insn, src1);
1859 }
1860
1861
1862 /**
1863 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1864 * using a constant offset per channel.
1865 *
1866 * The offset must be aligned to oword size (16 bytes). Used for
1867 * register spilling.
1868 */
1869 void brw_oword_block_write_scratch(struct brw_compile *p,
1870 struct brw_reg mrf,
1871 int num_regs,
1872 GLuint offset)
1873 {
1874 struct brw_context *brw = p->brw;
1875 uint32_t msg_control, msg_type;
1876 int mlen;
1877
1878 if (brw->gen >= 6)
1879 offset /= 16;
1880
1881 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1882
1883 if (num_regs == 1) {
1884 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1885 mlen = 2;
1886 } else {
1887 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1888 mlen = 3;
1889 }
1890
1891 /* Set up the message header. This is g0, with g0.2 filled with
1892 * the offset. We don't want to leave our offset around in g0 or
1893 * it'll screw up texture samples, so set it up inside the message
1894 * reg.
1895 */
1896 {
1897 brw_push_insn_state(p);
1898 brw_set_mask_control(p, BRW_MASK_DISABLE);
1899 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1900
1901 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1902
1903 /* set message header global offset field (reg 0, element 2) */
1904 brw_MOV(p,
1905 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1906 mrf.nr,
1907 2), BRW_REGISTER_TYPE_UD),
1908 brw_imm_ud(offset));
1909
1910 brw_pop_insn_state(p);
1911 }
1912
1913 {
1914 struct brw_reg dest;
1915 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1916 int send_commit_msg;
1917 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1918 BRW_REGISTER_TYPE_UW);
1919
1920 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1921 insn->header.compression_control = BRW_COMPRESSION_NONE;
1922 src_header = vec16(src_header);
1923 }
1924 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1925 insn->header.destreg__conditionalmod = mrf.nr;
1926
1927 /* Until gen6, writes followed by reads from the same location
1928 * are not guaranteed to be ordered unless write_commit is set.
1929 * If set, then a no-op write is issued to the destination
1930 * register to set a dependency, and a read from the destination
1931 * can be used to ensure the ordering.
1932 *
1933 * For gen6, only writes between different threads need ordering
1934 * protection. Our use of DP writes is all about register
1935 * spilling within a thread.
1936 */
1937 if (brw->gen >= 6) {
1938 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1939 send_commit_msg = 0;
1940 } else {
1941 dest = src_header;
1942 send_commit_msg = 1;
1943 }
1944
1945 brw_set_dest(p, insn, dest);
1946 if (brw->gen >= 6) {
1947 brw_set_src0(p, insn, mrf);
1948 } else {
1949 brw_set_src0(p, insn, brw_null_reg());
1950 }
1951
1952 if (brw->gen >= 6)
1953 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1954 else
1955 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1956
1957 brw_set_dp_write_message(p,
1958 insn,
1959 255, /* binding table index (255=stateless) */
1960 msg_control,
1961 msg_type,
1962 mlen,
1963 true, /* header_present */
1964 0, /* not a render target */
1965 send_commit_msg, /* response_length */
1966 0, /* eot */
1967 send_commit_msg);
1968 }
1969 }
1970
1971
1972 /**
1973 * Read a block of owords (half a GRF each) from the scratch buffer
1974 * using a constant index per channel.
1975 *
1976 * Offset must be aligned to oword size (16 bytes). Used for register
1977 * spilling.
1978 */
1979 void
1980 brw_oword_block_read_scratch(struct brw_compile *p,
1981 struct brw_reg dest,
1982 struct brw_reg mrf,
1983 int num_regs,
1984 GLuint offset)
1985 {
1986 struct brw_context *brw = p->brw;
1987 uint32_t msg_control;
1988 int rlen;
1989
1990 if (brw->gen >= 6)
1991 offset /= 16;
1992
1993 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1994 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1995
1996 if (num_regs == 1) {
1997 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1998 rlen = 1;
1999 } else {
2000 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2001 rlen = 2;
2002 }
2003
2004 {
2005 brw_push_insn_state(p);
2006 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2007 brw_set_mask_control(p, BRW_MASK_DISABLE);
2008
2009 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2010
2011 /* set message header global offset field (reg 0, element 2) */
2012 brw_MOV(p,
2013 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2014 mrf.nr,
2015 2), BRW_REGISTER_TYPE_UD),
2016 brw_imm_ud(offset));
2017
2018 brw_pop_insn_state(p);
2019 }
2020
2021 {
2022 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2023
2024 assert(insn->header.predicate_control == 0);
2025 insn->header.compression_control = BRW_COMPRESSION_NONE;
2026 insn->header.destreg__conditionalmod = mrf.nr;
2027
2028 brw_set_dest(p, insn, dest); /* UW? */
2029 if (brw->gen >= 6) {
2030 brw_set_src0(p, insn, mrf);
2031 } else {
2032 brw_set_src0(p, insn, brw_null_reg());
2033 }
2034
2035 brw_set_dp_read_message(p,
2036 insn,
2037 255, /* binding table index (255=stateless) */
2038 msg_control,
2039 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2040 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2041 1, /* msg_length */
2042 true, /* header_present */
2043 rlen);
2044 }
2045 }
2046
2047 /**
2048 * Read a float[4] vector from the data port Data Cache (const buffer).
2049 * Location (in buffer) should be a multiple of 16.
2050 * Used for fetching shader constants.
2051 */
2052 void brw_oword_block_read(struct brw_compile *p,
2053 struct brw_reg dest,
2054 struct brw_reg mrf,
2055 uint32_t offset,
2056 uint32_t bind_table_index)
2057 {
2058 struct brw_context *brw = p->brw;
2059
2060 /* On newer hardware, offset is in units of owords. */
2061 if (brw->gen >= 6)
2062 offset /= 16;
2063
2064 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2065
2066 brw_push_insn_state(p);
2067 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2068 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2069 brw_set_mask_control(p, BRW_MASK_DISABLE);
2070
2071 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2072
2073 /* set message header global offset field (reg 0, element 2) */
2074 brw_MOV(p,
2075 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2076 mrf.nr,
2077 2), BRW_REGISTER_TYPE_UD),
2078 brw_imm_ud(offset));
2079
2080 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2081 insn->header.destreg__conditionalmod = mrf.nr;
2082
2083 /* cast dest to a uword[8] vector */
2084 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2085
2086 brw_set_dest(p, insn, dest);
2087 if (brw->gen >= 6) {
2088 brw_set_src0(p, insn, mrf);
2089 } else {
2090 brw_set_src0(p, insn, brw_null_reg());
2091 }
2092
2093 brw_set_dp_read_message(p,
2094 insn,
2095 bind_table_index,
2096 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2097 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2098 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2099 1, /* msg_length */
2100 true, /* header_present */
2101 1); /* response_length (1 reg, 2 owords!) */
2102
2103 brw_pop_insn_state(p);
2104 }
2105
2106
2107 void brw_fb_WRITE(struct brw_compile *p,
2108 int dispatch_width,
2109 GLuint msg_reg_nr,
2110 struct brw_reg src0,
2111 GLuint msg_control,
2112 GLuint binding_table_index,
2113 GLuint msg_length,
2114 GLuint response_length,
2115 bool eot,
2116 bool header_present)
2117 {
2118 struct brw_context *brw = p->brw;
2119 struct brw_instruction *insn;
2120 GLuint msg_type;
2121 struct brw_reg dest;
2122
2123 if (dispatch_width == 16)
2124 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2125 else
2126 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2127
2128 if (brw->gen >= 6) {
2129 insn = next_insn(p, BRW_OPCODE_SENDC);
2130 } else {
2131 insn = next_insn(p, BRW_OPCODE_SEND);
2132 }
2133 /* The execution mask is ignored for render target writes. */
2134 insn->header.predicate_control = 0;
2135 insn->header.compression_control = BRW_COMPRESSION_NONE;
2136
2137 if (brw->gen >= 6) {
2138 /* headerless version, just submit color payload */
2139 src0 = brw_message_reg(msg_reg_nr);
2140
2141 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2142 } else {
2143 insn->header.destreg__conditionalmod = msg_reg_nr;
2144
2145 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2146 }
2147
2148 brw_set_dest(p, insn, dest);
2149 brw_set_src0(p, insn, src0);
2150 brw_set_dp_write_message(p,
2151 insn,
2152 binding_table_index,
2153 msg_control,
2154 msg_type,
2155 msg_length,
2156 header_present,
2157 eot, /* last render target write */
2158 response_length,
2159 eot,
2160 0 /* send_commit_msg */);
2161 }
2162
2163
2164 /**
2165 * Texture sample instruction.
2166 * Note: the msg_type plus msg_length values determine exactly what kind
2167 * of sampling operation is performed. See volume 4, page 161 of docs.
2168 */
2169 void brw_SAMPLE(struct brw_compile *p,
2170 struct brw_reg dest,
2171 GLuint msg_reg_nr,
2172 struct brw_reg src0,
2173 GLuint binding_table_index,
2174 GLuint sampler,
2175 GLuint msg_type,
2176 GLuint response_length,
2177 GLuint msg_length,
2178 GLuint header_present,
2179 GLuint simd_mode,
2180 GLuint return_format)
2181 {
2182 struct brw_context *brw = p->brw;
2183 struct brw_instruction *insn;
2184
2185 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2186
2187 insn = next_insn(p, BRW_OPCODE_SEND);
2188 insn->header.predicate_control = 0; /* XXX */
2189 insn->header.compression_control = BRW_COMPRESSION_NONE;
2190 if (brw->gen < 6)
2191 insn->header.destreg__conditionalmod = msg_reg_nr;
2192
2193 brw_set_dest(p, insn, dest);
2194 brw_set_src0(p, insn, src0);
2195 brw_set_sampler_message(p, insn,
2196 binding_table_index,
2197 sampler,
2198 msg_type,
2199 response_length,
2200 msg_length,
2201 header_present,
2202 simd_mode,
2203 return_format);
2204 }
2205
2206 /* All these variables are pretty confusing - we might be better off
2207 * using bitmasks and macros for this, in the old style. Or perhaps
2208 * just having the caller instantiate the fields in dword3 itself.
2209 */
2210 void brw_urb_WRITE(struct brw_compile *p,
2211 struct brw_reg dest,
2212 GLuint msg_reg_nr,
2213 struct brw_reg src0,
2214 enum brw_urb_write_flags flags,
2215 GLuint msg_length,
2216 GLuint response_length,
2217 GLuint offset,
2218 GLuint swizzle)
2219 {
2220 struct brw_context *brw = p->brw;
2221 struct brw_instruction *insn;
2222
2223 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2224
2225 if (brw->gen == 7) {
2226 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2227 brw_push_insn_state(p);
2228 brw_set_access_mode(p, BRW_ALIGN_1);
2229 brw_set_mask_control(p, BRW_MASK_DISABLE);
2230 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2231 BRW_REGISTER_TYPE_UD),
2232 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2233 brw_imm_ud(0xff00));
2234 brw_pop_insn_state(p);
2235 }
2236
2237 insn = next_insn(p, BRW_OPCODE_SEND);
2238
2239 assert(msg_length < BRW_MAX_MRF);
2240
2241 brw_set_dest(p, insn, dest);
2242 brw_set_src0(p, insn, src0);
2243 brw_set_src1(p, insn, brw_imm_d(0));
2244
2245 if (brw->gen < 6)
2246 insn->header.destreg__conditionalmod = msg_reg_nr;
2247
2248 brw_set_urb_message(p,
2249 insn,
2250 flags,
2251 msg_length,
2252 response_length,
2253 offset,
2254 swizzle);
2255 }
2256
2257 static int
2258 next_ip(struct brw_compile *p, int ip)
2259 {
2260 struct brw_instruction *insn = (void *)p->store + ip;
2261
2262 if (insn->header.cmpt_control)
2263 return ip + 8;
2264 else
2265 return ip + 16;
2266 }
2267
2268 static int
2269 brw_find_next_block_end(struct brw_compile *p, int start)
2270 {
2271 int ip;
2272 void *store = p->store;
2273
2274 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2275 struct brw_instruction *insn = store + ip;
2276
2277 switch (insn->header.opcode) {
2278 case BRW_OPCODE_ENDIF:
2279 case BRW_OPCODE_ELSE:
2280 case BRW_OPCODE_WHILE:
2281 case BRW_OPCODE_HALT:
2282 return ip;
2283 }
2284 }
2285
2286 return 0;
2287 }
2288
2289 /* There is no DO instruction on gen6, so to find the end of the loop
2290 * we have to see if the loop is jumping back before our start
2291 * instruction.
2292 */
2293 static int
2294 brw_find_loop_end(struct brw_compile *p, int start)
2295 {
2296 struct brw_context *brw = p->brw;
2297 int ip;
2298 int scale = 8;
2299 void *store = p->store;
2300
2301 /* Always start after the instruction (such as a WHILE) we're trying to fix
2302 * up.
2303 */
2304 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2305 struct brw_instruction *insn = store + ip;
2306
2307 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2308 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2309 : insn->bits3.break_cont.jip;
2310 if (ip + jip * scale <= start)
2311 return ip;
2312 }
2313 }
2314 assert(!"not reached");
2315 return start;
2316 }
2317
2318 /* After program generation, go back and update the UIP and JIP of
2319 * BREAK, CONT, and HALT instructions to their correct locations.
2320 */
2321 void
2322 brw_set_uip_jip(struct brw_compile *p)
2323 {
2324 struct brw_context *brw = p->brw;
2325 int ip;
2326 int scale = 8;
2327 void *store = p->store;
2328
2329 if (brw->gen < 6)
2330 return;
2331
2332 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2333 struct brw_instruction *insn = store + ip;
2334
2335 if (insn->header.cmpt_control) {
2336 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2337 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2338 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2339 insn->header.opcode != BRW_OPCODE_HALT);
2340 continue;
2341 }
2342
2343 int block_end_ip = brw_find_next_block_end(p, ip);
2344 switch (insn->header.opcode) {
2345 case BRW_OPCODE_BREAK:
2346 assert(block_end_ip != 0);
2347 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2348 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2349 insn->bits3.break_cont.uip =
2350 (brw_find_loop_end(p, ip) - ip +
2351 (brw->gen == 6 ? 16 : 0)) / scale;
2352 break;
2353 case BRW_OPCODE_CONTINUE:
2354 assert(block_end_ip != 0);
2355 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2356 insn->bits3.break_cont.uip =
2357 (brw_find_loop_end(p, ip) - ip) / scale;
2358
2359 assert(insn->bits3.break_cont.uip != 0);
2360 assert(insn->bits3.break_cont.jip != 0);
2361 break;
2362
2363 case BRW_OPCODE_ENDIF:
2364 if (block_end_ip == 0)
2365 insn->bits3.break_cont.jip = 2;
2366 else
2367 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2368 break;
2369
2370 case BRW_OPCODE_HALT:
2371 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2372 *
2373 * "In case of the halt instruction not inside any conditional
2374 * code block, the value of <JIP> and <UIP> should be the
2375 * same. In case of the halt instruction inside conditional code
2376 * block, the <UIP> should be the end of the program, and the
2377 * <JIP> should be end of the most inner conditional code block."
2378 *
2379 * The uip will have already been set by whoever set up the
2380 * instruction.
2381 */
2382 if (block_end_ip == 0) {
2383 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2384 } else {
2385 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2386 }
2387 assert(insn->bits3.break_cont.uip != 0);
2388 assert(insn->bits3.break_cont.jip != 0);
2389 break;
2390 }
2391 }
2392 }
2393
2394 void brw_ff_sync(struct brw_compile *p,
2395 struct brw_reg dest,
2396 GLuint msg_reg_nr,
2397 struct brw_reg src0,
2398 bool allocate,
2399 GLuint response_length,
2400 bool eot)
2401 {
2402 struct brw_context *brw = p->brw;
2403 struct brw_instruction *insn;
2404
2405 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2406
2407 insn = next_insn(p, BRW_OPCODE_SEND);
2408 brw_set_dest(p, insn, dest);
2409 brw_set_src0(p, insn, src0);
2410 brw_set_src1(p, insn, brw_imm_d(0));
2411
2412 if (brw->gen < 6)
2413 insn->header.destreg__conditionalmod = msg_reg_nr;
2414
2415 brw_set_ff_sync_message(p,
2416 insn,
2417 allocate,
2418 response_length,
2419 eot);
2420 }
2421
2422 /**
2423 * Emit the SEND instruction necessary to generate stream output data on Gen6
2424 * (for transform feedback).
2425 *
2426 * If send_commit_msg is true, this is the last piece of stream output data
2427 * from this thread, so send the data as a committed write. According to the
2428 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2429 *
2430 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2431 * writes are complete by sending the final write as a committed write."
2432 */
2433 void
2434 brw_svb_write(struct brw_compile *p,
2435 struct brw_reg dest,
2436 GLuint msg_reg_nr,
2437 struct brw_reg src0,
2438 GLuint binding_table_index,
2439 bool send_commit_msg)
2440 {
2441 struct brw_instruction *insn;
2442
2443 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2444
2445 insn = next_insn(p, BRW_OPCODE_SEND);
2446 brw_set_dest(p, insn, dest);
2447 brw_set_src0(p, insn, src0);
2448 brw_set_src1(p, insn, brw_imm_d(0));
2449 brw_set_dp_write_message(p, insn,
2450 binding_table_index,
2451 0, /* msg_control: ignored */
2452 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2453 1, /* msg_length */
2454 true, /* header_present */
2455 0, /* last_render_target: ignored */
2456 send_commit_msg, /* response_length */
2457 0, /* end_of_thread */
2458 send_commit_msg); /* send_commit_msg */
2459 }
2460
2461 /**
2462 * This instruction is generated as a single-channel align1 instruction by
2463 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2464 *
2465 * We can't use the typed atomic op in the FS because that has the execution
2466 * mask ANDed with the pixel mask, but we just want to write the one dword for
2467 * all the pixels.
2468 *
2469 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2470 * one u32. So we use the same untyped atomic write message as the pixel
2471 * shader.
2472 *
2473 * The untyped atomic operation requires a BUFFER surface type with RAW
2474 * format, and is only accessible through the legacy DATA_CACHE dataport
2475 * messages.
2476 */
2477 void brw_shader_time_add(struct brw_compile *p,
2478 struct brw_reg payload,
2479 uint32_t surf_index)
2480 {
2481 struct brw_context *brw = p->brw;
2482 assert(brw->gen >= 7);
2483
2484 brw_push_insn_state(p);
2485 brw_set_access_mode(p, BRW_ALIGN_1);
2486 brw_set_mask_control(p, BRW_MASK_DISABLE);
2487 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2488 brw_pop_insn_state(p);
2489
2490 /* We use brw_vec1_reg and unmasked because we want to increment the given
2491 * offset only once.
2492 */
2493 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2494 BRW_ARF_NULL, 0));
2495 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2496 payload.nr, 0));
2497
2498 uint32_t sfid, msg_type;
2499 if (brw->is_haswell) {
2500 sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2501 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2502 } else {
2503 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
2504 msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2505 }
2506
2507 bool header_present = false;
2508 bool eot = false;
2509 uint32_t mlen = 2; /* offset, value */
2510 uint32_t rlen = 0;
2511 brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
2512
2513 send->bits3.ud |= msg_type << 14;
2514 send->bits3.ud |= 0 << 13; /* no return data */
2515 send->bits3.ud |= 1 << 12; /* SIMD8 mode */
2516 send->bits3.ud |= BRW_AOP_ADD << 8;
2517 send->bits3.ud |= surf_index << 0;
2518 }