i965/gen7: Move SOL stage disable to gen7_sol_state.c
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 struct intel_context *intel = &p->brw->intel;
88 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
89 reg->file = BRW_GENERAL_REGISTER_FILE;
90 reg->nr += 111;
91 }
92 }
93
94
95 void
96 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
97 struct brw_reg dest)
98 {
99 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
100 dest.file != BRW_MESSAGE_REGISTER_FILE)
101 assert(dest.nr < 128);
102
103 gen7_convert_mrf_to_grf(p, &dest);
104
105 insn->bits1.da1.dest_reg_file = dest.file;
106 insn->bits1.da1.dest_reg_type = dest.type;
107 insn->bits1.da1.dest_address_mode = dest.address_mode;
108
109 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
110 insn->bits1.da1.dest_reg_nr = dest.nr;
111
112 if (insn->header.access_mode == BRW_ALIGN_1) {
113 insn->bits1.da1.dest_subreg_nr = dest.subnr;
114 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
115 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
116 insn->bits1.da1.dest_horiz_stride = dest.hstride;
117 }
118 else {
119 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
120 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
121 /* even ignored in da16, still need to set as '01' */
122 insn->bits1.da16.dest_horiz_stride = 1;
123 }
124 }
125 else {
126 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
127
128 /* These are different sizes in align1 vs align16:
129 */
130 if (insn->header.access_mode == BRW_ALIGN_1) {
131 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
132 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
133 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
134 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
135 }
136 else {
137 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
138 /* even ignored in da16, still need to set as '01' */
139 insn->bits1.ia16.dest_horiz_stride = 1;
140 }
141 }
142
143 /* NEW: Set the execution size based on dest.width and
144 * insn->compression_control:
145 */
146 guess_execution_size(p, insn, dest);
147 }
148
149 extern int reg_type_size[];
150
151 static void
152 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
153 {
154 int hstride_for_reg[] = {0, 1, 2, 4};
155 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
156 int width_for_reg[] = {1, 2, 4, 8, 16};
157 int execsize_for_reg[] = {1, 2, 4, 8, 16};
158 int width, hstride, vstride, execsize;
159
160 if (reg.file == BRW_IMMEDIATE_VALUE) {
161 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
162 * mean the destination has to be 128-bit aligned and the
163 * destination horiz stride has to be a word.
164 */
165 if (reg.type == BRW_REGISTER_TYPE_V) {
166 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
167 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
168 }
169
170 return;
171 }
172
173 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
174 reg.file == BRW_ARF_NULL)
175 return;
176
177 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
178 hstride = hstride_for_reg[reg.hstride];
179
180 if (reg.vstride == 0xf) {
181 vstride = -1;
182 } else {
183 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
184 vstride = vstride_for_reg[reg.vstride];
185 }
186
187 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
188 width = width_for_reg[reg.width];
189
190 assert(insn->header.execution_size >= 0 &&
191 insn->header.execution_size < Elements(execsize_for_reg));
192 execsize = execsize_for_reg[insn->header.execution_size];
193
194 /* Restrictions from 3.3.10: Register Region Restrictions. */
195 /* 3. */
196 assert(execsize >= width);
197
198 /* 4. */
199 if (execsize == width && hstride != 0) {
200 assert(vstride == -1 || vstride == width * hstride);
201 }
202
203 /* 5. */
204 if (execsize == width && hstride == 0) {
205 /* no restriction on vstride. */
206 }
207
208 /* 6. */
209 if (width == 1) {
210 assert(hstride == 0);
211 }
212
213 /* 7. */
214 if (execsize == 1 && width == 1) {
215 assert(hstride == 0);
216 assert(vstride == 0);
217 }
218
219 /* 8. */
220 if (vstride == 0 && hstride == 0) {
221 assert(width == 1);
222 }
223
224 /* 10. Check destination issues. */
225 }
226
227 void
228 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
229 struct brw_reg reg)
230 {
231 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
232 assert(reg.nr < 128);
233
234 gen7_convert_mrf_to_grf(p, &reg);
235
236 validate_reg(insn, reg);
237
238 insn->bits1.da1.src0_reg_file = reg.file;
239 insn->bits1.da1.src0_reg_type = reg.type;
240 insn->bits2.da1.src0_abs = reg.abs;
241 insn->bits2.da1.src0_negate = reg.negate;
242 insn->bits2.da1.src0_address_mode = reg.address_mode;
243
244 if (reg.file == BRW_IMMEDIATE_VALUE) {
245 insn->bits3.ud = reg.dw1.ud;
246
247 /* Required to set some fields in src1 as well:
248 */
249 insn->bits1.da1.src1_reg_file = 0; /* arf */
250 insn->bits1.da1.src1_reg_type = reg.type;
251 }
252 else
253 {
254 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
255 if (insn->header.access_mode == BRW_ALIGN_1) {
256 insn->bits2.da1.src0_subreg_nr = reg.subnr;
257 insn->bits2.da1.src0_reg_nr = reg.nr;
258 }
259 else {
260 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
261 insn->bits2.da16.src0_reg_nr = reg.nr;
262 }
263 }
264 else {
265 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
266
267 if (insn->header.access_mode == BRW_ALIGN_1) {
268 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
269 }
270 else {
271 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
272 }
273 }
274
275 if (insn->header.access_mode == BRW_ALIGN_1) {
276 if (reg.width == BRW_WIDTH_1 &&
277 insn->header.execution_size == BRW_EXECUTE_1) {
278 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
279 insn->bits2.da1.src0_width = BRW_WIDTH_1;
280 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
281 }
282 else {
283 insn->bits2.da1.src0_horiz_stride = reg.hstride;
284 insn->bits2.da1.src0_width = reg.width;
285 insn->bits2.da1.src0_vert_stride = reg.vstride;
286 }
287 }
288 else {
289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293
294 /* This is an oddity of the fact we're using the same
295 * descriptions for registers in align_16 as align_1:
296 */
297 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299 else
300 insn->bits2.da16.src0_vert_stride = reg.vstride;
301 }
302 }
303 }
304
305
306 void brw_set_src1(struct brw_compile *p,
307 struct brw_instruction *insn,
308 struct brw_reg reg)
309 {
310 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
311
312 assert(reg.nr < 128);
313
314 gen7_convert_mrf_to_grf(p, &reg);
315
316 validate_reg(insn, reg);
317
318 insn->bits1.da1.src1_reg_file = reg.file;
319 insn->bits1.da1.src1_reg_type = reg.type;
320 insn->bits3.da1.src1_abs = reg.abs;
321 insn->bits3.da1.src1_negate = reg.negate;
322
323 /* Only src1 can be immediate in two-argument instructions.
324 */
325 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
326
327 if (reg.file == BRW_IMMEDIATE_VALUE) {
328 insn->bits3.ud = reg.dw1.ud;
329 }
330 else {
331 /* This is a hardware restriction, which may or may not be lifted
332 * in the future:
333 */
334 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
335 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
336
337 if (insn->header.access_mode == BRW_ALIGN_1) {
338 insn->bits3.da1.src1_subreg_nr = reg.subnr;
339 insn->bits3.da1.src1_reg_nr = reg.nr;
340 }
341 else {
342 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
343 insn->bits3.da16.src1_reg_nr = reg.nr;
344 }
345
346 if (insn->header.access_mode == BRW_ALIGN_1) {
347 if (reg.width == BRW_WIDTH_1 &&
348 insn->header.execution_size == BRW_EXECUTE_1) {
349 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
350 insn->bits3.da1.src1_width = BRW_WIDTH_1;
351 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
352 }
353 else {
354 insn->bits3.da1.src1_horiz_stride = reg.hstride;
355 insn->bits3.da1.src1_width = reg.width;
356 insn->bits3.da1.src1_vert_stride = reg.vstride;
357 }
358 }
359 else {
360 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
361 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
362 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
363 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
364
365 /* This is an oddity of the fact we're using the same
366 * descriptions for registers in align_16 as align_1:
367 */
368 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
369 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
370 else
371 insn->bits3.da16.src1_vert_stride = reg.vstride;
372 }
373 }
374 }
375
376 /**
377 * Set the Message Descriptor and Extended Message Descriptor fields
378 * for SEND messages.
379 *
380 * \note This zeroes out the Function Control bits, so it must be called
381 * \b before filling out any message-specific data. Callers can
382 * choose not to fill in irrelevant bits; they will be zero.
383 */
384 static void
385 brw_set_message_descriptor(struct brw_compile *p,
386 struct brw_instruction *inst,
387 enum brw_message_target sfid,
388 unsigned msg_length,
389 unsigned response_length,
390 bool header_present,
391 bool end_of_thread)
392 {
393 struct intel_context *intel = &p->brw->intel;
394
395 brw_set_src1(p, inst, brw_imm_d(0));
396
397 if (intel->gen >= 5) {
398 inst->bits3.generic_gen5.header_present = header_present;
399 inst->bits3.generic_gen5.response_length = response_length;
400 inst->bits3.generic_gen5.msg_length = msg_length;
401 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
402
403 if (intel->gen >= 6) {
404 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
405 inst->header.destreg__conditionalmod = sfid;
406 } else {
407 /* Set Extended Message Descriptor (ex_desc) */
408 inst->bits2.send_gen5.sfid = sfid;
409 inst->bits2.send_gen5.end_of_thread = end_of_thread;
410 }
411 } else {
412 inst->bits3.generic.response_length = response_length;
413 inst->bits3.generic.msg_length = msg_length;
414 inst->bits3.generic.msg_target = sfid;
415 inst->bits3.generic.end_of_thread = end_of_thread;
416 }
417 }
418
419 static void brw_set_math_message( struct brw_compile *p,
420 struct brw_instruction *insn,
421 GLuint function,
422 GLuint integer_type,
423 bool low_precision,
424 bool saturate,
425 GLuint dataType )
426 {
427 struct brw_context *brw = p->brw;
428 struct intel_context *intel = &brw->intel;
429 unsigned msg_length;
430 unsigned response_length;
431
432 /* Infer message length from the function */
433 switch (function) {
434 case BRW_MATH_FUNCTION_POW:
435 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
436 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
437 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
438 msg_length = 2;
439 break;
440 default:
441 msg_length = 1;
442 break;
443 }
444
445 /* Infer response length from the function */
446 switch (function) {
447 case BRW_MATH_FUNCTION_SINCOS:
448 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
449 response_length = 2;
450 break;
451 default:
452 response_length = 1;
453 break;
454 }
455
456 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
457 msg_length, response_length, false, false);
458 if (intel->gen == 5) {
459 insn->bits3.math_gen5.function = function;
460 insn->bits3.math_gen5.int_type = integer_type;
461 insn->bits3.math_gen5.precision = low_precision;
462 insn->bits3.math_gen5.saturate = saturate;
463 insn->bits3.math_gen5.data_type = dataType;
464 insn->bits3.math_gen5.snapshot = 0;
465 } else {
466 insn->bits3.math.function = function;
467 insn->bits3.math.int_type = integer_type;
468 insn->bits3.math.precision = low_precision;
469 insn->bits3.math.saturate = saturate;
470 insn->bits3.math.data_type = dataType;
471 }
472 }
473
474
475 static void brw_set_ff_sync_message(struct brw_compile *p,
476 struct brw_instruction *insn,
477 bool allocate,
478 GLuint response_length,
479 bool end_of_thread)
480 {
481 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
482 1, response_length, true, end_of_thread);
483 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
484 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
486 insn->bits3.urb_gen5.allocate = allocate;
487 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
488 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
489 }
490
491 static void brw_set_urb_message( struct brw_compile *p,
492 struct brw_instruction *insn,
493 bool allocate,
494 bool used,
495 GLuint msg_length,
496 GLuint response_length,
497 bool end_of_thread,
498 bool complete,
499 GLuint offset,
500 GLuint swizzle_control )
501 {
502 struct brw_context *brw = p->brw;
503 struct intel_context *intel = &brw->intel;
504
505 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
506 msg_length, response_length, true, end_of_thread);
507 if (intel->gen == 7) {
508 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
509 insn->bits3.urb_gen7.offset = offset;
510 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
511 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
512 /* per_slot_offset = 0 makes it ignore offsets in message header */
513 insn->bits3.urb_gen7.per_slot_offset = 0;
514 insn->bits3.urb_gen7.complete = complete;
515 } else if (intel->gen >= 5) {
516 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
517 insn->bits3.urb_gen5.offset = offset;
518 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
519 insn->bits3.urb_gen5.allocate = allocate;
520 insn->bits3.urb_gen5.used = used; /* ? */
521 insn->bits3.urb_gen5.complete = complete;
522 } else {
523 insn->bits3.urb.opcode = 0; /* ? */
524 insn->bits3.urb.offset = offset;
525 insn->bits3.urb.swizzle_control = swizzle_control;
526 insn->bits3.urb.allocate = allocate;
527 insn->bits3.urb.used = used; /* ? */
528 insn->bits3.urb.complete = complete;
529 }
530 }
531
532 void
533 brw_set_dp_write_message(struct brw_compile *p,
534 struct brw_instruction *insn,
535 GLuint binding_table_index,
536 GLuint msg_control,
537 GLuint msg_type,
538 GLuint msg_length,
539 bool header_present,
540 GLuint last_render_target,
541 GLuint response_length,
542 GLuint end_of_thread,
543 GLuint send_commit_msg)
544 {
545 struct brw_context *brw = p->brw;
546 struct intel_context *intel = &brw->intel;
547 unsigned sfid;
548
549 if (intel->gen >= 7) {
550 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
551 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
552 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
553 else
554 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
555 } else if (intel->gen == 6) {
556 /* Use the render cache for all write messages. */
557 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
558 } else {
559 sfid = BRW_SFID_DATAPORT_WRITE;
560 }
561
562 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
563 header_present, end_of_thread);
564
565 if (intel->gen >= 7) {
566 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
567 insn->bits3.gen7_dp.msg_control = msg_control;
568 insn->bits3.gen7_dp.last_render_target = last_render_target;
569 insn->bits3.gen7_dp.msg_type = msg_type;
570 } else if (intel->gen == 6) {
571 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
572 insn->bits3.gen6_dp.msg_control = msg_control;
573 insn->bits3.gen6_dp.last_render_target = last_render_target;
574 insn->bits3.gen6_dp.msg_type = msg_type;
575 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
576 } else if (intel->gen == 5) {
577 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
578 insn->bits3.dp_write_gen5.msg_control = msg_control;
579 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
580 insn->bits3.dp_write_gen5.msg_type = msg_type;
581 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
582 } else {
583 insn->bits3.dp_write.binding_table_index = binding_table_index;
584 insn->bits3.dp_write.msg_control = msg_control;
585 insn->bits3.dp_write.last_render_target = last_render_target;
586 insn->bits3.dp_write.msg_type = msg_type;
587 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
588 }
589 }
590
591 void
592 brw_set_dp_read_message(struct brw_compile *p,
593 struct brw_instruction *insn,
594 GLuint binding_table_index,
595 GLuint msg_control,
596 GLuint msg_type,
597 GLuint target_cache,
598 GLuint msg_length,
599 GLuint response_length)
600 {
601 struct brw_context *brw = p->brw;
602 struct intel_context *intel = &brw->intel;
603 unsigned sfid;
604
605 if (intel->gen >= 7) {
606 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
607 } else if (intel->gen == 6) {
608 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
609 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
610 else
611 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
612 } else {
613 sfid = BRW_SFID_DATAPORT_READ;
614 }
615
616 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
617 true, false);
618
619 if (intel->gen >= 7) {
620 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
621 insn->bits3.gen7_dp.msg_control = msg_control;
622 insn->bits3.gen7_dp.last_render_target = 0;
623 insn->bits3.gen7_dp.msg_type = msg_type;
624 } else if (intel->gen == 6) {
625 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
626 insn->bits3.gen6_dp.msg_control = msg_control;
627 insn->bits3.gen6_dp.last_render_target = 0;
628 insn->bits3.gen6_dp.msg_type = msg_type;
629 insn->bits3.gen6_dp.send_commit_msg = 0;
630 } else if (intel->gen == 5) {
631 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
632 insn->bits3.dp_read_gen5.msg_control = msg_control;
633 insn->bits3.dp_read_gen5.msg_type = msg_type;
634 insn->bits3.dp_read_gen5.target_cache = target_cache;
635 } else if (intel->is_g4x) {
636 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
637 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
638 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
639 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
640 } else {
641 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
642 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
643 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
644 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
645 }
646 }
647
648 static void brw_set_sampler_message(struct brw_compile *p,
649 struct brw_instruction *insn,
650 GLuint binding_table_index,
651 GLuint sampler,
652 GLuint msg_type,
653 GLuint response_length,
654 GLuint msg_length,
655 GLuint header_present,
656 GLuint simd_mode,
657 GLuint return_format)
658 {
659 struct brw_context *brw = p->brw;
660 struct intel_context *intel = &brw->intel;
661
662 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
663 response_length, header_present, false);
664
665 if (intel->gen >= 7) {
666 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
667 insn->bits3.sampler_gen7.sampler = sampler;
668 insn->bits3.sampler_gen7.msg_type = msg_type;
669 insn->bits3.sampler_gen7.simd_mode = simd_mode;
670 } else if (intel->gen >= 5) {
671 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
672 insn->bits3.sampler_gen5.sampler = sampler;
673 insn->bits3.sampler_gen5.msg_type = msg_type;
674 insn->bits3.sampler_gen5.simd_mode = simd_mode;
675 } else if (intel->is_g4x) {
676 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
677 insn->bits3.sampler_g4x.sampler = sampler;
678 insn->bits3.sampler_g4x.msg_type = msg_type;
679 } else {
680 insn->bits3.sampler.binding_table_index = binding_table_index;
681 insn->bits3.sampler.sampler = sampler;
682 insn->bits3.sampler.msg_type = msg_type;
683 insn->bits3.sampler.return_format = return_format;
684 }
685 }
686
687
688 #define next_insn brw_next_insn
689 struct brw_instruction *
690 brw_next_insn(struct brw_compile *p, GLuint opcode)
691 {
692 struct brw_instruction *insn;
693
694 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
695
696 insn = &p->store[p->nr_insn++];
697 memcpy(insn, p->current, sizeof(*insn));
698
699 /* Reset this one-shot flag:
700 */
701
702 if (p->current->header.destreg__conditionalmod) {
703 p->current->header.destreg__conditionalmod = 0;
704 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
705 }
706
707 insn->header.opcode = opcode;
708 return insn;
709 }
710
711 static struct brw_instruction *brw_alu1( struct brw_compile *p,
712 GLuint opcode,
713 struct brw_reg dest,
714 struct brw_reg src )
715 {
716 struct brw_instruction *insn = next_insn(p, opcode);
717 brw_set_dest(p, insn, dest);
718 brw_set_src0(p, insn, src);
719 return insn;
720 }
721
722 static struct brw_instruction *brw_alu2(struct brw_compile *p,
723 GLuint opcode,
724 struct brw_reg dest,
725 struct brw_reg src0,
726 struct brw_reg src1 )
727 {
728 struct brw_instruction *insn = next_insn(p, opcode);
729 brw_set_dest(p, insn, dest);
730 brw_set_src0(p, insn, src0);
731 brw_set_src1(p, insn, src1);
732 return insn;
733 }
734
735
736 /***********************************************************************
737 * Convenience routines.
738 */
739 #define ALU1(OP) \
740 struct brw_instruction *brw_##OP(struct brw_compile *p, \
741 struct brw_reg dest, \
742 struct brw_reg src0) \
743 { \
744 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
745 }
746
747 #define ALU2(OP) \
748 struct brw_instruction *brw_##OP(struct brw_compile *p, \
749 struct brw_reg dest, \
750 struct brw_reg src0, \
751 struct brw_reg src1) \
752 { \
753 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
754 }
755
756 /* Rounding operations (other than RNDD) require two instructions - the first
757 * stores a rounded value (possibly the wrong way) in the dest register, but
758 * also sets a per-channel "increment bit" in the flag register. A predicated
759 * add of 1.0 fixes dest to contain the desired result.
760 *
761 * Sandybridge and later appear to round correctly without an ADD.
762 */
763 #define ROUND(OP) \
764 void brw_##OP(struct brw_compile *p, \
765 struct brw_reg dest, \
766 struct brw_reg src) \
767 { \
768 struct brw_instruction *rnd, *add; \
769 rnd = next_insn(p, BRW_OPCODE_##OP); \
770 brw_set_dest(p, rnd, dest); \
771 brw_set_src0(p, rnd, src); \
772 \
773 if (p->brw->intel.gen < 6) { \
774 /* turn on round-increments */ \
775 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
776 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
777 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
778 } \
779 }
780
781
782 ALU1(MOV)
783 ALU2(SEL)
784 ALU1(NOT)
785 ALU2(AND)
786 ALU2(OR)
787 ALU2(XOR)
788 ALU2(SHR)
789 ALU2(SHL)
790 ALU2(RSR)
791 ALU2(RSL)
792 ALU2(ASR)
793 ALU1(FRC)
794 ALU1(RNDD)
795 ALU2(MAC)
796 ALU2(MACH)
797 ALU1(LZD)
798 ALU2(DP4)
799 ALU2(DPH)
800 ALU2(DP3)
801 ALU2(DP2)
802 ALU2(LINE)
803 ALU2(PLN)
804
805
806 ROUND(RNDZ)
807 ROUND(RNDE)
808
809
810 struct brw_instruction *brw_ADD(struct brw_compile *p,
811 struct brw_reg dest,
812 struct brw_reg src0,
813 struct brw_reg src1)
814 {
815 /* 6.2.2: add */
816 if (src0.type == BRW_REGISTER_TYPE_F ||
817 (src0.file == BRW_IMMEDIATE_VALUE &&
818 src0.type == BRW_REGISTER_TYPE_VF)) {
819 assert(src1.type != BRW_REGISTER_TYPE_UD);
820 assert(src1.type != BRW_REGISTER_TYPE_D);
821 }
822
823 if (src1.type == BRW_REGISTER_TYPE_F ||
824 (src1.file == BRW_IMMEDIATE_VALUE &&
825 src1.type == BRW_REGISTER_TYPE_VF)) {
826 assert(src0.type != BRW_REGISTER_TYPE_UD);
827 assert(src0.type != BRW_REGISTER_TYPE_D);
828 }
829
830 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
831 }
832
833 struct brw_instruction *brw_MUL(struct brw_compile *p,
834 struct brw_reg dest,
835 struct brw_reg src0,
836 struct brw_reg src1)
837 {
838 /* 6.32.38: mul */
839 if (src0.type == BRW_REGISTER_TYPE_D ||
840 src0.type == BRW_REGISTER_TYPE_UD ||
841 src1.type == BRW_REGISTER_TYPE_D ||
842 src1.type == BRW_REGISTER_TYPE_UD) {
843 assert(dest.type != BRW_REGISTER_TYPE_F);
844 }
845
846 if (src0.type == BRW_REGISTER_TYPE_F ||
847 (src0.file == BRW_IMMEDIATE_VALUE &&
848 src0.type == BRW_REGISTER_TYPE_VF)) {
849 assert(src1.type != BRW_REGISTER_TYPE_UD);
850 assert(src1.type != BRW_REGISTER_TYPE_D);
851 }
852
853 if (src1.type == BRW_REGISTER_TYPE_F ||
854 (src1.file == BRW_IMMEDIATE_VALUE &&
855 src1.type == BRW_REGISTER_TYPE_VF)) {
856 assert(src0.type != BRW_REGISTER_TYPE_UD);
857 assert(src0.type != BRW_REGISTER_TYPE_D);
858 }
859
860 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
861 src0.nr != BRW_ARF_ACCUMULATOR);
862 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
863 src1.nr != BRW_ARF_ACCUMULATOR);
864
865 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
866 }
867
868
869 void brw_NOP(struct brw_compile *p)
870 {
871 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
872 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
873 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
874 brw_set_src1(p, insn, brw_imm_ud(0x0));
875 }
876
877
878
879
880
881 /***********************************************************************
882 * Comparisons, if/else/endif
883 */
884
885 struct brw_instruction *brw_JMPI(struct brw_compile *p,
886 struct brw_reg dest,
887 struct brw_reg src0,
888 struct brw_reg src1)
889 {
890 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
891
892 insn->header.execution_size = 1;
893 insn->header.compression_control = BRW_COMPRESSION_NONE;
894 insn->header.mask_control = BRW_MASK_DISABLE;
895
896 p->current->header.predicate_control = BRW_PREDICATE_NONE;
897
898 return insn;
899 }
900
901 static void
902 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
903 {
904 p->if_stack[p->if_stack_depth] = inst;
905
906 p->if_stack_depth++;
907 if (p->if_stack_array_size <= p->if_stack_depth) {
908 p->if_stack_array_size *= 2;
909 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
910 p->if_stack_array_size);
911 }
912 }
913
914 static void
915 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
916 {
917 if (p->loop_stack_array_size < p->loop_stack_depth) {
918 p->loop_stack_array_size *= 2;
919 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
920 p->loop_stack_array_size);
921 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
922 p->loop_stack_array_size);
923 }
924
925 p->loop_stack[p->loop_stack_depth] = inst - p->store;
926 p->loop_stack_depth++;
927 p->if_depth_in_loop[p->loop_stack_depth] = 0;
928 }
929
930 static struct brw_instruction *
931 get_inner_do_insn(struct brw_compile *p)
932 {
933 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
934 }
935
936 /* EU takes the value from the flag register and pushes it onto some
937 * sort of a stack (presumably merging with any flag value already on
938 * the stack). Within an if block, the flags at the top of the stack
939 * control execution on each channel of the unit, eg. on each of the
940 * 16 pixel values in our wm programs.
941 *
942 * When the matching 'else' instruction is reached (presumably by
943 * countdown of the instruction count patched in by our ELSE/ENDIF
944 * functions), the relevent flags are inverted.
945 *
946 * When the matching 'endif' instruction is reached, the flags are
947 * popped off. If the stack is now empty, normal execution resumes.
948 */
949 struct brw_instruction *
950 brw_IF(struct brw_compile *p, GLuint execute_size)
951 {
952 struct intel_context *intel = &p->brw->intel;
953 struct brw_instruction *insn;
954
955 insn = next_insn(p, BRW_OPCODE_IF);
956
957 /* Override the defaults for this instruction:
958 */
959 if (intel->gen < 6) {
960 brw_set_dest(p, insn, brw_ip_reg());
961 brw_set_src0(p, insn, brw_ip_reg());
962 brw_set_src1(p, insn, brw_imm_d(0x0));
963 } else if (intel->gen == 6) {
964 brw_set_dest(p, insn, brw_imm_w(0));
965 insn->bits1.branch_gen6.jump_count = 0;
966 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
967 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
968 } else {
969 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
970 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
971 brw_set_src1(p, insn, brw_imm_ud(0));
972 insn->bits3.break_cont.jip = 0;
973 insn->bits3.break_cont.uip = 0;
974 }
975
976 insn->header.execution_size = execute_size;
977 insn->header.compression_control = BRW_COMPRESSION_NONE;
978 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
979 insn->header.mask_control = BRW_MASK_ENABLE;
980 if (!p->single_program_flow)
981 insn->header.thread_control = BRW_THREAD_SWITCH;
982
983 p->current->header.predicate_control = BRW_PREDICATE_NONE;
984
985 push_if_stack(p, insn);
986 p->if_depth_in_loop[p->loop_stack_depth]++;
987 return insn;
988 }
989
990 /* This function is only used for gen6-style IF instructions with an
991 * embedded comparison (conditional modifier). It is not used on gen7.
992 */
993 struct brw_instruction *
994 gen6_IF(struct brw_compile *p, uint32_t conditional,
995 struct brw_reg src0, struct brw_reg src1)
996 {
997 struct brw_instruction *insn;
998
999 insn = next_insn(p, BRW_OPCODE_IF);
1000
1001 brw_set_dest(p, insn, brw_imm_w(0));
1002 if (p->compressed) {
1003 insn->header.execution_size = BRW_EXECUTE_16;
1004 } else {
1005 insn->header.execution_size = BRW_EXECUTE_8;
1006 }
1007 insn->bits1.branch_gen6.jump_count = 0;
1008 brw_set_src0(p, insn, src0);
1009 brw_set_src1(p, insn, src1);
1010
1011 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1012 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1013 insn->header.destreg__conditionalmod = conditional;
1014
1015 if (!p->single_program_flow)
1016 insn->header.thread_control = BRW_THREAD_SWITCH;
1017
1018 push_if_stack(p, insn);
1019 return insn;
1020 }
1021
1022 /**
1023 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1024 */
1025 static void
1026 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1027 struct brw_instruction *if_inst,
1028 struct brw_instruction *else_inst)
1029 {
1030 /* The next instruction (where the ENDIF would be, if it existed) */
1031 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1032
1033 assert(p->single_program_flow);
1034 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1035 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1036 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1037
1038 /* Convert IF to an ADD instruction that moves the instruction pointer
1039 * to the first instruction of the ELSE block. If there is no ELSE
1040 * block, point to where ENDIF would be. Reverse the predicate.
1041 *
1042 * There's no need to execute an ENDIF since we don't need to do any
1043 * stack operations, and if we're currently executing, we just want to
1044 * continue normally.
1045 */
1046 if_inst->header.opcode = BRW_OPCODE_ADD;
1047 if_inst->header.predicate_inverse = 1;
1048
1049 if (else_inst != NULL) {
1050 /* Convert ELSE to an ADD instruction that points where the ENDIF
1051 * would be.
1052 */
1053 else_inst->header.opcode = BRW_OPCODE_ADD;
1054
1055 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1056 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1057 } else {
1058 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1059 }
1060 }
1061
1062 /**
1063 * Patch IF and ELSE instructions with appropriate jump targets.
1064 */
1065 static void
1066 patch_IF_ELSE(struct brw_compile *p,
1067 struct brw_instruction *if_inst,
1068 struct brw_instruction *else_inst,
1069 struct brw_instruction *endif_inst)
1070 {
1071 struct intel_context *intel = &p->brw->intel;
1072
1073 /* We shouldn't be patching IF and ELSE instructions in single program flow
1074 * mode when gen < 6, because in single program flow mode on those
1075 * platforms, we convert flow control instructions to conditional ADDs that
1076 * operate on IP (see brw_ENDIF).
1077 *
1078 * However, on Gen6, writing to IP doesn't work in single program flow mode
1079 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1080 * not be updated by non-flow control instructions."). And on later
1081 * platforms, there is no significant benefit to converting control flow
1082 * instructions to conditional ADDs. So we do patch IF and ELSE
1083 * instructions in single program flow mode on those platforms.
1084 */
1085 if (intel->gen < 6)
1086 assert(!p->single_program_flow);
1087
1088 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1089 assert(endif_inst != NULL);
1090 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1091
1092 unsigned br = 1;
1093 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1094 * requires 2 chunks.
1095 */
1096 if (intel->gen >= 5)
1097 br = 2;
1098
1099 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1100 endif_inst->header.execution_size = if_inst->header.execution_size;
1101
1102 if (else_inst == NULL) {
1103 /* Patch IF -> ENDIF */
1104 if (intel->gen < 6) {
1105 /* Turn it into an IFF, which means no mask stack operations for
1106 * all-false and jumping past the ENDIF.
1107 */
1108 if_inst->header.opcode = BRW_OPCODE_IFF;
1109 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1110 if_inst->bits3.if_else.pop_count = 0;
1111 if_inst->bits3.if_else.pad0 = 0;
1112 } else if (intel->gen == 6) {
1113 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1114 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1115 } else {
1116 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1117 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1118 }
1119 } else {
1120 else_inst->header.execution_size = if_inst->header.execution_size;
1121
1122 /* Patch IF -> ELSE */
1123 if (intel->gen < 6) {
1124 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1125 if_inst->bits3.if_else.pop_count = 0;
1126 if_inst->bits3.if_else.pad0 = 0;
1127 } else if (intel->gen == 6) {
1128 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1129 }
1130
1131 /* Patch ELSE -> ENDIF */
1132 if (intel->gen < 6) {
1133 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1134 * matching ENDIF.
1135 */
1136 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1137 else_inst->bits3.if_else.pop_count = 1;
1138 else_inst->bits3.if_else.pad0 = 0;
1139 } else if (intel->gen == 6) {
1140 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1141 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1142 } else {
1143 /* The IF instruction's JIP should point just past the ELSE */
1144 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1145 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1146 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1147 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1148 }
1149 }
1150 }
1151
1152 void
1153 brw_ELSE(struct brw_compile *p)
1154 {
1155 struct intel_context *intel = &p->brw->intel;
1156 struct brw_instruction *insn;
1157
1158 insn = next_insn(p, BRW_OPCODE_ELSE);
1159
1160 if (intel->gen < 6) {
1161 brw_set_dest(p, insn, brw_ip_reg());
1162 brw_set_src0(p, insn, brw_ip_reg());
1163 brw_set_src1(p, insn, brw_imm_d(0x0));
1164 } else if (intel->gen == 6) {
1165 brw_set_dest(p, insn, brw_imm_w(0));
1166 insn->bits1.branch_gen6.jump_count = 0;
1167 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1168 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1169 } else {
1170 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1171 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1172 brw_set_src1(p, insn, brw_imm_ud(0));
1173 insn->bits3.break_cont.jip = 0;
1174 insn->bits3.break_cont.uip = 0;
1175 }
1176
1177 insn->header.compression_control = BRW_COMPRESSION_NONE;
1178 insn->header.mask_control = BRW_MASK_ENABLE;
1179 if (!p->single_program_flow)
1180 insn->header.thread_control = BRW_THREAD_SWITCH;
1181
1182 push_if_stack(p, insn);
1183 }
1184
1185 void
1186 brw_ENDIF(struct brw_compile *p)
1187 {
1188 struct intel_context *intel = &p->brw->intel;
1189 struct brw_instruction *insn;
1190 struct brw_instruction *else_inst = NULL;
1191 struct brw_instruction *if_inst = NULL;
1192
1193 /* Pop the IF and (optional) ELSE instructions from the stack */
1194 p->if_depth_in_loop[p->loop_stack_depth]--;
1195 p->if_stack_depth--;
1196 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1197 else_inst = p->if_stack[p->if_stack_depth];
1198 p->if_stack_depth--;
1199 }
1200 if_inst = p->if_stack[p->if_stack_depth];
1201
1202 /* In single program flow mode, we can express IF and ELSE instructions
1203 * equivalently as ADD instructions that operate on IP. On platforms prior
1204 * to Gen6, flow control instructions cause an implied thread switch, so
1205 * this is a significant savings.
1206 *
1207 * However, on Gen6, writing to IP doesn't work in single program flow mode
1208 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1209 * not be updated by non-flow control instructions."). And on later
1210 * platforms, there is no significant benefit to converting control flow
1211 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1212 * Gen5.
1213 */
1214 if (intel->gen < 6 && p->single_program_flow) {
1215 /* ENDIF is useless; don't bother emitting it. */
1216 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1217 return;
1218 }
1219
1220 insn = next_insn(p, BRW_OPCODE_ENDIF);
1221
1222 if (intel->gen < 6) {
1223 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1224 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1225 brw_set_src1(p, insn, brw_imm_d(0x0));
1226 } else if (intel->gen == 6) {
1227 brw_set_dest(p, insn, brw_imm_w(0));
1228 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1229 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1230 } else {
1231 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1232 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1233 brw_set_src1(p, insn, brw_imm_ud(0));
1234 }
1235
1236 insn->header.compression_control = BRW_COMPRESSION_NONE;
1237 insn->header.mask_control = BRW_MASK_ENABLE;
1238 insn->header.thread_control = BRW_THREAD_SWITCH;
1239
1240 /* Also pop item off the stack in the endif instruction: */
1241 if (intel->gen < 6) {
1242 insn->bits3.if_else.jump_count = 0;
1243 insn->bits3.if_else.pop_count = 1;
1244 insn->bits3.if_else.pad0 = 0;
1245 } else if (intel->gen == 6) {
1246 insn->bits1.branch_gen6.jump_count = 2;
1247 } else {
1248 insn->bits3.break_cont.jip = 2;
1249 }
1250 patch_IF_ELSE(p, if_inst, else_inst, insn);
1251 }
1252
1253 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1254 {
1255 struct intel_context *intel = &p->brw->intel;
1256 struct brw_instruction *insn;
1257
1258 insn = next_insn(p, BRW_OPCODE_BREAK);
1259 if (intel->gen >= 6) {
1260 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1261 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1262 brw_set_src1(p, insn, brw_imm_d(0x0));
1263 } else {
1264 brw_set_dest(p, insn, brw_ip_reg());
1265 brw_set_src0(p, insn, brw_ip_reg());
1266 brw_set_src1(p, insn, brw_imm_d(0x0));
1267 insn->bits3.if_else.pad0 = 0;
1268 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1269 }
1270 insn->header.compression_control = BRW_COMPRESSION_NONE;
1271 insn->header.execution_size = BRW_EXECUTE_8;
1272
1273 return insn;
1274 }
1275
1276 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1277 {
1278 struct brw_instruction *insn;
1279
1280 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1281 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1282 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1283 brw_set_dest(p, insn, brw_ip_reg());
1284 brw_set_src0(p, insn, brw_ip_reg());
1285 brw_set_src1(p, insn, brw_imm_d(0x0));
1286
1287 insn->header.compression_control = BRW_COMPRESSION_NONE;
1288 insn->header.execution_size = BRW_EXECUTE_8;
1289 return insn;
1290 }
1291
1292 struct brw_instruction *brw_CONT(struct brw_compile *p)
1293 {
1294 struct brw_instruction *insn;
1295 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1296 brw_set_dest(p, insn, brw_ip_reg());
1297 brw_set_src0(p, insn, brw_ip_reg());
1298 brw_set_src1(p, insn, brw_imm_d(0x0));
1299 insn->header.compression_control = BRW_COMPRESSION_NONE;
1300 insn->header.execution_size = BRW_EXECUTE_8;
1301 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1302 insn->bits3.if_else.pad0 = 0;
1303 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1304 return insn;
1305 }
1306
1307 /* DO/WHILE loop:
1308 *
1309 * The DO/WHILE is just an unterminated loop -- break or continue are
1310 * used for control within the loop. We have a few ways they can be
1311 * done.
1312 *
1313 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1314 * jip and no DO instruction.
1315 *
1316 * For non-uniform control flow pre-gen6, there's a DO instruction to
1317 * push the mask, and a WHILE to jump back, and BREAK to get out and
1318 * pop the mask.
1319 *
1320 * For gen6, there's no more mask stack, so no need for DO. WHILE
1321 * just points back to the first instruction of the loop.
1322 */
1323 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1324 {
1325 struct intel_context *intel = &p->brw->intel;
1326
1327 if (intel->gen >= 6 || p->single_program_flow) {
1328 push_loop_stack(p, &p->store[p->nr_insn]);
1329 return &p->store[p->nr_insn];
1330 } else {
1331 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1332
1333 push_loop_stack(p, insn);
1334
1335 /* Override the defaults for this instruction:
1336 */
1337 brw_set_dest(p, insn, brw_null_reg());
1338 brw_set_src0(p, insn, brw_null_reg());
1339 brw_set_src1(p, insn, brw_null_reg());
1340
1341 insn->header.compression_control = BRW_COMPRESSION_NONE;
1342 insn->header.execution_size = execute_size;
1343 insn->header.predicate_control = BRW_PREDICATE_NONE;
1344 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1345 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1346
1347 return insn;
1348 }
1349 }
1350
1351 /**
1352 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1353 * instruction here.
1354 *
1355 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1356 * nesting, since it can always just point to the end of the block/current loop.
1357 */
1358 static void
1359 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1360 {
1361 struct intel_context *intel = &p->brw->intel;
1362 struct brw_instruction *do_inst = get_inner_do_insn(p);
1363 struct brw_instruction *inst;
1364 int br = (intel->gen == 5) ? 2 : 1;
1365
1366 for (inst = while_inst - 1; inst != do_inst; inst--) {
1367 /* If the jump count is != 0, that means that this instruction has already
1368 * been patched because it's part of a loop inside of the one we're
1369 * patching.
1370 */
1371 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1372 inst->bits3.if_else.jump_count == 0) {
1373 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1374 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1375 inst->bits3.if_else.jump_count == 0) {
1376 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1377 }
1378 }
1379 }
1380
1381 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1382 {
1383 struct intel_context *intel = &p->brw->intel;
1384 struct brw_instruction *insn, *do_insn;
1385 GLuint br = 1;
1386
1387 do_insn = get_inner_do_insn(p);
1388
1389 if (intel->gen >= 5)
1390 br = 2;
1391
1392 if (intel->gen >= 7) {
1393 insn = next_insn(p, BRW_OPCODE_WHILE);
1394
1395 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1396 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1397 brw_set_src1(p, insn, brw_imm_ud(0));
1398 insn->bits3.break_cont.jip = br * (do_insn - insn);
1399
1400 insn->header.execution_size = BRW_EXECUTE_8;
1401 } else if (intel->gen == 6) {
1402 insn = next_insn(p, BRW_OPCODE_WHILE);
1403
1404 brw_set_dest(p, insn, brw_imm_w(0));
1405 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1406 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1407 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1408
1409 insn->header.execution_size = BRW_EXECUTE_8;
1410 } else {
1411 if (p->single_program_flow) {
1412 insn = next_insn(p, BRW_OPCODE_ADD);
1413
1414 brw_set_dest(p, insn, brw_ip_reg());
1415 brw_set_src0(p, insn, brw_ip_reg());
1416 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1417 insn->header.execution_size = BRW_EXECUTE_1;
1418 } else {
1419 insn = next_insn(p, BRW_OPCODE_WHILE);
1420
1421 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1422
1423 brw_set_dest(p, insn, brw_ip_reg());
1424 brw_set_src0(p, insn, brw_ip_reg());
1425 brw_set_src1(p, insn, brw_imm_d(0));
1426
1427 insn->header.execution_size = do_insn->header.execution_size;
1428 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1429 insn->bits3.if_else.pop_count = 0;
1430 insn->bits3.if_else.pad0 = 0;
1431
1432 brw_patch_break_cont(p, insn);
1433 }
1434 }
1435 insn->header.compression_control = BRW_COMPRESSION_NONE;
1436 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1437
1438 p->loop_stack_depth--;
1439
1440 return insn;
1441 }
1442
1443
1444 /* FORWARD JUMPS:
1445 */
1446 void brw_land_fwd_jump(struct brw_compile *p,
1447 struct brw_instruction *jmp_insn)
1448 {
1449 struct intel_context *intel = &p->brw->intel;
1450 struct brw_instruction *landing = &p->store[p->nr_insn];
1451 GLuint jmpi = 1;
1452
1453 if (intel->gen >= 5)
1454 jmpi = 2;
1455
1456 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1457 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1458
1459 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1460 }
1461
1462
1463
1464 /* To integrate with the above, it makes sense that the comparison
1465 * instruction should populate the flag register. It might be simpler
1466 * just to use the flag reg for most WM tasks?
1467 */
1468 void brw_CMP(struct brw_compile *p,
1469 struct brw_reg dest,
1470 GLuint conditional,
1471 struct brw_reg src0,
1472 struct brw_reg src1)
1473 {
1474 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1475
1476 insn->header.destreg__conditionalmod = conditional;
1477 brw_set_dest(p, insn, dest);
1478 brw_set_src0(p, insn, src0);
1479 brw_set_src1(p, insn, src1);
1480
1481 /* guess_execution_size(insn, src0); */
1482
1483
1484 /* Make it so that future instructions will use the computed flag
1485 * value until brw_set_predicate_control_flag_value() is called
1486 * again.
1487 */
1488 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1489 dest.nr == 0) {
1490 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1491 p->flag_value = 0xff;
1492 }
1493 }
1494
1495 /* Issue 'wait' instruction for n1, host could program MMIO
1496 to wake up thread. */
1497 void brw_WAIT (struct brw_compile *p)
1498 {
1499 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1500 struct brw_reg src = brw_notification_1_reg();
1501
1502 brw_set_dest(p, insn, src);
1503 brw_set_src0(p, insn, src);
1504 brw_set_src1(p, insn, brw_null_reg());
1505 insn->header.execution_size = 0; /* must */
1506 insn->header.predicate_control = 0;
1507 insn->header.compression_control = 0;
1508 }
1509
1510
1511 /***********************************************************************
1512 * Helpers for the various SEND message types:
1513 */
1514
1515 /** Extended math function, float[8].
1516 */
1517 void brw_math( struct brw_compile *p,
1518 struct brw_reg dest,
1519 GLuint function,
1520 GLuint saturate,
1521 GLuint msg_reg_nr,
1522 struct brw_reg src,
1523 GLuint data_type,
1524 GLuint precision )
1525 {
1526 struct intel_context *intel = &p->brw->intel;
1527
1528 if (intel->gen >= 6) {
1529 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1530
1531 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1532 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1533
1534 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1535 if (intel->gen == 6)
1536 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1537
1538 /* Source modifiers are ignored for extended math instructions on Gen6. */
1539 if (intel->gen == 6) {
1540 assert(!src.negate);
1541 assert(!src.abs);
1542 }
1543
1544 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1545 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1546 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1547 assert(src.type != BRW_REGISTER_TYPE_F);
1548 } else {
1549 assert(src.type == BRW_REGISTER_TYPE_F);
1550 }
1551
1552 /* Math is the same ISA format as other opcodes, except that CondModifier
1553 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1554 */
1555 insn->header.destreg__conditionalmod = function;
1556 insn->header.saturate = saturate;
1557
1558 brw_set_dest(p, insn, dest);
1559 brw_set_src0(p, insn, src);
1560 brw_set_src1(p, insn, brw_null_reg());
1561 } else {
1562 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1563
1564 /* Example code doesn't set predicate_control for send
1565 * instructions.
1566 */
1567 insn->header.predicate_control = 0;
1568 insn->header.destreg__conditionalmod = msg_reg_nr;
1569
1570 brw_set_dest(p, insn, dest);
1571 brw_set_src0(p, insn, src);
1572 brw_set_math_message(p,
1573 insn,
1574 function,
1575 src.type == BRW_REGISTER_TYPE_D,
1576 precision,
1577 saturate,
1578 data_type);
1579 }
1580 }
1581
1582 /** Extended math function, float[8].
1583 */
1584 void brw_math2(struct brw_compile *p,
1585 struct brw_reg dest,
1586 GLuint function,
1587 struct brw_reg src0,
1588 struct brw_reg src1)
1589 {
1590 struct intel_context *intel = &p->brw->intel;
1591 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1592
1593 assert(intel->gen >= 6);
1594 (void) intel;
1595
1596
1597 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1598 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1599 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1600
1601 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1602 if (intel->gen == 6) {
1603 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1604 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1605 }
1606
1607 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1608 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1609 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1610 assert(src0.type != BRW_REGISTER_TYPE_F);
1611 assert(src1.type != BRW_REGISTER_TYPE_F);
1612 } else {
1613 assert(src0.type == BRW_REGISTER_TYPE_F);
1614 assert(src1.type == BRW_REGISTER_TYPE_F);
1615 }
1616
1617 /* Source modifiers are ignored for extended math instructions on Gen6. */
1618 if (intel->gen == 6) {
1619 assert(!src0.negate);
1620 assert(!src0.abs);
1621 assert(!src1.negate);
1622 assert(!src1.abs);
1623 }
1624
1625 /* Math is the same ISA format as other opcodes, except that CondModifier
1626 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1627 */
1628 insn->header.destreg__conditionalmod = function;
1629
1630 brw_set_dest(p, insn, dest);
1631 brw_set_src0(p, insn, src0);
1632 brw_set_src1(p, insn, src1);
1633 }
1634
1635 /**
1636 * Extended math function, float[16].
1637 * Use 2 send instructions.
1638 */
1639 void brw_math_16( struct brw_compile *p,
1640 struct brw_reg dest,
1641 GLuint function,
1642 GLuint saturate,
1643 GLuint msg_reg_nr,
1644 struct brw_reg src,
1645 GLuint precision )
1646 {
1647 struct intel_context *intel = &p->brw->intel;
1648 struct brw_instruction *insn;
1649
1650 if (intel->gen >= 6) {
1651 insn = next_insn(p, BRW_OPCODE_MATH);
1652
1653 /* Math is the same ISA format as other opcodes, except that CondModifier
1654 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1655 */
1656 insn->header.destreg__conditionalmod = function;
1657 insn->header.saturate = saturate;
1658
1659 /* Source modifiers are ignored for extended math instructions. */
1660 assert(!src.negate);
1661 assert(!src.abs);
1662
1663 brw_set_dest(p, insn, dest);
1664 brw_set_src0(p, insn, src);
1665 brw_set_src1(p, insn, brw_null_reg());
1666 return;
1667 }
1668
1669 /* First instruction:
1670 */
1671 brw_push_insn_state(p);
1672 brw_set_predicate_control_flag_value(p, 0xff);
1673 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1674
1675 insn = next_insn(p, BRW_OPCODE_SEND);
1676 insn->header.destreg__conditionalmod = msg_reg_nr;
1677
1678 brw_set_dest(p, insn, dest);
1679 brw_set_src0(p, insn, src);
1680 brw_set_math_message(p,
1681 insn,
1682 function,
1683 BRW_MATH_INTEGER_UNSIGNED,
1684 precision,
1685 saturate,
1686 BRW_MATH_DATA_VECTOR);
1687
1688 /* Second instruction:
1689 */
1690 insn = next_insn(p, BRW_OPCODE_SEND);
1691 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1692 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1693
1694 brw_set_dest(p, insn, offset(dest,1));
1695 brw_set_src0(p, insn, src);
1696 brw_set_math_message(p,
1697 insn,
1698 function,
1699 BRW_MATH_INTEGER_UNSIGNED,
1700 precision,
1701 saturate,
1702 BRW_MATH_DATA_VECTOR);
1703
1704 brw_pop_insn_state(p);
1705 }
1706
1707
1708 /**
1709 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1710 * using a constant offset per channel.
1711 *
1712 * The offset must be aligned to oword size (16 bytes). Used for
1713 * register spilling.
1714 */
1715 void brw_oword_block_write_scratch(struct brw_compile *p,
1716 struct brw_reg mrf,
1717 int num_regs,
1718 GLuint offset)
1719 {
1720 struct intel_context *intel = &p->brw->intel;
1721 uint32_t msg_control, msg_type;
1722 int mlen;
1723
1724 if (intel->gen >= 6)
1725 offset /= 16;
1726
1727 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1728
1729 if (num_regs == 1) {
1730 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1731 mlen = 2;
1732 } else {
1733 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1734 mlen = 3;
1735 }
1736
1737 /* Set up the message header. This is g0, with g0.2 filled with
1738 * the offset. We don't want to leave our offset around in g0 or
1739 * it'll screw up texture samples, so set it up inside the message
1740 * reg.
1741 */
1742 {
1743 brw_push_insn_state(p);
1744 brw_set_mask_control(p, BRW_MASK_DISABLE);
1745 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1746
1747 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1748
1749 /* set message header global offset field (reg 0, element 2) */
1750 brw_MOV(p,
1751 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1752 mrf.nr,
1753 2), BRW_REGISTER_TYPE_UD),
1754 brw_imm_ud(offset));
1755
1756 brw_pop_insn_state(p);
1757 }
1758
1759 {
1760 struct brw_reg dest;
1761 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1762 int send_commit_msg;
1763 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1764 BRW_REGISTER_TYPE_UW);
1765
1766 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1767 insn->header.compression_control = BRW_COMPRESSION_NONE;
1768 src_header = vec16(src_header);
1769 }
1770 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1771 insn->header.destreg__conditionalmod = mrf.nr;
1772
1773 /* Until gen6, writes followed by reads from the same location
1774 * are not guaranteed to be ordered unless write_commit is set.
1775 * If set, then a no-op write is issued to the destination
1776 * register to set a dependency, and a read from the destination
1777 * can be used to ensure the ordering.
1778 *
1779 * For gen6, only writes between different threads need ordering
1780 * protection. Our use of DP writes is all about register
1781 * spilling within a thread.
1782 */
1783 if (intel->gen >= 6) {
1784 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1785 send_commit_msg = 0;
1786 } else {
1787 dest = src_header;
1788 send_commit_msg = 1;
1789 }
1790
1791 brw_set_dest(p, insn, dest);
1792 if (intel->gen >= 6) {
1793 brw_set_src0(p, insn, mrf);
1794 } else {
1795 brw_set_src0(p, insn, brw_null_reg());
1796 }
1797
1798 if (intel->gen >= 6)
1799 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1800 else
1801 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1802
1803 brw_set_dp_write_message(p,
1804 insn,
1805 255, /* binding table index (255=stateless) */
1806 msg_control,
1807 msg_type,
1808 mlen,
1809 true, /* header_present */
1810 0, /* not a render target */
1811 send_commit_msg, /* response_length */
1812 0, /* eot */
1813 send_commit_msg);
1814 }
1815 }
1816
1817
1818 /**
1819 * Read a block of owords (half a GRF each) from the scratch buffer
1820 * using a constant index per channel.
1821 *
1822 * Offset must be aligned to oword size (16 bytes). Used for register
1823 * spilling.
1824 */
1825 void
1826 brw_oword_block_read_scratch(struct brw_compile *p,
1827 struct brw_reg dest,
1828 struct brw_reg mrf,
1829 int num_regs,
1830 GLuint offset)
1831 {
1832 struct intel_context *intel = &p->brw->intel;
1833 uint32_t msg_control;
1834 int rlen;
1835
1836 if (intel->gen >= 6)
1837 offset /= 16;
1838
1839 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1840 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1841
1842 if (num_regs == 1) {
1843 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1844 rlen = 1;
1845 } else {
1846 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1847 rlen = 2;
1848 }
1849
1850 {
1851 brw_push_insn_state(p);
1852 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1853 brw_set_mask_control(p, BRW_MASK_DISABLE);
1854
1855 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1856
1857 /* set message header global offset field (reg 0, element 2) */
1858 brw_MOV(p,
1859 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1860 mrf.nr,
1861 2), BRW_REGISTER_TYPE_UD),
1862 brw_imm_ud(offset));
1863
1864 brw_pop_insn_state(p);
1865 }
1866
1867 {
1868 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1869
1870 assert(insn->header.predicate_control == 0);
1871 insn->header.compression_control = BRW_COMPRESSION_NONE;
1872 insn->header.destreg__conditionalmod = mrf.nr;
1873
1874 brw_set_dest(p, insn, dest); /* UW? */
1875 if (intel->gen >= 6) {
1876 brw_set_src0(p, insn, mrf);
1877 } else {
1878 brw_set_src0(p, insn, brw_null_reg());
1879 }
1880
1881 brw_set_dp_read_message(p,
1882 insn,
1883 255, /* binding table index (255=stateless) */
1884 msg_control,
1885 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1886 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1887 1, /* msg_length */
1888 rlen);
1889 }
1890 }
1891
1892 /**
1893 * Read a float[4] vector from the data port Data Cache (const buffer).
1894 * Location (in buffer) should be a multiple of 16.
1895 * Used for fetching shader constants.
1896 */
1897 void brw_oword_block_read(struct brw_compile *p,
1898 struct brw_reg dest,
1899 struct brw_reg mrf,
1900 uint32_t offset,
1901 uint32_t bind_table_index)
1902 {
1903 struct intel_context *intel = &p->brw->intel;
1904
1905 /* On newer hardware, offset is in units of owords. */
1906 if (intel->gen >= 6)
1907 offset /= 16;
1908
1909 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1910
1911 brw_push_insn_state(p);
1912 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1913 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1914 brw_set_mask_control(p, BRW_MASK_DISABLE);
1915
1916 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1917
1918 /* set message header global offset field (reg 0, element 2) */
1919 brw_MOV(p,
1920 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1921 mrf.nr,
1922 2), BRW_REGISTER_TYPE_UD),
1923 brw_imm_ud(offset));
1924
1925 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1926 insn->header.destreg__conditionalmod = mrf.nr;
1927
1928 /* cast dest to a uword[8] vector */
1929 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1930
1931 brw_set_dest(p, insn, dest);
1932 if (intel->gen >= 6) {
1933 brw_set_src0(p, insn, mrf);
1934 } else {
1935 brw_set_src0(p, insn, brw_null_reg());
1936 }
1937
1938 brw_set_dp_read_message(p,
1939 insn,
1940 bind_table_index,
1941 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1942 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1943 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1944 1, /* msg_length */
1945 1); /* response_length (1 reg, 2 owords!) */
1946
1947 brw_pop_insn_state(p);
1948 }
1949
1950 /**
1951 * Read a set of dwords from the data port Data Cache (const buffer).
1952 *
1953 * Location (in buffer) appears as UD offsets in the register after
1954 * the provided mrf header reg.
1955 */
1956 void brw_dword_scattered_read(struct brw_compile *p,
1957 struct brw_reg dest,
1958 struct brw_reg mrf,
1959 uint32_t bind_table_index)
1960 {
1961 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1962
1963 brw_push_insn_state(p);
1964 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1965 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1966 brw_set_mask_control(p, BRW_MASK_DISABLE);
1967 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1968 brw_pop_insn_state(p);
1969
1970 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1971 insn->header.destreg__conditionalmod = mrf.nr;
1972
1973 /* cast dest to a uword[8] vector */
1974 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1975
1976 brw_set_dest(p, insn, dest);
1977 brw_set_src0(p, insn, brw_null_reg());
1978
1979 brw_set_dp_read_message(p,
1980 insn,
1981 bind_table_index,
1982 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1983 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1984 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1985 2, /* msg_length */
1986 1); /* response_length */
1987 }
1988
1989
1990
1991 /**
1992 * Read float[4] constant(s) from VS constant buffer.
1993 * For relative addressing, two float[4] constants will be read into 'dest'.
1994 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1995 */
1996 void brw_dp_READ_4_vs(struct brw_compile *p,
1997 struct brw_reg dest,
1998 GLuint location,
1999 GLuint bind_table_index)
2000 {
2001 struct intel_context *intel = &p->brw->intel;
2002 struct brw_instruction *insn;
2003 GLuint msg_reg_nr = 1;
2004
2005 if (intel->gen >= 6)
2006 location /= 16;
2007
2008 /* Setup MRF[1] with location/offset into const buffer */
2009 brw_push_insn_state(p);
2010 brw_set_access_mode(p, BRW_ALIGN_1);
2011 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2012 brw_set_mask_control(p, BRW_MASK_DISABLE);
2013 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2014 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
2015 BRW_REGISTER_TYPE_UD),
2016 brw_imm_ud(location));
2017 brw_pop_insn_state(p);
2018
2019 insn = next_insn(p, BRW_OPCODE_SEND);
2020
2021 insn->header.predicate_control = BRW_PREDICATE_NONE;
2022 insn->header.compression_control = BRW_COMPRESSION_NONE;
2023 insn->header.destreg__conditionalmod = msg_reg_nr;
2024 insn->header.mask_control = BRW_MASK_DISABLE;
2025
2026 brw_set_dest(p, insn, dest);
2027 if (intel->gen >= 6) {
2028 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
2029 } else {
2030 brw_set_src0(p, insn, brw_null_reg());
2031 }
2032
2033 brw_set_dp_read_message(p,
2034 insn,
2035 bind_table_index,
2036 0,
2037 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2038 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2039 1, /* msg_length */
2040 1); /* response_length (1 Oword) */
2041 }
2042
2043 /**
2044 * Read a float[4] constant per vertex from VS constant buffer, with
2045 * relative addressing.
2046 */
2047 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2048 struct brw_reg dest,
2049 struct brw_reg addr_reg,
2050 GLuint offset,
2051 GLuint bind_table_index)
2052 {
2053 struct intel_context *intel = &p->brw->intel;
2054 struct brw_reg src = brw_vec8_grf(0, 0);
2055 int msg_type;
2056
2057 /* Setup MRF[1] with offset into const buffer */
2058 brw_push_insn_state(p);
2059 brw_set_access_mode(p, BRW_ALIGN_1);
2060 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2061 brw_set_mask_control(p, BRW_MASK_DISABLE);
2062 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2063
2064 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2065 * fields ignored.
2066 */
2067 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2068 addr_reg, brw_imm_d(offset));
2069 brw_pop_insn_state(p);
2070
2071 gen6_resolve_implied_move(p, &src, 0);
2072 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2073
2074 insn->header.predicate_control = BRW_PREDICATE_NONE;
2075 insn->header.compression_control = BRW_COMPRESSION_NONE;
2076 insn->header.destreg__conditionalmod = 0;
2077 insn->header.mask_control = BRW_MASK_DISABLE;
2078
2079 brw_set_dest(p, insn, dest);
2080 brw_set_src0(p, insn, src);
2081
2082 if (intel->gen >= 6)
2083 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2084 else if (intel->gen == 5 || intel->is_g4x)
2085 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2086 else
2087 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2088
2089 brw_set_dp_read_message(p,
2090 insn,
2091 bind_table_index,
2092 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2093 msg_type,
2094 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2095 2, /* msg_length */
2096 1); /* response_length */
2097 }
2098
2099
2100
2101 void brw_fb_WRITE(struct brw_compile *p,
2102 int dispatch_width,
2103 GLuint msg_reg_nr,
2104 struct brw_reg src0,
2105 GLuint binding_table_index,
2106 GLuint msg_length,
2107 GLuint response_length,
2108 bool eot,
2109 bool header_present)
2110 {
2111 struct intel_context *intel = &p->brw->intel;
2112 struct brw_instruction *insn;
2113 GLuint msg_control, msg_type;
2114 struct brw_reg dest;
2115
2116 if (dispatch_width == 16)
2117 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2118 else
2119 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2120
2121 if (intel->gen >= 6 && binding_table_index == 0) {
2122 insn = next_insn(p, BRW_OPCODE_SENDC);
2123 } else {
2124 insn = next_insn(p, BRW_OPCODE_SEND);
2125 }
2126 /* The execution mask is ignored for render target writes. */
2127 insn->header.predicate_control = 0;
2128 insn->header.compression_control = BRW_COMPRESSION_NONE;
2129
2130 if (intel->gen >= 6) {
2131 /* headerless version, just submit color payload */
2132 src0 = brw_message_reg(msg_reg_nr);
2133
2134 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2135 } else {
2136 insn->header.destreg__conditionalmod = msg_reg_nr;
2137
2138 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2139 }
2140
2141 if (dispatch_width == 16)
2142 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2143 else
2144 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2145
2146 brw_set_dest(p, insn, dest);
2147 brw_set_src0(p, insn, src0);
2148 brw_set_dp_write_message(p,
2149 insn,
2150 binding_table_index,
2151 msg_control,
2152 msg_type,
2153 msg_length,
2154 header_present,
2155 1, /* last render target write */
2156 response_length,
2157 eot,
2158 0 /* send_commit_msg */);
2159 }
2160
2161
2162 /**
2163 * Texture sample instruction.
2164 * Note: the msg_type plus msg_length values determine exactly what kind
2165 * of sampling operation is performed. See volume 4, page 161 of docs.
2166 */
2167 void brw_SAMPLE(struct brw_compile *p,
2168 struct brw_reg dest,
2169 GLuint msg_reg_nr,
2170 struct brw_reg src0,
2171 GLuint binding_table_index,
2172 GLuint sampler,
2173 GLuint writemask,
2174 GLuint msg_type,
2175 GLuint response_length,
2176 GLuint msg_length,
2177 GLuint header_present,
2178 GLuint simd_mode,
2179 GLuint return_format)
2180 {
2181 struct intel_context *intel = &p->brw->intel;
2182 bool need_stall = 0;
2183
2184 if (writemask == 0) {
2185 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2186 return;
2187 }
2188
2189 /* Hardware doesn't do destination dependency checking on send
2190 * instructions properly. Add a workaround which generates the
2191 * dependency by other means. In practice it seems like this bug
2192 * only crops up for texture samples, and only where registers are
2193 * written by the send and then written again later without being
2194 * read in between. Luckily for us, we already track that
2195 * information and use it to modify the writemask for the
2196 * instruction, so that is a guide for whether a workaround is
2197 * needed.
2198 */
2199 if (writemask != WRITEMASK_XYZW) {
2200 GLuint dst_offset = 0;
2201 GLuint i, newmask = 0, len = 0;
2202
2203 for (i = 0; i < 4; i++) {
2204 if (writemask & (1<<i))
2205 break;
2206 dst_offset += 2;
2207 }
2208 for (; i < 4; i++) {
2209 if (!(writemask & (1<<i)))
2210 break;
2211 newmask |= 1<<i;
2212 len++;
2213 }
2214
2215 if (newmask != writemask) {
2216 need_stall = 1;
2217 /* printf("need stall %x %x\n", newmask , writemask); */
2218 }
2219 else {
2220 bool dispatch_16 = false;
2221
2222 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2223
2224 guess_execution_size(p, p->current, dest);
2225 if (p->current->header.execution_size == BRW_EXECUTE_16)
2226 dispatch_16 = true;
2227
2228 newmask = ~newmask & WRITEMASK_XYZW;
2229
2230 brw_push_insn_state(p);
2231
2232 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2233 brw_set_mask_control(p, BRW_MASK_DISABLE);
2234
2235 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2236 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2237 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2238
2239 brw_pop_insn_state(p);
2240
2241 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2242 dest = offset(dest, dst_offset);
2243
2244 /* For 16-wide dispatch, masked channels are skipped in the
2245 * response. For 8-wide, masked channels still take up slots,
2246 * and are just not written to.
2247 */
2248 if (dispatch_16)
2249 response_length = len * 2;
2250 }
2251 }
2252
2253 {
2254 struct brw_instruction *insn;
2255
2256 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2257
2258 insn = next_insn(p, BRW_OPCODE_SEND);
2259 insn->header.predicate_control = 0; /* XXX */
2260 insn->header.compression_control = BRW_COMPRESSION_NONE;
2261 if (intel->gen < 6)
2262 insn->header.destreg__conditionalmod = msg_reg_nr;
2263
2264 brw_set_dest(p, insn, dest);
2265 brw_set_src0(p, insn, src0);
2266 brw_set_sampler_message(p, insn,
2267 binding_table_index,
2268 sampler,
2269 msg_type,
2270 response_length,
2271 msg_length,
2272 header_present,
2273 simd_mode,
2274 return_format);
2275 }
2276
2277 if (need_stall) {
2278 struct brw_reg reg = vec8(offset(dest, response_length-1));
2279
2280 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2281 */
2282 brw_push_insn_state(p);
2283 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2284 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2285 retype(reg, BRW_REGISTER_TYPE_UD));
2286 brw_pop_insn_state(p);
2287 }
2288
2289 }
2290
2291 /* All these variables are pretty confusing - we might be better off
2292 * using bitmasks and macros for this, in the old style. Or perhaps
2293 * just having the caller instantiate the fields in dword3 itself.
2294 */
2295 void brw_urb_WRITE(struct brw_compile *p,
2296 struct brw_reg dest,
2297 GLuint msg_reg_nr,
2298 struct brw_reg src0,
2299 bool allocate,
2300 bool used,
2301 GLuint msg_length,
2302 GLuint response_length,
2303 bool eot,
2304 bool writes_complete,
2305 GLuint offset,
2306 GLuint swizzle)
2307 {
2308 struct intel_context *intel = &p->brw->intel;
2309 struct brw_instruction *insn;
2310
2311 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2312
2313 if (intel->gen == 7) {
2314 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2315 brw_push_insn_state(p);
2316 brw_set_access_mode(p, BRW_ALIGN_1);
2317 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2318 BRW_REGISTER_TYPE_UD),
2319 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2320 brw_imm_ud(0xff00));
2321 brw_pop_insn_state(p);
2322 }
2323
2324 insn = next_insn(p, BRW_OPCODE_SEND);
2325
2326 assert(msg_length < BRW_MAX_MRF);
2327
2328 brw_set_dest(p, insn, dest);
2329 brw_set_src0(p, insn, src0);
2330 brw_set_src1(p, insn, brw_imm_d(0));
2331
2332 if (intel->gen < 6)
2333 insn->header.destreg__conditionalmod = msg_reg_nr;
2334
2335 brw_set_urb_message(p,
2336 insn,
2337 allocate,
2338 used,
2339 msg_length,
2340 response_length,
2341 eot,
2342 writes_complete,
2343 offset,
2344 swizzle);
2345 }
2346
2347 static int
2348 brw_find_next_block_end(struct brw_compile *p, int start)
2349 {
2350 int ip;
2351
2352 for (ip = start + 1; ip < p->nr_insn; ip++) {
2353 struct brw_instruction *insn = &p->store[ip];
2354
2355 switch (insn->header.opcode) {
2356 case BRW_OPCODE_ENDIF:
2357 case BRW_OPCODE_ELSE:
2358 case BRW_OPCODE_WHILE:
2359 return ip;
2360 }
2361 }
2362 assert(!"not reached");
2363 return start + 1;
2364 }
2365
2366 /* There is no DO instruction on gen6, so to find the end of the loop
2367 * we have to see if the loop is jumping back before our start
2368 * instruction.
2369 */
2370 static int
2371 brw_find_loop_end(struct brw_compile *p, int start)
2372 {
2373 struct intel_context *intel = &p->brw->intel;
2374 int ip;
2375 int br = 2;
2376
2377 for (ip = start + 1; ip < p->nr_insn; ip++) {
2378 struct brw_instruction *insn = &p->store[ip];
2379
2380 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2381 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2382 : insn->bits3.break_cont.jip;
2383 if (ip + jip / br <= start)
2384 return ip;
2385 }
2386 }
2387 assert(!"not reached");
2388 return start + 1;
2389 }
2390
2391 /* After program generation, go back and update the UIP and JIP of
2392 * BREAK and CONT instructions to their correct locations.
2393 */
2394 void
2395 brw_set_uip_jip(struct brw_compile *p)
2396 {
2397 struct intel_context *intel = &p->brw->intel;
2398 int ip;
2399 int br = 2;
2400
2401 if (intel->gen < 6)
2402 return;
2403
2404 for (ip = 0; ip < p->nr_insn; ip++) {
2405 struct brw_instruction *insn = &p->store[ip];
2406
2407 switch (insn->header.opcode) {
2408 case BRW_OPCODE_BREAK:
2409 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2410 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2411 insn->bits3.break_cont.uip =
2412 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2413 break;
2414 case BRW_OPCODE_CONTINUE:
2415 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2416 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2417
2418 assert(insn->bits3.break_cont.uip != 0);
2419 assert(insn->bits3.break_cont.jip != 0);
2420 break;
2421 }
2422 }
2423 }
2424
2425 void brw_ff_sync(struct brw_compile *p,
2426 struct brw_reg dest,
2427 GLuint msg_reg_nr,
2428 struct brw_reg src0,
2429 bool allocate,
2430 GLuint response_length,
2431 bool eot)
2432 {
2433 struct intel_context *intel = &p->brw->intel;
2434 struct brw_instruction *insn;
2435
2436 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2437
2438 insn = next_insn(p, BRW_OPCODE_SEND);
2439 brw_set_dest(p, insn, dest);
2440 brw_set_src0(p, insn, src0);
2441 brw_set_src1(p, insn, brw_imm_d(0));
2442
2443 if (intel->gen < 6)
2444 insn->header.destreg__conditionalmod = msg_reg_nr;
2445
2446 brw_set_ff_sync_message(p,
2447 insn,
2448 allocate,
2449 response_length,
2450 eot);
2451 }
2452
2453 /**
2454 * Emit the SEND instruction necessary to generate stream output data on Gen6
2455 * (for transform feedback).
2456 *
2457 * If send_commit_msg is true, this is the last piece of stream output data
2458 * from this thread, so send the data as a committed write. According to the
2459 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2460 *
2461 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2462 * writes are complete by sending the final write as a committed write."
2463 */
2464 void
2465 brw_svb_write(struct brw_compile *p,
2466 struct brw_reg dest,
2467 GLuint msg_reg_nr,
2468 struct brw_reg src0,
2469 GLuint binding_table_index,
2470 bool send_commit_msg)
2471 {
2472 struct brw_instruction *insn;
2473
2474 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2475
2476 insn = next_insn(p, BRW_OPCODE_SEND);
2477 brw_set_dest(p, insn, dest);
2478 brw_set_src0(p, insn, src0);
2479 brw_set_src1(p, insn, brw_imm_d(0));
2480 brw_set_dp_write_message(p, insn,
2481 binding_table_index,
2482 0, /* msg_control: ignored */
2483 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2484 1, /* msg_length */
2485 true, /* header_present */
2486 0, /* last_render_target: ignored */
2487 send_commit_msg, /* response_length */
2488 0, /* end_of_thread */
2489 send_commit_msg); /* send_commit_msg */
2490 }