i965: call next_insn() before referencing a instruction by index
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 struct intel_context *intel = &p->brw->intel;
88 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
89 reg->file = BRW_GENERAL_REGISTER_FILE;
90 reg->nr += 111;
91 }
92 }
93
94
95 void
96 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
97 struct brw_reg dest)
98 {
99 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
100 dest.file != BRW_MESSAGE_REGISTER_FILE)
101 assert(dest.nr < 128);
102
103 gen7_convert_mrf_to_grf(p, &dest);
104
105 insn->bits1.da1.dest_reg_file = dest.file;
106 insn->bits1.da1.dest_reg_type = dest.type;
107 insn->bits1.da1.dest_address_mode = dest.address_mode;
108
109 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
110 insn->bits1.da1.dest_reg_nr = dest.nr;
111
112 if (insn->header.access_mode == BRW_ALIGN_1) {
113 insn->bits1.da1.dest_subreg_nr = dest.subnr;
114 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
115 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
116 insn->bits1.da1.dest_horiz_stride = dest.hstride;
117 }
118 else {
119 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
120 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
121 /* even ignored in da16, still need to set as '01' */
122 insn->bits1.da16.dest_horiz_stride = 1;
123 }
124 }
125 else {
126 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
127
128 /* These are different sizes in align1 vs align16:
129 */
130 if (insn->header.access_mode == BRW_ALIGN_1) {
131 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
132 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
133 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
134 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
135 }
136 else {
137 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
138 /* even ignored in da16, still need to set as '01' */
139 insn->bits1.ia16.dest_horiz_stride = 1;
140 }
141 }
142
143 /* NEW: Set the execution size based on dest.width and
144 * insn->compression_control:
145 */
146 guess_execution_size(p, insn, dest);
147 }
148
149 extern int reg_type_size[];
150
151 static void
152 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
153 {
154 int hstride_for_reg[] = {0, 1, 2, 4};
155 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
156 int width_for_reg[] = {1, 2, 4, 8, 16};
157 int execsize_for_reg[] = {1, 2, 4, 8, 16};
158 int width, hstride, vstride, execsize;
159
160 if (reg.file == BRW_IMMEDIATE_VALUE) {
161 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
162 * mean the destination has to be 128-bit aligned and the
163 * destination horiz stride has to be a word.
164 */
165 if (reg.type == BRW_REGISTER_TYPE_V) {
166 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
167 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
168 }
169
170 return;
171 }
172
173 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
174 reg.file == BRW_ARF_NULL)
175 return;
176
177 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
178 hstride = hstride_for_reg[reg.hstride];
179
180 if (reg.vstride == 0xf) {
181 vstride = -1;
182 } else {
183 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
184 vstride = vstride_for_reg[reg.vstride];
185 }
186
187 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
188 width = width_for_reg[reg.width];
189
190 assert(insn->header.execution_size >= 0 &&
191 insn->header.execution_size < Elements(execsize_for_reg));
192 execsize = execsize_for_reg[insn->header.execution_size];
193
194 /* Restrictions from 3.3.10: Register Region Restrictions. */
195 /* 3. */
196 assert(execsize >= width);
197
198 /* 4. */
199 if (execsize == width && hstride != 0) {
200 assert(vstride == -1 || vstride == width * hstride);
201 }
202
203 /* 5. */
204 if (execsize == width && hstride == 0) {
205 /* no restriction on vstride. */
206 }
207
208 /* 6. */
209 if (width == 1) {
210 assert(hstride == 0);
211 }
212
213 /* 7. */
214 if (execsize == 1 && width == 1) {
215 assert(hstride == 0);
216 assert(vstride == 0);
217 }
218
219 /* 8. */
220 if (vstride == 0 && hstride == 0) {
221 assert(width == 1);
222 }
223
224 /* 10. Check destination issues. */
225 }
226
227 void
228 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
229 struct brw_reg reg)
230 {
231 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
232 assert(reg.nr < 128);
233
234 gen7_convert_mrf_to_grf(p, &reg);
235
236 validate_reg(insn, reg);
237
238 insn->bits1.da1.src0_reg_file = reg.file;
239 insn->bits1.da1.src0_reg_type = reg.type;
240 insn->bits2.da1.src0_abs = reg.abs;
241 insn->bits2.da1.src0_negate = reg.negate;
242 insn->bits2.da1.src0_address_mode = reg.address_mode;
243
244 if (reg.file == BRW_IMMEDIATE_VALUE) {
245 insn->bits3.ud = reg.dw1.ud;
246
247 /* Required to set some fields in src1 as well:
248 */
249 insn->bits1.da1.src1_reg_file = 0; /* arf */
250 insn->bits1.da1.src1_reg_type = reg.type;
251 }
252 else
253 {
254 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
255 if (insn->header.access_mode == BRW_ALIGN_1) {
256 insn->bits2.da1.src0_subreg_nr = reg.subnr;
257 insn->bits2.da1.src0_reg_nr = reg.nr;
258 }
259 else {
260 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
261 insn->bits2.da16.src0_reg_nr = reg.nr;
262 }
263 }
264 else {
265 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
266
267 if (insn->header.access_mode == BRW_ALIGN_1) {
268 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
269 }
270 else {
271 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
272 }
273 }
274
275 if (insn->header.access_mode == BRW_ALIGN_1) {
276 if (reg.width == BRW_WIDTH_1 &&
277 insn->header.execution_size == BRW_EXECUTE_1) {
278 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
279 insn->bits2.da1.src0_width = BRW_WIDTH_1;
280 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
281 }
282 else {
283 insn->bits2.da1.src0_horiz_stride = reg.hstride;
284 insn->bits2.da1.src0_width = reg.width;
285 insn->bits2.da1.src0_vert_stride = reg.vstride;
286 }
287 }
288 else {
289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293
294 /* This is an oddity of the fact we're using the same
295 * descriptions for registers in align_16 as align_1:
296 */
297 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299 else
300 insn->bits2.da16.src0_vert_stride = reg.vstride;
301 }
302 }
303 }
304
305
306 void brw_set_src1(struct brw_compile *p,
307 struct brw_instruction *insn,
308 struct brw_reg reg)
309 {
310 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
311
312 assert(reg.nr < 128);
313
314 gen7_convert_mrf_to_grf(p, &reg);
315
316 validate_reg(insn, reg);
317
318 insn->bits1.da1.src1_reg_file = reg.file;
319 insn->bits1.da1.src1_reg_type = reg.type;
320 insn->bits3.da1.src1_abs = reg.abs;
321 insn->bits3.da1.src1_negate = reg.negate;
322
323 /* Only src1 can be immediate in two-argument instructions.
324 */
325 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
326
327 if (reg.file == BRW_IMMEDIATE_VALUE) {
328 insn->bits3.ud = reg.dw1.ud;
329 }
330 else {
331 /* This is a hardware restriction, which may or may not be lifted
332 * in the future:
333 */
334 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
335 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
336
337 if (insn->header.access_mode == BRW_ALIGN_1) {
338 insn->bits3.da1.src1_subreg_nr = reg.subnr;
339 insn->bits3.da1.src1_reg_nr = reg.nr;
340 }
341 else {
342 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
343 insn->bits3.da16.src1_reg_nr = reg.nr;
344 }
345
346 if (insn->header.access_mode == BRW_ALIGN_1) {
347 if (reg.width == BRW_WIDTH_1 &&
348 insn->header.execution_size == BRW_EXECUTE_1) {
349 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
350 insn->bits3.da1.src1_width = BRW_WIDTH_1;
351 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
352 }
353 else {
354 insn->bits3.da1.src1_horiz_stride = reg.hstride;
355 insn->bits3.da1.src1_width = reg.width;
356 insn->bits3.da1.src1_vert_stride = reg.vstride;
357 }
358 }
359 else {
360 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
361 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
362 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
363 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
364
365 /* This is an oddity of the fact we're using the same
366 * descriptions for registers in align_16 as align_1:
367 */
368 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
369 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
370 else
371 insn->bits3.da16.src1_vert_stride = reg.vstride;
372 }
373 }
374 }
375
376 /**
377 * Set the Message Descriptor and Extended Message Descriptor fields
378 * for SEND messages.
379 *
380 * \note This zeroes out the Function Control bits, so it must be called
381 * \b before filling out any message-specific data. Callers can
382 * choose not to fill in irrelevant bits; they will be zero.
383 */
384 static void
385 brw_set_message_descriptor(struct brw_compile *p,
386 struct brw_instruction *inst,
387 enum brw_message_target sfid,
388 unsigned msg_length,
389 unsigned response_length,
390 bool header_present,
391 bool end_of_thread)
392 {
393 struct intel_context *intel = &p->brw->intel;
394
395 brw_set_src1(p, inst, brw_imm_d(0));
396
397 if (intel->gen >= 5) {
398 inst->bits3.generic_gen5.header_present = header_present;
399 inst->bits3.generic_gen5.response_length = response_length;
400 inst->bits3.generic_gen5.msg_length = msg_length;
401 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
402
403 if (intel->gen >= 6) {
404 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
405 inst->header.destreg__conditionalmod = sfid;
406 } else {
407 /* Set Extended Message Descriptor (ex_desc) */
408 inst->bits2.send_gen5.sfid = sfid;
409 inst->bits2.send_gen5.end_of_thread = end_of_thread;
410 }
411 } else {
412 inst->bits3.generic.response_length = response_length;
413 inst->bits3.generic.msg_length = msg_length;
414 inst->bits3.generic.msg_target = sfid;
415 inst->bits3.generic.end_of_thread = end_of_thread;
416 }
417 }
418
419 static void brw_set_math_message( struct brw_compile *p,
420 struct brw_instruction *insn,
421 GLuint function,
422 GLuint integer_type,
423 bool low_precision,
424 bool saturate,
425 GLuint dataType )
426 {
427 struct brw_context *brw = p->brw;
428 struct intel_context *intel = &brw->intel;
429 unsigned msg_length;
430 unsigned response_length;
431
432 /* Infer message length from the function */
433 switch (function) {
434 case BRW_MATH_FUNCTION_POW:
435 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
436 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
437 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
438 msg_length = 2;
439 break;
440 default:
441 msg_length = 1;
442 break;
443 }
444
445 /* Infer response length from the function */
446 switch (function) {
447 case BRW_MATH_FUNCTION_SINCOS:
448 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
449 response_length = 2;
450 break;
451 default:
452 response_length = 1;
453 break;
454 }
455
456 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
457 msg_length, response_length, false, false);
458 if (intel->gen == 5) {
459 insn->bits3.math_gen5.function = function;
460 insn->bits3.math_gen5.int_type = integer_type;
461 insn->bits3.math_gen5.precision = low_precision;
462 insn->bits3.math_gen5.saturate = saturate;
463 insn->bits3.math_gen5.data_type = dataType;
464 insn->bits3.math_gen5.snapshot = 0;
465 } else {
466 insn->bits3.math.function = function;
467 insn->bits3.math.int_type = integer_type;
468 insn->bits3.math.precision = low_precision;
469 insn->bits3.math.saturate = saturate;
470 insn->bits3.math.data_type = dataType;
471 }
472 }
473
474
475 static void brw_set_ff_sync_message(struct brw_compile *p,
476 struct brw_instruction *insn,
477 bool allocate,
478 GLuint response_length,
479 bool end_of_thread)
480 {
481 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
482 1, response_length, true, end_of_thread);
483 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
484 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
486 insn->bits3.urb_gen5.allocate = allocate;
487 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
488 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
489 }
490
491 static void brw_set_urb_message( struct brw_compile *p,
492 struct brw_instruction *insn,
493 bool allocate,
494 bool used,
495 GLuint msg_length,
496 GLuint response_length,
497 bool end_of_thread,
498 bool complete,
499 GLuint offset,
500 GLuint swizzle_control )
501 {
502 struct brw_context *brw = p->brw;
503 struct intel_context *intel = &brw->intel;
504
505 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
506 msg_length, response_length, true, end_of_thread);
507 if (intel->gen == 7) {
508 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
509 insn->bits3.urb_gen7.offset = offset;
510 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
511 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
512 /* per_slot_offset = 0 makes it ignore offsets in message header */
513 insn->bits3.urb_gen7.per_slot_offset = 0;
514 insn->bits3.urb_gen7.complete = complete;
515 } else if (intel->gen >= 5) {
516 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
517 insn->bits3.urb_gen5.offset = offset;
518 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
519 insn->bits3.urb_gen5.allocate = allocate;
520 insn->bits3.urb_gen5.used = used; /* ? */
521 insn->bits3.urb_gen5.complete = complete;
522 } else {
523 insn->bits3.urb.opcode = 0; /* ? */
524 insn->bits3.urb.offset = offset;
525 insn->bits3.urb.swizzle_control = swizzle_control;
526 insn->bits3.urb.allocate = allocate;
527 insn->bits3.urb.used = used; /* ? */
528 insn->bits3.urb.complete = complete;
529 }
530 }
531
532 void
533 brw_set_dp_write_message(struct brw_compile *p,
534 struct brw_instruction *insn,
535 GLuint binding_table_index,
536 GLuint msg_control,
537 GLuint msg_type,
538 GLuint msg_length,
539 bool header_present,
540 GLuint last_render_target,
541 GLuint response_length,
542 GLuint end_of_thread,
543 GLuint send_commit_msg)
544 {
545 struct brw_context *brw = p->brw;
546 struct intel_context *intel = &brw->intel;
547 unsigned sfid;
548
549 if (intel->gen >= 7) {
550 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
551 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
552 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
553 else
554 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
555 } else if (intel->gen == 6) {
556 /* Use the render cache for all write messages. */
557 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
558 } else {
559 sfid = BRW_SFID_DATAPORT_WRITE;
560 }
561
562 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
563 header_present, end_of_thread);
564
565 if (intel->gen >= 7) {
566 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
567 insn->bits3.gen7_dp.msg_control = msg_control;
568 insn->bits3.gen7_dp.last_render_target = last_render_target;
569 insn->bits3.gen7_dp.msg_type = msg_type;
570 } else if (intel->gen == 6) {
571 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
572 insn->bits3.gen6_dp.msg_control = msg_control;
573 insn->bits3.gen6_dp.last_render_target = last_render_target;
574 insn->bits3.gen6_dp.msg_type = msg_type;
575 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
576 } else if (intel->gen == 5) {
577 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
578 insn->bits3.dp_write_gen5.msg_control = msg_control;
579 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
580 insn->bits3.dp_write_gen5.msg_type = msg_type;
581 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
582 } else {
583 insn->bits3.dp_write.binding_table_index = binding_table_index;
584 insn->bits3.dp_write.msg_control = msg_control;
585 insn->bits3.dp_write.last_render_target = last_render_target;
586 insn->bits3.dp_write.msg_type = msg_type;
587 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
588 }
589 }
590
591 void
592 brw_set_dp_read_message(struct brw_compile *p,
593 struct brw_instruction *insn,
594 GLuint binding_table_index,
595 GLuint msg_control,
596 GLuint msg_type,
597 GLuint target_cache,
598 GLuint msg_length,
599 GLuint response_length)
600 {
601 struct brw_context *brw = p->brw;
602 struct intel_context *intel = &brw->intel;
603 unsigned sfid;
604
605 if (intel->gen >= 7) {
606 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
607 } else if (intel->gen == 6) {
608 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
609 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
610 else
611 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
612 } else {
613 sfid = BRW_SFID_DATAPORT_READ;
614 }
615
616 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
617 true, false);
618
619 if (intel->gen >= 7) {
620 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
621 insn->bits3.gen7_dp.msg_control = msg_control;
622 insn->bits3.gen7_dp.last_render_target = 0;
623 insn->bits3.gen7_dp.msg_type = msg_type;
624 } else if (intel->gen == 6) {
625 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
626 insn->bits3.gen6_dp.msg_control = msg_control;
627 insn->bits3.gen6_dp.last_render_target = 0;
628 insn->bits3.gen6_dp.msg_type = msg_type;
629 insn->bits3.gen6_dp.send_commit_msg = 0;
630 } else if (intel->gen == 5) {
631 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
632 insn->bits3.dp_read_gen5.msg_control = msg_control;
633 insn->bits3.dp_read_gen5.msg_type = msg_type;
634 insn->bits3.dp_read_gen5.target_cache = target_cache;
635 } else if (intel->is_g4x) {
636 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
637 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
638 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
639 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
640 } else {
641 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
642 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
643 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
644 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
645 }
646 }
647
648 static void brw_set_sampler_message(struct brw_compile *p,
649 struct brw_instruction *insn,
650 GLuint binding_table_index,
651 GLuint sampler,
652 GLuint msg_type,
653 GLuint response_length,
654 GLuint msg_length,
655 GLuint header_present,
656 GLuint simd_mode,
657 GLuint return_format)
658 {
659 struct brw_context *brw = p->brw;
660 struct intel_context *intel = &brw->intel;
661
662 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
663 response_length, header_present, false);
664
665 if (intel->gen >= 7) {
666 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
667 insn->bits3.sampler_gen7.sampler = sampler;
668 insn->bits3.sampler_gen7.msg_type = msg_type;
669 insn->bits3.sampler_gen7.simd_mode = simd_mode;
670 } else if (intel->gen >= 5) {
671 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
672 insn->bits3.sampler_gen5.sampler = sampler;
673 insn->bits3.sampler_gen5.msg_type = msg_type;
674 insn->bits3.sampler_gen5.simd_mode = simd_mode;
675 } else if (intel->is_g4x) {
676 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
677 insn->bits3.sampler_g4x.sampler = sampler;
678 insn->bits3.sampler_g4x.msg_type = msg_type;
679 } else {
680 insn->bits3.sampler.binding_table_index = binding_table_index;
681 insn->bits3.sampler.sampler = sampler;
682 insn->bits3.sampler.msg_type = msg_type;
683 insn->bits3.sampler.return_format = return_format;
684 }
685 }
686
687
688 #define next_insn brw_next_insn
689 struct brw_instruction *
690 brw_next_insn(struct brw_compile *p, GLuint opcode)
691 {
692 struct brw_instruction *insn;
693
694 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
695
696 insn = &p->store[p->nr_insn++];
697 memcpy(insn, p->current, sizeof(*insn));
698
699 /* Reset this one-shot flag:
700 */
701
702 if (p->current->header.destreg__conditionalmod) {
703 p->current->header.destreg__conditionalmod = 0;
704 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
705 }
706
707 insn->header.opcode = opcode;
708 return insn;
709 }
710
711 static struct brw_instruction *brw_alu1( struct brw_compile *p,
712 GLuint opcode,
713 struct brw_reg dest,
714 struct brw_reg src )
715 {
716 struct brw_instruction *insn = next_insn(p, opcode);
717 brw_set_dest(p, insn, dest);
718 brw_set_src0(p, insn, src);
719 return insn;
720 }
721
722 static struct brw_instruction *brw_alu2(struct brw_compile *p,
723 GLuint opcode,
724 struct brw_reg dest,
725 struct brw_reg src0,
726 struct brw_reg src1 )
727 {
728 struct brw_instruction *insn = next_insn(p, opcode);
729 brw_set_dest(p, insn, dest);
730 brw_set_src0(p, insn, src0);
731 brw_set_src1(p, insn, src1);
732 return insn;
733 }
734
735
736 /***********************************************************************
737 * Convenience routines.
738 */
739 #define ALU1(OP) \
740 struct brw_instruction *brw_##OP(struct brw_compile *p, \
741 struct brw_reg dest, \
742 struct brw_reg src0) \
743 { \
744 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
745 }
746
747 #define ALU2(OP) \
748 struct brw_instruction *brw_##OP(struct brw_compile *p, \
749 struct brw_reg dest, \
750 struct brw_reg src0, \
751 struct brw_reg src1) \
752 { \
753 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
754 }
755
756 /* Rounding operations (other than RNDD) require two instructions - the first
757 * stores a rounded value (possibly the wrong way) in the dest register, but
758 * also sets a per-channel "increment bit" in the flag register. A predicated
759 * add of 1.0 fixes dest to contain the desired result.
760 *
761 * Sandybridge and later appear to round correctly without an ADD.
762 */
763 #define ROUND(OP) \
764 void brw_##OP(struct brw_compile *p, \
765 struct brw_reg dest, \
766 struct brw_reg src) \
767 { \
768 struct brw_instruction *rnd, *add; \
769 rnd = next_insn(p, BRW_OPCODE_##OP); \
770 brw_set_dest(p, rnd, dest); \
771 brw_set_src0(p, rnd, src); \
772 \
773 if (p->brw->intel.gen < 6) { \
774 /* turn on round-increments */ \
775 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
776 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
777 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
778 } \
779 }
780
781
782 ALU1(MOV)
783 ALU2(SEL)
784 ALU1(NOT)
785 ALU2(AND)
786 ALU2(OR)
787 ALU2(XOR)
788 ALU2(SHR)
789 ALU2(SHL)
790 ALU2(RSR)
791 ALU2(RSL)
792 ALU2(ASR)
793 ALU1(FRC)
794 ALU1(RNDD)
795 ALU2(MAC)
796 ALU2(MACH)
797 ALU1(LZD)
798 ALU2(DP4)
799 ALU2(DPH)
800 ALU2(DP3)
801 ALU2(DP2)
802 ALU2(LINE)
803 ALU2(PLN)
804
805
806 ROUND(RNDZ)
807 ROUND(RNDE)
808
809
810 struct brw_instruction *brw_ADD(struct brw_compile *p,
811 struct brw_reg dest,
812 struct brw_reg src0,
813 struct brw_reg src1)
814 {
815 /* 6.2.2: add */
816 if (src0.type == BRW_REGISTER_TYPE_F ||
817 (src0.file == BRW_IMMEDIATE_VALUE &&
818 src0.type == BRW_REGISTER_TYPE_VF)) {
819 assert(src1.type != BRW_REGISTER_TYPE_UD);
820 assert(src1.type != BRW_REGISTER_TYPE_D);
821 }
822
823 if (src1.type == BRW_REGISTER_TYPE_F ||
824 (src1.file == BRW_IMMEDIATE_VALUE &&
825 src1.type == BRW_REGISTER_TYPE_VF)) {
826 assert(src0.type != BRW_REGISTER_TYPE_UD);
827 assert(src0.type != BRW_REGISTER_TYPE_D);
828 }
829
830 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
831 }
832
833 struct brw_instruction *brw_MUL(struct brw_compile *p,
834 struct brw_reg dest,
835 struct brw_reg src0,
836 struct brw_reg src1)
837 {
838 /* 6.32.38: mul */
839 if (src0.type == BRW_REGISTER_TYPE_D ||
840 src0.type == BRW_REGISTER_TYPE_UD ||
841 src1.type == BRW_REGISTER_TYPE_D ||
842 src1.type == BRW_REGISTER_TYPE_UD) {
843 assert(dest.type != BRW_REGISTER_TYPE_F);
844 }
845
846 if (src0.type == BRW_REGISTER_TYPE_F ||
847 (src0.file == BRW_IMMEDIATE_VALUE &&
848 src0.type == BRW_REGISTER_TYPE_VF)) {
849 assert(src1.type != BRW_REGISTER_TYPE_UD);
850 assert(src1.type != BRW_REGISTER_TYPE_D);
851 }
852
853 if (src1.type == BRW_REGISTER_TYPE_F ||
854 (src1.file == BRW_IMMEDIATE_VALUE &&
855 src1.type == BRW_REGISTER_TYPE_VF)) {
856 assert(src0.type != BRW_REGISTER_TYPE_UD);
857 assert(src0.type != BRW_REGISTER_TYPE_D);
858 }
859
860 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
861 src0.nr != BRW_ARF_ACCUMULATOR);
862 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
863 src1.nr != BRW_ARF_ACCUMULATOR);
864
865 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
866 }
867
868
869 void brw_NOP(struct brw_compile *p)
870 {
871 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
872 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
873 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
874 brw_set_src1(p, insn, brw_imm_ud(0x0));
875 }
876
877
878
879
880
881 /***********************************************************************
882 * Comparisons, if/else/endif
883 */
884
885 struct brw_instruction *brw_JMPI(struct brw_compile *p,
886 struct brw_reg dest,
887 struct brw_reg src0,
888 struct brw_reg src1)
889 {
890 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
891
892 insn->header.execution_size = 1;
893 insn->header.compression_control = BRW_COMPRESSION_NONE;
894 insn->header.mask_control = BRW_MASK_DISABLE;
895
896 p->current->header.predicate_control = BRW_PREDICATE_NONE;
897
898 return insn;
899 }
900
901 static void
902 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
903 {
904 p->if_stack[p->if_stack_depth] = inst - p->store;
905
906 p->if_stack_depth++;
907 if (p->if_stack_array_size <= p->if_stack_depth) {
908 p->if_stack_array_size *= 2;
909 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
910 p->if_stack_array_size);
911 }
912 }
913
914 static struct brw_instruction *
915 pop_if_stack(struct brw_compile *p)
916 {
917 p->if_stack_depth--;
918 return &p->store[p->if_stack[p->if_stack_depth]];
919 }
920
921 static void
922 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
923 {
924 if (p->loop_stack_array_size < p->loop_stack_depth) {
925 p->loop_stack_array_size *= 2;
926 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
927 p->loop_stack_array_size);
928 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
929 p->loop_stack_array_size);
930 }
931
932 p->loop_stack[p->loop_stack_depth] = inst - p->store;
933 p->loop_stack_depth++;
934 p->if_depth_in_loop[p->loop_stack_depth] = 0;
935 }
936
937 static struct brw_instruction *
938 get_inner_do_insn(struct brw_compile *p)
939 {
940 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
941 }
942
943 /* EU takes the value from the flag register and pushes it onto some
944 * sort of a stack (presumably merging with any flag value already on
945 * the stack). Within an if block, the flags at the top of the stack
946 * control execution on each channel of the unit, eg. on each of the
947 * 16 pixel values in our wm programs.
948 *
949 * When the matching 'else' instruction is reached (presumably by
950 * countdown of the instruction count patched in by our ELSE/ENDIF
951 * functions), the relevent flags are inverted.
952 *
953 * When the matching 'endif' instruction is reached, the flags are
954 * popped off. If the stack is now empty, normal execution resumes.
955 */
956 struct brw_instruction *
957 brw_IF(struct brw_compile *p, GLuint execute_size)
958 {
959 struct intel_context *intel = &p->brw->intel;
960 struct brw_instruction *insn;
961
962 insn = next_insn(p, BRW_OPCODE_IF);
963
964 /* Override the defaults for this instruction:
965 */
966 if (intel->gen < 6) {
967 brw_set_dest(p, insn, brw_ip_reg());
968 brw_set_src0(p, insn, brw_ip_reg());
969 brw_set_src1(p, insn, brw_imm_d(0x0));
970 } else if (intel->gen == 6) {
971 brw_set_dest(p, insn, brw_imm_w(0));
972 insn->bits1.branch_gen6.jump_count = 0;
973 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
974 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
975 } else {
976 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
977 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
978 brw_set_src1(p, insn, brw_imm_ud(0));
979 insn->bits3.break_cont.jip = 0;
980 insn->bits3.break_cont.uip = 0;
981 }
982
983 insn->header.execution_size = execute_size;
984 insn->header.compression_control = BRW_COMPRESSION_NONE;
985 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
986 insn->header.mask_control = BRW_MASK_ENABLE;
987 if (!p->single_program_flow)
988 insn->header.thread_control = BRW_THREAD_SWITCH;
989
990 p->current->header.predicate_control = BRW_PREDICATE_NONE;
991
992 push_if_stack(p, insn);
993 p->if_depth_in_loop[p->loop_stack_depth]++;
994 return insn;
995 }
996
997 /* This function is only used for gen6-style IF instructions with an
998 * embedded comparison (conditional modifier). It is not used on gen7.
999 */
1000 struct brw_instruction *
1001 gen6_IF(struct brw_compile *p, uint32_t conditional,
1002 struct brw_reg src0, struct brw_reg src1)
1003 {
1004 struct brw_instruction *insn;
1005
1006 insn = next_insn(p, BRW_OPCODE_IF);
1007
1008 brw_set_dest(p, insn, brw_imm_w(0));
1009 if (p->compressed) {
1010 insn->header.execution_size = BRW_EXECUTE_16;
1011 } else {
1012 insn->header.execution_size = BRW_EXECUTE_8;
1013 }
1014 insn->bits1.branch_gen6.jump_count = 0;
1015 brw_set_src0(p, insn, src0);
1016 brw_set_src1(p, insn, src1);
1017
1018 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1019 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1020 insn->header.destreg__conditionalmod = conditional;
1021
1022 if (!p->single_program_flow)
1023 insn->header.thread_control = BRW_THREAD_SWITCH;
1024
1025 push_if_stack(p, insn);
1026 return insn;
1027 }
1028
1029 /**
1030 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1031 */
1032 static void
1033 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1034 struct brw_instruction *if_inst,
1035 struct brw_instruction *else_inst)
1036 {
1037 /* The next instruction (where the ENDIF would be, if it existed) */
1038 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1039
1040 assert(p->single_program_flow);
1041 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1042 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1043 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1044
1045 /* Convert IF to an ADD instruction that moves the instruction pointer
1046 * to the first instruction of the ELSE block. If there is no ELSE
1047 * block, point to where ENDIF would be. Reverse the predicate.
1048 *
1049 * There's no need to execute an ENDIF since we don't need to do any
1050 * stack operations, and if we're currently executing, we just want to
1051 * continue normally.
1052 */
1053 if_inst->header.opcode = BRW_OPCODE_ADD;
1054 if_inst->header.predicate_inverse = 1;
1055
1056 if (else_inst != NULL) {
1057 /* Convert ELSE to an ADD instruction that points where the ENDIF
1058 * would be.
1059 */
1060 else_inst->header.opcode = BRW_OPCODE_ADD;
1061
1062 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1063 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1064 } else {
1065 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1066 }
1067 }
1068
1069 /**
1070 * Patch IF and ELSE instructions with appropriate jump targets.
1071 */
1072 static void
1073 patch_IF_ELSE(struct brw_compile *p,
1074 struct brw_instruction *if_inst,
1075 struct brw_instruction *else_inst,
1076 struct brw_instruction *endif_inst)
1077 {
1078 struct intel_context *intel = &p->brw->intel;
1079
1080 /* We shouldn't be patching IF and ELSE instructions in single program flow
1081 * mode when gen < 6, because in single program flow mode on those
1082 * platforms, we convert flow control instructions to conditional ADDs that
1083 * operate on IP (see brw_ENDIF).
1084 *
1085 * However, on Gen6, writing to IP doesn't work in single program flow mode
1086 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1087 * not be updated by non-flow control instructions."). And on later
1088 * platforms, there is no significant benefit to converting control flow
1089 * instructions to conditional ADDs. So we do patch IF and ELSE
1090 * instructions in single program flow mode on those platforms.
1091 */
1092 if (intel->gen < 6)
1093 assert(!p->single_program_flow);
1094
1095 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1096 assert(endif_inst != NULL);
1097 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1098
1099 unsigned br = 1;
1100 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1101 * requires 2 chunks.
1102 */
1103 if (intel->gen >= 5)
1104 br = 2;
1105
1106 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1107 endif_inst->header.execution_size = if_inst->header.execution_size;
1108
1109 if (else_inst == NULL) {
1110 /* Patch IF -> ENDIF */
1111 if (intel->gen < 6) {
1112 /* Turn it into an IFF, which means no mask stack operations for
1113 * all-false and jumping past the ENDIF.
1114 */
1115 if_inst->header.opcode = BRW_OPCODE_IFF;
1116 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1117 if_inst->bits3.if_else.pop_count = 0;
1118 if_inst->bits3.if_else.pad0 = 0;
1119 } else if (intel->gen == 6) {
1120 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1121 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1122 } else {
1123 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1124 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1125 }
1126 } else {
1127 else_inst->header.execution_size = if_inst->header.execution_size;
1128
1129 /* Patch IF -> ELSE */
1130 if (intel->gen < 6) {
1131 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1132 if_inst->bits3.if_else.pop_count = 0;
1133 if_inst->bits3.if_else.pad0 = 0;
1134 } else if (intel->gen == 6) {
1135 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1136 }
1137
1138 /* Patch ELSE -> ENDIF */
1139 if (intel->gen < 6) {
1140 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1141 * matching ENDIF.
1142 */
1143 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1144 else_inst->bits3.if_else.pop_count = 1;
1145 else_inst->bits3.if_else.pad0 = 0;
1146 } else if (intel->gen == 6) {
1147 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1148 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1149 } else {
1150 /* The IF instruction's JIP should point just past the ELSE */
1151 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1152 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1153 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1154 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1155 }
1156 }
1157 }
1158
1159 void
1160 brw_ELSE(struct brw_compile *p)
1161 {
1162 struct intel_context *intel = &p->brw->intel;
1163 struct brw_instruction *insn;
1164
1165 insn = next_insn(p, BRW_OPCODE_ELSE);
1166
1167 if (intel->gen < 6) {
1168 brw_set_dest(p, insn, brw_ip_reg());
1169 brw_set_src0(p, insn, brw_ip_reg());
1170 brw_set_src1(p, insn, brw_imm_d(0x0));
1171 } else if (intel->gen == 6) {
1172 brw_set_dest(p, insn, brw_imm_w(0));
1173 insn->bits1.branch_gen6.jump_count = 0;
1174 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1175 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1176 } else {
1177 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1178 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1179 brw_set_src1(p, insn, brw_imm_ud(0));
1180 insn->bits3.break_cont.jip = 0;
1181 insn->bits3.break_cont.uip = 0;
1182 }
1183
1184 insn->header.compression_control = BRW_COMPRESSION_NONE;
1185 insn->header.mask_control = BRW_MASK_ENABLE;
1186 if (!p->single_program_flow)
1187 insn->header.thread_control = BRW_THREAD_SWITCH;
1188
1189 push_if_stack(p, insn);
1190 }
1191
1192 void
1193 brw_ENDIF(struct brw_compile *p)
1194 {
1195 struct intel_context *intel = &p->brw->intel;
1196 struct brw_instruction *insn;
1197 struct brw_instruction *else_inst = NULL;
1198 struct brw_instruction *if_inst = NULL;
1199 struct brw_instruction *tmp;
1200 bool emit_endif = true;
1201
1202 /* In single program flow mode, we can express IF and ELSE instructions
1203 * equivalently as ADD instructions that operate on IP. On platforms prior
1204 * to Gen6, flow control instructions cause an implied thread switch, so
1205 * this is a significant savings.
1206 *
1207 * However, on Gen6, writing to IP doesn't work in single program flow mode
1208 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1209 * not be updated by non-flow control instructions."). And on later
1210 * platforms, there is no significant benefit to converting control flow
1211 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1212 * Gen5.
1213 */
1214 if (intel->gen < 6 && p->single_program_flow)
1215 emit_endif = false;
1216
1217 /*
1218 * A single next_insn() may change the base adress of instruction store
1219 * memory(p->store), so call it first before referencing the instruction
1220 * store pointer from an index
1221 */
1222 if (emit_endif)
1223 insn = next_insn(p, BRW_OPCODE_ENDIF);
1224
1225 /* Pop the IF and (optional) ELSE instructions from the stack */
1226 p->if_depth_in_loop[p->loop_stack_depth]--;
1227 tmp = pop_if_stack(p);
1228 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1229 else_inst = tmp;
1230 tmp = pop_if_stack(p);
1231 }
1232 if_inst = tmp;
1233
1234 if (!emit_endif) {
1235 /* ENDIF is useless; don't bother emitting it. */
1236 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1237 return;
1238 }
1239
1240 if (intel->gen < 6) {
1241 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1242 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1243 brw_set_src1(p, insn, brw_imm_d(0x0));
1244 } else if (intel->gen == 6) {
1245 brw_set_dest(p, insn, brw_imm_w(0));
1246 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1247 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1248 } else {
1249 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1250 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1251 brw_set_src1(p, insn, brw_imm_ud(0));
1252 }
1253
1254 insn->header.compression_control = BRW_COMPRESSION_NONE;
1255 insn->header.mask_control = BRW_MASK_ENABLE;
1256 insn->header.thread_control = BRW_THREAD_SWITCH;
1257
1258 /* Also pop item off the stack in the endif instruction: */
1259 if (intel->gen < 6) {
1260 insn->bits3.if_else.jump_count = 0;
1261 insn->bits3.if_else.pop_count = 1;
1262 insn->bits3.if_else.pad0 = 0;
1263 } else if (intel->gen == 6) {
1264 insn->bits1.branch_gen6.jump_count = 2;
1265 } else {
1266 insn->bits3.break_cont.jip = 2;
1267 }
1268 patch_IF_ELSE(p, if_inst, else_inst, insn);
1269 }
1270
1271 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1272 {
1273 struct intel_context *intel = &p->brw->intel;
1274 struct brw_instruction *insn;
1275
1276 insn = next_insn(p, BRW_OPCODE_BREAK);
1277 if (intel->gen >= 6) {
1278 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1279 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1280 brw_set_src1(p, insn, brw_imm_d(0x0));
1281 } else {
1282 brw_set_dest(p, insn, brw_ip_reg());
1283 brw_set_src0(p, insn, brw_ip_reg());
1284 brw_set_src1(p, insn, brw_imm_d(0x0));
1285 insn->bits3.if_else.pad0 = 0;
1286 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1287 }
1288 insn->header.compression_control = BRW_COMPRESSION_NONE;
1289 insn->header.execution_size = BRW_EXECUTE_8;
1290
1291 return insn;
1292 }
1293
1294 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1295 {
1296 struct brw_instruction *insn;
1297
1298 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1299 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1300 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1301 brw_set_dest(p, insn, brw_ip_reg());
1302 brw_set_src0(p, insn, brw_ip_reg());
1303 brw_set_src1(p, insn, brw_imm_d(0x0));
1304
1305 insn->header.compression_control = BRW_COMPRESSION_NONE;
1306 insn->header.execution_size = BRW_EXECUTE_8;
1307 return insn;
1308 }
1309
1310 struct brw_instruction *brw_CONT(struct brw_compile *p)
1311 {
1312 struct brw_instruction *insn;
1313 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1314 brw_set_dest(p, insn, brw_ip_reg());
1315 brw_set_src0(p, insn, brw_ip_reg());
1316 brw_set_src1(p, insn, brw_imm_d(0x0));
1317 insn->header.compression_control = BRW_COMPRESSION_NONE;
1318 insn->header.execution_size = BRW_EXECUTE_8;
1319 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1320 insn->bits3.if_else.pad0 = 0;
1321 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1322 return insn;
1323 }
1324
1325 /* DO/WHILE loop:
1326 *
1327 * The DO/WHILE is just an unterminated loop -- break or continue are
1328 * used for control within the loop. We have a few ways they can be
1329 * done.
1330 *
1331 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1332 * jip and no DO instruction.
1333 *
1334 * For non-uniform control flow pre-gen6, there's a DO instruction to
1335 * push the mask, and a WHILE to jump back, and BREAK to get out and
1336 * pop the mask.
1337 *
1338 * For gen6, there's no more mask stack, so no need for DO. WHILE
1339 * just points back to the first instruction of the loop.
1340 */
1341 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1342 {
1343 struct intel_context *intel = &p->brw->intel;
1344
1345 if (intel->gen >= 6 || p->single_program_flow) {
1346 push_loop_stack(p, &p->store[p->nr_insn]);
1347 return &p->store[p->nr_insn];
1348 } else {
1349 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1350
1351 push_loop_stack(p, insn);
1352
1353 /* Override the defaults for this instruction:
1354 */
1355 brw_set_dest(p, insn, brw_null_reg());
1356 brw_set_src0(p, insn, brw_null_reg());
1357 brw_set_src1(p, insn, brw_null_reg());
1358
1359 insn->header.compression_control = BRW_COMPRESSION_NONE;
1360 insn->header.execution_size = execute_size;
1361 insn->header.predicate_control = BRW_PREDICATE_NONE;
1362 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1363 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1364
1365 return insn;
1366 }
1367 }
1368
1369 /**
1370 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1371 * instruction here.
1372 *
1373 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1374 * nesting, since it can always just point to the end of the block/current loop.
1375 */
1376 static void
1377 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1378 {
1379 struct intel_context *intel = &p->brw->intel;
1380 struct brw_instruction *do_inst = get_inner_do_insn(p);
1381 struct brw_instruction *inst;
1382 int br = (intel->gen == 5) ? 2 : 1;
1383
1384 for (inst = while_inst - 1; inst != do_inst; inst--) {
1385 /* If the jump count is != 0, that means that this instruction has already
1386 * been patched because it's part of a loop inside of the one we're
1387 * patching.
1388 */
1389 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1390 inst->bits3.if_else.jump_count == 0) {
1391 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1392 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1393 inst->bits3.if_else.jump_count == 0) {
1394 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1395 }
1396 }
1397 }
1398
1399 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1400 {
1401 struct intel_context *intel = &p->brw->intel;
1402 struct brw_instruction *insn, *do_insn;
1403 GLuint br = 1;
1404
1405 if (intel->gen >= 5)
1406 br = 2;
1407
1408 if (intel->gen >= 7) {
1409 insn = next_insn(p, BRW_OPCODE_WHILE);
1410 do_insn = get_inner_do_insn(p);
1411
1412 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1413 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1414 brw_set_src1(p, insn, brw_imm_ud(0));
1415 insn->bits3.break_cont.jip = br * (do_insn - insn);
1416
1417 insn->header.execution_size = BRW_EXECUTE_8;
1418 } else if (intel->gen == 6) {
1419 insn = next_insn(p, BRW_OPCODE_WHILE);
1420 do_insn = get_inner_do_insn(p);
1421
1422 brw_set_dest(p, insn, brw_imm_w(0));
1423 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1424 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1425 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1426
1427 insn->header.execution_size = BRW_EXECUTE_8;
1428 } else {
1429 if (p->single_program_flow) {
1430 insn = next_insn(p, BRW_OPCODE_ADD);
1431 do_insn = get_inner_do_insn(p);
1432
1433 brw_set_dest(p, insn, brw_ip_reg());
1434 brw_set_src0(p, insn, brw_ip_reg());
1435 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1436 insn->header.execution_size = BRW_EXECUTE_1;
1437 } else {
1438 insn = next_insn(p, BRW_OPCODE_WHILE);
1439 do_insn = get_inner_do_insn(p);
1440
1441 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1442
1443 brw_set_dest(p, insn, brw_ip_reg());
1444 brw_set_src0(p, insn, brw_ip_reg());
1445 brw_set_src1(p, insn, brw_imm_d(0));
1446
1447 insn->header.execution_size = do_insn->header.execution_size;
1448 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1449 insn->bits3.if_else.pop_count = 0;
1450 insn->bits3.if_else.pad0 = 0;
1451
1452 brw_patch_break_cont(p, insn);
1453 }
1454 }
1455 insn->header.compression_control = BRW_COMPRESSION_NONE;
1456 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1457
1458 p->loop_stack_depth--;
1459
1460 return insn;
1461 }
1462
1463
1464 /* FORWARD JUMPS:
1465 */
1466 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1467 {
1468 struct intel_context *intel = &p->brw->intel;
1469 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1470 GLuint jmpi = 1;
1471
1472 if (intel->gen >= 5)
1473 jmpi = 2;
1474
1475 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1476 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1477
1478 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1479 }
1480
1481
1482
1483 /* To integrate with the above, it makes sense that the comparison
1484 * instruction should populate the flag register. It might be simpler
1485 * just to use the flag reg for most WM tasks?
1486 */
1487 void brw_CMP(struct brw_compile *p,
1488 struct brw_reg dest,
1489 GLuint conditional,
1490 struct brw_reg src0,
1491 struct brw_reg src1)
1492 {
1493 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1494
1495 insn->header.destreg__conditionalmod = conditional;
1496 brw_set_dest(p, insn, dest);
1497 brw_set_src0(p, insn, src0);
1498 brw_set_src1(p, insn, src1);
1499
1500 /* guess_execution_size(insn, src0); */
1501
1502
1503 /* Make it so that future instructions will use the computed flag
1504 * value until brw_set_predicate_control_flag_value() is called
1505 * again.
1506 */
1507 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1508 dest.nr == 0) {
1509 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1510 p->flag_value = 0xff;
1511 }
1512 }
1513
1514 /* Issue 'wait' instruction for n1, host could program MMIO
1515 to wake up thread. */
1516 void brw_WAIT (struct brw_compile *p)
1517 {
1518 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1519 struct brw_reg src = brw_notification_1_reg();
1520
1521 brw_set_dest(p, insn, src);
1522 brw_set_src0(p, insn, src);
1523 brw_set_src1(p, insn, brw_null_reg());
1524 insn->header.execution_size = 0; /* must */
1525 insn->header.predicate_control = 0;
1526 insn->header.compression_control = 0;
1527 }
1528
1529
1530 /***********************************************************************
1531 * Helpers for the various SEND message types:
1532 */
1533
1534 /** Extended math function, float[8].
1535 */
1536 void brw_math( struct brw_compile *p,
1537 struct brw_reg dest,
1538 GLuint function,
1539 GLuint saturate,
1540 GLuint msg_reg_nr,
1541 struct brw_reg src,
1542 GLuint data_type,
1543 GLuint precision )
1544 {
1545 struct intel_context *intel = &p->brw->intel;
1546
1547 if (intel->gen >= 6) {
1548 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1549
1550 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1551 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1552
1553 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1554 if (intel->gen == 6)
1555 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1556
1557 /* Source modifiers are ignored for extended math instructions on Gen6. */
1558 if (intel->gen == 6) {
1559 assert(!src.negate);
1560 assert(!src.abs);
1561 }
1562
1563 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1564 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1565 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1566 assert(src.type != BRW_REGISTER_TYPE_F);
1567 } else {
1568 assert(src.type == BRW_REGISTER_TYPE_F);
1569 }
1570
1571 /* Math is the same ISA format as other opcodes, except that CondModifier
1572 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1573 */
1574 insn->header.destreg__conditionalmod = function;
1575 insn->header.saturate = saturate;
1576
1577 brw_set_dest(p, insn, dest);
1578 brw_set_src0(p, insn, src);
1579 brw_set_src1(p, insn, brw_null_reg());
1580 } else {
1581 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1582
1583 /* Example code doesn't set predicate_control for send
1584 * instructions.
1585 */
1586 insn->header.predicate_control = 0;
1587 insn->header.destreg__conditionalmod = msg_reg_nr;
1588
1589 brw_set_dest(p, insn, dest);
1590 brw_set_src0(p, insn, src);
1591 brw_set_math_message(p,
1592 insn,
1593 function,
1594 src.type == BRW_REGISTER_TYPE_D,
1595 precision,
1596 saturate,
1597 data_type);
1598 }
1599 }
1600
1601 /** Extended math function, float[8].
1602 */
1603 void brw_math2(struct brw_compile *p,
1604 struct brw_reg dest,
1605 GLuint function,
1606 struct brw_reg src0,
1607 struct brw_reg src1)
1608 {
1609 struct intel_context *intel = &p->brw->intel;
1610 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1611
1612 assert(intel->gen >= 6);
1613 (void) intel;
1614
1615
1616 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1617 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1618 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1619
1620 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1621 if (intel->gen == 6) {
1622 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1623 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1624 }
1625
1626 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1627 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1628 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1629 assert(src0.type != BRW_REGISTER_TYPE_F);
1630 assert(src1.type != BRW_REGISTER_TYPE_F);
1631 } else {
1632 assert(src0.type == BRW_REGISTER_TYPE_F);
1633 assert(src1.type == BRW_REGISTER_TYPE_F);
1634 }
1635
1636 /* Source modifiers are ignored for extended math instructions on Gen6. */
1637 if (intel->gen == 6) {
1638 assert(!src0.negate);
1639 assert(!src0.abs);
1640 assert(!src1.negate);
1641 assert(!src1.abs);
1642 }
1643
1644 /* Math is the same ISA format as other opcodes, except that CondModifier
1645 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1646 */
1647 insn->header.destreg__conditionalmod = function;
1648
1649 brw_set_dest(p, insn, dest);
1650 brw_set_src0(p, insn, src0);
1651 brw_set_src1(p, insn, src1);
1652 }
1653
1654 /**
1655 * Extended math function, float[16].
1656 * Use 2 send instructions.
1657 */
1658 void brw_math_16( struct brw_compile *p,
1659 struct brw_reg dest,
1660 GLuint function,
1661 GLuint saturate,
1662 GLuint msg_reg_nr,
1663 struct brw_reg src,
1664 GLuint precision )
1665 {
1666 struct intel_context *intel = &p->brw->intel;
1667 struct brw_instruction *insn;
1668
1669 if (intel->gen >= 6) {
1670 insn = next_insn(p, BRW_OPCODE_MATH);
1671
1672 /* Math is the same ISA format as other opcodes, except that CondModifier
1673 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1674 */
1675 insn->header.destreg__conditionalmod = function;
1676 insn->header.saturate = saturate;
1677
1678 /* Source modifiers are ignored for extended math instructions. */
1679 assert(!src.negate);
1680 assert(!src.abs);
1681
1682 brw_set_dest(p, insn, dest);
1683 brw_set_src0(p, insn, src);
1684 brw_set_src1(p, insn, brw_null_reg());
1685 return;
1686 }
1687
1688 /* First instruction:
1689 */
1690 brw_push_insn_state(p);
1691 brw_set_predicate_control_flag_value(p, 0xff);
1692 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1693
1694 insn = next_insn(p, BRW_OPCODE_SEND);
1695 insn->header.destreg__conditionalmod = msg_reg_nr;
1696
1697 brw_set_dest(p, insn, dest);
1698 brw_set_src0(p, insn, src);
1699 brw_set_math_message(p,
1700 insn,
1701 function,
1702 BRW_MATH_INTEGER_UNSIGNED,
1703 precision,
1704 saturate,
1705 BRW_MATH_DATA_VECTOR);
1706
1707 /* Second instruction:
1708 */
1709 insn = next_insn(p, BRW_OPCODE_SEND);
1710 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1711 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1712
1713 brw_set_dest(p, insn, offset(dest,1));
1714 brw_set_src0(p, insn, src);
1715 brw_set_math_message(p,
1716 insn,
1717 function,
1718 BRW_MATH_INTEGER_UNSIGNED,
1719 precision,
1720 saturate,
1721 BRW_MATH_DATA_VECTOR);
1722
1723 brw_pop_insn_state(p);
1724 }
1725
1726
1727 /**
1728 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1729 * using a constant offset per channel.
1730 *
1731 * The offset must be aligned to oword size (16 bytes). Used for
1732 * register spilling.
1733 */
1734 void brw_oword_block_write_scratch(struct brw_compile *p,
1735 struct brw_reg mrf,
1736 int num_regs,
1737 GLuint offset)
1738 {
1739 struct intel_context *intel = &p->brw->intel;
1740 uint32_t msg_control, msg_type;
1741 int mlen;
1742
1743 if (intel->gen >= 6)
1744 offset /= 16;
1745
1746 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1747
1748 if (num_regs == 1) {
1749 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1750 mlen = 2;
1751 } else {
1752 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1753 mlen = 3;
1754 }
1755
1756 /* Set up the message header. This is g0, with g0.2 filled with
1757 * the offset. We don't want to leave our offset around in g0 or
1758 * it'll screw up texture samples, so set it up inside the message
1759 * reg.
1760 */
1761 {
1762 brw_push_insn_state(p);
1763 brw_set_mask_control(p, BRW_MASK_DISABLE);
1764 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1765
1766 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1767
1768 /* set message header global offset field (reg 0, element 2) */
1769 brw_MOV(p,
1770 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1771 mrf.nr,
1772 2), BRW_REGISTER_TYPE_UD),
1773 brw_imm_ud(offset));
1774
1775 brw_pop_insn_state(p);
1776 }
1777
1778 {
1779 struct brw_reg dest;
1780 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1781 int send_commit_msg;
1782 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1783 BRW_REGISTER_TYPE_UW);
1784
1785 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1786 insn->header.compression_control = BRW_COMPRESSION_NONE;
1787 src_header = vec16(src_header);
1788 }
1789 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1790 insn->header.destreg__conditionalmod = mrf.nr;
1791
1792 /* Until gen6, writes followed by reads from the same location
1793 * are not guaranteed to be ordered unless write_commit is set.
1794 * If set, then a no-op write is issued to the destination
1795 * register to set a dependency, and a read from the destination
1796 * can be used to ensure the ordering.
1797 *
1798 * For gen6, only writes between different threads need ordering
1799 * protection. Our use of DP writes is all about register
1800 * spilling within a thread.
1801 */
1802 if (intel->gen >= 6) {
1803 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1804 send_commit_msg = 0;
1805 } else {
1806 dest = src_header;
1807 send_commit_msg = 1;
1808 }
1809
1810 brw_set_dest(p, insn, dest);
1811 if (intel->gen >= 6) {
1812 brw_set_src0(p, insn, mrf);
1813 } else {
1814 brw_set_src0(p, insn, brw_null_reg());
1815 }
1816
1817 if (intel->gen >= 6)
1818 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1819 else
1820 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1821
1822 brw_set_dp_write_message(p,
1823 insn,
1824 255, /* binding table index (255=stateless) */
1825 msg_control,
1826 msg_type,
1827 mlen,
1828 true, /* header_present */
1829 0, /* not a render target */
1830 send_commit_msg, /* response_length */
1831 0, /* eot */
1832 send_commit_msg);
1833 }
1834 }
1835
1836
1837 /**
1838 * Read a block of owords (half a GRF each) from the scratch buffer
1839 * using a constant index per channel.
1840 *
1841 * Offset must be aligned to oword size (16 bytes). Used for register
1842 * spilling.
1843 */
1844 void
1845 brw_oword_block_read_scratch(struct brw_compile *p,
1846 struct brw_reg dest,
1847 struct brw_reg mrf,
1848 int num_regs,
1849 GLuint offset)
1850 {
1851 struct intel_context *intel = &p->brw->intel;
1852 uint32_t msg_control;
1853 int rlen;
1854
1855 if (intel->gen >= 6)
1856 offset /= 16;
1857
1858 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1859 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1860
1861 if (num_regs == 1) {
1862 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1863 rlen = 1;
1864 } else {
1865 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1866 rlen = 2;
1867 }
1868
1869 {
1870 brw_push_insn_state(p);
1871 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1872 brw_set_mask_control(p, BRW_MASK_DISABLE);
1873
1874 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1875
1876 /* set message header global offset field (reg 0, element 2) */
1877 brw_MOV(p,
1878 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1879 mrf.nr,
1880 2), BRW_REGISTER_TYPE_UD),
1881 brw_imm_ud(offset));
1882
1883 brw_pop_insn_state(p);
1884 }
1885
1886 {
1887 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1888
1889 assert(insn->header.predicate_control == 0);
1890 insn->header.compression_control = BRW_COMPRESSION_NONE;
1891 insn->header.destreg__conditionalmod = mrf.nr;
1892
1893 brw_set_dest(p, insn, dest); /* UW? */
1894 if (intel->gen >= 6) {
1895 brw_set_src0(p, insn, mrf);
1896 } else {
1897 brw_set_src0(p, insn, brw_null_reg());
1898 }
1899
1900 brw_set_dp_read_message(p,
1901 insn,
1902 255, /* binding table index (255=stateless) */
1903 msg_control,
1904 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1905 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1906 1, /* msg_length */
1907 rlen);
1908 }
1909 }
1910
1911 /**
1912 * Read a float[4] vector from the data port Data Cache (const buffer).
1913 * Location (in buffer) should be a multiple of 16.
1914 * Used for fetching shader constants.
1915 */
1916 void brw_oword_block_read(struct brw_compile *p,
1917 struct brw_reg dest,
1918 struct brw_reg mrf,
1919 uint32_t offset,
1920 uint32_t bind_table_index)
1921 {
1922 struct intel_context *intel = &p->brw->intel;
1923
1924 /* On newer hardware, offset is in units of owords. */
1925 if (intel->gen >= 6)
1926 offset /= 16;
1927
1928 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1929
1930 brw_push_insn_state(p);
1931 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1932 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1933 brw_set_mask_control(p, BRW_MASK_DISABLE);
1934
1935 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1936
1937 /* set message header global offset field (reg 0, element 2) */
1938 brw_MOV(p,
1939 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1940 mrf.nr,
1941 2), BRW_REGISTER_TYPE_UD),
1942 brw_imm_ud(offset));
1943
1944 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1945 insn->header.destreg__conditionalmod = mrf.nr;
1946
1947 /* cast dest to a uword[8] vector */
1948 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1949
1950 brw_set_dest(p, insn, dest);
1951 if (intel->gen >= 6) {
1952 brw_set_src0(p, insn, mrf);
1953 } else {
1954 brw_set_src0(p, insn, brw_null_reg());
1955 }
1956
1957 brw_set_dp_read_message(p,
1958 insn,
1959 bind_table_index,
1960 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1961 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1962 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1963 1, /* msg_length */
1964 1); /* response_length (1 reg, 2 owords!) */
1965
1966 brw_pop_insn_state(p);
1967 }
1968
1969 /**
1970 * Read a set of dwords from the data port Data Cache (const buffer).
1971 *
1972 * Location (in buffer) appears as UD offsets in the register after
1973 * the provided mrf header reg.
1974 */
1975 void brw_dword_scattered_read(struct brw_compile *p,
1976 struct brw_reg dest,
1977 struct brw_reg mrf,
1978 uint32_t bind_table_index)
1979 {
1980 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1981
1982 brw_push_insn_state(p);
1983 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1984 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1985 brw_set_mask_control(p, BRW_MASK_DISABLE);
1986 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1987 brw_pop_insn_state(p);
1988
1989 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1990 insn->header.destreg__conditionalmod = mrf.nr;
1991
1992 /* cast dest to a uword[8] vector */
1993 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1994
1995 brw_set_dest(p, insn, dest);
1996 brw_set_src0(p, insn, brw_null_reg());
1997
1998 brw_set_dp_read_message(p,
1999 insn,
2000 bind_table_index,
2001 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
2002 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
2003 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2004 2, /* msg_length */
2005 1); /* response_length */
2006 }
2007
2008
2009
2010 /**
2011 * Read float[4] constant(s) from VS constant buffer.
2012 * For relative addressing, two float[4] constants will be read into 'dest'.
2013 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
2014 */
2015 void brw_dp_READ_4_vs(struct brw_compile *p,
2016 struct brw_reg dest,
2017 GLuint location,
2018 GLuint bind_table_index)
2019 {
2020 struct intel_context *intel = &p->brw->intel;
2021 struct brw_instruction *insn;
2022 GLuint msg_reg_nr = 1;
2023
2024 if (intel->gen >= 6)
2025 location /= 16;
2026
2027 /* Setup MRF[1] with location/offset into const buffer */
2028 brw_push_insn_state(p);
2029 brw_set_access_mode(p, BRW_ALIGN_1);
2030 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2031 brw_set_mask_control(p, BRW_MASK_DISABLE);
2032 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2033 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
2034 BRW_REGISTER_TYPE_UD),
2035 brw_imm_ud(location));
2036 brw_pop_insn_state(p);
2037
2038 insn = next_insn(p, BRW_OPCODE_SEND);
2039
2040 insn->header.predicate_control = BRW_PREDICATE_NONE;
2041 insn->header.compression_control = BRW_COMPRESSION_NONE;
2042 insn->header.destreg__conditionalmod = msg_reg_nr;
2043 insn->header.mask_control = BRW_MASK_DISABLE;
2044
2045 brw_set_dest(p, insn, dest);
2046 if (intel->gen >= 6) {
2047 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
2048 } else {
2049 brw_set_src0(p, insn, brw_null_reg());
2050 }
2051
2052 brw_set_dp_read_message(p,
2053 insn,
2054 bind_table_index,
2055 0,
2056 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2057 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2058 1, /* msg_length */
2059 1); /* response_length (1 Oword) */
2060 }
2061
2062 /**
2063 * Read a float[4] constant per vertex from VS constant buffer, with
2064 * relative addressing.
2065 */
2066 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2067 struct brw_reg dest,
2068 struct brw_reg addr_reg,
2069 GLuint offset,
2070 GLuint bind_table_index)
2071 {
2072 struct intel_context *intel = &p->brw->intel;
2073 struct brw_reg src = brw_vec8_grf(0, 0);
2074 int msg_type;
2075
2076 /* Setup MRF[1] with offset into const buffer */
2077 brw_push_insn_state(p);
2078 brw_set_access_mode(p, BRW_ALIGN_1);
2079 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2080 brw_set_mask_control(p, BRW_MASK_DISABLE);
2081 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2082
2083 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2084 * fields ignored.
2085 */
2086 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2087 addr_reg, brw_imm_d(offset));
2088 brw_pop_insn_state(p);
2089
2090 gen6_resolve_implied_move(p, &src, 0);
2091 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2092
2093 insn->header.predicate_control = BRW_PREDICATE_NONE;
2094 insn->header.compression_control = BRW_COMPRESSION_NONE;
2095 insn->header.destreg__conditionalmod = 0;
2096 insn->header.mask_control = BRW_MASK_DISABLE;
2097
2098 brw_set_dest(p, insn, dest);
2099 brw_set_src0(p, insn, src);
2100
2101 if (intel->gen >= 6)
2102 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2103 else if (intel->gen == 5 || intel->is_g4x)
2104 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2105 else
2106 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2107
2108 brw_set_dp_read_message(p,
2109 insn,
2110 bind_table_index,
2111 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2112 msg_type,
2113 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2114 2, /* msg_length */
2115 1); /* response_length */
2116 }
2117
2118
2119
2120 void brw_fb_WRITE(struct brw_compile *p,
2121 int dispatch_width,
2122 GLuint msg_reg_nr,
2123 struct brw_reg src0,
2124 GLuint binding_table_index,
2125 GLuint msg_length,
2126 GLuint response_length,
2127 bool eot,
2128 bool header_present)
2129 {
2130 struct intel_context *intel = &p->brw->intel;
2131 struct brw_instruction *insn;
2132 GLuint msg_control, msg_type;
2133 struct brw_reg dest;
2134
2135 if (dispatch_width == 16)
2136 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2137 else
2138 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2139
2140 if (intel->gen >= 6 && binding_table_index == 0) {
2141 insn = next_insn(p, BRW_OPCODE_SENDC);
2142 } else {
2143 insn = next_insn(p, BRW_OPCODE_SEND);
2144 }
2145 /* The execution mask is ignored for render target writes. */
2146 insn->header.predicate_control = 0;
2147 insn->header.compression_control = BRW_COMPRESSION_NONE;
2148
2149 if (intel->gen >= 6) {
2150 /* headerless version, just submit color payload */
2151 src0 = brw_message_reg(msg_reg_nr);
2152
2153 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2154 } else {
2155 insn->header.destreg__conditionalmod = msg_reg_nr;
2156
2157 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2158 }
2159
2160 if (dispatch_width == 16)
2161 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2162 else
2163 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2164
2165 brw_set_dest(p, insn, dest);
2166 brw_set_src0(p, insn, src0);
2167 brw_set_dp_write_message(p,
2168 insn,
2169 binding_table_index,
2170 msg_control,
2171 msg_type,
2172 msg_length,
2173 header_present,
2174 1, /* last render target write */
2175 response_length,
2176 eot,
2177 0 /* send_commit_msg */);
2178 }
2179
2180
2181 /**
2182 * Texture sample instruction.
2183 * Note: the msg_type plus msg_length values determine exactly what kind
2184 * of sampling operation is performed. See volume 4, page 161 of docs.
2185 */
2186 void brw_SAMPLE(struct brw_compile *p,
2187 struct brw_reg dest,
2188 GLuint msg_reg_nr,
2189 struct brw_reg src0,
2190 GLuint binding_table_index,
2191 GLuint sampler,
2192 GLuint writemask,
2193 GLuint msg_type,
2194 GLuint response_length,
2195 GLuint msg_length,
2196 GLuint header_present,
2197 GLuint simd_mode,
2198 GLuint return_format)
2199 {
2200 struct intel_context *intel = &p->brw->intel;
2201 bool need_stall = 0;
2202
2203 if (writemask == 0) {
2204 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2205 return;
2206 }
2207
2208 /* Hardware doesn't do destination dependency checking on send
2209 * instructions properly. Add a workaround which generates the
2210 * dependency by other means. In practice it seems like this bug
2211 * only crops up for texture samples, and only where registers are
2212 * written by the send and then written again later without being
2213 * read in between. Luckily for us, we already track that
2214 * information and use it to modify the writemask for the
2215 * instruction, so that is a guide for whether a workaround is
2216 * needed.
2217 */
2218 if (writemask != WRITEMASK_XYZW) {
2219 GLuint dst_offset = 0;
2220 GLuint i, newmask = 0, len = 0;
2221
2222 for (i = 0; i < 4; i++) {
2223 if (writemask & (1<<i))
2224 break;
2225 dst_offset += 2;
2226 }
2227 for (; i < 4; i++) {
2228 if (!(writemask & (1<<i)))
2229 break;
2230 newmask |= 1<<i;
2231 len++;
2232 }
2233
2234 if (newmask != writemask) {
2235 need_stall = 1;
2236 /* printf("need stall %x %x\n", newmask , writemask); */
2237 }
2238 else {
2239 bool dispatch_16 = false;
2240
2241 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2242
2243 guess_execution_size(p, p->current, dest);
2244 if (p->current->header.execution_size == BRW_EXECUTE_16)
2245 dispatch_16 = true;
2246
2247 newmask = ~newmask & WRITEMASK_XYZW;
2248
2249 brw_push_insn_state(p);
2250
2251 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2252 brw_set_mask_control(p, BRW_MASK_DISABLE);
2253
2254 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2255 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2256 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2257
2258 brw_pop_insn_state(p);
2259
2260 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2261 dest = offset(dest, dst_offset);
2262
2263 /* For 16-wide dispatch, masked channels are skipped in the
2264 * response. For 8-wide, masked channels still take up slots,
2265 * and are just not written to.
2266 */
2267 if (dispatch_16)
2268 response_length = len * 2;
2269 }
2270 }
2271
2272 {
2273 struct brw_instruction *insn;
2274
2275 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2276
2277 insn = next_insn(p, BRW_OPCODE_SEND);
2278 insn->header.predicate_control = 0; /* XXX */
2279 insn->header.compression_control = BRW_COMPRESSION_NONE;
2280 if (intel->gen < 6)
2281 insn->header.destreg__conditionalmod = msg_reg_nr;
2282
2283 brw_set_dest(p, insn, dest);
2284 brw_set_src0(p, insn, src0);
2285 brw_set_sampler_message(p, insn,
2286 binding_table_index,
2287 sampler,
2288 msg_type,
2289 response_length,
2290 msg_length,
2291 header_present,
2292 simd_mode,
2293 return_format);
2294 }
2295
2296 if (need_stall) {
2297 struct brw_reg reg = vec8(offset(dest, response_length-1));
2298
2299 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2300 */
2301 brw_push_insn_state(p);
2302 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2303 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2304 retype(reg, BRW_REGISTER_TYPE_UD));
2305 brw_pop_insn_state(p);
2306 }
2307
2308 }
2309
2310 /* All these variables are pretty confusing - we might be better off
2311 * using bitmasks and macros for this, in the old style. Or perhaps
2312 * just having the caller instantiate the fields in dword3 itself.
2313 */
2314 void brw_urb_WRITE(struct brw_compile *p,
2315 struct brw_reg dest,
2316 GLuint msg_reg_nr,
2317 struct brw_reg src0,
2318 bool allocate,
2319 bool used,
2320 GLuint msg_length,
2321 GLuint response_length,
2322 bool eot,
2323 bool writes_complete,
2324 GLuint offset,
2325 GLuint swizzle)
2326 {
2327 struct intel_context *intel = &p->brw->intel;
2328 struct brw_instruction *insn;
2329
2330 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2331
2332 if (intel->gen == 7) {
2333 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2334 brw_push_insn_state(p);
2335 brw_set_access_mode(p, BRW_ALIGN_1);
2336 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2337 BRW_REGISTER_TYPE_UD),
2338 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2339 brw_imm_ud(0xff00));
2340 brw_pop_insn_state(p);
2341 }
2342
2343 insn = next_insn(p, BRW_OPCODE_SEND);
2344
2345 assert(msg_length < BRW_MAX_MRF);
2346
2347 brw_set_dest(p, insn, dest);
2348 brw_set_src0(p, insn, src0);
2349 brw_set_src1(p, insn, brw_imm_d(0));
2350
2351 if (intel->gen < 6)
2352 insn->header.destreg__conditionalmod = msg_reg_nr;
2353
2354 brw_set_urb_message(p,
2355 insn,
2356 allocate,
2357 used,
2358 msg_length,
2359 response_length,
2360 eot,
2361 writes_complete,
2362 offset,
2363 swizzle);
2364 }
2365
2366 static int
2367 brw_find_next_block_end(struct brw_compile *p, int start)
2368 {
2369 int ip;
2370
2371 for (ip = start + 1; ip < p->nr_insn; ip++) {
2372 struct brw_instruction *insn = &p->store[ip];
2373
2374 switch (insn->header.opcode) {
2375 case BRW_OPCODE_ENDIF:
2376 case BRW_OPCODE_ELSE:
2377 case BRW_OPCODE_WHILE:
2378 return ip;
2379 }
2380 }
2381 assert(!"not reached");
2382 return start + 1;
2383 }
2384
2385 /* There is no DO instruction on gen6, so to find the end of the loop
2386 * we have to see if the loop is jumping back before our start
2387 * instruction.
2388 */
2389 static int
2390 brw_find_loop_end(struct brw_compile *p, int start)
2391 {
2392 struct intel_context *intel = &p->brw->intel;
2393 int ip;
2394 int br = 2;
2395
2396 for (ip = start + 1; ip < p->nr_insn; ip++) {
2397 struct brw_instruction *insn = &p->store[ip];
2398
2399 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2400 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2401 : insn->bits3.break_cont.jip;
2402 if (ip + jip / br <= start)
2403 return ip;
2404 }
2405 }
2406 assert(!"not reached");
2407 return start + 1;
2408 }
2409
2410 /* After program generation, go back and update the UIP and JIP of
2411 * BREAK and CONT instructions to their correct locations.
2412 */
2413 void
2414 brw_set_uip_jip(struct brw_compile *p)
2415 {
2416 struct intel_context *intel = &p->brw->intel;
2417 int ip;
2418 int br = 2;
2419
2420 if (intel->gen < 6)
2421 return;
2422
2423 for (ip = 0; ip < p->nr_insn; ip++) {
2424 struct brw_instruction *insn = &p->store[ip];
2425
2426 switch (insn->header.opcode) {
2427 case BRW_OPCODE_BREAK:
2428 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2429 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2430 insn->bits3.break_cont.uip =
2431 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2432 break;
2433 case BRW_OPCODE_CONTINUE:
2434 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2435 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2436
2437 assert(insn->bits3.break_cont.uip != 0);
2438 assert(insn->bits3.break_cont.jip != 0);
2439 break;
2440 }
2441 }
2442 }
2443
2444 void brw_ff_sync(struct brw_compile *p,
2445 struct brw_reg dest,
2446 GLuint msg_reg_nr,
2447 struct brw_reg src0,
2448 bool allocate,
2449 GLuint response_length,
2450 bool eot)
2451 {
2452 struct intel_context *intel = &p->brw->intel;
2453 struct brw_instruction *insn;
2454
2455 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2456
2457 insn = next_insn(p, BRW_OPCODE_SEND);
2458 brw_set_dest(p, insn, dest);
2459 brw_set_src0(p, insn, src0);
2460 brw_set_src1(p, insn, brw_imm_d(0));
2461
2462 if (intel->gen < 6)
2463 insn->header.destreg__conditionalmod = msg_reg_nr;
2464
2465 brw_set_ff_sync_message(p,
2466 insn,
2467 allocate,
2468 response_length,
2469 eot);
2470 }
2471
2472 /**
2473 * Emit the SEND instruction necessary to generate stream output data on Gen6
2474 * (for transform feedback).
2475 *
2476 * If send_commit_msg is true, this is the last piece of stream output data
2477 * from this thread, so send the data as a committed write. According to the
2478 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2479 *
2480 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2481 * writes are complete by sending the final write as a committed write."
2482 */
2483 void
2484 brw_svb_write(struct brw_compile *p,
2485 struct brw_reg dest,
2486 GLuint msg_reg_nr,
2487 struct brw_reg src0,
2488 GLuint binding_table_index,
2489 bool send_commit_msg)
2490 {
2491 struct brw_instruction *insn;
2492
2493 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2494
2495 insn = next_insn(p, BRW_OPCODE_SEND);
2496 brw_set_dest(p, insn, dest);
2497 brw_set_src0(p, insn, src0);
2498 brw_set_src1(p, insn, brw_imm_d(0));
2499 brw_set_dp_write_message(p, insn,
2500 binding_table_index,
2501 0, /* msg_control: ignored */
2502 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2503 1, /* msg_length */
2504 true, /* header_present */
2505 0, /* last_render_target: ignored */
2506 send_commit_msg, /* response_length */
2507 0, /* end_of_thread */
2508 send_commit_msg); /* send_commit_msg */
2509 }