i965: Silence gcc warning from resizing EU store changes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 struct intel_context *intel = &p->brw->intel;
88 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
89 reg->file = BRW_GENERAL_REGISTER_FILE;
90 reg->nr += 111;
91 }
92 }
93
94
95 void
96 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
97 struct brw_reg dest)
98 {
99 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
100 dest.file != BRW_MESSAGE_REGISTER_FILE)
101 assert(dest.nr < 128);
102
103 gen7_convert_mrf_to_grf(p, &dest);
104
105 insn->bits1.da1.dest_reg_file = dest.file;
106 insn->bits1.da1.dest_reg_type = dest.type;
107 insn->bits1.da1.dest_address_mode = dest.address_mode;
108
109 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
110 insn->bits1.da1.dest_reg_nr = dest.nr;
111
112 if (insn->header.access_mode == BRW_ALIGN_1) {
113 insn->bits1.da1.dest_subreg_nr = dest.subnr;
114 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
115 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
116 insn->bits1.da1.dest_horiz_stride = dest.hstride;
117 }
118 else {
119 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
120 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
121 /* even ignored in da16, still need to set as '01' */
122 insn->bits1.da16.dest_horiz_stride = 1;
123 }
124 }
125 else {
126 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
127
128 /* These are different sizes in align1 vs align16:
129 */
130 if (insn->header.access_mode == BRW_ALIGN_1) {
131 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
132 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
133 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
134 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
135 }
136 else {
137 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
138 /* even ignored in da16, still need to set as '01' */
139 insn->bits1.ia16.dest_horiz_stride = 1;
140 }
141 }
142
143 /* NEW: Set the execution size based on dest.width and
144 * insn->compression_control:
145 */
146 guess_execution_size(p, insn, dest);
147 }
148
149 extern int reg_type_size[];
150
151 static void
152 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
153 {
154 int hstride_for_reg[] = {0, 1, 2, 4};
155 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
156 int width_for_reg[] = {1, 2, 4, 8, 16};
157 int execsize_for_reg[] = {1, 2, 4, 8, 16};
158 int width, hstride, vstride, execsize;
159
160 if (reg.file == BRW_IMMEDIATE_VALUE) {
161 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
162 * mean the destination has to be 128-bit aligned and the
163 * destination horiz stride has to be a word.
164 */
165 if (reg.type == BRW_REGISTER_TYPE_V) {
166 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
167 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
168 }
169
170 return;
171 }
172
173 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
174 reg.file == BRW_ARF_NULL)
175 return;
176
177 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
178 hstride = hstride_for_reg[reg.hstride];
179
180 if (reg.vstride == 0xf) {
181 vstride = -1;
182 } else {
183 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
184 vstride = vstride_for_reg[reg.vstride];
185 }
186
187 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
188 width = width_for_reg[reg.width];
189
190 assert(insn->header.execution_size >= 0 &&
191 insn->header.execution_size < Elements(execsize_for_reg));
192 execsize = execsize_for_reg[insn->header.execution_size];
193
194 /* Restrictions from 3.3.10: Register Region Restrictions. */
195 /* 3. */
196 assert(execsize >= width);
197
198 /* 4. */
199 if (execsize == width && hstride != 0) {
200 assert(vstride == -1 || vstride == width * hstride);
201 }
202
203 /* 5. */
204 if (execsize == width && hstride == 0) {
205 /* no restriction on vstride. */
206 }
207
208 /* 6. */
209 if (width == 1) {
210 assert(hstride == 0);
211 }
212
213 /* 7. */
214 if (execsize == 1 && width == 1) {
215 assert(hstride == 0);
216 assert(vstride == 0);
217 }
218
219 /* 8. */
220 if (vstride == 0 && hstride == 0) {
221 assert(width == 1);
222 }
223
224 /* 10. Check destination issues. */
225 }
226
227 void
228 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
229 struct brw_reg reg)
230 {
231 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
232 assert(reg.nr < 128);
233
234 gen7_convert_mrf_to_grf(p, &reg);
235
236 validate_reg(insn, reg);
237
238 insn->bits1.da1.src0_reg_file = reg.file;
239 insn->bits1.da1.src0_reg_type = reg.type;
240 insn->bits2.da1.src0_abs = reg.abs;
241 insn->bits2.da1.src0_negate = reg.negate;
242 insn->bits2.da1.src0_address_mode = reg.address_mode;
243
244 if (reg.file == BRW_IMMEDIATE_VALUE) {
245 insn->bits3.ud = reg.dw1.ud;
246
247 /* Required to set some fields in src1 as well:
248 */
249 insn->bits1.da1.src1_reg_file = 0; /* arf */
250 insn->bits1.da1.src1_reg_type = reg.type;
251 }
252 else
253 {
254 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
255 if (insn->header.access_mode == BRW_ALIGN_1) {
256 insn->bits2.da1.src0_subreg_nr = reg.subnr;
257 insn->bits2.da1.src0_reg_nr = reg.nr;
258 }
259 else {
260 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
261 insn->bits2.da16.src0_reg_nr = reg.nr;
262 }
263 }
264 else {
265 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
266
267 if (insn->header.access_mode == BRW_ALIGN_1) {
268 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
269 }
270 else {
271 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
272 }
273 }
274
275 if (insn->header.access_mode == BRW_ALIGN_1) {
276 if (reg.width == BRW_WIDTH_1 &&
277 insn->header.execution_size == BRW_EXECUTE_1) {
278 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
279 insn->bits2.da1.src0_width = BRW_WIDTH_1;
280 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
281 }
282 else {
283 insn->bits2.da1.src0_horiz_stride = reg.hstride;
284 insn->bits2.da1.src0_width = reg.width;
285 insn->bits2.da1.src0_vert_stride = reg.vstride;
286 }
287 }
288 else {
289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293
294 /* This is an oddity of the fact we're using the same
295 * descriptions for registers in align_16 as align_1:
296 */
297 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299 else
300 insn->bits2.da16.src0_vert_stride = reg.vstride;
301 }
302 }
303 }
304
305
306 void brw_set_src1(struct brw_compile *p,
307 struct brw_instruction *insn,
308 struct brw_reg reg)
309 {
310 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
311
312 assert(reg.nr < 128);
313
314 gen7_convert_mrf_to_grf(p, &reg);
315
316 validate_reg(insn, reg);
317
318 insn->bits1.da1.src1_reg_file = reg.file;
319 insn->bits1.da1.src1_reg_type = reg.type;
320 insn->bits3.da1.src1_abs = reg.abs;
321 insn->bits3.da1.src1_negate = reg.negate;
322
323 /* Only src1 can be immediate in two-argument instructions.
324 */
325 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
326
327 if (reg.file == BRW_IMMEDIATE_VALUE) {
328 insn->bits3.ud = reg.dw1.ud;
329 }
330 else {
331 /* This is a hardware restriction, which may or may not be lifted
332 * in the future:
333 */
334 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
335 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
336
337 if (insn->header.access_mode == BRW_ALIGN_1) {
338 insn->bits3.da1.src1_subreg_nr = reg.subnr;
339 insn->bits3.da1.src1_reg_nr = reg.nr;
340 }
341 else {
342 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
343 insn->bits3.da16.src1_reg_nr = reg.nr;
344 }
345
346 if (insn->header.access_mode == BRW_ALIGN_1) {
347 if (reg.width == BRW_WIDTH_1 &&
348 insn->header.execution_size == BRW_EXECUTE_1) {
349 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
350 insn->bits3.da1.src1_width = BRW_WIDTH_1;
351 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
352 }
353 else {
354 insn->bits3.da1.src1_horiz_stride = reg.hstride;
355 insn->bits3.da1.src1_width = reg.width;
356 insn->bits3.da1.src1_vert_stride = reg.vstride;
357 }
358 }
359 else {
360 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
361 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
362 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
363 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
364
365 /* This is an oddity of the fact we're using the same
366 * descriptions for registers in align_16 as align_1:
367 */
368 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
369 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
370 else
371 insn->bits3.da16.src1_vert_stride = reg.vstride;
372 }
373 }
374 }
375
376 /**
377 * Set the Message Descriptor and Extended Message Descriptor fields
378 * for SEND messages.
379 *
380 * \note This zeroes out the Function Control bits, so it must be called
381 * \b before filling out any message-specific data. Callers can
382 * choose not to fill in irrelevant bits; they will be zero.
383 */
384 static void
385 brw_set_message_descriptor(struct brw_compile *p,
386 struct brw_instruction *inst,
387 enum brw_message_target sfid,
388 unsigned msg_length,
389 unsigned response_length,
390 bool header_present,
391 bool end_of_thread)
392 {
393 struct intel_context *intel = &p->brw->intel;
394
395 brw_set_src1(p, inst, brw_imm_d(0));
396
397 if (intel->gen >= 5) {
398 inst->bits3.generic_gen5.header_present = header_present;
399 inst->bits3.generic_gen5.response_length = response_length;
400 inst->bits3.generic_gen5.msg_length = msg_length;
401 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
402
403 if (intel->gen >= 6) {
404 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
405 inst->header.destreg__conditionalmod = sfid;
406 } else {
407 /* Set Extended Message Descriptor (ex_desc) */
408 inst->bits2.send_gen5.sfid = sfid;
409 inst->bits2.send_gen5.end_of_thread = end_of_thread;
410 }
411 } else {
412 inst->bits3.generic.response_length = response_length;
413 inst->bits3.generic.msg_length = msg_length;
414 inst->bits3.generic.msg_target = sfid;
415 inst->bits3.generic.end_of_thread = end_of_thread;
416 }
417 }
418
419 static void brw_set_math_message( struct brw_compile *p,
420 struct brw_instruction *insn,
421 GLuint function,
422 GLuint integer_type,
423 bool low_precision,
424 bool saturate,
425 GLuint dataType )
426 {
427 struct brw_context *brw = p->brw;
428 struct intel_context *intel = &brw->intel;
429 unsigned msg_length;
430 unsigned response_length;
431
432 /* Infer message length from the function */
433 switch (function) {
434 case BRW_MATH_FUNCTION_POW:
435 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
436 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
437 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
438 msg_length = 2;
439 break;
440 default:
441 msg_length = 1;
442 break;
443 }
444
445 /* Infer response length from the function */
446 switch (function) {
447 case BRW_MATH_FUNCTION_SINCOS:
448 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
449 response_length = 2;
450 break;
451 default:
452 response_length = 1;
453 break;
454 }
455
456 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
457 msg_length, response_length, false, false);
458 if (intel->gen == 5) {
459 insn->bits3.math_gen5.function = function;
460 insn->bits3.math_gen5.int_type = integer_type;
461 insn->bits3.math_gen5.precision = low_precision;
462 insn->bits3.math_gen5.saturate = saturate;
463 insn->bits3.math_gen5.data_type = dataType;
464 insn->bits3.math_gen5.snapshot = 0;
465 } else {
466 insn->bits3.math.function = function;
467 insn->bits3.math.int_type = integer_type;
468 insn->bits3.math.precision = low_precision;
469 insn->bits3.math.saturate = saturate;
470 insn->bits3.math.data_type = dataType;
471 }
472 }
473
474
475 static void brw_set_ff_sync_message(struct brw_compile *p,
476 struct brw_instruction *insn,
477 bool allocate,
478 GLuint response_length,
479 bool end_of_thread)
480 {
481 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
482 1, response_length, true, end_of_thread);
483 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
484 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
486 insn->bits3.urb_gen5.allocate = allocate;
487 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
488 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
489 }
490
491 static void brw_set_urb_message( struct brw_compile *p,
492 struct brw_instruction *insn,
493 bool allocate,
494 bool used,
495 GLuint msg_length,
496 GLuint response_length,
497 bool end_of_thread,
498 bool complete,
499 GLuint offset,
500 GLuint swizzle_control )
501 {
502 struct brw_context *brw = p->brw;
503 struct intel_context *intel = &brw->intel;
504
505 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
506 msg_length, response_length, true, end_of_thread);
507 if (intel->gen == 7) {
508 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
509 insn->bits3.urb_gen7.offset = offset;
510 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
511 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
512 /* per_slot_offset = 0 makes it ignore offsets in message header */
513 insn->bits3.urb_gen7.per_slot_offset = 0;
514 insn->bits3.urb_gen7.complete = complete;
515 } else if (intel->gen >= 5) {
516 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
517 insn->bits3.urb_gen5.offset = offset;
518 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
519 insn->bits3.urb_gen5.allocate = allocate;
520 insn->bits3.urb_gen5.used = used; /* ? */
521 insn->bits3.urb_gen5.complete = complete;
522 } else {
523 insn->bits3.urb.opcode = 0; /* ? */
524 insn->bits3.urb.offset = offset;
525 insn->bits3.urb.swizzle_control = swizzle_control;
526 insn->bits3.urb.allocate = allocate;
527 insn->bits3.urb.used = used; /* ? */
528 insn->bits3.urb.complete = complete;
529 }
530 }
531
532 void
533 brw_set_dp_write_message(struct brw_compile *p,
534 struct brw_instruction *insn,
535 GLuint binding_table_index,
536 GLuint msg_control,
537 GLuint msg_type,
538 GLuint msg_length,
539 bool header_present,
540 GLuint last_render_target,
541 GLuint response_length,
542 GLuint end_of_thread,
543 GLuint send_commit_msg)
544 {
545 struct brw_context *brw = p->brw;
546 struct intel_context *intel = &brw->intel;
547 unsigned sfid;
548
549 if (intel->gen >= 7) {
550 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
551 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
552 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
553 else
554 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
555 } else if (intel->gen == 6) {
556 /* Use the render cache for all write messages. */
557 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
558 } else {
559 sfid = BRW_SFID_DATAPORT_WRITE;
560 }
561
562 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
563 header_present, end_of_thread);
564
565 if (intel->gen >= 7) {
566 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
567 insn->bits3.gen7_dp.msg_control = msg_control;
568 insn->bits3.gen7_dp.last_render_target = last_render_target;
569 insn->bits3.gen7_dp.msg_type = msg_type;
570 } else if (intel->gen == 6) {
571 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
572 insn->bits3.gen6_dp.msg_control = msg_control;
573 insn->bits3.gen6_dp.last_render_target = last_render_target;
574 insn->bits3.gen6_dp.msg_type = msg_type;
575 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
576 } else if (intel->gen == 5) {
577 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
578 insn->bits3.dp_write_gen5.msg_control = msg_control;
579 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
580 insn->bits3.dp_write_gen5.msg_type = msg_type;
581 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
582 } else {
583 insn->bits3.dp_write.binding_table_index = binding_table_index;
584 insn->bits3.dp_write.msg_control = msg_control;
585 insn->bits3.dp_write.last_render_target = last_render_target;
586 insn->bits3.dp_write.msg_type = msg_type;
587 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
588 }
589 }
590
591 void
592 brw_set_dp_read_message(struct brw_compile *p,
593 struct brw_instruction *insn,
594 GLuint binding_table_index,
595 GLuint msg_control,
596 GLuint msg_type,
597 GLuint target_cache,
598 GLuint msg_length,
599 GLuint response_length)
600 {
601 struct brw_context *brw = p->brw;
602 struct intel_context *intel = &brw->intel;
603 unsigned sfid;
604
605 if (intel->gen >= 7) {
606 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
607 } else if (intel->gen == 6) {
608 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
609 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
610 else
611 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
612 } else {
613 sfid = BRW_SFID_DATAPORT_READ;
614 }
615
616 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
617 true, false);
618
619 if (intel->gen >= 7) {
620 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
621 insn->bits3.gen7_dp.msg_control = msg_control;
622 insn->bits3.gen7_dp.last_render_target = 0;
623 insn->bits3.gen7_dp.msg_type = msg_type;
624 } else if (intel->gen == 6) {
625 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
626 insn->bits3.gen6_dp.msg_control = msg_control;
627 insn->bits3.gen6_dp.last_render_target = 0;
628 insn->bits3.gen6_dp.msg_type = msg_type;
629 insn->bits3.gen6_dp.send_commit_msg = 0;
630 } else if (intel->gen == 5) {
631 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
632 insn->bits3.dp_read_gen5.msg_control = msg_control;
633 insn->bits3.dp_read_gen5.msg_type = msg_type;
634 insn->bits3.dp_read_gen5.target_cache = target_cache;
635 } else if (intel->is_g4x) {
636 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
637 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
638 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
639 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
640 } else {
641 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
642 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
643 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
644 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
645 }
646 }
647
648 static void brw_set_sampler_message(struct brw_compile *p,
649 struct brw_instruction *insn,
650 GLuint binding_table_index,
651 GLuint sampler,
652 GLuint msg_type,
653 GLuint response_length,
654 GLuint msg_length,
655 GLuint header_present,
656 GLuint simd_mode,
657 GLuint return_format)
658 {
659 struct brw_context *brw = p->brw;
660 struct intel_context *intel = &brw->intel;
661
662 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
663 response_length, header_present, false);
664
665 if (intel->gen >= 7) {
666 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
667 insn->bits3.sampler_gen7.sampler = sampler;
668 insn->bits3.sampler_gen7.msg_type = msg_type;
669 insn->bits3.sampler_gen7.simd_mode = simd_mode;
670 } else if (intel->gen >= 5) {
671 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
672 insn->bits3.sampler_gen5.sampler = sampler;
673 insn->bits3.sampler_gen5.msg_type = msg_type;
674 insn->bits3.sampler_gen5.simd_mode = simd_mode;
675 } else if (intel->is_g4x) {
676 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
677 insn->bits3.sampler_g4x.sampler = sampler;
678 insn->bits3.sampler_g4x.msg_type = msg_type;
679 } else {
680 insn->bits3.sampler.binding_table_index = binding_table_index;
681 insn->bits3.sampler.sampler = sampler;
682 insn->bits3.sampler.msg_type = msg_type;
683 insn->bits3.sampler.return_format = return_format;
684 }
685 }
686
687
688 #define next_insn brw_next_insn
689 struct brw_instruction *
690 brw_next_insn(struct brw_compile *p, GLuint opcode)
691 {
692 struct brw_instruction *insn;
693
694 if (p->nr_insn + 1 > p->store_size) {
695 if (0)
696 printf("incresing the store size to %d\n", p->store_size << 1);
697 p->store_size <<= 1;
698 p->store = reralloc(p->mem_ctx, p->store,
699 struct brw_instruction, p->store_size);
700 if (!p->store)
701 assert(!"realloc eu store memeory failed");
702 }
703
704 insn = &p->store[p->nr_insn++];
705 memcpy(insn, p->current, sizeof(*insn));
706
707 /* Reset this one-shot flag:
708 */
709
710 if (p->current->header.destreg__conditionalmod) {
711 p->current->header.destreg__conditionalmod = 0;
712 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
713 }
714
715 insn->header.opcode = opcode;
716 return insn;
717 }
718
719 static struct brw_instruction *brw_alu1( struct brw_compile *p,
720 GLuint opcode,
721 struct brw_reg dest,
722 struct brw_reg src )
723 {
724 struct brw_instruction *insn = next_insn(p, opcode);
725 brw_set_dest(p, insn, dest);
726 brw_set_src0(p, insn, src);
727 return insn;
728 }
729
730 static struct brw_instruction *brw_alu2(struct brw_compile *p,
731 GLuint opcode,
732 struct brw_reg dest,
733 struct brw_reg src0,
734 struct brw_reg src1 )
735 {
736 struct brw_instruction *insn = next_insn(p, opcode);
737 brw_set_dest(p, insn, dest);
738 brw_set_src0(p, insn, src0);
739 brw_set_src1(p, insn, src1);
740 return insn;
741 }
742
743
744 /***********************************************************************
745 * Convenience routines.
746 */
747 #define ALU1(OP) \
748 struct brw_instruction *brw_##OP(struct brw_compile *p, \
749 struct brw_reg dest, \
750 struct brw_reg src0) \
751 { \
752 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
753 }
754
755 #define ALU2(OP) \
756 struct brw_instruction *brw_##OP(struct brw_compile *p, \
757 struct brw_reg dest, \
758 struct brw_reg src0, \
759 struct brw_reg src1) \
760 { \
761 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
762 }
763
764 /* Rounding operations (other than RNDD) require two instructions - the first
765 * stores a rounded value (possibly the wrong way) in the dest register, but
766 * also sets a per-channel "increment bit" in the flag register. A predicated
767 * add of 1.0 fixes dest to contain the desired result.
768 *
769 * Sandybridge and later appear to round correctly without an ADD.
770 */
771 #define ROUND(OP) \
772 void brw_##OP(struct brw_compile *p, \
773 struct brw_reg dest, \
774 struct brw_reg src) \
775 { \
776 struct brw_instruction *rnd, *add; \
777 rnd = next_insn(p, BRW_OPCODE_##OP); \
778 brw_set_dest(p, rnd, dest); \
779 brw_set_src0(p, rnd, src); \
780 \
781 if (p->brw->intel.gen < 6) { \
782 /* turn on round-increments */ \
783 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
784 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
785 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
786 } \
787 }
788
789
790 ALU1(MOV)
791 ALU2(SEL)
792 ALU1(NOT)
793 ALU2(AND)
794 ALU2(OR)
795 ALU2(XOR)
796 ALU2(SHR)
797 ALU2(SHL)
798 ALU2(RSR)
799 ALU2(RSL)
800 ALU2(ASR)
801 ALU1(FRC)
802 ALU1(RNDD)
803 ALU2(MAC)
804 ALU2(MACH)
805 ALU1(LZD)
806 ALU2(DP4)
807 ALU2(DPH)
808 ALU2(DP3)
809 ALU2(DP2)
810 ALU2(LINE)
811 ALU2(PLN)
812
813
814 ROUND(RNDZ)
815 ROUND(RNDE)
816
817
818 struct brw_instruction *brw_ADD(struct brw_compile *p,
819 struct brw_reg dest,
820 struct brw_reg src0,
821 struct brw_reg src1)
822 {
823 /* 6.2.2: add */
824 if (src0.type == BRW_REGISTER_TYPE_F ||
825 (src0.file == BRW_IMMEDIATE_VALUE &&
826 src0.type == BRW_REGISTER_TYPE_VF)) {
827 assert(src1.type != BRW_REGISTER_TYPE_UD);
828 assert(src1.type != BRW_REGISTER_TYPE_D);
829 }
830
831 if (src1.type == BRW_REGISTER_TYPE_F ||
832 (src1.file == BRW_IMMEDIATE_VALUE &&
833 src1.type == BRW_REGISTER_TYPE_VF)) {
834 assert(src0.type != BRW_REGISTER_TYPE_UD);
835 assert(src0.type != BRW_REGISTER_TYPE_D);
836 }
837
838 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
839 }
840
841 struct brw_instruction *brw_MUL(struct brw_compile *p,
842 struct brw_reg dest,
843 struct brw_reg src0,
844 struct brw_reg src1)
845 {
846 /* 6.32.38: mul */
847 if (src0.type == BRW_REGISTER_TYPE_D ||
848 src0.type == BRW_REGISTER_TYPE_UD ||
849 src1.type == BRW_REGISTER_TYPE_D ||
850 src1.type == BRW_REGISTER_TYPE_UD) {
851 assert(dest.type != BRW_REGISTER_TYPE_F);
852 }
853
854 if (src0.type == BRW_REGISTER_TYPE_F ||
855 (src0.file == BRW_IMMEDIATE_VALUE &&
856 src0.type == BRW_REGISTER_TYPE_VF)) {
857 assert(src1.type != BRW_REGISTER_TYPE_UD);
858 assert(src1.type != BRW_REGISTER_TYPE_D);
859 }
860
861 if (src1.type == BRW_REGISTER_TYPE_F ||
862 (src1.file == BRW_IMMEDIATE_VALUE &&
863 src1.type == BRW_REGISTER_TYPE_VF)) {
864 assert(src0.type != BRW_REGISTER_TYPE_UD);
865 assert(src0.type != BRW_REGISTER_TYPE_D);
866 }
867
868 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
869 src0.nr != BRW_ARF_ACCUMULATOR);
870 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
871 src1.nr != BRW_ARF_ACCUMULATOR);
872
873 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
874 }
875
876
877 void brw_NOP(struct brw_compile *p)
878 {
879 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
880 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
881 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
882 brw_set_src1(p, insn, brw_imm_ud(0x0));
883 }
884
885
886
887
888
889 /***********************************************************************
890 * Comparisons, if/else/endif
891 */
892
893 struct brw_instruction *brw_JMPI(struct brw_compile *p,
894 struct brw_reg dest,
895 struct brw_reg src0,
896 struct brw_reg src1)
897 {
898 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
899
900 insn->header.execution_size = 1;
901 insn->header.compression_control = BRW_COMPRESSION_NONE;
902 insn->header.mask_control = BRW_MASK_DISABLE;
903
904 p->current->header.predicate_control = BRW_PREDICATE_NONE;
905
906 return insn;
907 }
908
909 static void
910 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
911 {
912 p->if_stack[p->if_stack_depth] = inst - p->store;
913
914 p->if_stack_depth++;
915 if (p->if_stack_array_size <= p->if_stack_depth) {
916 p->if_stack_array_size *= 2;
917 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
918 p->if_stack_array_size);
919 }
920 }
921
922 static struct brw_instruction *
923 pop_if_stack(struct brw_compile *p)
924 {
925 p->if_stack_depth--;
926 return &p->store[p->if_stack[p->if_stack_depth]];
927 }
928
929 static void
930 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
931 {
932 if (p->loop_stack_array_size < p->loop_stack_depth) {
933 p->loop_stack_array_size *= 2;
934 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
935 p->loop_stack_array_size);
936 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
937 p->loop_stack_array_size);
938 }
939
940 p->loop_stack[p->loop_stack_depth] = inst - p->store;
941 p->loop_stack_depth++;
942 p->if_depth_in_loop[p->loop_stack_depth] = 0;
943 }
944
945 static struct brw_instruction *
946 get_inner_do_insn(struct brw_compile *p)
947 {
948 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
949 }
950
951 /* EU takes the value from the flag register and pushes it onto some
952 * sort of a stack (presumably merging with any flag value already on
953 * the stack). Within an if block, the flags at the top of the stack
954 * control execution on each channel of the unit, eg. on each of the
955 * 16 pixel values in our wm programs.
956 *
957 * When the matching 'else' instruction is reached (presumably by
958 * countdown of the instruction count patched in by our ELSE/ENDIF
959 * functions), the relevent flags are inverted.
960 *
961 * When the matching 'endif' instruction is reached, the flags are
962 * popped off. If the stack is now empty, normal execution resumes.
963 */
964 struct brw_instruction *
965 brw_IF(struct brw_compile *p, GLuint execute_size)
966 {
967 struct intel_context *intel = &p->brw->intel;
968 struct brw_instruction *insn;
969
970 insn = next_insn(p, BRW_OPCODE_IF);
971
972 /* Override the defaults for this instruction:
973 */
974 if (intel->gen < 6) {
975 brw_set_dest(p, insn, brw_ip_reg());
976 brw_set_src0(p, insn, brw_ip_reg());
977 brw_set_src1(p, insn, brw_imm_d(0x0));
978 } else if (intel->gen == 6) {
979 brw_set_dest(p, insn, brw_imm_w(0));
980 insn->bits1.branch_gen6.jump_count = 0;
981 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
982 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
983 } else {
984 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
985 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
986 brw_set_src1(p, insn, brw_imm_ud(0));
987 insn->bits3.break_cont.jip = 0;
988 insn->bits3.break_cont.uip = 0;
989 }
990
991 insn->header.execution_size = execute_size;
992 insn->header.compression_control = BRW_COMPRESSION_NONE;
993 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
994 insn->header.mask_control = BRW_MASK_ENABLE;
995 if (!p->single_program_flow)
996 insn->header.thread_control = BRW_THREAD_SWITCH;
997
998 p->current->header.predicate_control = BRW_PREDICATE_NONE;
999
1000 push_if_stack(p, insn);
1001 p->if_depth_in_loop[p->loop_stack_depth]++;
1002 return insn;
1003 }
1004
1005 /* This function is only used for gen6-style IF instructions with an
1006 * embedded comparison (conditional modifier). It is not used on gen7.
1007 */
1008 struct brw_instruction *
1009 gen6_IF(struct brw_compile *p, uint32_t conditional,
1010 struct brw_reg src0, struct brw_reg src1)
1011 {
1012 struct brw_instruction *insn;
1013
1014 insn = next_insn(p, BRW_OPCODE_IF);
1015
1016 brw_set_dest(p, insn, brw_imm_w(0));
1017 if (p->compressed) {
1018 insn->header.execution_size = BRW_EXECUTE_16;
1019 } else {
1020 insn->header.execution_size = BRW_EXECUTE_8;
1021 }
1022 insn->bits1.branch_gen6.jump_count = 0;
1023 brw_set_src0(p, insn, src0);
1024 brw_set_src1(p, insn, src1);
1025
1026 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1027 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1028 insn->header.destreg__conditionalmod = conditional;
1029
1030 if (!p->single_program_flow)
1031 insn->header.thread_control = BRW_THREAD_SWITCH;
1032
1033 push_if_stack(p, insn);
1034 return insn;
1035 }
1036
1037 /**
1038 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1039 */
1040 static void
1041 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1042 struct brw_instruction *if_inst,
1043 struct brw_instruction *else_inst)
1044 {
1045 /* The next instruction (where the ENDIF would be, if it existed) */
1046 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1047
1048 assert(p->single_program_flow);
1049 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1050 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1051 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1052
1053 /* Convert IF to an ADD instruction that moves the instruction pointer
1054 * to the first instruction of the ELSE block. If there is no ELSE
1055 * block, point to where ENDIF would be. Reverse the predicate.
1056 *
1057 * There's no need to execute an ENDIF since we don't need to do any
1058 * stack operations, and if we're currently executing, we just want to
1059 * continue normally.
1060 */
1061 if_inst->header.opcode = BRW_OPCODE_ADD;
1062 if_inst->header.predicate_inverse = 1;
1063
1064 if (else_inst != NULL) {
1065 /* Convert ELSE to an ADD instruction that points where the ENDIF
1066 * would be.
1067 */
1068 else_inst->header.opcode = BRW_OPCODE_ADD;
1069
1070 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1071 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1072 } else {
1073 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1074 }
1075 }
1076
1077 /**
1078 * Patch IF and ELSE instructions with appropriate jump targets.
1079 */
1080 static void
1081 patch_IF_ELSE(struct brw_compile *p,
1082 struct brw_instruction *if_inst,
1083 struct brw_instruction *else_inst,
1084 struct brw_instruction *endif_inst)
1085 {
1086 struct intel_context *intel = &p->brw->intel;
1087
1088 /* We shouldn't be patching IF and ELSE instructions in single program flow
1089 * mode when gen < 6, because in single program flow mode on those
1090 * platforms, we convert flow control instructions to conditional ADDs that
1091 * operate on IP (see brw_ENDIF).
1092 *
1093 * However, on Gen6, writing to IP doesn't work in single program flow mode
1094 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1095 * not be updated by non-flow control instructions."). And on later
1096 * platforms, there is no significant benefit to converting control flow
1097 * instructions to conditional ADDs. So we do patch IF and ELSE
1098 * instructions in single program flow mode on those platforms.
1099 */
1100 if (intel->gen < 6)
1101 assert(!p->single_program_flow);
1102
1103 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1104 assert(endif_inst != NULL);
1105 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1106
1107 unsigned br = 1;
1108 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1109 * requires 2 chunks.
1110 */
1111 if (intel->gen >= 5)
1112 br = 2;
1113
1114 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1115 endif_inst->header.execution_size = if_inst->header.execution_size;
1116
1117 if (else_inst == NULL) {
1118 /* Patch IF -> ENDIF */
1119 if (intel->gen < 6) {
1120 /* Turn it into an IFF, which means no mask stack operations for
1121 * all-false and jumping past the ENDIF.
1122 */
1123 if_inst->header.opcode = BRW_OPCODE_IFF;
1124 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1125 if_inst->bits3.if_else.pop_count = 0;
1126 if_inst->bits3.if_else.pad0 = 0;
1127 } else if (intel->gen == 6) {
1128 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1129 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1130 } else {
1131 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1132 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1133 }
1134 } else {
1135 else_inst->header.execution_size = if_inst->header.execution_size;
1136
1137 /* Patch IF -> ELSE */
1138 if (intel->gen < 6) {
1139 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1140 if_inst->bits3.if_else.pop_count = 0;
1141 if_inst->bits3.if_else.pad0 = 0;
1142 } else if (intel->gen == 6) {
1143 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1144 }
1145
1146 /* Patch ELSE -> ENDIF */
1147 if (intel->gen < 6) {
1148 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1149 * matching ENDIF.
1150 */
1151 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1152 else_inst->bits3.if_else.pop_count = 1;
1153 else_inst->bits3.if_else.pad0 = 0;
1154 } else if (intel->gen == 6) {
1155 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1156 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1157 } else {
1158 /* The IF instruction's JIP should point just past the ELSE */
1159 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1160 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1161 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1162 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1163 }
1164 }
1165 }
1166
1167 void
1168 brw_ELSE(struct brw_compile *p)
1169 {
1170 struct intel_context *intel = &p->brw->intel;
1171 struct brw_instruction *insn;
1172
1173 insn = next_insn(p, BRW_OPCODE_ELSE);
1174
1175 if (intel->gen < 6) {
1176 brw_set_dest(p, insn, brw_ip_reg());
1177 brw_set_src0(p, insn, brw_ip_reg());
1178 brw_set_src1(p, insn, brw_imm_d(0x0));
1179 } else if (intel->gen == 6) {
1180 brw_set_dest(p, insn, brw_imm_w(0));
1181 insn->bits1.branch_gen6.jump_count = 0;
1182 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1183 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1184 } else {
1185 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1186 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1187 brw_set_src1(p, insn, brw_imm_ud(0));
1188 insn->bits3.break_cont.jip = 0;
1189 insn->bits3.break_cont.uip = 0;
1190 }
1191
1192 insn->header.compression_control = BRW_COMPRESSION_NONE;
1193 insn->header.mask_control = BRW_MASK_ENABLE;
1194 if (!p->single_program_flow)
1195 insn->header.thread_control = BRW_THREAD_SWITCH;
1196
1197 push_if_stack(p, insn);
1198 }
1199
1200 void
1201 brw_ENDIF(struct brw_compile *p)
1202 {
1203 struct intel_context *intel = &p->brw->intel;
1204 struct brw_instruction *insn = NULL;
1205 struct brw_instruction *else_inst = NULL;
1206 struct brw_instruction *if_inst = NULL;
1207 struct brw_instruction *tmp;
1208 bool emit_endif = true;
1209
1210 /* In single program flow mode, we can express IF and ELSE instructions
1211 * equivalently as ADD instructions that operate on IP. On platforms prior
1212 * to Gen6, flow control instructions cause an implied thread switch, so
1213 * this is a significant savings.
1214 *
1215 * However, on Gen6, writing to IP doesn't work in single program flow mode
1216 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1217 * not be updated by non-flow control instructions."). And on later
1218 * platforms, there is no significant benefit to converting control flow
1219 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1220 * Gen5.
1221 */
1222 if (intel->gen < 6 && p->single_program_flow)
1223 emit_endif = false;
1224
1225 /*
1226 * A single next_insn() may change the base adress of instruction store
1227 * memory(p->store), so call it first before referencing the instruction
1228 * store pointer from an index
1229 */
1230 if (emit_endif)
1231 insn = next_insn(p, BRW_OPCODE_ENDIF);
1232
1233 /* Pop the IF and (optional) ELSE instructions from the stack */
1234 p->if_depth_in_loop[p->loop_stack_depth]--;
1235 tmp = pop_if_stack(p);
1236 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1237 else_inst = tmp;
1238 tmp = pop_if_stack(p);
1239 }
1240 if_inst = tmp;
1241
1242 if (!emit_endif) {
1243 /* ENDIF is useless; don't bother emitting it. */
1244 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1245 return;
1246 }
1247
1248 if (intel->gen < 6) {
1249 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1250 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1251 brw_set_src1(p, insn, brw_imm_d(0x0));
1252 } else if (intel->gen == 6) {
1253 brw_set_dest(p, insn, brw_imm_w(0));
1254 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1255 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1256 } else {
1257 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1258 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1259 brw_set_src1(p, insn, brw_imm_ud(0));
1260 }
1261
1262 insn->header.compression_control = BRW_COMPRESSION_NONE;
1263 insn->header.mask_control = BRW_MASK_ENABLE;
1264 insn->header.thread_control = BRW_THREAD_SWITCH;
1265
1266 /* Also pop item off the stack in the endif instruction: */
1267 if (intel->gen < 6) {
1268 insn->bits3.if_else.jump_count = 0;
1269 insn->bits3.if_else.pop_count = 1;
1270 insn->bits3.if_else.pad0 = 0;
1271 } else if (intel->gen == 6) {
1272 insn->bits1.branch_gen6.jump_count = 2;
1273 } else {
1274 insn->bits3.break_cont.jip = 2;
1275 }
1276 patch_IF_ELSE(p, if_inst, else_inst, insn);
1277 }
1278
1279 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1280 {
1281 struct intel_context *intel = &p->brw->intel;
1282 struct brw_instruction *insn;
1283
1284 insn = next_insn(p, BRW_OPCODE_BREAK);
1285 if (intel->gen >= 6) {
1286 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1287 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1288 brw_set_src1(p, insn, brw_imm_d(0x0));
1289 } else {
1290 brw_set_dest(p, insn, brw_ip_reg());
1291 brw_set_src0(p, insn, brw_ip_reg());
1292 brw_set_src1(p, insn, brw_imm_d(0x0));
1293 insn->bits3.if_else.pad0 = 0;
1294 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1295 }
1296 insn->header.compression_control = BRW_COMPRESSION_NONE;
1297 insn->header.execution_size = BRW_EXECUTE_8;
1298
1299 return insn;
1300 }
1301
1302 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1303 {
1304 struct brw_instruction *insn;
1305
1306 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1307 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1308 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1309 brw_set_dest(p, insn, brw_ip_reg());
1310 brw_set_src0(p, insn, brw_ip_reg());
1311 brw_set_src1(p, insn, brw_imm_d(0x0));
1312
1313 insn->header.compression_control = BRW_COMPRESSION_NONE;
1314 insn->header.execution_size = BRW_EXECUTE_8;
1315 return insn;
1316 }
1317
1318 struct brw_instruction *brw_CONT(struct brw_compile *p)
1319 {
1320 struct brw_instruction *insn;
1321 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1322 brw_set_dest(p, insn, brw_ip_reg());
1323 brw_set_src0(p, insn, brw_ip_reg());
1324 brw_set_src1(p, insn, brw_imm_d(0x0));
1325 insn->header.compression_control = BRW_COMPRESSION_NONE;
1326 insn->header.execution_size = BRW_EXECUTE_8;
1327 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1328 insn->bits3.if_else.pad0 = 0;
1329 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1330 return insn;
1331 }
1332
1333 /* DO/WHILE loop:
1334 *
1335 * The DO/WHILE is just an unterminated loop -- break or continue are
1336 * used for control within the loop. We have a few ways they can be
1337 * done.
1338 *
1339 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1340 * jip and no DO instruction.
1341 *
1342 * For non-uniform control flow pre-gen6, there's a DO instruction to
1343 * push the mask, and a WHILE to jump back, and BREAK to get out and
1344 * pop the mask.
1345 *
1346 * For gen6, there's no more mask stack, so no need for DO. WHILE
1347 * just points back to the first instruction of the loop.
1348 */
1349 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1350 {
1351 struct intel_context *intel = &p->brw->intel;
1352
1353 if (intel->gen >= 6 || p->single_program_flow) {
1354 push_loop_stack(p, &p->store[p->nr_insn]);
1355 return &p->store[p->nr_insn];
1356 } else {
1357 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1358
1359 push_loop_stack(p, insn);
1360
1361 /* Override the defaults for this instruction:
1362 */
1363 brw_set_dest(p, insn, brw_null_reg());
1364 brw_set_src0(p, insn, brw_null_reg());
1365 brw_set_src1(p, insn, brw_null_reg());
1366
1367 insn->header.compression_control = BRW_COMPRESSION_NONE;
1368 insn->header.execution_size = execute_size;
1369 insn->header.predicate_control = BRW_PREDICATE_NONE;
1370 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1371 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1372
1373 return insn;
1374 }
1375 }
1376
1377 /**
1378 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1379 * instruction here.
1380 *
1381 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1382 * nesting, since it can always just point to the end of the block/current loop.
1383 */
1384 static void
1385 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1386 {
1387 struct intel_context *intel = &p->brw->intel;
1388 struct brw_instruction *do_inst = get_inner_do_insn(p);
1389 struct brw_instruction *inst;
1390 int br = (intel->gen == 5) ? 2 : 1;
1391
1392 for (inst = while_inst - 1; inst != do_inst; inst--) {
1393 /* If the jump count is != 0, that means that this instruction has already
1394 * been patched because it's part of a loop inside of the one we're
1395 * patching.
1396 */
1397 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1398 inst->bits3.if_else.jump_count == 0) {
1399 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1400 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1401 inst->bits3.if_else.jump_count == 0) {
1402 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1403 }
1404 }
1405 }
1406
1407 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1408 {
1409 struct intel_context *intel = &p->brw->intel;
1410 struct brw_instruction *insn, *do_insn;
1411 GLuint br = 1;
1412
1413 if (intel->gen >= 5)
1414 br = 2;
1415
1416 if (intel->gen >= 7) {
1417 insn = next_insn(p, BRW_OPCODE_WHILE);
1418 do_insn = get_inner_do_insn(p);
1419
1420 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1421 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1422 brw_set_src1(p, insn, brw_imm_ud(0));
1423 insn->bits3.break_cont.jip = br * (do_insn - insn);
1424
1425 insn->header.execution_size = BRW_EXECUTE_8;
1426 } else if (intel->gen == 6) {
1427 insn = next_insn(p, BRW_OPCODE_WHILE);
1428 do_insn = get_inner_do_insn(p);
1429
1430 brw_set_dest(p, insn, brw_imm_w(0));
1431 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1432 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1433 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1434
1435 insn->header.execution_size = BRW_EXECUTE_8;
1436 } else {
1437 if (p->single_program_flow) {
1438 insn = next_insn(p, BRW_OPCODE_ADD);
1439 do_insn = get_inner_do_insn(p);
1440
1441 brw_set_dest(p, insn, brw_ip_reg());
1442 brw_set_src0(p, insn, brw_ip_reg());
1443 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1444 insn->header.execution_size = BRW_EXECUTE_1;
1445 } else {
1446 insn = next_insn(p, BRW_OPCODE_WHILE);
1447 do_insn = get_inner_do_insn(p);
1448
1449 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1450
1451 brw_set_dest(p, insn, brw_ip_reg());
1452 brw_set_src0(p, insn, brw_ip_reg());
1453 brw_set_src1(p, insn, brw_imm_d(0));
1454
1455 insn->header.execution_size = do_insn->header.execution_size;
1456 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1457 insn->bits3.if_else.pop_count = 0;
1458 insn->bits3.if_else.pad0 = 0;
1459
1460 brw_patch_break_cont(p, insn);
1461 }
1462 }
1463 insn->header.compression_control = BRW_COMPRESSION_NONE;
1464 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1465
1466 p->loop_stack_depth--;
1467
1468 return insn;
1469 }
1470
1471
1472 /* FORWARD JUMPS:
1473 */
1474 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1475 {
1476 struct intel_context *intel = &p->brw->intel;
1477 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1478 GLuint jmpi = 1;
1479
1480 if (intel->gen >= 5)
1481 jmpi = 2;
1482
1483 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1484 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1485
1486 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1487 }
1488
1489
1490
1491 /* To integrate with the above, it makes sense that the comparison
1492 * instruction should populate the flag register. It might be simpler
1493 * just to use the flag reg for most WM tasks?
1494 */
1495 void brw_CMP(struct brw_compile *p,
1496 struct brw_reg dest,
1497 GLuint conditional,
1498 struct brw_reg src0,
1499 struct brw_reg src1)
1500 {
1501 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1502
1503 insn->header.destreg__conditionalmod = conditional;
1504 brw_set_dest(p, insn, dest);
1505 brw_set_src0(p, insn, src0);
1506 brw_set_src1(p, insn, src1);
1507
1508 /* guess_execution_size(insn, src0); */
1509
1510
1511 /* Make it so that future instructions will use the computed flag
1512 * value until brw_set_predicate_control_flag_value() is called
1513 * again.
1514 */
1515 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1516 dest.nr == 0) {
1517 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1518 p->flag_value = 0xff;
1519 }
1520 }
1521
1522 /* Issue 'wait' instruction for n1, host could program MMIO
1523 to wake up thread. */
1524 void brw_WAIT (struct brw_compile *p)
1525 {
1526 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1527 struct brw_reg src = brw_notification_1_reg();
1528
1529 brw_set_dest(p, insn, src);
1530 brw_set_src0(p, insn, src);
1531 brw_set_src1(p, insn, brw_null_reg());
1532 insn->header.execution_size = 0; /* must */
1533 insn->header.predicate_control = 0;
1534 insn->header.compression_control = 0;
1535 }
1536
1537
1538 /***********************************************************************
1539 * Helpers for the various SEND message types:
1540 */
1541
1542 /** Extended math function, float[8].
1543 */
1544 void brw_math( struct brw_compile *p,
1545 struct brw_reg dest,
1546 GLuint function,
1547 GLuint saturate,
1548 GLuint msg_reg_nr,
1549 struct brw_reg src,
1550 GLuint data_type,
1551 GLuint precision )
1552 {
1553 struct intel_context *intel = &p->brw->intel;
1554
1555 if (intel->gen >= 6) {
1556 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1557
1558 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1559 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1560
1561 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1562 if (intel->gen == 6)
1563 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1564
1565 /* Source modifiers are ignored for extended math instructions on Gen6. */
1566 if (intel->gen == 6) {
1567 assert(!src.negate);
1568 assert(!src.abs);
1569 }
1570
1571 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1572 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1573 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1574 assert(src.type != BRW_REGISTER_TYPE_F);
1575 } else {
1576 assert(src.type == BRW_REGISTER_TYPE_F);
1577 }
1578
1579 /* Math is the same ISA format as other opcodes, except that CondModifier
1580 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1581 */
1582 insn->header.destreg__conditionalmod = function;
1583 insn->header.saturate = saturate;
1584
1585 brw_set_dest(p, insn, dest);
1586 brw_set_src0(p, insn, src);
1587 brw_set_src1(p, insn, brw_null_reg());
1588 } else {
1589 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1590
1591 /* Example code doesn't set predicate_control for send
1592 * instructions.
1593 */
1594 insn->header.predicate_control = 0;
1595 insn->header.destreg__conditionalmod = msg_reg_nr;
1596
1597 brw_set_dest(p, insn, dest);
1598 brw_set_src0(p, insn, src);
1599 brw_set_math_message(p,
1600 insn,
1601 function,
1602 src.type == BRW_REGISTER_TYPE_D,
1603 precision,
1604 saturate,
1605 data_type);
1606 }
1607 }
1608
1609 /** Extended math function, float[8].
1610 */
1611 void brw_math2(struct brw_compile *p,
1612 struct brw_reg dest,
1613 GLuint function,
1614 struct brw_reg src0,
1615 struct brw_reg src1)
1616 {
1617 struct intel_context *intel = &p->brw->intel;
1618 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1619
1620 assert(intel->gen >= 6);
1621 (void) intel;
1622
1623
1624 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1625 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1626 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1627
1628 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1629 if (intel->gen == 6) {
1630 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1631 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1632 }
1633
1634 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1635 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1636 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1637 assert(src0.type != BRW_REGISTER_TYPE_F);
1638 assert(src1.type != BRW_REGISTER_TYPE_F);
1639 } else {
1640 assert(src0.type == BRW_REGISTER_TYPE_F);
1641 assert(src1.type == BRW_REGISTER_TYPE_F);
1642 }
1643
1644 /* Source modifiers are ignored for extended math instructions on Gen6. */
1645 if (intel->gen == 6) {
1646 assert(!src0.negate);
1647 assert(!src0.abs);
1648 assert(!src1.negate);
1649 assert(!src1.abs);
1650 }
1651
1652 /* Math is the same ISA format as other opcodes, except that CondModifier
1653 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1654 */
1655 insn->header.destreg__conditionalmod = function;
1656
1657 brw_set_dest(p, insn, dest);
1658 brw_set_src0(p, insn, src0);
1659 brw_set_src1(p, insn, src1);
1660 }
1661
1662 /**
1663 * Extended math function, float[16].
1664 * Use 2 send instructions.
1665 */
1666 void brw_math_16( struct brw_compile *p,
1667 struct brw_reg dest,
1668 GLuint function,
1669 GLuint saturate,
1670 GLuint msg_reg_nr,
1671 struct brw_reg src,
1672 GLuint precision )
1673 {
1674 struct intel_context *intel = &p->brw->intel;
1675 struct brw_instruction *insn;
1676
1677 if (intel->gen >= 6) {
1678 insn = next_insn(p, BRW_OPCODE_MATH);
1679
1680 /* Math is the same ISA format as other opcodes, except that CondModifier
1681 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1682 */
1683 insn->header.destreg__conditionalmod = function;
1684 insn->header.saturate = saturate;
1685
1686 /* Source modifiers are ignored for extended math instructions. */
1687 assert(!src.negate);
1688 assert(!src.abs);
1689
1690 brw_set_dest(p, insn, dest);
1691 brw_set_src0(p, insn, src);
1692 brw_set_src1(p, insn, brw_null_reg());
1693 return;
1694 }
1695
1696 /* First instruction:
1697 */
1698 brw_push_insn_state(p);
1699 brw_set_predicate_control_flag_value(p, 0xff);
1700 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1701
1702 insn = next_insn(p, BRW_OPCODE_SEND);
1703 insn->header.destreg__conditionalmod = msg_reg_nr;
1704
1705 brw_set_dest(p, insn, dest);
1706 brw_set_src0(p, insn, src);
1707 brw_set_math_message(p,
1708 insn,
1709 function,
1710 BRW_MATH_INTEGER_UNSIGNED,
1711 precision,
1712 saturate,
1713 BRW_MATH_DATA_VECTOR);
1714
1715 /* Second instruction:
1716 */
1717 insn = next_insn(p, BRW_OPCODE_SEND);
1718 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1719 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1720
1721 brw_set_dest(p, insn, offset(dest,1));
1722 brw_set_src0(p, insn, src);
1723 brw_set_math_message(p,
1724 insn,
1725 function,
1726 BRW_MATH_INTEGER_UNSIGNED,
1727 precision,
1728 saturate,
1729 BRW_MATH_DATA_VECTOR);
1730
1731 brw_pop_insn_state(p);
1732 }
1733
1734
1735 /**
1736 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1737 * using a constant offset per channel.
1738 *
1739 * The offset must be aligned to oword size (16 bytes). Used for
1740 * register spilling.
1741 */
1742 void brw_oword_block_write_scratch(struct brw_compile *p,
1743 struct brw_reg mrf,
1744 int num_regs,
1745 GLuint offset)
1746 {
1747 struct intel_context *intel = &p->brw->intel;
1748 uint32_t msg_control, msg_type;
1749 int mlen;
1750
1751 if (intel->gen >= 6)
1752 offset /= 16;
1753
1754 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1755
1756 if (num_regs == 1) {
1757 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1758 mlen = 2;
1759 } else {
1760 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1761 mlen = 3;
1762 }
1763
1764 /* Set up the message header. This is g0, with g0.2 filled with
1765 * the offset. We don't want to leave our offset around in g0 or
1766 * it'll screw up texture samples, so set it up inside the message
1767 * reg.
1768 */
1769 {
1770 brw_push_insn_state(p);
1771 brw_set_mask_control(p, BRW_MASK_DISABLE);
1772 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1773
1774 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1775
1776 /* set message header global offset field (reg 0, element 2) */
1777 brw_MOV(p,
1778 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1779 mrf.nr,
1780 2), BRW_REGISTER_TYPE_UD),
1781 brw_imm_ud(offset));
1782
1783 brw_pop_insn_state(p);
1784 }
1785
1786 {
1787 struct brw_reg dest;
1788 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1789 int send_commit_msg;
1790 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1791 BRW_REGISTER_TYPE_UW);
1792
1793 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1794 insn->header.compression_control = BRW_COMPRESSION_NONE;
1795 src_header = vec16(src_header);
1796 }
1797 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1798 insn->header.destreg__conditionalmod = mrf.nr;
1799
1800 /* Until gen6, writes followed by reads from the same location
1801 * are not guaranteed to be ordered unless write_commit is set.
1802 * If set, then a no-op write is issued to the destination
1803 * register to set a dependency, and a read from the destination
1804 * can be used to ensure the ordering.
1805 *
1806 * For gen6, only writes between different threads need ordering
1807 * protection. Our use of DP writes is all about register
1808 * spilling within a thread.
1809 */
1810 if (intel->gen >= 6) {
1811 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1812 send_commit_msg = 0;
1813 } else {
1814 dest = src_header;
1815 send_commit_msg = 1;
1816 }
1817
1818 brw_set_dest(p, insn, dest);
1819 if (intel->gen >= 6) {
1820 brw_set_src0(p, insn, mrf);
1821 } else {
1822 brw_set_src0(p, insn, brw_null_reg());
1823 }
1824
1825 if (intel->gen >= 6)
1826 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1827 else
1828 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1829
1830 brw_set_dp_write_message(p,
1831 insn,
1832 255, /* binding table index (255=stateless) */
1833 msg_control,
1834 msg_type,
1835 mlen,
1836 true, /* header_present */
1837 0, /* not a render target */
1838 send_commit_msg, /* response_length */
1839 0, /* eot */
1840 send_commit_msg);
1841 }
1842 }
1843
1844
1845 /**
1846 * Read a block of owords (half a GRF each) from the scratch buffer
1847 * using a constant index per channel.
1848 *
1849 * Offset must be aligned to oword size (16 bytes). Used for register
1850 * spilling.
1851 */
1852 void
1853 brw_oword_block_read_scratch(struct brw_compile *p,
1854 struct brw_reg dest,
1855 struct brw_reg mrf,
1856 int num_regs,
1857 GLuint offset)
1858 {
1859 struct intel_context *intel = &p->brw->intel;
1860 uint32_t msg_control;
1861 int rlen;
1862
1863 if (intel->gen >= 6)
1864 offset /= 16;
1865
1866 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1867 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1868
1869 if (num_regs == 1) {
1870 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1871 rlen = 1;
1872 } else {
1873 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1874 rlen = 2;
1875 }
1876
1877 {
1878 brw_push_insn_state(p);
1879 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1880 brw_set_mask_control(p, BRW_MASK_DISABLE);
1881
1882 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1883
1884 /* set message header global offset field (reg 0, element 2) */
1885 brw_MOV(p,
1886 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1887 mrf.nr,
1888 2), BRW_REGISTER_TYPE_UD),
1889 brw_imm_ud(offset));
1890
1891 brw_pop_insn_state(p);
1892 }
1893
1894 {
1895 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1896
1897 assert(insn->header.predicate_control == 0);
1898 insn->header.compression_control = BRW_COMPRESSION_NONE;
1899 insn->header.destreg__conditionalmod = mrf.nr;
1900
1901 brw_set_dest(p, insn, dest); /* UW? */
1902 if (intel->gen >= 6) {
1903 brw_set_src0(p, insn, mrf);
1904 } else {
1905 brw_set_src0(p, insn, brw_null_reg());
1906 }
1907
1908 brw_set_dp_read_message(p,
1909 insn,
1910 255, /* binding table index (255=stateless) */
1911 msg_control,
1912 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1913 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1914 1, /* msg_length */
1915 rlen);
1916 }
1917 }
1918
1919 /**
1920 * Read a float[4] vector from the data port Data Cache (const buffer).
1921 * Location (in buffer) should be a multiple of 16.
1922 * Used for fetching shader constants.
1923 */
1924 void brw_oword_block_read(struct brw_compile *p,
1925 struct brw_reg dest,
1926 struct brw_reg mrf,
1927 uint32_t offset,
1928 uint32_t bind_table_index)
1929 {
1930 struct intel_context *intel = &p->brw->intel;
1931
1932 /* On newer hardware, offset is in units of owords. */
1933 if (intel->gen >= 6)
1934 offset /= 16;
1935
1936 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1937
1938 brw_push_insn_state(p);
1939 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1940 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1941 brw_set_mask_control(p, BRW_MASK_DISABLE);
1942
1943 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1944
1945 /* set message header global offset field (reg 0, element 2) */
1946 brw_MOV(p,
1947 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1948 mrf.nr,
1949 2), BRW_REGISTER_TYPE_UD),
1950 brw_imm_ud(offset));
1951
1952 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1953 insn->header.destreg__conditionalmod = mrf.nr;
1954
1955 /* cast dest to a uword[8] vector */
1956 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1957
1958 brw_set_dest(p, insn, dest);
1959 if (intel->gen >= 6) {
1960 brw_set_src0(p, insn, mrf);
1961 } else {
1962 brw_set_src0(p, insn, brw_null_reg());
1963 }
1964
1965 brw_set_dp_read_message(p,
1966 insn,
1967 bind_table_index,
1968 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1969 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1970 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1971 1, /* msg_length */
1972 1); /* response_length (1 reg, 2 owords!) */
1973
1974 brw_pop_insn_state(p);
1975 }
1976
1977 /**
1978 * Read a set of dwords from the data port Data Cache (const buffer).
1979 *
1980 * Location (in buffer) appears as UD offsets in the register after
1981 * the provided mrf header reg.
1982 */
1983 void brw_dword_scattered_read(struct brw_compile *p,
1984 struct brw_reg dest,
1985 struct brw_reg mrf,
1986 uint32_t bind_table_index)
1987 {
1988 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1989
1990 brw_push_insn_state(p);
1991 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1992 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1993 brw_set_mask_control(p, BRW_MASK_DISABLE);
1994 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1995 brw_pop_insn_state(p);
1996
1997 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1998 insn->header.destreg__conditionalmod = mrf.nr;
1999
2000 /* cast dest to a uword[8] vector */
2001 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2002
2003 brw_set_dest(p, insn, dest);
2004 brw_set_src0(p, insn, brw_null_reg());
2005
2006 brw_set_dp_read_message(p,
2007 insn,
2008 bind_table_index,
2009 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
2010 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
2011 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2012 2, /* msg_length */
2013 1); /* response_length */
2014 }
2015
2016
2017
2018 /**
2019 * Read float[4] constant(s) from VS constant buffer.
2020 * For relative addressing, two float[4] constants will be read into 'dest'.
2021 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
2022 */
2023 void brw_dp_READ_4_vs(struct brw_compile *p,
2024 struct brw_reg dest,
2025 GLuint location,
2026 GLuint bind_table_index)
2027 {
2028 struct intel_context *intel = &p->brw->intel;
2029 struct brw_instruction *insn;
2030 GLuint msg_reg_nr = 1;
2031
2032 if (intel->gen >= 6)
2033 location /= 16;
2034
2035 /* Setup MRF[1] with location/offset into const buffer */
2036 brw_push_insn_state(p);
2037 brw_set_access_mode(p, BRW_ALIGN_1);
2038 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2039 brw_set_mask_control(p, BRW_MASK_DISABLE);
2040 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2041 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
2042 BRW_REGISTER_TYPE_UD),
2043 brw_imm_ud(location));
2044 brw_pop_insn_state(p);
2045
2046 insn = next_insn(p, BRW_OPCODE_SEND);
2047
2048 insn->header.predicate_control = BRW_PREDICATE_NONE;
2049 insn->header.compression_control = BRW_COMPRESSION_NONE;
2050 insn->header.destreg__conditionalmod = msg_reg_nr;
2051 insn->header.mask_control = BRW_MASK_DISABLE;
2052
2053 brw_set_dest(p, insn, dest);
2054 if (intel->gen >= 6) {
2055 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
2056 } else {
2057 brw_set_src0(p, insn, brw_null_reg());
2058 }
2059
2060 brw_set_dp_read_message(p,
2061 insn,
2062 bind_table_index,
2063 0,
2064 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2065 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2066 1, /* msg_length */
2067 1); /* response_length (1 Oword) */
2068 }
2069
2070 /**
2071 * Read a float[4] constant per vertex from VS constant buffer, with
2072 * relative addressing.
2073 */
2074 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2075 struct brw_reg dest,
2076 struct brw_reg addr_reg,
2077 GLuint offset,
2078 GLuint bind_table_index)
2079 {
2080 struct intel_context *intel = &p->brw->intel;
2081 struct brw_reg src = brw_vec8_grf(0, 0);
2082 int msg_type;
2083
2084 /* Setup MRF[1] with offset into const buffer */
2085 brw_push_insn_state(p);
2086 brw_set_access_mode(p, BRW_ALIGN_1);
2087 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2088 brw_set_mask_control(p, BRW_MASK_DISABLE);
2089 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2090
2091 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2092 * fields ignored.
2093 */
2094 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2095 addr_reg, brw_imm_d(offset));
2096 brw_pop_insn_state(p);
2097
2098 gen6_resolve_implied_move(p, &src, 0);
2099 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2100
2101 insn->header.predicate_control = BRW_PREDICATE_NONE;
2102 insn->header.compression_control = BRW_COMPRESSION_NONE;
2103 insn->header.destreg__conditionalmod = 0;
2104 insn->header.mask_control = BRW_MASK_DISABLE;
2105
2106 brw_set_dest(p, insn, dest);
2107 brw_set_src0(p, insn, src);
2108
2109 if (intel->gen >= 6)
2110 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2111 else if (intel->gen == 5 || intel->is_g4x)
2112 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2113 else
2114 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2115
2116 brw_set_dp_read_message(p,
2117 insn,
2118 bind_table_index,
2119 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2120 msg_type,
2121 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2122 2, /* msg_length */
2123 1); /* response_length */
2124 }
2125
2126
2127
2128 void brw_fb_WRITE(struct brw_compile *p,
2129 int dispatch_width,
2130 GLuint msg_reg_nr,
2131 struct brw_reg src0,
2132 GLuint binding_table_index,
2133 GLuint msg_length,
2134 GLuint response_length,
2135 bool eot,
2136 bool header_present)
2137 {
2138 struct intel_context *intel = &p->brw->intel;
2139 struct brw_instruction *insn;
2140 GLuint msg_control, msg_type;
2141 struct brw_reg dest;
2142
2143 if (dispatch_width == 16)
2144 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2145 else
2146 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2147
2148 if (intel->gen >= 6 && binding_table_index == 0) {
2149 insn = next_insn(p, BRW_OPCODE_SENDC);
2150 } else {
2151 insn = next_insn(p, BRW_OPCODE_SEND);
2152 }
2153 /* The execution mask is ignored for render target writes. */
2154 insn->header.predicate_control = 0;
2155 insn->header.compression_control = BRW_COMPRESSION_NONE;
2156
2157 if (intel->gen >= 6) {
2158 /* headerless version, just submit color payload */
2159 src0 = brw_message_reg(msg_reg_nr);
2160
2161 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2162 } else {
2163 insn->header.destreg__conditionalmod = msg_reg_nr;
2164
2165 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2166 }
2167
2168 if (dispatch_width == 16)
2169 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2170 else
2171 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2172
2173 brw_set_dest(p, insn, dest);
2174 brw_set_src0(p, insn, src0);
2175 brw_set_dp_write_message(p,
2176 insn,
2177 binding_table_index,
2178 msg_control,
2179 msg_type,
2180 msg_length,
2181 header_present,
2182 1, /* last render target write */
2183 response_length,
2184 eot,
2185 0 /* send_commit_msg */);
2186 }
2187
2188
2189 /**
2190 * Texture sample instruction.
2191 * Note: the msg_type plus msg_length values determine exactly what kind
2192 * of sampling operation is performed. See volume 4, page 161 of docs.
2193 */
2194 void brw_SAMPLE(struct brw_compile *p,
2195 struct brw_reg dest,
2196 GLuint msg_reg_nr,
2197 struct brw_reg src0,
2198 GLuint binding_table_index,
2199 GLuint sampler,
2200 GLuint writemask,
2201 GLuint msg_type,
2202 GLuint response_length,
2203 GLuint msg_length,
2204 GLuint header_present,
2205 GLuint simd_mode,
2206 GLuint return_format)
2207 {
2208 struct intel_context *intel = &p->brw->intel;
2209 bool need_stall = 0;
2210
2211 if (writemask == 0) {
2212 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2213 return;
2214 }
2215
2216 /* Hardware doesn't do destination dependency checking on send
2217 * instructions properly. Add a workaround which generates the
2218 * dependency by other means. In practice it seems like this bug
2219 * only crops up for texture samples, and only where registers are
2220 * written by the send and then written again later without being
2221 * read in between. Luckily for us, we already track that
2222 * information and use it to modify the writemask for the
2223 * instruction, so that is a guide for whether a workaround is
2224 * needed.
2225 */
2226 if (writemask != WRITEMASK_XYZW) {
2227 GLuint dst_offset = 0;
2228 GLuint i, newmask = 0, len = 0;
2229
2230 for (i = 0; i < 4; i++) {
2231 if (writemask & (1<<i))
2232 break;
2233 dst_offset += 2;
2234 }
2235 for (; i < 4; i++) {
2236 if (!(writemask & (1<<i)))
2237 break;
2238 newmask |= 1<<i;
2239 len++;
2240 }
2241
2242 if (newmask != writemask) {
2243 need_stall = 1;
2244 /* printf("need stall %x %x\n", newmask , writemask); */
2245 }
2246 else {
2247 bool dispatch_16 = false;
2248
2249 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2250
2251 guess_execution_size(p, p->current, dest);
2252 if (p->current->header.execution_size == BRW_EXECUTE_16)
2253 dispatch_16 = true;
2254
2255 newmask = ~newmask & WRITEMASK_XYZW;
2256
2257 brw_push_insn_state(p);
2258
2259 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2260 brw_set_mask_control(p, BRW_MASK_DISABLE);
2261
2262 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2263 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2264 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2265
2266 brw_pop_insn_state(p);
2267
2268 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2269 dest = offset(dest, dst_offset);
2270
2271 /* For 16-wide dispatch, masked channels are skipped in the
2272 * response. For 8-wide, masked channels still take up slots,
2273 * and are just not written to.
2274 */
2275 if (dispatch_16)
2276 response_length = len * 2;
2277 }
2278 }
2279
2280 {
2281 struct brw_instruction *insn;
2282
2283 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2284
2285 insn = next_insn(p, BRW_OPCODE_SEND);
2286 insn->header.predicate_control = 0; /* XXX */
2287 insn->header.compression_control = BRW_COMPRESSION_NONE;
2288 if (intel->gen < 6)
2289 insn->header.destreg__conditionalmod = msg_reg_nr;
2290
2291 brw_set_dest(p, insn, dest);
2292 brw_set_src0(p, insn, src0);
2293 brw_set_sampler_message(p, insn,
2294 binding_table_index,
2295 sampler,
2296 msg_type,
2297 response_length,
2298 msg_length,
2299 header_present,
2300 simd_mode,
2301 return_format);
2302 }
2303
2304 if (need_stall) {
2305 struct brw_reg reg = vec8(offset(dest, response_length-1));
2306
2307 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2308 */
2309 brw_push_insn_state(p);
2310 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2311 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2312 retype(reg, BRW_REGISTER_TYPE_UD));
2313 brw_pop_insn_state(p);
2314 }
2315
2316 }
2317
2318 /* All these variables are pretty confusing - we might be better off
2319 * using bitmasks and macros for this, in the old style. Or perhaps
2320 * just having the caller instantiate the fields in dword3 itself.
2321 */
2322 void brw_urb_WRITE(struct brw_compile *p,
2323 struct brw_reg dest,
2324 GLuint msg_reg_nr,
2325 struct brw_reg src0,
2326 bool allocate,
2327 bool used,
2328 GLuint msg_length,
2329 GLuint response_length,
2330 bool eot,
2331 bool writes_complete,
2332 GLuint offset,
2333 GLuint swizzle)
2334 {
2335 struct intel_context *intel = &p->brw->intel;
2336 struct brw_instruction *insn;
2337
2338 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2339
2340 if (intel->gen == 7) {
2341 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2342 brw_push_insn_state(p);
2343 brw_set_access_mode(p, BRW_ALIGN_1);
2344 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2345 BRW_REGISTER_TYPE_UD),
2346 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2347 brw_imm_ud(0xff00));
2348 brw_pop_insn_state(p);
2349 }
2350
2351 insn = next_insn(p, BRW_OPCODE_SEND);
2352
2353 assert(msg_length < BRW_MAX_MRF);
2354
2355 brw_set_dest(p, insn, dest);
2356 brw_set_src0(p, insn, src0);
2357 brw_set_src1(p, insn, brw_imm_d(0));
2358
2359 if (intel->gen < 6)
2360 insn->header.destreg__conditionalmod = msg_reg_nr;
2361
2362 brw_set_urb_message(p,
2363 insn,
2364 allocate,
2365 used,
2366 msg_length,
2367 response_length,
2368 eot,
2369 writes_complete,
2370 offset,
2371 swizzle);
2372 }
2373
2374 static int
2375 brw_find_next_block_end(struct brw_compile *p, int start)
2376 {
2377 int ip;
2378
2379 for (ip = start + 1; ip < p->nr_insn; ip++) {
2380 struct brw_instruction *insn = &p->store[ip];
2381
2382 switch (insn->header.opcode) {
2383 case BRW_OPCODE_ENDIF:
2384 case BRW_OPCODE_ELSE:
2385 case BRW_OPCODE_WHILE:
2386 return ip;
2387 }
2388 }
2389 assert(!"not reached");
2390 return start + 1;
2391 }
2392
2393 /* There is no DO instruction on gen6, so to find the end of the loop
2394 * we have to see if the loop is jumping back before our start
2395 * instruction.
2396 */
2397 static int
2398 brw_find_loop_end(struct brw_compile *p, int start)
2399 {
2400 struct intel_context *intel = &p->brw->intel;
2401 int ip;
2402 int br = 2;
2403
2404 for (ip = start + 1; ip < p->nr_insn; ip++) {
2405 struct brw_instruction *insn = &p->store[ip];
2406
2407 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2408 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2409 : insn->bits3.break_cont.jip;
2410 if (ip + jip / br <= start)
2411 return ip;
2412 }
2413 }
2414 assert(!"not reached");
2415 return start + 1;
2416 }
2417
2418 /* After program generation, go back and update the UIP and JIP of
2419 * BREAK and CONT instructions to their correct locations.
2420 */
2421 void
2422 brw_set_uip_jip(struct brw_compile *p)
2423 {
2424 struct intel_context *intel = &p->brw->intel;
2425 int ip;
2426 int br = 2;
2427
2428 if (intel->gen < 6)
2429 return;
2430
2431 for (ip = 0; ip < p->nr_insn; ip++) {
2432 struct brw_instruction *insn = &p->store[ip];
2433
2434 switch (insn->header.opcode) {
2435 case BRW_OPCODE_BREAK:
2436 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2437 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2438 insn->bits3.break_cont.uip =
2439 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2440 break;
2441 case BRW_OPCODE_CONTINUE:
2442 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2443 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2444
2445 assert(insn->bits3.break_cont.uip != 0);
2446 assert(insn->bits3.break_cont.jip != 0);
2447 break;
2448 }
2449 }
2450 }
2451
2452 void brw_ff_sync(struct brw_compile *p,
2453 struct brw_reg dest,
2454 GLuint msg_reg_nr,
2455 struct brw_reg src0,
2456 bool allocate,
2457 GLuint response_length,
2458 bool eot)
2459 {
2460 struct intel_context *intel = &p->brw->intel;
2461 struct brw_instruction *insn;
2462
2463 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2464
2465 insn = next_insn(p, BRW_OPCODE_SEND);
2466 brw_set_dest(p, insn, dest);
2467 brw_set_src0(p, insn, src0);
2468 brw_set_src1(p, insn, brw_imm_d(0));
2469
2470 if (intel->gen < 6)
2471 insn->header.destreg__conditionalmod = msg_reg_nr;
2472
2473 brw_set_ff_sync_message(p,
2474 insn,
2475 allocate,
2476 response_length,
2477 eot);
2478 }
2479
2480 /**
2481 * Emit the SEND instruction necessary to generate stream output data on Gen6
2482 * (for transform feedback).
2483 *
2484 * If send_commit_msg is true, this is the last piece of stream output data
2485 * from this thread, so send the data as a committed write. According to the
2486 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2487 *
2488 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2489 * writes are complete by sending the final write as a committed write."
2490 */
2491 void
2492 brw_svb_write(struct brw_compile *p,
2493 struct brw_reg dest,
2494 GLuint msg_reg_nr,
2495 struct brw_reg src0,
2496 GLuint binding_table_index,
2497 bool send_commit_msg)
2498 {
2499 struct brw_instruction *insn;
2500
2501 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2502
2503 insn = next_insn(p, BRW_OPCODE_SEND);
2504 brw_set_dest(p, insn, dest);
2505 brw_set_src0(p, insn, src0);
2506 brw_set_src1(p, insn, brw_imm_d(0));
2507 brw_set_dp_write_message(p, insn,
2508 binding_table_index,
2509 0, /* msg_control: ignored */
2510 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2511 1, /* msg_length */
2512 true, /* header_present */
2513 0, /* last_render_target: ignored */
2514 send_commit_msg, /* response_length */
2515 0, /* end_of_thread */
2516 send_commit_msg); /* send_commit_msg */
2517 }