intel: Convert from GLboolean to 'bool' from stdbool.h.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373
374
375 static void brw_set_math_message( struct brw_compile *p,
376 struct brw_instruction *insn,
377 GLuint function,
378 GLuint integer_type,
379 bool low_precision,
380 bool saturate,
381 GLuint dataType )
382 {
383 struct brw_context *brw = p->brw;
384 struct intel_context *intel = &brw->intel;
385 unsigned msg_length;
386 unsigned response_length;
387
388 /* Infer message length from the function */
389 switch (function) {
390 case BRW_MATH_FUNCTION_POW:
391 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
392 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
393 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
394 msg_length = 2;
395 break;
396 default:
397 msg_length = 1;
398 break;
399 }
400
401 /* Infer response length from the function */
402 switch (function) {
403 case BRW_MATH_FUNCTION_SINCOS:
404 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
405 response_length = 2;
406 break;
407 default:
408 response_length = 1;
409 break;
410 }
411
412 brw_set_src1(p, insn, brw_imm_d(0));
413 if (intel->gen == 5) {
414 insn->bits3.math_gen5.function = function;
415 insn->bits3.math_gen5.int_type = integer_type;
416 insn->bits3.math_gen5.precision = low_precision;
417 insn->bits3.math_gen5.saturate = saturate;
418 insn->bits3.math_gen5.data_type = dataType;
419 insn->bits3.math_gen5.snapshot = 0;
420 insn->bits3.math_gen5.header_present = 0;
421 insn->bits3.math_gen5.response_length = response_length;
422 insn->bits3.math_gen5.msg_length = msg_length;
423 insn->bits3.math_gen5.end_of_thread = 0;
424 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
425 insn->bits2.send_gen5.end_of_thread = 0;
426 } else {
427 insn->bits3.math.function = function;
428 insn->bits3.math.int_type = integer_type;
429 insn->bits3.math.precision = low_precision;
430 insn->bits3.math.saturate = saturate;
431 insn->bits3.math.data_type = dataType;
432 insn->bits3.math.response_length = response_length;
433 insn->bits3.math.msg_length = msg_length;
434 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
435 insn->bits3.math.end_of_thread = 0;
436 }
437 }
438
439
440 static void brw_set_ff_sync_message(struct brw_compile *p,
441 struct brw_instruction *insn,
442 bool allocate,
443 GLuint response_length,
444 bool end_of_thread)
445 {
446 struct brw_context *brw = p->brw;
447 struct intel_context *intel = &brw->intel;
448 brw_set_src1(p, insn, brw_imm_d(0));
449
450 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
451 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
452 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
453 insn->bits3.urb_gen5.allocate = allocate;
454 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
455 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
456 insn->bits3.urb_gen5.header_present = 1;
457 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
458 insn->bits3.urb_gen5.msg_length = 1;
459 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
460 if (intel->gen >= 6) {
461 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
462 } else {
463 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
464 insn->bits2.send_gen5.end_of_thread = end_of_thread;
465 }
466 }
467
468 static void brw_set_urb_message( struct brw_compile *p,
469 struct brw_instruction *insn,
470 bool allocate,
471 bool used,
472 GLuint msg_length,
473 GLuint response_length,
474 bool end_of_thread,
475 bool complete,
476 GLuint offset,
477 GLuint swizzle_control )
478 {
479 struct brw_context *brw = p->brw;
480 struct intel_context *intel = &brw->intel;
481 brw_set_src1(p, insn, brw_imm_d(0));
482
483 if (intel->gen == 7) {
484 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
485 insn->bits3.urb_gen7.offset = offset;
486 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
487 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
488 /* per_slot_offset = 0 makes it ignore offsets in message header */
489 insn->bits3.urb_gen7.per_slot_offset = 0;
490 insn->bits3.urb_gen7.complete = complete;
491 insn->bits3.urb_gen7.header_present = 1;
492 insn->bits3.urb_gen7.response_length = response_length;
493 insn->bits3.urb_gen7.msg_length = msg_length;
494 insn->bits3.urb_gen7.end_of_thread = end_of_thread;
495 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
496 } else if (intel->gen >= 5) {
497 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
498 insn->bits3.urb_gen5.offset = offset;
499 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
500 insn->bits3.urb_gen5.allocate = allocate;
501 insn->bits3.urb_gen5.used = used; /* ? */
502 insn->bits3.urb_gen5.complete = complete;
503 insn->bits3.urb_gen5.header_present = 1;
504 insn->bits3.urb_gen5.response_length = response_length;
505 insn->bits3.urb_gen5.msg_length = msg_length;
506 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
507 if (intel->gen >= 6) {
508 /* For SNB, the SFID bits moved to the condmod bits, and
509 * EOT stayed in bits3 above. Does the EOT bit setting
510 * below on Ironlake even do anything?
511 */
512 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
513 } else {
514 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
515 insn->bits2.send_gen5.end_of_thread = end_of_thread;
516 }
517 } else {
518 insn->bits3.urb.opcode = 0; /* ? */
519 insn->bits3.urb.offset = offset;
520 insn->bits3.urb.swizzle_control = swizzle_control;
521 insn->bits3.urb.allocate = allocate;
522 insn->bits3.urb.used = used; /* ? */
523 insn->bits3.urb.complete = complete;
524 insn->bits3.urb.response_length = response_length;
525 insn->bits3.urb.msg_length = msg_length;
526 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
527 insn->bits3.urb.end_of_thread = end_of_thread;
528 }
529 }
530
531 void
532 brw_set_dp_write_message(struct brw_compile *p,
533 struct brw_instruction *insn,
534 GLuint binding_table_index,
535 GLuint msg_control,
536 GLuint msg_type,
537 GLuint msg_length,
538 bool header_present,
539 GLuint pixel_scoreboard_clear,
540 GLuint response_length,
541 GLuint end_of_thread,
542 GLuint send_commit_msg)
543 {
544 struct brw_context *brw = p->brw;
545 struct intel_context *intel = &brw->intel;
546 brw_set_src1(p, insn, brw_imm_ud(0));
547
548 if (intel->gen >= 7) {
549 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
550 insn->bits3.gen7_dp.msg_control = msg_control;
551 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
552 insn->bits3.gen7_dp.msg_type = msg_type;
553 insn->bits3.gen7_dp.header_present = header_present;
554 insn->bits3.gen7_dp.response_length = response_length;
555 insn->bits3.gen7_dp.msg_length = msg_length;
556 insn->bits3.gen7_dp.end_of_thread = end_of_thread;
557
558 /* We always use the render cache for write messages */
559 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
560 } else if (intel->gen == 6) {
561 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
562 insn->bits3.gen6_dp.msg_control = msg_control;
563 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
564 insn->bits3.gen6_dp.msg_type = msg_type;
565 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
566 insn->bits3.gen6_dp.header_present = header_present;
567 insn->bits3.gen6_dp.response_length = response_length;
568 insn->bits3.gen6_dp.msg_length = msg_length;
569 insn->bits3.gen6_dp.end_of_thread = end_of_thread;
570
571 /* We always use the render cache for write messages */
572 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
573 } else if (intel->gen == 5) {
574 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
575 insn->bits3.dp_write_gen5.msg_control = msg_control;
576 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
577 insn->bits3.dp_write_gen5.msg_type = msg_type;
578 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
579 insn->bits3.dp_write_gen5.header_present = header_present;
580 insn->bits3.dp_write_gen5.response_length = response_length;
581 insn->bits3.dp_write_gen5.msg_length = msg_length;
582 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
583 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
584 insn->bits2.send_gen5.end_of_thread = end_of_thread;
585 } else {
586 insn->bits3.dp_write.binding_table_index = binding_table_index;
587 insn->bits3.dp_write.msg_control = msg_control;
588 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
589 insn->bits3.dp_write.msg_type = msg_type;
590 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
591 insn->bits3.dp_write.response_length = response_length;
592 insn->bits3.dp_write.msg_length = msg_length;
593 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
594 insn->bits3.dp_write.end_of_thread = end_of_thread;
595 }
596 }
597
598 void
599 brw_set_dp_read_message(struct brw_compile *p,
600 struct brw_instruction *insn,
601 GLuint binding_table_index,
602 GLuint msg_control,
603 GLuint msg_type,
604 GLuint target_cache,
605 GLuint msg_length,
606 GLuint response_length)
607 {
608 struct brw_context *brw = p->brw;
609 struct intel_context *intel = &brw->intel;
610 brw_set_src1(p, insn, brw_imm_d(0));
611
612 if (intel->gen >= 7) {
613 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
614 insn->bits3.gen7_dp.msg_control = msg_control;
615 insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
616 insn->bits3.gen7_dp.msg_type = msg_type;
617 insn->bits3.gen7_dp.header_present = 1;
618 insn->bits3.gen7_dp.response_length = response_length;
619 insn->bits3.gen7_dp.msg_length = msg_length;
620 insn->bits3.gen7_dp.end_of_thread = 0;
621 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
622 } else if (intel->gen == 6) {
623 uint32_t target_function;
624
625 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
626 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
627 else
628 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
629
630 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
631 insn->bits3.gen6_dp.msg_control = msg_control;
632 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
633 insn->bits3.gen6_dp.msg_type = msg_type;
634 insn->bits3.gen6_dp.send_commit_msg = 0;
635 insn->bits3.gen6_dp.header_present = 1;
636 insn->bits3.gen6_dp.response_length = response_length;
637 insn->bits3.gen6_dp.msg_length = msg_length;
638 insn->bits3.gen6_dp.end_of_thread = 0;
639 insn->header.destreg__conditionalmod = target_function;
640 } else if (intel->gen == 5) {
641 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
642 insn->bits3.dp_read_gen5.msg_control = msg_control;
643 insn->bits3.dp_read_gen5.msg_type = msg_type;
644 insn->bits3.dp_read_gen5.target_cache = target_cache;
645 insn->bits3.dp_read_gen5.header_present = 1;
646 insn->bits3.dp_read_gen5.response_length = response_length;
647 insn->bits3.dp_read_gen5.msg_length = msg_length;
648 insn->bits3.dp_read_gen5.pad1 = 0;
649 insn->bits3.dp_read_gen5.end_of_thread = 0;
650 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
651 insn->bits2.send_gen5.end_of_thread = 0;
652 } else if (intel->is_g4x) {
653 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
654 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
655 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
656 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
657 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
658 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
659 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
660 insn->bits3.dp_read_g4x.pad1 = 0;
661 insn->bits3.dp_read_g4x.end_of_thread = 0;
662 } else {
663 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
664 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
665 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
666 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
667 insn->bits3.dp_read.response_length = response_length; /*16:19*/
668 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
669 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
670 insn->bits3.dp_read.pad1 = 0; /*28:30*/
671 insn->bits3.dp_read.end_of_thread = 0; /*31*/
672 }
673 }
674
675 static void brw_set_sampler_message(struct brw_compile *p,
676 struct brw_instruction *insn,
677 GLuint binding_table_index,
678 GLuint sampler,
679 GLuint msg_type,
680 GLuint response_length,
681 GLuint msg_length,
682 bool eot,
683 GLuint header_present,
684 GLuint simd_mode)
685 {
686 struct brw_context *brw = p->brw;
687 struct intel_context *intel = &brw->intel;
688 assert(eot == 0);
689 brw_set_src1(p, insn, brw_imm_d(0));
690
691 if (intel->gen >= 7) {
692 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
693 insn->bits3.sampler_gen7.sampler = sampler;
694 insn->bits3.sampler_gen7.msg_type = msg_type;
695 insn->bits3.sampler_gen7.simd_mode = simd_mode;
696 insn->bits3.sampler_gen7.header_present = header_present;
697 insn->bits3.sampler_gen7.response_length = response_length;
698 insn->bits3.sampler_gen7.msg_length = msg_length;
699 insn->bits3.sampler_gen7.end_of_thread = eot;
700 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
701 } else if (intel->gen >= 5) {
702 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
703 insn->bits3.sampler_gen5.sampler = sampler;
704 insn->bits3.sampler_gen5.msg_type = msg_type;
705 insn->bits3.sampler_gen5.simd_mode = simd_mode;
706 insn->bits3.sampler_gen5.header_present = header_present;
707 insn->bits3.sampler_gen5.response_length = response_length;
708 insn->bits3.sampler_gen5.msg_length = msg_length;
709 insn->bits3.sampler_gen5.end_of_thread = eot;
710 if (intel->gen >= 6)
711 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
712 else {
713 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
714 insn->bits2.send_gen5.end_of_thread = eot;
715 }
716 } else if (intel->is_g4x) {
717 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
718 insn->bits3.sampler_g4x.sampler = sampler;
719 insn->bits3.sampler_g4x.msg_type = msg_type;
720 insn->bits3.sampler_g4x.response_length = response_length;
721 insn->bits3.sampler_g4x.msg_length = msg_length;
722 insn->bits3.sampler_g4x.end_of_thread = eot;
723 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
724 } else {
725 insn->bits3.sampler.binding_table_index = binding_table_index;
726 insn->bits3.sampler.sampler = sampler;
727 insn->bits3.sampler.msg_type = msg_type;
728 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
729 insn->bits3.sampler.response_length = response_length;
730 insn->bits3.sampler.msg_length = msg_length;
731 insn->bits3.sampler.end_of_thread = eot;
732 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
733 }
734 }
735
736
737 #define next_insn brw_next_insn
738 struct brw_instruction *
739 brw_next_insn(struct brw_compile *p, GLuint opcode)
740 {
741 struct brw_instruction *insn;
742
743 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
744
745 insn = &p->store[p->nr_insn++];
746 memcpy(insn, p->current, sizeof(*insn));
747
748 /* Reset this one-shot flag:
749 */
750
751 if (p->current->header.destreg__conditionalmod) {
752 p->current->header.destreg__conditionalmod = 0;
753 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
754 }
755
756 insn->header.opcode = opcode;
757 return insn;
758 }
759
760 static struct brw_instruction *brw_alu1( struct brw_compile *p,
761 GLuint opcode,
762 struct brw_reg dest,
763 struct brw_reg src )
764 {
765 struct brw_instruction *insn = next_insn(p, opcode);
766 brw_set_dest(p, insn, dest);
767 brw_set_src0(p, insn, src);
768 return insn;
769 }
770
771 static struct brw_instruction *brw_alu2(struct brw_compile *p,
772 GLuint opcode,
773 struct brw_reg dest,
774 struct brw_reg src0,
775 struct brw_reg src1 )
776 {
777 struct brw_instruction *insn = next_insn(p, opcode);
778 brw_set_dest(p, insn, dest);
779 brw_set_src0(p, insn, src0);
780 brw_set_src1(p, insn, src1);
781 return insn;
782 }
783
784
785 /***********************************************************************
786 * Convenience routines.
787 */
788 #define ALU1(OP) \
789 struct brw_instruction *brw_##OP(struct brw_compile *p, \
790 struct brw_reg dest, \
791 struct brw_reg src0) \
792 { \
793 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
794 }
795
796 #define ALU2(OP) \
797 struct brw_instruction *brw_##OP(struct brw_compile *p, \
798 struct brw_reg dest, \
799 struct brw_reg src0, \
800 struct brw_reg src1) \
801 { \
802 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
803 }
804
805 /* Rounding operations (other than RNDD) require two instructions - the first
806 * stores a rounded value (possibly the wrong way) in the dest register, but
807 * also sets a per-channel "increment bit" in the flag register. A predicated
808 * add of 1.0 fixes dest to contain the desired result.
809 *
810 * Sandybridge and later appear to round correctly without an ADD.
811 */
812 #define ROUND(OP) \
813 void brw_##OP(struct brw_compile *p, \
814 struct brw_reg dest, \
815 struct brw_reg src) \
816 { \
817 struct brw_instruction *rnd, *add; \
818 rnd = next_insn(p, BRW_OPCODE_##OP); \
819 brw_set_dest(p, rnd, dest); \
820 brw_set_src0(p, rnd, src); \
821 \
822 if (p->brw->intel.gen < 6) { \
823 /* turn on round-increments */ \
824 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
825 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
826 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
827 } \
828 }
829
830
831 ALU1(MOV)
832 ALU2(SEL)
833 ALU1(NOT)
834 ALU2(AND)
835 ALU2(OR)
836 ALU2(XOR)
837 ALU2(SHR)
838 ALU2(SHL)
839 ALU2(RSR)
840 ALU2(RSL)
841 ALU2(ASR)
842 ALU1(FRC)
843 ALU1(RNDD)
844 ALU2(MAC)
845 ALU2(MACH)
846 ALU1(LZD)
847 ALU2(DP4)
848 ALU2(DPH)
849 ALU2(DP3)
850 ALU2(DP2)
851 ALU2(LINE)
852 ALU2(PLN)
853
854
855 ROUND(RNDZ)
856 ROUND(RNDE)
857
858
859 struct brw_instruction *brw_ADD(struct brw_compile *p,
860 struct brw_reg dest,
861 struct brw_reg src0,
862 struct brw_reg src1)
863 {
864 /* 6.2.2: add */
865 if (src0.type == BRW_REGISTER_TYPE_F ||
866 (src0.file == BRW_IMMEDIATE_VALUE &&
867 src0.type == BRW_REGISTER_TYPE_VF)) {
868 assert(src1.type != BRW_REGISTER_TYPE_UD);
869 assert(src1.type != BRW_REGISTER_TYPE_D);
870 }
871
872 if (src1.type == BRW_REGISTER_TYPE_F ||
873 (src1.file == BRW_IMMEDIATE_VALUE &&
874 src1.type == BRW_REGISTER_TYPE_VF)) {
875 assert(src0.type != BRW_REGISTER_TYPE_UD);
876 assert(src0.type != BRW_REGISTER_TYPE_D);
877 }
878
879 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
880 }
881
882 struct brw_instruction *brw_MUL(struct brw_compile *p,
883 struct brw_reg dest,
884 struct brw_reg src0,
885 struct brw_reg src1)
886 {
887 /* 6.32.38: mul */
888 if (src0.type == BRW_REGISTER_TYPE_D ||
889 src0.type == BRW_REGISTER_TYPE_UD ||
890 src1.type == BRW_REGISTER_TYPE_D ||
891 src1.type == BRW_REGISTER_TYPE_UD) {
892 assert(dest.type != BRW_REGISTER_TYPE_F);
893 }
894
895 if (src0.type == BRW_REGISTER_TYPE_F ||
896 (src0.file == BRW_IMMEDIATE_VALUE &&
897 src0.type == BRW_REGISTER_TYPE_VF)) {
898 assert(src1.type != BRW_REGISTER_TYPE_UD);
899 assert(src1.type != BRW_REGISTER_TYPE_D);
900 }
901
902 if (src1.type == BRW_REGISTER_TYPE_F ||
903 (src1.file == BRW_IMMEDIATE_VALUE &&
904 src1.type == BRW_REGISTER_TYPE_VF)) {
905 assert(src0.type != BRW_REGISTER_TYPE_UD);
906 assert(src0.type != BRW_REGISTER_TYPE_D);
907 }
908
909 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
910 src0.nr != BRW_ARF_ACCUMULATOR);
911 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
912 src1.nr != BRW_ARF_ACCUMULATOR);
913
914 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
915 }
916
917
918 void brw_NOP(struct brw_compile *p)
919 {
920 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
921 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
922 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
923 brw_set_src1(p, insn, brw_imm_ud(0x0));
924 }
925
926
927
928
929
930 /***********************************************************************
931 * Comparisons, if/else/endif
932 */
933
934 struct brw_instruction *brw_JMPI(struct brw_compile *p,
935 struct brw_reg dest,
936 struct brw_reg src0,
937 struct brw_reg src1)
938 {
939 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
940
941 insn->header.execution_size = 1;
942 insn->header.compression_control = BRW_COMPRESSION_NONE;
943 insn->header.mask_control = BRW_MASK_DISABLE;
944
945 p->current->header.predicate_control = BRW_PREDICATE_NONE;
946
947 return insn;
948 }
949
950 static void
951 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
952 {
953 p->if_stack[p->if_stack_depth] = inst;
954
955 p->if_stack_depth++;
956 if (p->if_stack_array_size <= p->if_stack_depth) {
957 p->if_stack_array_size *= 2;
958 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
959 p->if_stack_array_size);
960 }
961 }
962
963 /* EU takes the value from the flag register and pushes it onto some
964 * sort of a stack (presumably merging with any flag value already on
965 * the stack). Within an if block, the flags at the top of the stack
966 * control execution on each channel of the unit, eg. on each of the
967 * 16 pixel values in our wm programs.
968 *
969 * When the matching 'else' instruction is reached (presumably by
970 * countdown of the instruction count patched in by our ELSE/ENDIF
971 * functions), the relevent flags are inverted.
972 *
973 * When the matching 'endif' instruction is reached, the flags are
974 * popped off. If the stack is now empty, normal execution resumes.
975 */
976 struct brw_instruction *
977 brw_IF(struct brw_compile *p, GLuint execute_size)
978 {
979 struct intel_context *intel = &p->brw->intel;
980 struct brw_instruction *insn;
981
982 insn = next_insn(p, BRW_OPCODE_IF);
983
984 /* Override the defaults for this instruction:
985 */
986 if (intel->gen < 6) {
987 brw_set_dest(p, insn, brw_ip_reg());
988 brw_set_src0(p, insn, brw_ip_reg());
989 brw_set_src1(p, insn, brw_imm_d(0x0));
990 } else if (intel->gen == 6) {
991 brw_set_dest(p, insn, brw_imm_w(0));
992 insn->bits1.branch_gen6.jump_count = 0;
993 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
994 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
995 } else {
996 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
997 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
998 brw_set_src1(p, insn, brw_imm_ud(0));
999 insn->bits3.break_cont.jip = 0;
1000 insn->bits3.break_cont.uip = 0;
1001 }
1002
1003 insn->header.execution_size = execute_size;
1004 insn->header.compression_control = BRW_COMPRESSION_NONE;
1005 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1006 insn->header.mask_control = BRW_MASK_ENABLE;
1007 if (!p->single_program_flow)
1008 insn->header.thread_control = BRW_THREAD_SWITCH;
1009
1010 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1011
1012 push_if_stack(p, insn);
1013 return insn;
1014 }
1015
1016 /* This function is only used for gen6-style IF instructions with an
1017 * embedded comparison (conditional modifier). It is not used on gen7.
1018 */
1019 struct brw_instruction *
1020 gen6_IF(struct brw_compile *p, uint32_t conditional,
1021 struct brw_reg src0, struct brw_reg src1)
1022 {
1023 struct brw_instruction *insn;
1024
1025 insn = next_insn(p, BRW_OPCODE_IF);
1026
1027 brw_set_dest(p, insn, brw_imm_w(0));
1028 if (p->compressed) {
1029 insn->header.execution_size = BRW_EXECUTE_16;
1030 } else {
1031 insn->header.execution_size = BRW_EXECUTE_8;
1032 }
1033 insn->bits1.branch_gen6.jump_count = 0;
1034 brw_set_src0(p, insn, src0);
1035 brw_set_src1(p, insn, src1);
1036
1037 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1038 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1039 insn->header.destreg__conditionalmod = conditional;
1040
1041 if (!p->single_program_flow)
1042 insn->header.thread_control = BRW_THREAD_SWITCH;
1043
1044 push_if_stack(p, insn);
1045 return insn;
1046 }
1047
1048 /**
1049 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1050 */
1051 static void
1052 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1053 struct brw_instruction *if_inst,
1054 struct brw_instruction *else_inst)
1055 {
1056 /* The next instruction (where the ENDIF would be, if it existed) */
1057 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1058
1059 assert(p->single_program_flow);
1060 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1061 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1062 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1063
1064 /* Convert IF to an ADD instruction that moves the instruction pointer
1065 * to the first instruction of the ELSE block. If there is no ELSE
1066 * block, point to where ENDIF would be. Reverse the predicate.
1067 *
1068 * There's no need to execute an ENDIF since we don't need to do any
1069 * stack operations, and if we're currently executing, we just want to
1070 * continue normally.
1071 */
1072 if_inst->header.opcode = BRW_OPCODE_ADD;
1073 if_inst->header.predicate_inverse = 1;
1074
1075 if (else_inst != NULL) {
1076 /* Convert ELSE to an ADD instruction that points where the ENDIF
1077 * would be.
1078 */
1079 else_inst->header.opcode = BRW_OPCODE_ADD;
1080
1081 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1082 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1083 } else {
1084 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1085 }
1086 }
1087
1088 /**
1089 * Patch IF and ELSE instructions with appropriate jump targets.
1090 */
1091 static void
1092 patch_IF_ELSE(struct brw_compile *p,
1093 struct brw_instruction *if_inst,
1094 struct brw_instruction *else_inst,
1095 struct brw_instruction *endif_inst)
1096 {
1097 struct intel_context *intel = &p->brw->intel;
1098
1099 assert(!p->single_program_flow);
1100 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1101 assert(endif_inst != NULL);
1102 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1103
1104 unsigned br = 1;
1105 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1106 * requires 2 chunks.
1107 */
1108 if (intel->gen >= 5)
1109 br = 2;
1110
1111 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1112 endif_inst->header.execution_size = if_inst->header.execution_size;
1113
1114 if (else_inst == NULL) {
1115 /* Patch IF -> ENDIF */
1116 if (intel->gen < 6) {
1117 /* Turn it into an IFF, which means no mask stack operations for
1118 * all-false and jumping past the ENDIF.
1119 */
1120 if_inst->header.opcode = BRW_OPCODE_IFF;
1121 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1122 if_inst->bits3.if_else.pop_count = 0;
1123 if_inst->bits3.if_else.pad0 = 0;
1124 } else if (intel->gen == 6) {
1125 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1126 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1127 } else {
1128 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1129 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1130 }
1131 } else {
1132 else_inst->header.execution_size = if_inst->header.execution_size;
1133
1134 /* Patch IF -> ELSE */
1135 if (intel->gen < 6) {
1136 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1137 if_inst->bits3.if_else.pop_count = 0;
1138 if_inst->bits3.if_else.pad0 = 0;
1139 } else if (intel->gen == 6) {
1140 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1141 }
1142
1143 /* Patch ELSE -> ENDIF */
1144 if (intel->gen < 6) {
1145 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1146 * matching ENDIF.
1147 */
1148 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1149 else_inst->bits3.if_else.pop_count = 1;
1150 else_inst->bits3.if_else.pad0 = 0;
1151 } else if (intel->gen == 6) {
1152 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1153 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1154 } else {
1155 /* The IF instruction's JIP should point just past the ELSE */
1156 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1157 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1158 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1159 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1160 }
1161 }
1162 }
1163
1164 void
1165 brw_ELSE(struct brw_compile *p)
1166 {
1167 struct intel_context *intel = &p->brw->intel;
1168 struct brw_instruction *insn;
1169
1170 insn = next_insn(p, BRW_OPCODE_ELSE);
1171
1172 if (intel->gen < 6) {
1173 brw_set_dest(p, insn, brw_ip_reg());
1174 brw_set_src0(p, insn, brw_ip_reg());
1175 brw_set_src1(p, insn, brw_imm_d(0x0));
1176 } else if (intel->gen == 6) {
1177 brw_set_dest(p, insn, brw_imm_w(0));
1178 insn->bits1.branch_gen6.jump_count = 0;
1179 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1180 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1181 } else {
1182 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1183 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1184 brw_set_src1(p, insn, brw_imm_ud(0));
1185 insn->bits3.break_cont.jip = 0;
1186 insn->bits3.break_cont.uip = 0;
1187 }
1188
1189 insn->header.compression_control = BRW_COMPRESSION_NONE;
1190 insn->header.mask_control = BRW_MASK_ENABLE;
1191 if (!p->single_program_flow)
1192 insn->header.thread_control = BRW_THREAD_SWITCH;
1193
1194 push_if_stack(p, insn);
1195 }
1196
1197 void
1198 brw_ENDIF(struct brw_compile *p)
1199 {
1200 struct intel_context *intel = &p->brw->intel;
1201 struct brw_instruction *insn;
1202 struct brw_instruction *else_inst = NULL;
1203 struct brw_instruction *if_inst = NULL;
1204
1205 /* Pop the IF and (optional) ELSE instructions from the stack */
1206 p->if_stack_depth--;
1207 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1208 else_inst = p->if_stack[p->if_stack_depth];
1209 p->if_stack_depth--;
1210 }
1211 if_inst = p->if_stack[p->if_stack_depth];
1212
1213 if (p->single_program_flow) {
1214 /* ENDIF is useless; don't bother emitting it. */
1215 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1216 return;
1217 }
1218
1219 insn = next_insn(p, BRW_OPCODE_ENDIF);
1220
1221 if (intel->gen < 6) {
1222 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1223 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1224 brw_set_src1(p, insn, brw_imm_d(0x0));
1225 } else if (intel->gen == 6) {
1226 brw_set_dest(p, insn, brw_imm_w(0));
1227 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1228 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1229 } else {
1230 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1231 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1232 brw_set_src1(p, insn, brw_imm_ud(0));
1233 }
1234
1235 insn->header.compression_control = BRW_COMPRESSION_NONE;
1236 insn->header.mask_control = BRW_MASK_ENABLE;
1237 insn->header.thread_control = BRW_THREAD_SWITCH;
1238
1239 /* Also pop item off the stack in the endif instruction: */
1240 if (intel->gen < 6) {
1241 insn->bits3.if_else.jump_count = 0;
1242 insn->bits3.if_else.pop_count = 1;
1243 insn->bits3.if_else.pad0 = 0;
1244 } else if (intel->gen == 6) {
1245 insn->bits1.branch_gen6.jump_count = 2;
1246 } else {
1247 insn->bits3.break_cont.jip = 2;
1248 }
1249 patch_IF_ELSE(p, if_inst, else_inst, insn);
1250 }
1251
1252 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1253 {
1254 struct intel_context *intel = &p->brw->intel;
1255 struct brw_instruction *insn;
1256
1257 insn = next_insn(p, BRW_OPCODE_BREAK);
1258 if (intel->gen >= 6) {
1259 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1260 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1261 brw_set_src1(p, insn, brw_imm_d(0x0));
1262 } else {
1263 brw_set_dest(p, insn, brw_ip_reg());
1264 brw_set_src0(p, insn, brw_ip_reg());
1265 brw_set_src1(p, insn, brw_imm_d(0x0));
1266 insn->bits3.if_else.pad0 = 0;
1267 insn->bits3.if_else.pop_count = pop_count;
1268 }
1269 insn->header.compression_control = BRW_COMPRESSION_NONE;
1270 insn->header.execution_size = BRW_EXECUTE_8;
1271
1272 return insn;
1273 }
1274
1275 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1276 struct brw_instruction *do_insn)
1277 {
1278 struct brw_instruction *insn;
1279
1280 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1281 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1282 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1283 brw_set_dest(p, insn, brw_ip_reg());
1284 brw_set_src0(p, insn, brw_ip_reg());
1285 brw_set_src1(p, insn, brw_imm_d(0x0));
1286
1287 insn->header.compression_control = BRW_COMPRESSION_NONE;
1288 insn->header.execution_size = BRW_EXECUTE_8;
1289 return insn;
1290 }
1291
1292 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1293 {
1294 struct brw_instruction *insn;
1295 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1296 brw_set_dest(p, insn, brw_ip_reg());
1297 brw_set_src0(p, insn, brw_ip_reg());
1298 brw_set_src1(p, insn, brw_imm_d(0x0));
1299 insn->header.compression_control = BRW_COMPRESSION_NONE;
1300 insn->header.execution_size = BRW_EXECUTE_8;
1301 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1302 insn->bits3.if_else.pad0 = 0;
1303 insn->bits3.if_else.pop_count = pop_count;
1304 return insn;
1305 }
1306
1307 /* DO/WHILE loop:
1308 *
1309 * The DO/WHILE is just an unterminated loop -- break or continue are
1310 * used for control within the loop. We have a few ways they can be
1311 * done.
1312 *
1313 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1314 * jip and no DO instruction.
1315 *
1316 * For non-uniform control flow pre-gen6, there's a DO instruction to
1317 * push the mask, and a WHILE to jump back, and BREAK to get out and
1318 * pop the mask.
1319 *
1320 * For gen6, there's no more mask stack, so no need for DO. WHILE
1321 * just points back to the first instruction of the loop.
1322 */
1323 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1324 {
1325 struct intel_context *intel = &p->brw->intel;
1326
1327 if (intel->gen >= 6 || p->single_program_flow) {
1328 return &p->store[p->nr_insn];
1329 } else {
1330 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1331
1332 /* Override the defaults for this instruction:
1333 */
1334 brw_set_dest(p, insn, brw_null_reg());
1335 brw_set_src0(p, insn, brw_null_reg());
1336 brw_set_src1(p, insn, brw_null_reg());
1337
1338 insn->header.compression_control = BRW_COMPRESSION_NONE;
1339 insn->header.execution_size = execute_size;
1340 insn->header.predicate_control = BRW_PREDICATE_NONE;
1341 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1342 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1343
1344 return insn;
1345 }
1346 }
1347
1348
1349
1350 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1351 struct brw_instruction *do_insn)
1352 {
1353 struct intel_context *intel = &p->brw->intel;
1354 struct brw_instruction *insn;
1355 GLuint br = 1;
1356
1357 if (intel->gen >= 5)
1358 br = 2;
1359
1360 if (intel->gen >= 7) {
1361 insn = next_insn(p, BRW_OPCODE_WHILE);
1362
1363 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1364 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1365 brw_set_src1(p, insn, brw_imm_ud(0));
1366 insn->bits3.break_cont.jip = br * (do_insn - insn);
1367
1368 insn->header.execution_size = BRW_EXECUTE_8;
1369 } else if (intel->gen == 6) {
1370 insn = next_insn(p, BRW_OPCODE_WHILE);
1371
1372 brw_set_dest(p, insn, brw_imm_w(0));
1373 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1374 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1375 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1376
1377 insn->header.execution_size = BRW_EXECUTE_8;
1378 } else {
1379 if (p->single_program_flow) {
1380 insn = next_insn(p, BRW_OPCODE_ADD);
1381
1382 brw_set_dest(p, insn, brw_ip_reg());
1383 brw_set_src0(p, insn, brw_ip_reg());
1384 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1385 insn->header.execution_size = BRW_EXECUTE_1;
1386 } else {
1387 insn = next_insn(p, BRW_OPCODE_WHILE);
1388
1389 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1390
1391 brw_set_dest(p, insn, brw_ip_reg());
1392 brw_set_src0(p, insn, brw_ip_reg());
1393 brw_set_src1(p, insn, brw_imm_d(0));
1394
1395 insn->header.execution_size = do_insn->header.execution_size;
1396 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1397 insn->bits3.if_else.pop_count = 0;
1398 insn->bits3.if_else.pad0 = 0;
1399 }
1400 }
1401 insn->header.compression_control = BRW_COMPRESSION_NONE;
1402 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1403
1404 return insn;
1405 }
1406
1407
1408 /* FORWARD JUMPS:
1409 */
1410 void brw_land_fwd_jump(struct brw_compile *p,
1411 struct brw_instruction *jmp_insn)
1412 {
1413 struct intel_context *intel = &p->brw->intel;
1414 struct brw_instruction *landing = &p->store[p->nr_insn];
1415 GLuint jmpi = 1;
1416
1417 if (intel->gen >= 5)
1418 jmpi = 2;
1419
1420 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1421 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1422
1423 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1424 }
1425
1426
1427
1428 /* To integrate with the above, it makes sense that the comparison
1429 * instruction should populate the flag register. It might be simpler
1430 * just to use the flag reg for most WM tasks?
1431 */
1432 void brw_CMP(struct brw_compile *p,
1433 struct brw_reg dest,
1434 GLuint conditional,
1435 struct brw_reg src0,
1436 struct brw_reg src1)
1437 {
1438 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1439
1440 insn->header.destreg__conditionalmod = conditional;
1441 brw_set_dest(p, insn, dest);
1442 brw_set_src0(p, insn, src0);
1443 brw_set_src1(p, insn, src1);
1444
1445 /* guess_execution_size(insn, src0); */
1446
1447
1448 /* Make it so that future instructions will use the computed flag
1449 * value until brw_set_predicate_control_flag_value() is called
1450 * again.
1451 */
1452 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1453 dest.nr == 0) {
1454 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1455 p->flag_value = 0xff;
1456 }
1457 }
1458
1459 /* Issue 'wait' instruction for n1, host could program MMIO
1460 to wake up thread. */
1461 void brw_WAIT (struct brw_compile *p)
1462 {
1463 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1464 struct brw_reg src = brw_notification_1_reg();
1465
1466 brw_set_dest(p, insn, src);
1467 brw_set_src0(p, insn, src);
1468 brw_set_src1(p, insn, brw_null_reg());
1469 insn->header.execution_size = 0; /* must */
1470 insn->header.predicate_control = 0;
1471 insn->header.compression_control = 0;
1472 }
1473
1474
1475 /***********************************************************************
1476 * Helpers for the various SEND message types:
1477 */
1478
1479 /** Extended math function, float[8].
1480 */
1481 void brw_math( struct brw_compile *p,
1482 struct brw_reg dest,
1483 GLuint function,
1484 GLuint saturate,
1485 GLuint msg_reg_nr,
1486 struct brw_reg src,
1487 GLuint data_type,
1488 GLuint precision )
1489 {
1490 struct intel_context *intel = &p->brw->intel;
1491
1492 if (intel->gen >= 6) {
1493 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1494
1495 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1496 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1497
1498 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1499 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1500
1501 /* Source modifiers are ignored for extended math instructions. */
1502 assert(!src.negate);
1503 assert(!src.abs);
1504
1505 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1506 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1507 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1508 assert(src.type != BRW_REGISTER_TYPE_F);
1509 } else {
1510 assert(src.type == BRW_REGISTER_TYPE_F);
1511 }
1512
1513 /* Math is the same ISA format as other opcodes, except that CondModifier
1514 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1515 */
1516 insn->header.destreg__conditionalmod = function;
1517 insn->header.saturate = saturate;
1518
1519 brw_set_dest(p, insn, dest);
1520 brw_set_src0(p, insn, src);
1521 brw_set_src1(p, insn, brw_null_reg());
1522 } else {
1523 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1524
1525 /* Example code doesn't set predicate_control for send
1526 * instructions.
1527 */
1528 insn->header.predicate_control = 0;
1529 insn->header.destreg__conditionalmod = msg_reg_nr;
1530
1531 brw_set_dest(p, insn, dest);
1532 brw_set_src0(p, insn, src);
1533 brw_set_math_message(p,
1534 insn,
1535 function,
1536 src.type == BRW_REGISTER_TYPE_D,
1537 precision,
1538 saturate,
1539 data_type);
1540 }
1541 }
1542
1543 /** Extended math function, float[8].
1544 */
1545 void brw_math2(struct brw_compile *p,
1546 struct brw_reg dest,
1547 GLuint function,
1548 struct brw_reg src0,
1549 struct brw_reg src1)
1550 {
1551 struct intel_context *intel = &p->brw->intel;
1552 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1553
1554 assert(intel->gen >= 6);
1555 (void) intel;
1556
1557
1558 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1559 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1560 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1561
1562 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1563 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1564 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1565
1566 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1567 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1568 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1569 assert(src0.type != BRW_REGISTER_TYPE_F);
1570 assert(src1.type != BRW_REGISTER_TYPE_F);
1571 } else {
1572 assert(src0.type == BRW_REGISTER_TYPE_F);
1573 assert(src1.type == BRW_REGISTER_TYPE_F);
1574 }
1575
1576 /* Source modifiers are ignored for extended math instructions. */
1577 assert(!src0.negate);
1578 assert(!src0.abs);
1579 assert(!src1.negate);
1580 assert(!src1.abs);
1581
1582 /* Math is the same ISA format as other opcodes, except that CondModifier
1583 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1584 */
1585 insn->header.destreg__conditionalmod = function;
1586
1587 brw_set_dest(p, insn, dest);
1588 brw_set_src0(p, insn, src0);
1589 brw_set_src1(p, insn, src1);
1590 }
1591
1592 /**
1593 * Extended math function, float[16].
1594 * Use 2 send instructions.
1595 */
1596 void brw_math_16( struct brw_compile *p,
1597 struct brw_reg dest,
1598 GLuint function,
1599 GLuint saturate,
1600 GLuint msg_reg_nr,
1601 struct brw_reg src,
1602 GLuint precision )
1603 {
1604 struct intel_context *intel = &p->brw->intel;
1605 struct brw_instruction *insn;
1606
1607 if (intel->gen >= 6) {
1608 insn = next_insn(p, BRW_OPCODE_MATH);
1609
1610 /* Math is the same ISA format as other opcodes, except that CondModifier
1611 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1612 */
1613 insn->header.destreg__conditionalmod = function;
1614 insn->header.saturate = saturate;
1615
1616 /* Source modifiers are ignored for extended math instructions. */
1617 assert(!src.negate);
1618 assert(!src.abs);
1619
1620 brw_set_dest(p, insn, dest);
1621 brw_set_src0(p, insn, src);
1622 brw_set_src1(p, insn, brw_null_reg());
1623 return;
1624 }
1625
1626 /* First instruction:
1627 */
1628 brw_push_insn_state(p);
1629 brw_set_predicate_control_flag_value(p, 0xff);
1630 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1631
1632 insn = next_insn(p, BRW_OPCODE_SEND);
1633 insn->header.destreg__conditionalmod = msg_reg_nr;
1634
1635 brw_set_dest(p, insn, dest);
1636 brw_set_src0(p, insn, src);
1637 brw_set_math_message(p,
1638 insn,
1639 function,
1640 BRW_MATH_INTEGER_UNSIGNED,
1641 precision,
1642 saturate,
1643 BRW_MATH_DATA_VECTOR);
1644
1645 /* Second instruction:
1646 */
1647 insn = next_insn(p, BRW_OPCODE_SEND);
1648 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1649 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1650
1651 brw_set_dest(p, insn, offset(dest,1));
1652 brw_set_src0(p, insn, src);
1653 brw_set_math_message(p,
1654 insn,
1655 function,
1656 BRW_MATH_INTEGER_UNSIGNED,
1657 precision,
1658 saturate,
1659 BRW_MATH_DATA_VECTOR);
1660
1661 brw_pop_insn_state(p);
1662 }
1663
1664
1665 /**
1666 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1667 * using a constant offset per channel.
1668 *
1669 * The offset must be aligned to oword size (16 bytes). Used for
1670 * register spilling.
1671 */
1672 void brw_oword_block_write_scratch(struct brw_compile *p,
1673 struct brw_reg mrf,
1674 int num_regs,
1675 GLuint offset)
1676 {
1677 struct intel_context *intel = &p->brw->intel;
1678 uint32_t msg_control, msg_type;
1679 int mlen;
1680
1681 if (intel->gen >= 6)
1682 offset /= 16;
1683
1684 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1685
1686 if (num_regs == 1) {
1687 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1688 mlen = 2;
1689 } else {
1690 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1691 mlen = 3;
1692 }
1693
1694 /* Set up the message header. This is g0, with g0.2 filled with
1695 * the offset. We don't want to leave our offset around in g0 or
1696 * it'll screw up texture samples, so set it up inside the message
1697 * reg.
1698 */
1699 {
1700 brw_push_insn_state(p);
1701 brw_set_mask_control(p, BRW_MASK_DISABLE);
1702 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1703
1704 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1705
1706 /* set message header global offset field (reg 0, element 2) */
1707 brw_MOV(p,
1708 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1709 mrf.nr,
1710 2), BRW_REGISTER_TYPE_UD),
1711 brw_imm_ud(offset));
1712
1713 brw_pop_insn_state(p);
1714 }
1715
1716 {
1717 struct brw_reg dest;
1718 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1719 int send_commit_msg;
1720 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1721 BRW_REGISTER_TYPE_UW);
1722
1723 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1724 insn->header.compression_control = BRW_COMPRESSION_NONE;
1725 src_header = vec16(src_header);
1726 }
1727 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1728 insn->header.destreg__conditionalmod = mrf.nr;
1729
1730 /* Until gen6, writes followed by reads from the same location
1731 * are not guaranteed to be ordered unless write_commit is set.
1732 * If set, then a no-op write is issued to the destination
1733 * register to set a dependency, and a read from the destination
1734 * can be used to ensure the ordering.
1735 *
1736 * For gen6, only writes between different threads need ordering
1737 * protection. Our use of DP writes is all about register
1738 * spilling within a thread.
1739 */
1740 if (intel->gen >= 6) {
1741 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1742 send_commit_msg = 0;
1743 } else {
1744 dest = src_header;
1745 send_commit_msg = 1;
1746 }
1747
1748 brw_set_dest(p, insn, dest);
1749 if (intel->gen >= 6) {
1750 brw_set_src0(p, insn, mrf);
1751 } else {
1752 brw_set_src0(p, insn, brw_null_reg());
1753 }
1754
1755 if (intel->gen >= 6)
1756 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1757 else
1758 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1759
1760 brw_set_dp_write_message(p,
1761 insn,
1762 255, /* binding table index (255=stateless) */
1763 msg_control,
1764 msg_type,
1765 mlen,
1766 true, /* header_present */
1767 0, /* pixel scoreboard */
1768 send_commit_msg, /* response_length */
1769 0, /* eot */
1770 send_commit_msg);
1771 }
1772 }
1773
1774
1775 /**
1776 * Read a block of owords (half a GRF each) from the scratch buffer
1777 * using a constant index per channel.
1778 *
1779 * Offset must be aligned to oword size (16 bytes). Used for register
1780 * spilling.
1781 */
1782 void
1783 brw_oword_block_read_scratch(struct brw_compile *p,
1784 struct brw_reg dest,
1785 struct brw_reg mrf,
1786 int num_regs,
1787 GLuint offset)
1788 {
1789 struct intel_context *intel = &p->brw->intel;
1790 uint32_t msg_control;
1791 int rlen;
1792
1793 if (intel->gen >= 6)
1794 offset /= 16;
1795
1796 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1797 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1798
1799 if (num_regs == 1) {
1800 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1801 rlen = 1;
1802 } else {
1803 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1804 rlen = 2;
1805 }
1806
1807 {
1808 brw_push_insn_state(p);
1809 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1810 brw_set_mask_control(p, BRW_MASK_DISABLE);
1811
1812 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1813
1814 /* set message header global offset field (reg 0, element 2) */
1815 brw_MOV(p,
1816 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1817 mrf.nr,
1818 2), BRW_REGISTER_TYPE_UD),
1819 brw_imm_ud(offset));
1820
1821 brw_pop_insn_state(p);
1822 }
1823
1824 {
1825 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1826
1827 assert(insn->header.predicate_control == 0);
1828 insn->header.compression_control = BRW_COMPRESSION_NONE;
1829 insn->header.destreg__conditionalmod = mrf.nr;
1830
1831 brw_set_dest(p, insn, dest); /* UW? */
1832 if (intel->gen >= 6) {
1833 brw_set_src0(p, insn, mrf);
1834 } else {
1835 brw_set_src0(p, insn, brw_null_reg());
1836 }
1837
1838 brw_set_dp_read_message(p,
1839 insn,
1840 255, /* binding table index (255=stateless) */
1841 msg_control,
1842 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1843 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1844 1, /* msg_length */
1845 rlen);
1846 }
1847 }
1848
1849 /**
1850 * Read a float[4] vector from the data port Data Cache (const buffer).
1851 * Location (in buffer) should be a multiple of 16.
1852 * Used for fetching shader constants.
1853 */
1854 void brw_oword_block_read(struct brw_compile *p,
1855 struct brw_reg dest,
1856 struct brw_reg mrf,
1857 uint32_t offset,
1858 uint32_t bind_table_index)
1859 {
1860 struct intel_context *intel = &p->brw->intel;
1861
1862 /* On newer hardware, offset is in units of owords. */
1863 if (intel->gen >= 6)
1864 offset /= 16;
1865
1866 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1867
1868 brw_push_insn_state(p);
1869 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1870 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1871 brw_set_mask_control(p, BRW_MASK_DISABLE);
1872
1873 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1874
1875 /* set message header global offset field (reg 0, element 2) */
1876 brw_MOV(p,
1877 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1878 mrf.nr,
1879 2), BRW_REGISTER_TYPE_UD),
1880 brw_imm_ud(offset));
1881
1882 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1883 insn->header.destreg__conditionalmod = mrf.nr;
1884
1885 /* cast dest to a uword[8] vector */
1886 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1887
1888 brw_set_dest(p, insn, dest);
1889 if (intel->gen >= 6) {
1890 brw_set_src0(p, insn, mrf);
1891 } else {
1892 brw_set_src0(p, insn, brw_null_reg());
1893 }
1894
1895 brw_set_dp_read_message(p,
1896 insn,
1897 bind_table_index,
1898 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1899 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1900 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1901 1, /* msg_length */
1902 1); /* response_length (1 reg, 2 owords!) */
1903
1904 brw_pop_insn_state(p);
1905 }
1906
1907 /**
1908 * Read a set of dwords from the data port Data Cache (const buffer).
1909 *
1910 * Location (in buffer) appears as UD offsets in the register after
1911 * the provided mrf header reg.
1912 */
1913 void brw_dword_scattered_read(struct brw_compile *p,
1914 struct brw_reg dest,
1915 struct brw_reg mrf,
1916 uint32_t bind_table_index)
1917 {
1918 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1919
1920 brw_push_insn_state(p);
1921 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1922 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1923 brw_set_mask_control(p, BRW_MASK_DISABLE);
1924 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1925 brw_pop_insn_state(p);
1926
1927 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1928 insn->header.destreg__conditionalmod = mrf.nr;
1929
1930 /* cast dest to a uword[8] vector */
1931 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1932
1933 brw_set_dest(p, insn, dest);
1934 brw_set_src0(p, insn, brw_null_reg());
1935
1936 brw_set_dp_read_message(p,
1937 insn,
1938 bind_table_index,
1939 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1940 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1941 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1942 2, /* msg_length */
1943 1); /* response_length */
1944 }
1945
1946
1947
1948 /**
1949 * Read float[4] constant(s) from VS constant buffer.
1950 * For relative addressing, two float[4] constants will be read into 'dest'.
1951 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1952 */
1953 void brw_dp_READ_4_vs(struct brw_compile *p,
1954 struct brw_reg dest,
1955 GLuint location,
1956 GLuint bind_table_index)
1957 {
1958 struct intel_context *intel = &p->brw->intel;
1959 struct brw_instruction *insn;
1960 GLuint msg_reg_nr = 1;
1961
1962 if (intel->gen >= 6)
1963 location /= 16;
1964
1965 /* Setup MRF[1] with location/offset into const buffer */
1966 brw_push_insn_state(p);
1967 brw_set_access_mode(p, BRW_ALIGN_1);
1968 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1969 brw_set_mask_control(p, BRW_MASK_DISABLE);
1970 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1971 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1972 BRW_REGISTER_TYPE_UD),
1973 brw_imm_ud(location));
1974 brw_pop_insn_state(p);
1975
1976 insn = next_insn(p, BRW_OPCODE_SEND);
1977
1978 insn->header.predicate_control = BRW_PREDICATE_NONE;
1979 insn->header.compression_control = BRW_COMPRESSION_NONE;
1980 insn->header.destreg__conditionalmod = msg_reg_nr;
1981 insn->header.mask_control = BRW_MASK_DISABLE;
1982
1983 brw_set_dest(p, insn, dest);
1984 if (intel->gen >= 6) {
1985 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1986 } else {
1987 brw_set_src0(p, insn, brw_null_reg());
1988 }
1989
1990 brw_set_dp_read_message(p,
1991 insn,
1992 bind_table_index,
1993 0,
1994 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1995 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1996 1, /* msg_length */
1997 1); /* response_length (1 Oword) */
1998 }
1999
2000 /**
2001 * Read a float[4] constant per vertex from VS constant buffer, with
2002 * relative addressing.
2003 */
2004 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2005 struct brw_reg dest,
2006 struct brw_reg addr_reg,
2007 GLuint offset,
2008 GLuint bind_table_index)
2009 {
2010 struct intel_context *intel = &p->brw->intel;
2011 struct brw_reg src = brw_vec8_grf(0, 0);
2012 int msg_type;
2013
2014 /* Setup MRF[1] with offset into const buffer */
2015 brw_push_insn_state(p);
2016 brw_set_access_mode(p, BRW_ALIGN_1);
2017 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2018 brw_set_mask_control(p, BRW_MASK_DISABLE);
2019 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2020
2021 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2022 * fields ignored.
2023 */
2024 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2025 addr_reg, brw_imm_d(offset));
2026 brw_pop_insn_state(p);
2027
2028 gen6_resolve_implied_move(p, &src, 0);
2029 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2030
2031 insn->header.predicate_control = BRW_PREDICATE_NONE;
2032 insn->header.compression_control = BRW_COMPRESSION_NONE;
2033 insn->header.destreg__conditionalmod = 0;
2034 insn->header.mask_control = BRW_MASK_DISABLE;
2035
2036 brw_set_dest(p, insn, dest);
2037 brw_set_src0(p, insn, src);
2038
2039 if (intel->gen >= 6)
2040 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2041 else if (intel->gen == 5 || intel->is_g4x)
2042 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2043 else
2044 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2045
2046 brw_set_dp_read_message(p,
2047 insn,
2048 bind_table_index,
2049 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2050 msg_type,
2051 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2052 2, /* msg_length */
2053 1); /* response_length */
2054 }
2055
2056
2057
2058 void brw_fb_WRITE(struct brw_compile *p,
2059 int dispatch_width,
2060 GLuint msg_reg_nr,
2061 struct brw_reg src0,
2062 GLuint binding_table_index,
2063 GLuint msg_length,
2064 GLuint response_length,
2065 bool eot,
2066 bool header_present)
2067 {
2068 struct intel_context *intel = &p->brw->intel;
2069 struct brw_instruction *insn;
2070 GLuint msg_control, msg_type;
2071 struct brw_reg dest;
2072
2073 if (dispatch_width == 16)
2074 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2075 else
2076 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2077
2078 if (intel->gen >= 6 && binding_table_index == 0) {
2079 insn = next_insn(p, BRW_OPCODE_SENDC);
2080 } else {
2081 insn = next_insn(p, BRW_OPCODE_SEND);
2082 }
2083 /* The execution mask is ignored for render target writes. */
2084 insn->header.predicate_control = 0;
2085 insn->header.compression_control = BRW_COMPRESSION_NONE;
2086
2087 if (intel->gen >= 6) {
2088 /* headerless version, just submit color payload */
2089 src0 = brw_message_reg(msg_reg_nr);
2090
2091 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2092 } else {
2093 insn->header.destreg__conditionalmod = msg_reg_nr;
2094
2095 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2096 }
2097
2098 if (dispatch_width == 16)
2099 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2100 else
2101 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2102
2103 brw_set_dest(p, insn, dest);
2104 brw_set_src0(p, insn, src0);
2105 brw_set_dp_write_message(p,
2106 insn,
2107 binding_table_index,
2108 msg_control,
2109 msg_type,
2110 msg_length,
2111 header_present,
2112 1, /* pixel scoreboard */
2113 response_length,
2114 eot,
2115 0 /* send_commit_msg */);
2116 }
2117
2118
2119 /**
2120 * Texture sample instruction.
2121 * Note: the msg_type plus msg_length values determine exactly what kind
2122 * of sampling operation is performed. See volume 4, page 161 of docs.
2123 */
2124 void brw_SAMPLE(struct brw_compile *p,
2125 struct brw_reg dest,
2126 GLuint msg_reg_nr,
2127 struct brw_reg src0,
2128 GLuint binding_table_index,
2129 GLuint sampler,
2130 GLuint writemask,
2131 GLuint msg_type,
2132 GLuint response_length,
2133 GLuint msg_length,
2134 bool eot,
2135 GLuint header_present,
2136 GLuint simd_mode)
2137 {
2138 struct intel_context *intel = &p->brw->intel;
2139 bool need_stall = 0;
2140
2141 if (writemask == 0) {
2142 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2143 return;
2144 }
2145
2146 /* Hardware doesn't do destination dependency checking on send
2147 * instructions properly. Add a workaround which generates the
2148 * dependency by other means. In practice it seems like this bug
2149 * only crops up for texture samples, and only where registers are
2150 * written by the send and then written again later without being
2151 * read in between. Luckily for us, we already track that
2152 * information and use it to modify the writemask for the
2153 * instruction, so that is a guide for whether a workaround is
2154 * needed.
2155 */
2156 if (writemask != WRITEMASK_XYZW) {
2157 GLuint dst_offset = 0;
2158 GLuint i, newmask = 0, len = 0;
2159
2160 for (i = 0; i < 4; i++) {
2161 if (writemask & (1<<i))
2162 break;
2163 dst_offset += 2;
2164 }
2165 for (; i < 4; i++) {
2166 if (!(writemask & (1<<i)))
2167 break;
2168 newmask |= 1<<i;
2169 len++;
2170 }
2171
2172 if (newmask != writemask) {
2173 need_stall = 1;
2174 /* printf("need stall %x %x\n", newmask , writemask); */
2175 }
2176 else {
2177 bool dispatch_16 = false;
2178
2179 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2180
2181 guess_execution_size(p, p->current, dest);
2182 if (p->current->header.execution_size == BRW_EXECUTE_16)
2183 dispatch_16 = true;
2184
2185 newmask = ~newmask & WRITEMASK_XYZW;
2186
2187 brw_push_insn_state(p);
2188
2189 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2190 brw_set_mask_control(p, BRW_MASK_DISABLE);
2191
2192 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2193 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2194 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2195
2196 brw_pop_insn_state(p);
2197
2198 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2199 dest = offset(dest, dst_offset);
2200
2201 /* For 16-wide dispatch, masked channels are skipped in the
2202 * response. For 8-wide, masked channels still take up slots,
2203 * and are just not written to.
2204 */
2205 if (dispatch_16)
2206 response_length = len * 2;
2207 }
2208 }
2209
2210 {
2211 struct brw_instruction *insn;
2212
2213 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2214
2215 insn = next_insn(p, BRW_OPCODE_SEND);
2216 insn->header.predicate_control = 0; /* XXX */
2217 insn->header.compression_control = BRW_COMPRESSION_NONE;
2218 if (intel->gen < 6)
2219 insn->header.destreg__conditionalmod = msg_reg_nr;
2220
2221 brw_set_dest(p, insn, dest);
2222 brw_set_src0(p, insn, src0);
2223 brw_set_sampler_message(p, insn,
2224 binding_table_index,
2225 sampler,
2226 msg_type,
2227 response_length,
2228 msg_length,
2229 eot,
2230 header_present,
2231 simd_mode);
2232 }
2233
2234 if (need_stall) {
2235 struct brw_reg reg = vec8(offset(dest, response_length-1));
2236
2237 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2238 */
2239 brw_push_insn_state(p);
2240 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2241 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2242 retype(reg, BRW_REGISTER_TYPE_UD));
2243 brw_pop_insn_state(p);
2244 }
2245
2246 }
2247
2248 /* All these variables are pretty confusing - we might be better off
2249 * using bitmasks and macros for this, in the old style. Or perhaps
2250 * just having the caller instantiate the fields in dword3 itself.
2251 */
2252 void brw_urb_WRITE(struct brw_compile *p,
2253 struct brw_reg dest,
2254 GLuint msg_reg_nr,
2255 struct brw_reg src0,
2256 bool allocate,
2257 bool used,
2258 GLuint msg_length,
2259 GLuint response_length,
2260 bool eot,
2261 bool writes_complete,
2262 GLuint offset,
2263 GLuint swizzle)
2264 {
2265 struct intel_context *intel = &p->brw->intel;
2266 struct brw_instruction *insn;
2267
2268 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2269
2270 if (intel->gen == 7) {
2271 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2272 brw_push_insn_state(p);
2273 brw_set_access_mode(p, BRW_ALIGN_1);
2274 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2275 BRW_REGISTER_TYPE_UD),
2276 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2277 brw_imm_ud(0xff00));
2278 brw_pop_insn_state(p);
2279 }
2280
2281 insn = next_insn(p, BRW_OPCODE_SEND);
2282
2283 assert(msg_length < BRW_MAX_MRF);
2284
2285 brw_set_dest(p, insn, dest);
2286 brw_set_src0(p, insn, src0);
2287 brw_set_src1(p, insn, brw_imm_d(0));
2288
2289 if (intel->gen < 6)
2290 insn->header.destreg__conditionalmod = msg_reg_nr;
2291
2292 brw_set_urb_message(p,
2293 insn,
2294 allocate,
2295 used,
2296 msg_length,
2297 response_length,
2298 eot,
2299 writes_complete,
2300 offset,
2301 swizzle);
2302 }
2303
2304 static int
2305 brw_find_next_block_end(struct brw_compile *p, int start)
2306 {
2307 int ip;
2308
2309 for (ip = start + 1; ip < p->nr_insn; ip++) {
2310 struct brw_instruction *insn = &p->store[ip];
2311
2312 switch (insn->header.opcode) {
2313 case BRW_OPCODE_ENDIF:
2314 case BRW_OPCODE_ELSE:
2315 case BRW_OPCODE_WHILE:
2316 return ip;
2317 }
2318 }
2319 assert(!"not reached");
2320 return start + 1;
2321 }
2322
2323 /* There is no DO instruction on gen6, so to find the end of the loop
2324 * we have to see if the loop is jumping back before our start
2325 * instruction.
2326 */
2327 static int
2328 brw_find_loop_end(struct brw_compile *p, int start)
2329 {
2330 struct intel_context *intel = &p->brw->intel;
2331 int ip;
2332 int br = 2;
2333
2334 for (ip = start + 1; ip < p->nr_insn; ip++) {
2335 struct brw_instruction *insn = &p->store[ip];
2336
2337 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2338 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2339 : insn->bits3.break_cont.jip;
2340 if (ip + jip / br <= start)
2341 return ip;
2342 }
2343 }
2344 assert(!"not reached");
2345 return start + 1;
2346 }
2347
2348 /* After program generation, go back and update the UIP and JIP of
2349 * BREAK and CONT instructions to their correct locations.
2350 */
2351 void
2352 brw_set_uip_jip(struct brw_compile *p)
2353 {
2354 struct intel_context *intel = &p->brw->intel;
2355 int ip;
2356 int br = 2;
2357
2358 if (intel->gen < 6)
2359 return;
2360
2361 for (ip = 0; ip < p->nr_insn; ip++) {
2362 struct brw_instruction *insn = &p->store[ip];
2363
2364 switch (insn->header.opcode) {
2365 case BRW_OPCODE_BREAK:
2366 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2367 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2368 insn->bits3.break_cont.uip =
2369 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2370 break;
2371 case BRW_OPCODE_CONTINUE:
2372 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2373 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2374
2375 assert(insn->bits3.break_cont.uip != 0);
2376 assert(insn->bits3.break_cont.jip != 0);
2377 break;
2378 }
2379 }
2380 }
2381
2382 void brw_ff_sync(struct brw_compile *p,
2383 struct brw_reg dest,
2384 GLuint msg_reg_nr,
2385 struct brw_reg src0,
2386 bool allocate,
2387 GLuint response_length,
2388 bool eot)
2389 {
2390 struct intel_context *intel = &p->brw->intel;
2391 struct brw_instruction *insn;
2392
2393 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2394
2395 insn = next_insn(p, BRW_OPCODE_SEND);
2396 brw_set_dest(p, insn, dest);
2397 brw_set_src0(p, insn, src0);
2398 brw_set_src1(p, insn, brw_imm_d(0));
2399
2400 if (intel->gen < 6)
2401 insn->header.destreg__conditionalmod = msg_reg_nr;
2402
2403 brw_set_ff_sync_message(p,
2404 insn,
2405 allocate,
2406 response_length,
2407 eot);
2408 }