i915: Remove most of the code under gen >= 4 checks.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the BSpec / ISA Reference / send - [DevIVB+]:
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct intel_context *intel = &p->brw->intel;
96 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102
103 void
104 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
105 struct brw_reg dest)
106 {
107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
108 dest.file != BRW_MESSAGE_REGISTER_FILE)
109 assert(dest.nr < 128);
110
111 gen7_convert_mrf_to_grf(p, &dest);
112
113 insn->bits1.da1.dest_reg_file = dest.file;
114 insn->bits1.da1.dest_reg_type = dest.type;
115 insn->bits1.da1.dest_address_mode = dest.address_mode;
116
117 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
118 insn->bits1.da1.dest_reg_nr = dest.nr;
119
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits1.da1.dest_subreg_nr = dest.subnr;
122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
123 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
124 insn->bits1.da1.dest_horiz_stride = dest.hstride;
125 }
126 else {
127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
132 */
133 insn->bits1.da16.dest_horiz_stride = 1;
134 }
135 }
136 else {
137 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
138
139 /* These are different sizes in align1 vs align16:
140 */
141 if (insn->header.access_mode == BRW_ALIGN_1) {
142 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
143 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
144 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
145 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
146 }
147 else {
148 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
149 /* even ignored in da16, still need to set as '01' */
150 insn->bits1.ia16.dest_horiz_stride = 1;
151 }
152 }
153
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
156 */
157 guess_execution_size(p, insn, dest);
158 }
159
160 extern int reg_type_size[];
161
162 static void
163 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
164 {
165 int hstride_for_reg[] = {0, 1, 2, 4};
166 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg[] = {1, 2, 4, 8, 16};
169 int width, hstride, vstride, execsize;
170
171 if (reg.file == BRW_IMMEDIATE_VALUE) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
175 */
176 if (reg.type == BRW_REGISTER_TYPE_V) {
177 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
178 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
179 }
180
181 return;
182 }
183
184 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
185 reg.file == BRW_ARF_NULL)
186 return;
187
188 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
189 hstride = hstride_for_reg[reg.hstride];
190
191 if (reg.vstride == 0xf) {
192 vstride = -1;
193 } else {
194 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
195 vstride = vstride_for_reg[reg.vstride];
196 }
197
198 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
199 width = width_for_reg[reg.width];
200
201 assert(insn->header.execution_size >= 0 &&
202 insn->header.execution_size < Elements(execsize_for_reg));
203 execsize = execsize_for_reg[insn->header.execution_size];
204
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
206 /* 3. */
207 assert(execsize >= width);
208
209 /* 4. */
210 if (execsize == width && hstride != 0) {
211 assert(vstride == -1 || vstride == width * hstride);
212 }
213
214 /* 5. */
215 if (execsize == width && hstride == 0) {
216 /* no restriction on vstride. */
217 }
218
219 /* 6. */
220 if (width == 1) {
221 assert(hstride == 0);
222 }
223
224 /* 7. */
225 if (execsize == 1 && width == 1) {
226 assert(hstride == 0);
227 assert(vstride == 0);
228 }
229
230 /* 8. */
231 if (vstride == 0 && hstride == 0) {
232 assert(width == 1);
233 }
234
235 /* 10. Check destination issues. */
236 }
237
238 void
239 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
240 struct brw_reg reg)
241 {
242 struct brw_context *brw = p->brw;
243 struct intel_context *intel = &brw->intel;
244
245 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
246 assert(reg.nr < 128);
247
248 gen7_convert_mrf_to_grf(p, &reg);
249
250 if (intel->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
251 insn->header.opcode == BRW_OPCODE_SENDC)) {
252 /* Any source modifiers or regions will be ignored, since this just
253 * identifies the MRF/GRF to start reading the message contents from.
254 * Check for some likely failures.
255 */
256 assert(!reg.negate);
257 assert(!reg.abs);
258 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
259 }
260
261 validate_reg(insn, reg);
262
263 insn->bits1.da1.src0_reg_file = reg.file;
264 insn->bits1.da1.src0_reg_type = reg.type;
265 insn->bits2.da1.src0_abs = reg.abs;
266 insn->bits2.da1.src0_negate = reg.negate;
267 insn->bits2.da1.src0_address_mode = reg.address_mode;
268
269 if (reg.file == BRW_IMMEDIATE_VALUE) {
270 insn->bits3.ud = reg.dw1.ud;
271
272 /* Required to set some fields in src1 as well:
273 */
274 insn->bits1.da1.src1_reg_file = 0; /* arf */
275 insn->bits1.da1.src1_reg_type = reg.type;
276 }
277 else
278 {
279 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
280 if (insn->header.access_mode == BRW_ALIGN_1) {
281 insn->bits2.da1.src0_subreg_nr = reg.subnr;
282 insn->bits2.da1.src0_reg_nr = reg.nr;
283 }
284 else {
285 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
286 insn->bits2.da16.src0_reg_nr = reg.nr;
287 }
288 }
289 else {
290 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
291
292 if (insn->header.access_mode == BRW_ALIGN_1) {
293 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
294 }
295 else {
296 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
297 }
298 }
299
300 if (insn->header.access_mode == BRW_ALIGN_1) {
301 if (reg.width == BRW_WIDTH_1 &&
302 insn->header.execution_size == BRW_EXECUTE_1) {
303 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
304 insn->bits2.da1.src0_width = BRW_WIDTH_1;
305 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
306 }
307 else {
308 insn->bits2.da1.src0_horiz_stride = reg.hstride;
309 insn->bits2.da1.src0_width = reg.width;
310 insn->bits2.da1.src0_vert_stride = reg.vstride;
311 }
312 }
313 else {
314 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
315 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
316 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
317 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
318
319 /* This is an oddity of the fact we're using the same
320 * descriptions for registers in align_16 as align_1:
321 */
322 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
323 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
324 else
325 insn->bits2.da16.src0_vert_stride = reg.vstride;
326 }
327 }
328 }
329
330
331 void brw_set_src1(struct brw_compile *p,
332 struct brw_instruction *insn,
333 struct brw_reg reg)
334 {
335 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
336
337 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
338 assert(reg.nr < 128);
339
340 gen7_convert_mrf_to_grf(p, &reg);
341
342 validate_reg(insn, reg);
343
344 insn->bits1.da1.src1_reg_file = reg.file;
345 insn->bits1.da1.src1_reg_type = reg.type;
346 insn->bits3.da1.src1_abs = reg.abs;
347 insn->bits3.da1.src1_negate = reg.negate;
348
349 /* Only src1 can be immediate in two-argument instructions.
350 */
351 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
352
353 if (reg.file == BRW_IMMEDIATE_VALUE) {
354 insn->bits3.ud = reg.dw1.ud;
355 }
356 else {
357 /* This is a hardware restriction, which may or may not be lifted
358 * in the future:
359 */
360 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
361 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
362
363 if (insn->header.access_mode == BRW_ALIGN_1) {
364 insn->bits3.da1.src1_subreg_nr = reg.subnr;
365 insn->bits3.da1.src1_reg_nr = reg.nr;
366 }
367 else {
368 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
369 insn->bits3.da16.src1_reg_nr = reg.nr;
370 }
371
372 if (insn->header.access_mode == BRW_ALIGN_1) {
373 if (reg.width == BRW_WIDTH_1 &&
374 insn->header.execution_size == BRW_EXECUTE_1) {
375 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
376 insn->bits3.da1.src1_width = BRW_WIDTH_1;
377 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
378 }
379 else {
380 insn->bits3.da1.src1_horiz_stride = reg.hstride;
381 insn->bits3.da1.src1_width = reg.width;
382 insn->bits3.da1.src1_vert_stride = reg.vstride;
383 }
384 }
385 else {
386 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
387 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
388 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
389 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
390
391 /* This is an oddity of the fact we're using the same
392 * descriptions for registers in align_16 as align_1:
393 */
394 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
395 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
396 else
397 insn->bits3.da16.src1_vert_stride = reg.vstride;
398 }
399 }
400 }
401
402 /**
403 * Set the Message Descriptor and Extended Message Descriptor fields
404 * for SEND messages.
405 *
406 * \note This zeroes out the Function Control bits, so it must be called
407 * \b before filling out any message-specific data. Callers can
408 * choose not to fill in irrelevant bits; they will be zero.
409 */
410 static void
411 brw_set_message_descriptor(struct brw_compile *p,
412 struct brw_instruction *inst,
413 enum brw_message_target sfid,
414 unsigned msg_length,
415 unsigned response_length,
416 bool header_present,
417 bool end_of_thread)
418 {
419 struct intel_context *intel = &p->brw->intel;
420
421 brw_set_src1(p, inst, brw_imm_d(0));
422
423 if (intel->gen >= 5) {
424 inst->bits3.generic_gen5.header_present = header_present;
425 inst->bits3.generic_gen5.response_length = response_length;
426 inst->bits3.generic_gen5.msg_length = msg_length;
427 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
428
429 if (intel->gen >= 6) {
430 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
431 inst->header.destreg__conditionalmod = sfid;
432 } else {
433 /* Set Extended Message Descriptor (ex_desc) */
434 inst->bits2.send_gen5.sfid = sfid;
435 inst->bits2.send_gen5.end_of_thread = end_of_thread;
436 }
437 } else {
438 inst->bits3.generic.response_length = response_length;
439 inst->bits3.generic.msg_length = msg_length;
440 inst->bits3.generic.msg_target = sfid;
441 inst->bits3.generic.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_math_message( struct brw_compile *p,
446 struct brw_instruction *insn,
447 GLuint function,
448 GLuint integer_type,
449 bool low_precision,
450 GLuint dataType )
451 {
452 struct brw_context *brw = p->brw;
453 struct intel_context *intel = &brw->intel;
454 unsigned msg_length;
455 unsigned response_length;
456
457 /* Infer message length from the function */
458 switch (function) {
459 case BRW_MATH_FUNCTION_POW:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
461 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
463 msg_length = 2;
464 break;
465 default:
466 msg_length = 1;
467 break;
468 }
469
470 /* Infer response length from the function */
471 switch (function) {
472 case BRW_MATH_FUNCTION_SINCOS:
473 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
474 response_length = 2;
475 break;
476 default:
477 response_length = 1;
478 break;
479 }
480
481
482 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
483 msg_length, response_length, false, false);
484 if (intel->gen == 5) {
485 insn->bits3.math_gen5.function = function;
486 insn->bits3.math_gen5.int_type = integer_type;
487 insn->bits3.math_gen5.precision = low_precision;
488 insn->bits3.math_gen5.saturate = insn->header.saturate;
489 insn->bits3.math_gen5.data_type = dataType;
490 insn->bits3.math_gen5.snapshot = 0;
491 } else {
492 insn->bits3.math.function = function;
493 insn->bits3.math.int_type = integer_type;
494 insn->bits3.math.precision = low_precision;
495 insn->bits3.math.saturate = insn->header.saturate;
496 insn->bits3.math.data_type = dataType;
497 }
498 insn->header.saturate = 0;
499 }
500
501
502 static void brw_set_ff_sync_message(struct brw_compile *p,
503 struct brw_instruction *insn,
504 bool allocate,
505 GLuint response_length,
506 bool end_of_thread)
507 {
508 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
509 1, response_length, true, end_of_thread);
510 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
511 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
512 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
513 insn->bits3.urb_gen5.allocate = allocate;
514 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
515 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
516 }
517
518 static void brw_set_urb_message( struct brw_compile *p,
519 struct brw_instruction *insn,
520 bool allocate,
521 bool used,
522 GLuint msg_length,
523 GLuint response_length,
524 bool end_of_thread,
525 bool complete,
526 GLuint offset,
527 GLuint swizzle_control )
528 {
529 struct brw_context *brw = p->brw;
530 struct intel_context *intel = &brw->intel;
531
532 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
533 msg_length, response_length, true, end_of_thread);
534 if (intel->gen == 7) {
535 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
536 insn->bits3.urb_gen7.offset = offset;
537 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
538 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
539 /* per_slot_offset = 0 makes it ignore offsets in message header */
540 insn->bits3.urb_gen7.per_slot_offset = 0;
541 insn->bits3.urb_gen7.complete = complete;
542 } else if (intel->gen >= 5) {
543 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
544 insn->bits3.urb_gen5.offset = offset;
545 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
546 insn->bits3.urb_gen5.allocate = allocate;
547 insn->bits3.urb_gen5.used = used; /* ? */
548 insn->bits3.urb_gen5.complete = complete;
549 } else {
550 insn->bits3.urb.opcode = 0; /* ? */
551 insn->bits3.urb.offset = offset;
552 insn->bits3.urb.swizzle_control = swizzle_control;
553 insn->bits3.urb.allocate = allocate;
554 insn->bits3.urb.used = used; /* ? */
555 insn->bits3.urb.complete = complete;
556 }
557 }
558
559 void
560 brw_set_dp_write_message(struct brw_compile *p,
561 struct brw_instruction *insn,
562 GLuint binding_table_index,
563 GLuint msg_control,
564 GLuint msg_type,
565 GLuint msg_length,
566 bool header_present,
567 GLuint last_render_target,
568 GLuint response_length,
569 GLuint end_of_thread,
570 GLuint send_commit_msg)
571 {
572 struct brw_context *brw = p->brw;
573 struct intel_context *intel = &brw->intel;
574 unsigned sfid;
575
576 if (intel->gen >= 7) {
577 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
578 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
579 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
580 else
581 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
582 } else if (intel->gen == 6) {
583 /* Use the render cache for all write messages. */
584 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
585 } else {
586 sfid = BRW_SFID_DATAPORT_WRITE;
587 }
588
589 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
590 header_present, end_of_thread);
591
592 if (intel->gen >= 7) {
593 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
594 insn->bits3.gen7_dp.msg_control = msg_control;
595 insn->bits3.gen7_dp.last_render_target = last_render_target;
596 insn->bits3.gen7_dp.msg_type = msg_type;
597 } else if (intel->gen == 6) {
598 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
599 insn->bits3.gen6_dp.msg_control = msg_control;
600 insn->bits3.gen6_dp.last_render_target = last_render_target;
601 insn->bits3.gen6_dp.msg_type = msg_type;
602 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
603 } else if (intel->gen == 5) {
604 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
605 insn->bits3.dp_write_gen5.msg_control = msg_control;
606 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
607 insn->bits3.dp_write_gen5.msg_type = msg_type;
608 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
609 } else {
610 insn->bits3.dp_write.binding_table_index = binding_table_index;
611 insn->bits3.dp_write.msg_control = msg_control;
612 insn->bits3.dp_write.last_render_target = last_render_target;
613 insn->bits3.dp_write.msg_type = msg_type;
614 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
615 }
616 }
617
618 void
619 brw_set_dp_read_message(struct brw_compile *p,
620 struct brw_instruction *insn,
621 GLuint binding_table_index,
622 GLuint msg_control,
623 GLuint msg_type,
624 GLuint target_cache,
625 GLuint msg_length,
626 bool header_present,
627 GLuint response_length)
628 {
629 struct brw_context *brw = p->brw;
630 struct intel_context *intel = &brw->intel;
631 unsigned sfid;
632
633 if (intel->gen >= 7) {
634 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
635 } else if (intel->gen == 6) {
636 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
637 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
638 else
639 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
640 } else {
641 sfid = BRW_SFID_DATAPORT_READ;
642 }
643
644 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
645 header_present, false);
646
647 if (intel->gen >= 7) {
648 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
649 insn->bits3.gen7_dp.msg_control = msg_control;
650 insn->bits3.gen7_dp.last_render_target = 0;
651 insn->bits3.gen7_dp.msg_type = msg_type;
652 } else if (intel->gen == 6) {
653 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
654 insn->bits3.gen6_dp.msg_control = msg_control;
655 insn->bits3.gen6_dp.last_render_target = 0;
656 insn->bits3.gen6_dp.msg_type = msg_type;
657 insn->bits3.gen6_dp.send_commit_msg = 0;
658 } else if (intel->gen == 5) {
659 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
660 insn->bits3.dp_read_gen5.msg_control = msg_control;
661 insn->bits3.dp_read_gen5.msg_type = msg_type;
662 insn->bits3.dp_read_gen5.target_cache = target_cache;
663 } else if (intel->is_g4x) {
664 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
665 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
666 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
667 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
668 } else {
669 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
670 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
671 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
672 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
673 }
674 }
675
676 void
677 brw_set_sampler_message(struct brw_compile *p,
678 struct brw_instruction *insn,
679 GLuint binding_table_index,
680 GLuint sampler,
681 GLuint msg_type,
682 GLuint response_length,
683 GLuint msg_length,
684 GLuint header_present,
685 GLuint simd_mode,
686 GLuint return_format)
687 {
688 struct brw_context *brw = p->brw;
689 struct intel_context *intel = &brw->intel;
690
691 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
692 response_length, header_present, false);
693
694 if (intel->gen >= 7) {
695 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
696 insn->bits3.sampler_gen7.sampler = sampler;
697 insn->bits3.sampler_gen7.msg_type = msg_type;
698 insn->bits3.sampler_gen7.simd_mode = simd_mode;
699 } else if (intel->gen >= 5) {
700 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
701 insn->bits3.sampler_gen5.sampler = sampler;
702 insn->bits3.sampler_gen5.msg_type = msg_type;
703 insn->bits3.sampler_gen5.simd_mode = simd_mode;
704 } else if (intel->is_g4x) {
705 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
706 insn->bits3.sampler_g4x.sampler = sampler;
707 insn->bits3.sampler_g4x.msg_type = msg_type;
708 } else {
709 insn->bits3.sampler.binding_table_index = binding_table_index;
710 insn->bits3.sampler.sampler = sampler;
711 insn->bits3.sampler.msg_type = msg_type;
712 insn->bits3.sampler.return_format = return_format;
713 }
714 }
715
716
717 #define next_insn brw_next_insn
718 struct brw_instruction *
719 brw_next_insn(struct brw_compile *p, GLuint opcode)
720 {
721 struct brw_instruction *insn;
722
723 if (p->nr_insn + 1 > p->store_size) {
724 if (0)
725 printf("incresing the store size to %d\n", p->store_size << 1);
726 p->store_size <<= 1;
727 p->store = reralloc(p->mem_ctx, p->store,
728 struct brw_instruction, p->store_size);
729 if (!p->store)
730 assert(!"realloc eu store memeory failed");
731 }
732
733 p->next_insn_offset += 16;
734 insn = &p->store[p->nr_insn++];
735 memcpy(insn, p->current, sizeof(*insn));
736
737 /* Reset this one-shot flag:
738 */
739
740 if (p->current->header.destreg__conditionalmod) {
741 p->current->header.destreg__conditionalmod = 0;
742 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
743 }
744
745 insn->header.opcode = opcode;
746 return insn;
747 }
748
749 static struct brw_instruction *brw_alu1( struct brw_compile *p,
750 GLuint opcode,
751 struct brw_reg dest,
752 struct brw_reg src )
753 {
754 struct brw_instruction *insn = next_insn(p, opcode);
755 brw_set_dest(p, insn, dest);
756 brw_set_src0(p, insn, src);
757 return insn;
758 }
759
760 static struct brw_instruction *brw_alu2(struct brw_compile *p,
761 GLuint opcode,
762 struct brw_reg dest,
763 struct brw_reg src0,
764 struct brw_reg src1 )
765 {
766 struct brw_instruction *insn = next_insn(p, opcode);
767 brw_set_dest(p, insn, dest);
768 brw_set_src0(p, insn, src0);
769 brw_set_src1(p, insn, src1);
770 return insn;
771 }
772
773 static int
774 get_3src_subreg_nr(struct brw_reg reg)
775 {
776 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
777 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
778 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
779 } else {
780 return reg.subnr / 4;
781 }
782 }
783
784 static struct brw_instruction *brw_alu3(struct brw_compile *p,
785 GLuint opcode,
786 struct brw_reg dest,
787 struct brw_reg src0,
788 struct brw_reg src1,
789 struct brw_reg src2)
790 {
791 struct intel_context *intel = &p->brw->intel;
792 struct brw_instruction *insn = next_insn(p, opcode);
793
794 gen7_convert_mrf_to_grf(p, &dest);
795
796 assert(insn->header.access_mode == BRW_ALIGN_16);
797
798 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
799 dest.file == BRW_MESSAGE_REGISTER_FILE);
800 assert(dest.nr < 128);
801 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
802 assert(dest.type == BRW_REGISTER_TYPE_F ||
803 dest.type == BRW_REGISTER_TYPE_D ||
804 dest.type == BRW_REGISTER_TYPE_UD);
805 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
806 insn->bits1.da3src.dest_reg_nr = dest.nr;
807 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
808 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
809 guess_execution_size(p, insn, dest);
810
811 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
812 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
813 assert(src0.nr < 128);
814 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
815 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
816 insn->bits2.da3src.src0_reg_nr = src0.nr;
817 insn->bits1.da3src.src0_abs = src0.abs;
818 insn->bits1.da3src.src0_negate = src0.negate;
819 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
820
821 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
822 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
823 assert(src1.nr < 128);
824 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
825 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
826 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
827 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
828 insn->bits3.da3src.src1_reg_nr = src1.nr;
829 insn->bits1.da3src.src1_abs = src1.abs;
830 insn->bits1.da3src.src1_negate = src1.negate;
831
832 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
833 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
834 assert(src2.nr < 128);
835 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
836 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
837 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
838 insn->bits3.da3src.src2_reg_nr = src2.nr;
839 insn->bits1.da3src.src2_abs = src2.abs;
840 insn->bits1.da3src.src2_negate = src2.negate;
841
842 if (intel->gen >= 7) {
843 /* Set both the source and destination types based on dest.type,
844 * ignoring the source register types. The MAD and LRP emitters ensure
845 * that all four types are float. The BFE and BFI2 emitters, however,
846 * may send us mixed D and UD types and want us to ignore that and use
847 * the destination type.
848 */
849 switch (dest.type) {
850 case BRW_REGISTER_TYPE_F:
851 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
852 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
853 break;
854 case BRW_REGISTER_TYPE_D:
855 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
856 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
857 break;
858 case BRW_REGISTER_TYPE_UD:
859 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
860 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
861 break;
862 }
863 }
864
865 return insn;
866 }
867
868
869 /***********************************************************************
870 * Convenience routines.
871 */
872 #define ALU1(OP) \
873 struct brw_instruction *brw_##OP(struct brw_compile *p, \
874 struct brw_reg dest, \
875 struct brw_reg src0) \
876 { \
877 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
878 }
879
880 #define ALU2(OP) \
881 struct brw_instruction *brw_##OP(struct brw_compile *p, \
882 struct brw_reg dest, \
883 struct brw_reg src0, \
884 struct brw_reg src1) \
885 { \
886 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
887 }
888
889 #define ALU3(OP) \
890 struct brw_instruction *brw_##OP(struct brw_compile *p, \
891 struct brw_reg dest, \
892 struct brw_reg src0, \
893 struct brw_reg src1, \
894 struct brw_reg src2) \
895 { \
896 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
897 }
898
899 #define ALU3F(OP) \
900 struct brw_instruction *brw_##OP(struct brw_compile *p, \
901 struct brw_reg dest, \
902 struct brw_reg src0, \
903 struct brw_reg src1, \
904 struct brw_reg src2) \
905 { \
906 assert(dest.type == BRW_REGISTER_TYPE_F); \
907 assert(src0.type == BRW_REGISTER_TYPE_F); \
908 assert(src1.type == BRW_REGISTER_TYPE_F); \
909 assert(src2.type == BRW_REGISTER_TYPE_F); \
910 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
911 }
912
913 /* Rounding operations (other than RNDD) require two instructions - the first
914 * stores a rounded value (possibly the wrong way) in the dest register, but
915 * also sets a per-channel "increment bit" in the flag register. A predicated
916 * add of 1.0 fixes dest to contain the desired result.
917 *
918 * Sandybridge and later appear to round correctly without an ADD.
919 */
920 #define ROUND(OP) \
921 void brw_##OP(struct brw_compile *p, \
922 struct brw_reg dest, \
923 struct brw_reg src) \
924 { \
925 struct brw_instruction *rnd, *add; \
926 rnd = next_insn(p, BRW_OPCODE_##OP); \
927 brw_set_dest(p, rnd, dest); \
928 brw_set_src0(p, rnd, src); \
929 \
930 if (p->brw->intel.gen < 6) { \
931 /* turn on round-increments */ \
932 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
933 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
934 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
935 } \
936 }
937
938
939 ALU1(MOV)
940 ALU2(SEL)
941 ALU1(NOT)
942 ALU2(AND)
943 ALU2(OR)
944 ALU2(XOR)
945 ALU2(SHR)
946 ALU2(SHL)
947 ALU2(RSR)
948 ALU2(RSL)
949 ALU2(ASR)
950 ALU1(F32TO16)
951 ALU1(F16TO32)
952 ALU1(FRC)
953 ALU1(RNDD)
954 ALU2(MAC)
955 ALU2(MACH)
956 ALU1(LZD)
957 ALU2(DP4)
958 ALU2(DPH)
959 ALU2(DP3)
960 ALU2(DP2)
961 ALU2(LINE)
962 ALU2(PLN)
963 ALU3F(MAD)
964 ALU3F(LRP)
965 ALU1(BFREV)
966 ALU3(BFE)
967 ALU2(BFI1)
968 ALU3(BFI2)
969 ALU1(FBH)
970 ALU1(FBL)
971 ALU1(CBIT)
972
973 ROUND(RNDZ)
974 ROUND(RNDE)
975
976
977 struct brw_instruction *brw_ADD(struct brw_compile *p,
978 struct brw_reg dest,
979 struct brw_reg src0,
980 struct brw_reg src1)
981 {
982 /* 6.2.2: add */
983 if (src0.type == BRW_REGISTER_TYPE_F ||
984 (src0.file == BRW_IMMEDIATE_VALUE &&
985 src0.type == BRW_REGISTER_TYPE_VF)) {
986 assert(src1.type != BRW_REGISTER_TYPE_UD);
987 assert(src1.type != BRW_REGISTER_TYPE_D);
988 }
989
990 if (src1.type == BRW_REGISTER_TYPE_F ||
991 (src1.file == BRW_IMMEDIATE_VALUE &&
992 src1.type == BRW_REGISTER_TYPE_VF)) {
993 assert(src0.type != BRW_REGISTER_TYPE_UD);
994 assert(src0.type != BRW_REGISTER_TYPE_D);
995 }
996
997 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
998 }
999
1000 struct brw_instruction *brw_AVG(struct brw_compile *p,
1001 struct brw_reg dest,
1002 struct brw_reg src0,
1003 struct brw_reg src1)
1004 {
1005 assert(dest.type == src0.type);
1006 assert(src0.type == src1.type);
1007 switch (src0.type) {
1008 case BRW_REGISTER_TYPE_B:
1009 case BRW_REGISTER_TYPE_UB:
1010 case BRW_REGISTER_TYPE_W:
1011 case BRW_REGISTER_TYPE_UW:
1012 case BRW_REGISTER_TYPE_D:
1013 case BRW_REGISTER_TYPE_UD:
1014 break;
1015 default:
1016 assert(!"Bad type for brw_AVG");
1017 }
1018
1019 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1020 }
1021
1022 struct brw_instruction *brw_MUL(struct brw_compile *p,
1023 struct brw_reg dest,
1024 struct brw_reg src0,
1025 struct brw_reg src1)
1026 {
1027 /* 6.32.38: mul */
1028 if (src0.type == BRW_REGISTER_TYPE_D ||
1029 src0.type == BRW_REGISTER_TYPE_UD ||
1030 src1.type == BRW_REGISTER_TYPE_D ||
1031 src1.type == BRW_REGISTER_TYPE_UD) {
1032 assert(dest.type != BRW_REGISTER_TYPE_F);
1033 }
1034
1035 if (src0.type == BRW_REGISTER_TYPE_F ||
1036 (src0.file == BRW_IMMEDIATE_VALUE &&
1037 src0.type == BRW_REGISTER_TYPE_VF)) {
1038 assert(src1.type != BRW_REGISTER_TYPE_UD);
1039 assert(src1.type != BRW_REGISTER_TYPE_D);
1040 }
1041
1042 if (src1.type == BRW_REGISTER_TYPE_F ||
1043 (src1.file == BRW_IMMEDIATE_VALUE &&
1044 src1.type == BRW_REGISTER_TYPE_VF)) {
1045 assert(src0.type != BRW_REGISTER_TYPE_UD);
1046 assert(src0.type != BRW_REGISTER_TYPE_D);
1047 }
1048
1049 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1050 src0.nr != BRW_ARF_ACCUMULATOR);
1051 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1052 src1.nr != BRW_ARF_ACCUMULATOR);
1053
1054 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1055 }
1056
1057
1058 void brw_NOP(struct brw_compile *p)
1059 {
1060 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1061 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1062 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1063 brw_set_src1(p, insn, brw_imm_ud(0x0));
1064 }
1065
1066
1067
1068
1069
1070 /***********************************************************************
1071 * Comparisons, if/else/endif
1072 */
1073
1074 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1075 struct brw_reg dest,
1076 struct brw_reg src0,
1077 struct brw_reg src1)
1078 {
1079 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1080
1081 insn->header.execution_size = 1;
1082 insn->header.compression_control = BRW_COMPRESSION_NONE;
1083 insn->header.mask_control = BRW_MASK_DISABLE;
1084
1085 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1086
1087 return insn;
1088 }
1089
1090 static void
1091 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1092 {
1093 p->if_stack[p->if_stack_depth] = inst - p->store;
1094
1095 p->if_stack_depth++;
1096 if (p->if_stack_array_size <= p->if_stack_depth) {
1097 p->if_stack_array_size *= 2;
1098 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1099 p->if_stack_array_size);
1100 }
1101 }
1102
1103 static struct brw_instruction *
1104 pop_if_stack(struct brw_compile *p)
1105 {
1106 p->if_stack_depth--;
1107 return &p->store[p->if_stack[p->if_stack_depth]];
1108 }
1109
1110 static void
1111 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1112 {
1113 if (p->loop_stack_array_size < p->loop_stack_depth) {
1114 p->loop_stack_array_size *= 2;
1115 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1116 p->loop_stack_array_size);
1117 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1118 p->loop_stack_array_size);
1119 }
1120
1121 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1122 p->loop_stack_depth++;
1123 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1124 }
1125
1126 static struct brw_instruction *
1127 get_inner_do_insn(struct brw_compile *p)
1128 {
1129 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1130 }
1131
1132 /* EU takes the value from the flag register and pushes it onto some
1133 * sort of a stack (presumably merging with any flag value already on
1134 * the stack). Within an if block, the flags at the top of the stack
1135 * control execution on each channel of the unit, eg. on each of the
1136 * 16 pixel values in our wm programs.
1137 *
1138 * When the matching 'else' instruction is reached (presumably by
1139 * countdown of the instruction count patched in by our ELSE/ENDIF
1140 * functions), the relevent flags are inverted.
1141 *
1142 * When the matching 'endif' instruction is reached, the flags are
1143 * popped off. If the stack is now empty, normal execution resumes.
1144 */
1145 struct brw_instruction *
1146 brw_IF(struct brw_compile *p, GLuint execute_size)
1147 {
1148 struct intel_context *intel = &p->brw->intel;
1149 struct brw_instruction *insn;
1150
1151 insn = next_insn(p, BRW_OPCODE_IF);
1152
1153 /* Override the defaults for this instruction:
1154 */
1155 if (intel->gen < 6) {
1156 brw_set_dest(p, insn, brw_ip_reg());
1157 brw_set_src0(p, insn, brw_ip_reg());
1158 brw_set_src1(p, insn, brw_imm_d(0x0));
1159 } else if (intel->gen == 6) {
1160 brw_set_dest(p, insn, brw_imm_w(0));
1161 insn->bits1.branch_gen6.jump_count = 0;
1162 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1163 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1164 } else {
1165 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1166 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1167 brw_set_src1(p, insn, brw_imm_ud(0));
1168 insn->bits3.break_cont.jip = 0;
1169 insn->bits3.break_cont.uip = 0;
1170 }
1171
1172 insn->header.execution_size = execute_size;
1173 insn->header.compression_control = BRW_COMPRESSION_NONE;
1174 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1175 insn->header.mask_control = BRW_MASK_ENABLE;
1176 if (!p->single_program_flow)
1177 insn->header.thread_control = BRW_THREAD_SWITCH;
1178
1179 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1180
1181 push_if_stack(p, insn);
1182 p->if_depth_in_loop[p->loop_stack_depth]++;
1183 return insn;
1184 }
1185
1186 /* This function is only used for gen6-style IF instructions with an
1187 * embedded comparison (conditional modifier). It is not used on gen7.
1188 */
1189 struct brw_instruction *
1190 gen6_IF(struct brw_compile *p, uint32_t conditional,
1191 struct brw_reg src0, struct brw_reg src1)
1192 {
1193 struct brw_instruction *insn;
1194
1195 insn = next_insn(p, BRW_OPCODE_IF);
1196
1197 brw_set_dest(p, insn, brw_imm_w(0));
1198 if (p->compressed) {
1199 insn->header.execution_size = BRW_EXECUTE_16;
1200 } else {
1201 insn->header.execution_size = BRW_EXECUTE_8;
1202 }
1203 insn->bits1.branch_gen6.jump_count = 0;
1204 brw_set_src0(p, insn, src0);
1205 brw_set_src1(p, insn, src1);
1206
1207 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1208 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1209 insn->header.destreg__conditionalmod = conditional;
1210
1211 if (!p->single_program_flow)
1212 insn->header.thread_control = BRW_THREAD_SWITCH;
1213
1214 push_if_stack(p, insn);
1215 return insn;
1216 }
1217
1218 /**
1219 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1220 */
1221 static void
1222 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1223 struct brw_instruction *if_inst,
1224 struct brw_instruction *else_inst)
1225 {
1226 /* The next instruction (where the ENDIF would be, if it existed) */
1227 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1228
1229 assert(p->single_program_flow);
1230 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1231 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1232 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1233
1234 /* Convert IF to an ADD instruction that moves the instruction pointer
1235 * to the first instruction of the ELSE block. If there is no ELSE
1236 * block, point to where ENDIF would be. Reverse the predicate.
1237 *
1238 * There's no need to execute an ENDIF since we don't need to do any
1239 * stack operations, and if we're currently executing, we just want to
1240 * continue normally.
1241 */
1242 if_inst->header.opcode = BRW_OPCODE_ADD;
1243 if_inst->header.predicate_inverse = 1;
1244
1245 if (else_inst != NULL) {
1246 /* Convert ELSE to an ADD instruction that points where the ENDIF
1247 * would be.
1248 */
1249 else_inst->header.opcode = BRW_OPCODE_ADD;
1250
1251 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1252 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1253 } else {
1254 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1255 }
1256 }
1257
1258 /**
1259 * Patch IF and ELSE instructions with appropriate jump targets.
1260 */
1261 static void
1262 patch_IF_ELSE(struct brw_compile *p,
1263 struct brw_instruction *if_inst,
1264 struct brw_instruction *else_inst,
1265 struct brw_instruction *endif_inst)
1266 {
1267 struct intel_context *intel = &p->brw->intel;
1268
1269 /* We shouldn't be patching IF and ELSE instructions in single program flow
1270 * mode when gen < 6, because in single program flow mode on those
1271 * platforms, we convert flow control instructions to conditional ADDs that
1272 * operate on IP (see brw_ENDIF).
1273 *
1274 * However, on Gen6, writing to IP doesn't work in single program flow mode
1275 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1276 * not be updated by non-flow control instructions."). And on later
1277 * platforms, there is no significant benefit to converting control flow
1278 * instructions to conditional ADDs. So we do patch IF and ELSE
1279 * instructions in single program flow mode on those platforms.
1280 */
1281 if (intel->gen < 6)
1282 assert(!p->single_program_flow);
1283
1284 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1285 assert(endif_inst != NULL);
1286 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1287
1288 unsigned br = 1;
1289 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1290 * requires 2 chunks.
1291 */
1292 if (intel->gen >= 5)
1293 br = 2;
1294
1295 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1296 endif_inst->header.execution_size = if_inst->header.execution_size;
1297
1298 if (else_inst == NULL) {
1299 /* Patch IF -> ENDIF */
1300 if (intel->gen < 6) {
1301 /* Turn it into an IFF, which means no mask stack operations for
1302 * all-false and jumping past the ENDIF.
1303 */
1304 if_inst->header.opcode = BRW_OPCODE_IFF;
1305 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1306 if_inst->bits3.if_else.pop_count = 0;
1307 if_inst->bits3.if_else.pad0 = 0;
1308 } else if (intel->gen == 6) {
1309 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1310 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1311 } else {
1312 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1313 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1314 }
1315 } else {
1316 else_inst->header.execution_size = if_inst->header.execution_size;
1317
1318 /* Patch IF -> ELSE */
1319 if (intel->gen < 6) {
1320 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1321 if_inst->bits3.if_else.pop_count = 0;
1322 if_inst->bits3.if_else.pad0 = 0;
1323 } else if (intel->gen == 6) {
1324 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1325 }
1326
1327 /* Patch ELSE -> ENDIF */
1328 if (intel->gen < 6) {
1329 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1330 * matching ENDIF.
1331 */
1332 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1333 else_inst->bits3.if_else.pop_count = 1;
1334 else_inst->bits3.if_else.pad0 = 0;
1335 } else if (intel->gen == 6) {
1336 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1337 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1338 } else {
1339 /* The IF instruction's JIP should point just past the ELSE */
1340 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1341 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1342 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1343 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1344 }
1345 }
1346 }
1347
1348 void
1349 brw_ELSE(struct brw_compile *p)
1350 {
1351 struct intel_context *intel = &p->brw->intel;
1352 struct brw_instruction *insn;
1353
1354 insn = next_insn(p, BRW_OPCODE_ELSE);
1355
1356 if (intel->gen < 6) {
1357 brw_set_dest(p, insn, brw_ip_reg());
1358 brw_set_src0(p, insn, brw_ip_reg());
1359 brw_set_src1(p, insn, brw_imm_d(0x0));
1360 } else if (intel->gen == 6) {
1361 brw_set_dest(p, insn, brw_imm_w(0));
1362 insn->bits1.branch_gen6.jump_count = 0;
1363 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1364 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1365 } else {
1366 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1367 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1368 brw_set_src1(p, insn, brw_imm_ud(0));
1369 insn->bits3.break_cont.jip = 0;
1370 insn->bits3.break_cont.uip = 0;
1371 }
1372
1373 insn->header.compression_control = BRW_COMPRESSION_NONE;
1374 insn->header.mask_control = BRW_MASK_ENABLE;
1375 if (!p->single_program_flow)
1376 insn->header.thread_control = BRW_THREAD_SWITCH;
1377
1378 push_if_stack(p, insn);
1379 }
1380
1381 void
1382 brw_ENDIF(struct brw_compile *p)
1383 {
1384 struct intel_context *intel = &p->brw->intel;
1385 struct brw_instruction *insn = NULL;
1386 struct brw_instruction *else_inst = NULL;
1387 struct brw_instruction *if_inst = NULL;
1388 struct brw_instruction *tmp;
1389 bool emit_endif = true;
1390
1391 /* In single program flow mode, we can express IF and ELSE instructions
1392 * equivalently as ADD instructions that operate on IP. On platforms prior
1393 * to Gen6, flow control instructions cause an implied thread switch, so
1394 * this is a significant savings.
1395 *
1396 * However, on Gen6, writing to IP doesn't work in single program flow mode
1397 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1398 * not be updated by non-flow control instructions."). And on later
1399 * platforms, there is no significant benefit to converting control flow
1400 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1401 * Gen5.
1402 */
1403 if (intel->gen < 6 && p->single_program_flow)
1404 emit_endif = false;
1405
1406 /*
1407 * A single next_insn() may change the base adress of instruction store
1408 * memory(p->store), so call it first before referencing the instruction
1409 * store pointer from an index
1410 */
1411 if (emit_endif)
1412 insn = next_insn(p, BRW_OPCODE_ENDIF);
1413
1414 /* Pop the IF and (optional) ELSE instructions from the stack */
1415 p->if_depth_in_loop[p->loop_stack_depth]--;
1416 tmp = pop_if_stack(p);
1417 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1418 else_inst = tmp;
1419 tmp = pop_if_stack(p);
1420 }
1421 if_inst = tmp;
1422
1423 if (!emit_endif) {
1424 /* ENDIF is useless; don't bother emitting it. */
1425 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1426 return;
1427 }
1428
1429 if (intel->gen < 6) {
1430 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1431 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1432 brw_set_src1(p, insn, brw_imm_d(0x0));
1433 } else if (intel->gen == 6) {
1434 brw_set_dest(p, insn, brw_imm_w(0));
1435 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1436 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1437 } else {
1438 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1439 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1440 brw_set_src1(p, insn, brw_imm_ud(0));
1441 }
1442
1443 insn->header.compression_control = BRW_COMPRESSION_NONE;
1444 insn->header.mask_control = BRW_MASK_ENABLE;
1445 insn->header.thread_control = BRW_THREAD_SWITCH;
1446
1447 /* Also pop item off the stack in the endif instruction: */
1448 if (intel->gen < 6) {
1449 insn->bits3.if_else.jump_count = 0;
1450 insn->bits3.if_else.pop_count = 1;
1451 insn->bits3.if_else.pad0 = 0;
1452 } else if (intel->gen == 6) {
1453 insn->bits1.branch_gen6.jump_count = 2;
1454 } else {
1455 insn->bits3.break_cont.jip = 2;
1456 }
1457 patch_IF_ELSE(p, if_inst, else_inst, insn);
1458 }
1459
1460 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1461 {
1462 struct intel_context *intel = &p->brw->intel;
1463 struct brw_instruction *insn;
1464
1465 insn = next_insn(p, BRW_OPCODE_BREAK);
1466 if (intel->gen >= 6) {
1467 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1468 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1469 brw_set_src1(p, insn, brw_imm_d(0x0));
1470 } else {
1471 brw_set_dest(p, insn, brw_ip_reg());
1472 brw_set_src0(p, insn, brw_ip_reg());
1473 brw_set_src1(p, insn, brw_imm_d(0x0));
1474 insn->bits3.if_else.pad0 = 0;
1475 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1476 }
1477 insn->header.compression_control = BRW_COMPRESSION_NONE;
1478 insn->header.execution_size = BRW_EXECUTE_8;
1479
1480 return insn;
1481 }
1482
1483 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1484 {
1485 struct brw_instruction *insn;
1486
1487 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1488 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1489 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1490 brw_set_dest(p, insn, brw_ip_reg());
1491 brw_set_src0(p, insn, brw_ip_reg());
1492 brw_set_src1(p, insn, brw_imm_d(0x0));
1493
1494 insn->header.compression_control = BRW_COMPRESSION_NONE;
1495 insn->header.execution_size = BRW_EXECUTE_8;
1496 return insn;
1497 }
1498
1499 struct brw_instruction *brw_CONT(struct brw_compile *p)
1500 {
1501 struct brw_instruction *insn;
1502 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1503 brw_set_dest(p, insn, brw_ip_reg());
1504 brw_set_src0(p, insn, brw_ip_reg());
1505 brw_set_src1(p, insn, brw_imm_d(0x0));
1506 insn->header.compression_control = BRW_COMPRESSION_NONE;
1507 insn->header.execution_size = BRW_EXECUTE_8;
1508 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1509 insn->bits3.if_else.pad0 = 0;
1510 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1511 return insn;
1512 }
1513
1514 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1515 {
1516 struct brw_instruction *insn;
1517
1518 insn = next_insn(p, BRW_OPCODE_HALT);
1519 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1520 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1521 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1522
1523 if (p->compressed) {
1524 insn->header.execution_size = BRW_EXECUTE_16;
1525 } else {
1526 insn->header.compression_control = BRW_COMPRESSION_NONE;
1527 insn->header.execution_size = BRW_EXECUTE_8;
1528 }
1529 return insn;
1530 }
1531
1532 /* DO/WHILE loop:
1533 *
1534 * The DO/WHILE is just an unterminated loop -- break or continue are
1535 * used for control within the loop. We have a few ways they can be
1536 * done.
1537 *
1538 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1539 * jip and no DO instruction.
1540 *
1541 * For non-uniform control flow pre-gen6, there's a DO instruction to
1542 * push the mask, and a WHILE to jump back, and BREAK to get out and
1543 * pop the mask.
1544 *
1545 * For gen6, there's no more mask stack, so no need for DO. WHILE
1546 * just points back to the first instruction of the loop.
1547 */
1548 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1549 {
1550 struct intel_context *intel = &p->brw->intel;
1551
1552 if (intel->gen >= 6 || p->single_program_flow) {
1553 push_loop_stack(p, &p->store[p->nr_insn]);
1554 return &p->store[p->nr_insn];
1555 } else {
1556 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1557
1558 push_loop_stack(p, insn);
1559
1560 /* Override the defaults for this instruction:
1561 */
1562 brw_set_dest(p, insn, brw_null_reg());
1563 brw_set_src0(p, insn, brw_null_reg());
1564 brw_set_src1(p, insn, brw_null_reg());
1565
1566 insn->header.compression_control = BRW_COMPRESSION_NONE;
1567 insn->header.execution_size = execute_size;
1568 insn->header.predicate_control = BRW_PREDICATE_NONE;
1569 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1570 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1571
1572 return insn;
1573 }
1574 }
1575
1576 /**
1577 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1578 * instruction here.
1579 *
1580 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1581 * nesting, since it can always just point to the end of the block/current loop.
1582 */
1583 static void
1584 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1585 {
1586 struct intel_context *intel = &p->brw->intel;
1587 struct brw_instruction *do_inst = get_inner_do_insn(p);
1588 struct brw_instruction *inst;
1589 int br = (intel->gen == 5) ? 2 : 1;
1590
1591 for (inst = while_inst - 1; inst != do_inst; inst--) {
1592 /* If the jump count is != 0, that means that this instruction has already
1593 * been patched because it's part of a loop inside of the one we're
1594 * patching.
1595 */
1596 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1597 inst->bits3.if_else.jump_count == 0) {
1598 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1599 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1600 inst->bits3.if_else.jump_count == 0) {
1601 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1602 }
1603 }
1604 }
1605
1606 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1607 {
1608 struct intel_context *intel = &p->brw->intel;
1609 struct brw_instruction *insn, *do_insn;
1610 GLuint br = 1;
1611
1612 if (intel->gen >= 5)
1613 br = 2;
1614
1615 if (intel->gen >= 7) {
1616 insn = next_insn(p, BRW_OPCODE_WHILE);
1617 do_insn = get_inner_do_insn(p);
1618
1619 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1620 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1621 brw_set_src1(p, insn, brw_imm_ud(0));
1622 insn->bits3.break_cont.jip = br * (do_insn - insn);
1623
1624 insn->header.execution_size = BRW_EXECUTE_8;
1625 } else if (intel->gen == 6) {
1626 insn = next_insn(p, BRW_OPCODE_WHILE);
1627 do_insn = get_inner_do_insn(p);
1628
1629 brw_set_dest(p, insn, brw_imm_w(0));
1630 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1631 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1632 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1633
1634 insn->header.execution_size = BRW_EXECUTE_8;
1635 } else {
1636 if (p->single_program_flow) {
1637 insn = next_insn(p, BRW_OPCODE_ADD);
1638 do_insn = get_inner_do_insn(p);
1639
1640 brw_set_dest(p, insn, brw_ip_reg());
1641 brw_set_src0(p, insn, brw_ip_reg());
1642 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1643 insn->header.execution_size = BRW_EXECUTE_1;
1644 } else {
1645 insn = next_insn(p, BRW_OPCODE_WHILE);
1646 do_insn = get_inner_do_insn(p);
1647
1648 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1649
1650 brw_set_dest(p, insn, brw_ip_reg());
1651 brw_set_src0(p, insn, brw_ip_reg());
1652 brw_set_src1(p, insn, brw_imm_d(0));
1653
1654 insn->header.execution_size = do_insn->header.execution_size;
1655 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1656 insn->bits3.if_else.pop_count = 0;
1657 insn->bits3.if_else.pad0 = 0;
1658
1659 brw_patch_break_cont(p, insn);
1660 }
1661 }
1662 insn->header.compression_control = BRW_COMPRESSION_NONE;
1663 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1664
1665 p->loop_stack_depth--;
1666
1667 return insn;
1668 }
1669
1670
1671 /* FORWARD JUMPS:
1672 */
1673 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1674 {
1675 struct intel_context *intel = &p->brw->intel;
1676 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1677 GLuint jmpi = 1;
1678
1679 if (intel->gen >= 5)
1680 jmpi = 2;
1681
1682 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1683 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1684
1685 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1686 }
1687
1688
1689
1690 /* To integrate with the above, it makes sense that the comparison
1691 * instruction should populate the flag register. It might be simpler
1692 * just to use the flag reg for most WM tasks?
1693 */
1694 void brw_CMP(struct brw_compile *p,
1695 struct brw_reg dest,
1696 GLuint conditional,
1697 struct brw_reg src0,
1698 struct brw_reg src1)
1699 {
1700 struct intel_context *intel = &p->brw->intel;
1701 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1702
1703 insn->header.destreg__conditionalmod = conditional;
1704 brw_set_dest(p, insn, dest);
1705 brw_set_src0(p, insn, src0);
1706 brw_set_src1(p, insn, src1);
1707
1708 /* guess_execution_size(insn, src0); */
1709
1710
1711 /* Make it so that future instructions will use the computed flag
1712 * value until brw_set_predicate_control_flag_value() is called
1713 * again.
1714 */
1715 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1716 dest.nr == 0) {
1717 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1718 p->flag_value = 0xff;
1719 }
1720
1721 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1722 * page says:
1723 * "Any CMP instruction with a null destination must use a {switch}."
1724 *
1725 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1726 * mentioned on their work-arounds pages.
1727 */
1728 if (intel->gen == 7) {
1729 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1730 dest.nr == BRW_ARF_NULL) {
1731 insn->header.thread_control = BRW_THREAD_SWITCH;
1732 }
1733 }
1734 }
1735
1736 /* Issue 'wait' instruction for n1, host could program MMIO
1737 to wake up thread. */
1738 void brw_WAIT (struct brw_compile *p)
1739 {
1740 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1741 struct brw_reg src = brw_notification_1_reg();
1742
1743 brw_set_dest(p, insn, src);
1744 brw_set_src0(p, insn, src);
1745 brw_set_src1(p, insn, brw_null_reg());
1746 insn->header.execution_size = 0; /* must */
1747 insn->header.predicate_control = 0;
1748 insn->header.compression_control = 0;
1749 }
1750
1751
1752 /***********************************************************************
1753 * Helpers for the various SEND message types:
1754 */
1755
1756 /** Extended math function, float[8].
1757 */
1758 void brw_math( struct brw_compile *p,
1759 struct brw_reg dest,
1760 GLuint function,
1761 GLuint msg_reg_nr,
1762 struct brw_reg src,
1763 GLuint data_type,
1764 GLuint precision )
1765 {
1766 struct intel_context *intel = &p->brw->intel;
1767
1768 if (intel->gen >= 6) {
1769 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1770
1771 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1772 (intel->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1773 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1774
1775 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1776 if (intel->gen == 6)
1777 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1778
1779 /* Source modifiers are ignored for extended math instructions on Gen6. */
1780 if (intel->gen == 6) {
1781 assert(!src.negate);
1782 assert(!src.abs);
1783 }
1784
1785 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1786 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1787 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1788 assert(src.type != BRW_REGISTER_TYPE_F);
1789 } else {
1790 assert(src.type == BRW_REGISTER_TYPE_F);
1791 }
1792
1793 /* Math is the same ISA format as other opcodes, except that CondModifier
1794 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1795 */
1796 insn->header.destreg__conditionalmod = function;
1797
1798 brw_set_dest(p, insn, dest);
1799 brw_set_src0(p, insn, src);
1800 brw_set_src1(p, insn, brw_null_reg());
1801 } else {
1802 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1803
1804 /* Example code doesn't set predicate_control for send
1805 * instructions.
1806 */
1807 insn->header.predicate_control = 0;
1808 insn->header.destreg__conditionalmod = msg_reg_nr;
1809
1810 brw_set_dest(p, insn, dest);
1811 brw_set_src0(p, insn, src);
1812 brw_set_math_message(p,
1813 insn,
1814 function,
1815 src.type == BRW_REGISTER_TYPE_D,
1816 precision,
1817 data_type);
1818 }
1819 }
1820
1821 /** Extended math function, float[8].
1822 */
1823 void brw_math2(struct brw_compile *p,
1824 struct brw_reg dest,
1825 GLuint function,
1826 struct brw_reg src0,
1827 struct brw_reg src1)
1828 {
1829 struct intel_context *intel = &p->brw->intel;
1830 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1831
1832 assert(intel->gen >= 6);
1833 (void) intel;
1834
1835
1836 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1837 (intel->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1838 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1839 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1840
1841 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1842 if (intel->gen == 6) {
1843 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1844 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1845 }
1846
1847 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1848 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1849 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1850 assert(src0.type != BRW_REGISTER_TYPE_F);
1851 assert(src1.type != BRW_REGISTER_TYPE_F);
1852 } else {
1853 assert(src0.type == BRW_REGISTER_TYPE_F);
1854 assert(src1.type == BRW_REGISTER_TYPE_F);
1855 }
1856
1857 /* Source modifiers are ignored for extended math instructions on Gen6. */
1858 if (intel->gen == 6) {
1859 assert(!src0.negate);
1860 assert(!src0.abs);
1861 assert(!src1.negate);
1862 assert(!src1.abs);
1863 }
1864
1865 /* Math is the same ISA format as other opcodes, except that CondModifier
1866 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1867 */
1868 insn->header.destreg__conditionalmod = function;
1869
1870 brw_set_dest(p, insn, dest);
1871 brw_set_src0(p, insn, src0);
1872 brw_set_src1(p, insn, src1);
1873 }
1874
1875
1876 /**
1877 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1878 * using a constant offset per channel.
1879 *
1880 * The offset must be aligned to oword size (16 bytes). Used for
1881 * register spilling.
1882 */
1883 void brw_oword_block_write_scratch(struct brw_compile *p,
1884 struct brw_reg mrf,
1885 int num_regs,
1886 GLuint offset)
1887 {
1888 struct intel_context *intel = &p->brw->intel;
1889 uint32_t msg_control, msg_type;
1890 int mlen;
1891
1892 if (intel->gen >= 6)
1893 offset /= 16;
1894
1895 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1896
1897 if (num_regs == 1) {
1898 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1899 mlen = 2;
1900 } else {
1901 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1902 mlen = 3;
1903 }
1904
1905 /* Set up the message header. This is g0, with g0.2 filled with
1906 * the offset. We don't want to leave our offset around in g0 or
1907 * it'll screw up texture samples, so set it up inside the message
1908 * reg.
1909 */
1910 {
1911 brw_push_insn_state(p);
1912 brw_set_mask_control(p, BRW_MASK_DISABLE);
1913 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1914
1915 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1916
1917 /* set message header global offset field (reg 0, element 2) */
1918 brw_MOV(p,
1919 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1920 mrf.nr,
1921 2), BRW_REGISTER_TYPE_UD),
1922 brw_imm_ud(offset));
1923
1924 brw_pop_insn_state(p);
1925 }
1926
1927 {
1928 struct brw_reg dest;
1929 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1930 int send_commit_msg;
1931 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1932 BRW_REGISTER_TYPE_UW);
1933
1934 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1935 insn->header.compression_control = BRW_COMPRESSION_NONE;
1936 src_header = vec16(src_header);
1937 }
1938 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1939 insn->header.destreg__conditionalmod = mrf.nr;
1940
1941 /* Until gen6, writes followed by reads from the same location
1942 * are not guaranteed to be ordered unless write_commit is set.
1943 * If set, then a no-op write is issued to the destination
1944 * register to set a dependency, and a read from the destination
1945 * can be used to ensure the ordering.
1946 *
1947 * For gen6, only writes between different threads need ordering
1948 * protection. Our use of DP writes is all about register
1949 * spilling within a thread.
1950 */
1951 if (intel->gen >= 6) {
1952 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1953 send_commit_msg = 0;
1954 } else {
1955 dest = src_header;
1956 send_commit_msg = 1;
1957 }
1958
1959 brw_set_dest(p, insn, dest);
1960 if (intel->gen >= 6) {
1961 brw_set_src0(p, insn, mrf);
1962 } else {
1963 brw_set_src0(p, insn, brw_null_reg());
1964 }
1965
1966 if (intel->gen >= 6)
1967 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1968 else
1969 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1970
1971 brw_set_dp_write_message(p,
1972 insn,
1973 255, /* binding table index (255=stateless) */
1974 msg_control,
1975 msg_type,
1976 mlen,
1977 true, /* header_present */
1978 0, /* not a render target */
1979 send_commit_msg, /* response_length */
1980 0, /* eot */
1981 send_commit_msg);
1982 }
1983 }
1984
1985
1986 /**
1987 * Read a block of owords (half a GRF each) from the scratch buffer
1988 * using a constant index per channel.
1989 *
1990 * Offset must be aligned to oword size (16 bytes). Used for register
1991 * spilling.
1992 */
1993 void
1994 brw_oword_block_read_scratch(struct brw_compile *p,
1995 struct brw_reg dest,
1996 struct brw_reg mrf,
1997 int num_regs,
1998 GLuint offset)
1999 {
2000 struct intel_context *intel = &p->brw->intel;
2001 uint32_t msg_control;
2002 int rlen;
2003
2004 if (intel->gen >= 6)
2005 offset /= 16;
2006
2007 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2008 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2009
2010 if (num_regs == 1) {
2011 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2012 rlen = 1;
2013 } else {
2014 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2015 rlen = 2;
2016 }
2017
2018 {
2019 brw_push_insn_state(p);
2020 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2021 brw_set_mask_control(p, BRW_MASK_DISABLE);
2022
2023 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2024
2025 /* set message header global offset field (reg 0, element 2) */
2026 brw_MOV(p,
2027 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2028 mrf.nr,
2029 2), BRW_REGISTER_TYPE_UD),
2030 brw_imm_ud(offset));
2031
2032 brw_pop_insn_state(p);
2033 }
2034
2035 {
2036 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2037
2038 assert(insn->header.predicate_control == 0);
2039 insn->header.compression_control = BRW_COMPRESSION_NONE;
2040 insn->header.destreg__conditionalmod = mrf.nr;
2041
2042 brw_set_dest(p, insn, dest); /* UW? */
2043 if (intel->gen >= 6) {
2044 brw_set_src0(p, insn, mrf);
2045 } else {
2046 brw_set_src0(p, insn, brw_null_reg());
2047 }
2048
2049 brw_set_dp_read_message(p,
2050 insn,
2051 255, /* binding table index (255=stateless) */
2052 msg_control,
2053 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2054 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2055 1, /* msg_length */
2056 true, /* header_present */
2057 rlen);
2058 }
2059 }
2060
2061 /**
2062 * Read a float[4] vector from the data port Data Cache (const buffer).
2063 * Location (in buffer) should be a multiple of 16.
2064 * Used for fetching shader constants.
2065 */
2066 void brw_oword_block_read(struct brw_compile *p,
2067 struct brw_reg dest,
2068 struct brw_reg mrf,
2069 uint32_t offset,
2070 uint32_t bind_table_index)
2071 {
2072 struct intel_context *intel = &p->brw->intel;
2073
2074 /* On newer hardware, offset is in units of owords. */
2075 if (intel->gen >= 6)
2076 offset /= 16;
2077
2078 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2079
2080 brw_push_insn_state(p);
2081 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2082 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2083 brw_set_mask_control(p, BRW_MASK_DISABLE);
2084
2085 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2086
2087 /* set message header global offset field (reg 0, element 2) */
2088 brw_MOV(p,
2089 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2090 mrf.nr,
2091 2), BRW_REGISTER_TYPE_UD),
2092 brw_imm_ud(offset));
2093
2094 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2095 insn->header.destreg__conditionalmod = mrf.nr;
2096
2097 /* cast dest to a uword[8] vector */
2098 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2099
2100 brw_set_dest(p, insn, dest);
2101 if (intel->gen >= 6) {
2102 brw_set_src0(p, insn, mrf);
2103 } else {
2104 brw_set_src0(p, insn, brw_null_reg());
2105 }
2106
2107 brw_set_dp_read_message(p,
2108 insn,
2109 bind_table_index,
2110 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2111 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2112 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2113 1, /* msg_length */
2114 true, /* header_present */
2115 1); /* response_length (1 reg, 2 owords!) */
2116
2117 brw_pop_insn_state(p);
2118 }
2119
2120
2121 void brw_fb_WRITE(struct brw_compile *p,
2122 int dispatch_width,
2123 GLuint msg_reg_nr,
2124 struct brw_reg src0,
2125 GLuint msg_control,
2126 GLuint binding_table_index,
2127 GLuint msg_length,
2128 GLuint response_length,
2129 bool eot,
2130 bool header_present)
2131 {
2132 struct intel_context *intel = &p->brw->intel;
2133 struct brw_instruction *insn;
2134 GLuint msg_type;
2135 struct brw_reg dest;
2136
2137 if (dispatch_width == 16)
2138 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2139 else
2140 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2141
2142 if (intel->gen >= 6) {
2143 insn = next_insn(p, BRW_OPCODE_SENDC);
2144 } else {
2145 insn = next_insn(p, BRW_OPCODE_SEND);
2146 }
2147 /* The execution mask is ignored for render target writes. */
2148 insn->header.predicate_control = 0;
2149 insn->header.compression_control = BRW_COMPRESSION_NONE;
2150
2151 if (intel->gen >= 6) {
2152 /* headerless version, just submit color payload */
2153 src0 = brw_message_reg(msg_reg_nr);
2154
2155 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2156 } else {
2157 insn->header.destreg__conditionalmod = msg_reg_nr;
2158
2159 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2160 }
2161
2162 brw_set_dest(p, insn, dest);
2163 brw_set_src0(p, insn, src0);
2164 brw_set_dp_write_message(p,
2165 insn,
2166 binding_table_index,
2167 msg_control,
2168 msg_type,
2169 msg_length,
2170 header_present,
2171 eot, /* last render target write */
2172 response_length,
2173 eot,
2174 0 /* send_commit_msg */);
2175 }
2176
2177
2178 /**
2179 * Texture sample instruction.
2180 * Note: the msg_type plus msg_length values determine exactly what kind
2181 * of sampling operation is performed. See volume 4, page 161 of docs.
2182 */
2183 void brw_SAMPLE(struct brw_compile *p,
2184 struct brw_reg dest,
2185 GLuint msg_reg_nr,
2186 struct brw_reg src0,
2187 GLuint binding_table_index,
2188 GLuint sampler,
2189 GLuint msg_type,
2190 GLuint response_length,
2191 GLuint msg_length,
2192 GLuint header_present,
2193 GLuint simd_mode,
2194 GLuint return_format)
2195 {
2196 struct intel_context *intel = &p->brw->intel;
2197 struct brw_instruction *insn;
2198
2199 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2200
2201 insn = next_insn(p, BRW_OPCODE_SEND);
2202 insn->header.predicate_control = 0; /* XXX */
2203 insn->header.compression_control = BRW_COMPRESSION_NONE;
2204 if (intel->gen < 6)
2205 insn->header.destreg__conditionalmod = msg_reg_nr;
2206
2207 brw_set_dest(p, insn, dest);
2208 brw_set_src0(p, insn, src0);
2209 brw_set_sampler_message(p, insn,
2210 binding_table_index,
2211 sampler,
2212 msg_type,
2213 response_length,
2214 msg_length,
2215 header_present,
2216 simd_mode,
2217 return_format);
2218 }
2219
2220 /* All these variables are pretty confusing - we might be better off
2221 * using bitmasks and macros for this, in the old style. Or perhaps
2222 * just having the caller instantiate the fields in dword3 itself.
2223 */
2224 void brw_urb_WRITE(struct brw_compile *p,
2225 struct brw_reg dest,
2226 GLuint msg_reg_nr,
2227 struct brw_reg src0,
2228 bool allocate,
2229 bool used,
2230 GLuint msg_length,
2231 GLuint response_length,
2232 bool eot,
2233 bool writes_complete,
2234 GLuint offset,
2235 GLuint swizzle)
2236 {
2237 struct intel_context *intel = &p->brw->intel;
2238 struct brw_instruction *insn;
2239
2240 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2241
2242 if (intel->gen == 7) {
2243 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2244 brw_push_insn_state(p);
2245 brw_set_access_mode(p, BRW_ALIGN_1);
2246 brw_set_mask_control(p, BRW_MASK_DISABLE);
2247 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2248 BRW_REGISTER_TYPE_UD),
2249 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2250 brw_imm_ud(0xff00));
2251 brw_pop_insn_state(p);
2252 }
2253
2254 insn = next_insn(p, BRW_OPCODE_SEND);
2255
2256 assert(msg_length < BRW_MAX_MRF);
2257
2258 brw_set_dest(p, insn, dest);
2259 brw_set_src0(p, insn, src0);
2260 brw_set_src1(p, insn, brw_imm_d(0));
2261
2262 if (intel->gen < 6)
2263 insn->header.destreg__conditionalmod = msg_reg_nr;
2264
2265 brw_set_urb_message(p,
2266 insn,
2267 allocate,
2268 used,
2269 msg_length,
2270 response_length,
2271 eot,
2272 writes_complete,
2273 offset,
2274 swizzle);
2275 }
2276
2277 static int
2278 next_ip(struct brw_compile *p, int ip)
2279 {
2280 struct brw_instruction *insn = (void *)p->store + ip;
2281
2282 if (insn->header.cmpt_control)
2283 return ip + 8;
2284 else
2285 return ip + 16;
2286 }
2287
2288 static int
2289 brw_find_next_block_end(struct brw_compile *p, int start)
2290 {
2291 int ip;
2292 void *store = p->store;
2293
2294 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2295 struct brw_instruction *insn = store + ip;
2296
2297 switch (insn->header.opcode) {
2298 case BRW_OPCODE_ENDIF:
2299 case BRW_OPCODE_ELSE:
2300 case BRW_OPCODE_WHILE:
2301 case BRW_OPCODE_HALT:
2302 return ip;
2303 }
2304 }
2305
2306 return 0;
2307 }
2308
2309 /* There is no DO instruction on gen6, so to find the end of the loop
2310 * we have to see if the loop is jumping back before our start
2311 * instruction.
2312 */
2313 static int
2314 brw_find_loop_end(struct brw_compile *p, int start)
2315 {
2316 struct intel_context *intel = &p->brw->intel;
2317 int ip;
2318 int scale = 8;
2319 void *store = p->store;
2320
2321 /* Always start after the instruction (such as a WHILE) we're trying to fix
2322 * up.
2323 */
2324 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2325 struct brw_instruction *insn = store + ip;
2326
2327 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2328 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2329 : insn->bits3.break_cont.jip;
2330 if (ip + jip * scale <= start)
2331 return ip;
2332 }
2333 }
2334 assert(!"not reached");
2335 return start;
2336 }
2337
2338 /* After program generation, go back and update the UIP and JIP of
2339 * BREAK, CONT, and HALT instructions to their correct locations.
2340 */
2341 void
2342 brw_set_uip_jip(struct brw_compile *p)
2343 {
2344 struct intel_context *intel = &p->brw->intel;
2345 int ip;
2346 int scale = 8;
2347 void *store = p->store;
2348
2349 if (intel->gen < 6)
2350 return;
2351
2352 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2353 struct brw_instruction *insn = store + ip;
2354
2355 if (insn->header.cmpt_control) {
2356 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2357 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2358 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2359 insn->header.opcode != BRW_OPCODE_HALT);
2360 continue;
2361 }
2362
2363 int block_end_ip = brw_find_next_block_end(p, ip);
2364 switch (insn->header.opcode) {
2365 case BRW_OPCODE_BREAK:
2366 assert(block_end_ip != 0);
2367 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2368 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2369 insn->bits3.break_cont.uip =
2370 (brw_find_loop_end(p, ip) - ip +
2371 (intel->gen == 6 ? 16 : 0)) / scale;
2372 break;
2373 case BRW_OPCODE_CONTINUE:
2374 assert(block_end_ip != 0);
2375 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2376 insn->bits3.break_cont.uip =
2377 (brw_find_loop_end(p, ip) - ip) / scale;
2378
2379 assert(insn->bits3.break_cont.uip != 0);
2380 assert(insn->bits3.break_cont.jip != 0);
2381 break;
2382
2383 case BRW_OPCODE_ENDIF:
2384 if (block_end_ip == 0)
2385 insn->bits3.break_cont.jip = 2;
2386 else
2387 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2388 break;
2389
2390 case BRW_OPCODE_HALT:
2391 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2392 *
2393 * "In case of the halt instruction not inside any conditional
2394 * code block, the value of <JIP> and <UIP> should be the
2395 * same. In case of the halt instruction inside conditional code
2396 * block, the <UIP> should be the end of the program, and the
2397 * <JIP> should be end of the most inner conditional code block."
2398 *
2399 * The uip will have already been set by whoever set up the
2400 * instruction.
2401 */
2402 if (block_end_ip == 0) {
2403 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2404 } else {
2405 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2406 }
2407 assert(insn->bits3.break_cont.uip != 0);
2408 assert(insn->bits3.break_cont.jip != 0);
2409 break;
2410 }
2411 }
2412 }
2413
2414 void brw_ff_sync(struct brw_compile *p,
2415 struct brw_reg dest,
2416 GLuint msg_reg_nr,
2417 struct brw_reg src0,
2418 bool allocate,
2419 GLuint response_length,
2420 bool eot)
2421 {
2422 struct intel_context *intel = &p->brw->intel;
2423 struct brw_instruction *insn;
2424
2425 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2426
2427 insn = next_insn(p, BRW_OPCODE_SEND);
2428 brw_set_dest(p, insn, dest);
2429 brw_set_src0(p, insn, src0);
2430 brw_set_src1(p, insn, brw_imm_d(0));
2431
2432 if (intel->gen < 6)
2433 insn->header.destreg__conditionalmod = msg_reg_nr;
2434
2435 brw_set_ff_sync_message(p,
2436 insn,
2437 allocate,
2438 response_length,
2439 eot);
2440 }
2441
2442 /**
2443 * Emit the SEND instruction necessary to generate stream output data on Gen6
2444 * (for transform feedback).
2445 *
2446 * If send_commit_msg is true, this is the last piece of stream output data
2447 * from this thread, so send the data as a committed write. According to the
2448 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2449 *
2450 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2451 * writes are complete by sending the final write as a committed write."
2452 */
2453 void
2454 brw_svb_write(struct brw_compile *p,
2455 struct brw_reg dest,
2456 GLuint msg_reg_nr,
2457 struct brw_reg src0,
2458 GLuint binding_table_index,
2459 bool send_commit_msg)
2460 {
2461 struct brw_instruction *insn;
2462
2463 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2464
2465 insn = next_insn(p, BRW_OPCODE_SEND);
2466 brw_set_dest(p, insn, dest);
2467 brw_set_src0(p, insn, src0);
2468 brw_set_src1(p, insn, brw_imm_d(0));
2469 brw_set_dp_write_message(p, insn,
2470 binding_table_index,
2471 0, /* msg_control: ignored */
2472 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2473 1, /* msg_length */
2474 true, /* header_present */
2475 0, /* last_render_target: ignored */
2476 send_commit_msg, /* response_length */
2477 0, /* end_of_thread */
2478 send_commit_msg); /* send_commit_msg */
2479 }
2480
2481 /**
2482 * This instruction is generated as a single-channel align1 instruction by
2483 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2484 *
2485 * We can't use the typed atomic op in the FS because that has the execution
2486 * mask ANDed with the pixel mask, but we just want to write the one dword for
2487 * all the pixels.
2488 *
2489 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2490 * one u32. So we use the same untyped atomic write message as the pixel
2491 * shader.
2492 *
2493 * The untyped atomic operation requires a BUFFER surface type with RAW
2494 * format, and is only accessible through the legacy DATA_CACHE dataport
2495 * messages.
2496 */
2497 void brw_shader_time_add(struct brw_compile *p,
2498 struct brw_reg payload,
2499 uint32_t surf_index)
2500 {
2501 struct intel_context *intel = &p->brw->intel;
2502 assert(intel->gen >= 7);
2503
2504 brw_push_insn_state(p);
2505 brw_set_access_mode(p, BRW_ALIGN_1);
2506 brw_set_mask_control(p, BRW_MASK_DISABLE);
2507 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2508 brw_pop_insn_state(p);
2509
2510 /* We use brw_vec1_reg and unmasked because we want to increment the given
2511 * offset only once.
2512 */
2513 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2514 BRW_ARF_NULL, 0));
2515 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2516 payload.nr, 0));
2517
2518 uint32_t sfid, msg_type;
2519 if (intel->is_haswell) {
2520 sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2521 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2522 } else {
2523 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
2524 msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2525 }
2526
2527 bool header_present = false;
2528 bool eot = false;
2529 uint32_t mlen = 2; /* offset, value */
2530 uint32_t rlen = 0;
2531 brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
2532
2533 send->bits3.ud |= msg_type << 14;
2534 send->bits3.ud |= 0 << 13; /* no return data */
2535 send->bits3.ud |= 1 << 12; /* SIMD8 mode */
2536 send->bits3.ud |= BRW_AOP_ADD << 8;
2537 send->bits3.ud |= surf_index << 0;
2538 }