s/Tungsten Graphics/VMware/
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102 /**
103 * Convert a brw_reg_type enumeration value into the hardware representation.
104 *
105 * The hardware encoding may depend on whether the value is an immediate.
106 */
107 unsigned
108 brw_reg_type_to_hw_type(const struct brw_context *brw,
109 enum brw_reg_type type, unsigned file)
110 {
111 if (file == BRW_IMMEDIATE_VALUE) {
112 const static int imm_hw_types[] = {
113 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
114 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
115 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
116 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
117 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
118 [BRW_REGISTER_TYPE_UB] = -1,
119 [BRW_REGISTER_TYPE_B] = -1,
120 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
121 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
122 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V,
123 [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
124 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
125 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
126 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
127 };
128 assert(type < ARRAY_SIZE(imm_hw_types));
129 assert(imm_hw_types[type] != -1);
130 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
131 return imm_hw_types[type];
132 } else {
133 /* Non-immediate registers */
134 const static int hw_types[] = {
135 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
136 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
137 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
138 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
139 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
140 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B,
141 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
142 [BRW_REGISTER_TYPE_UV] = -1,
143 [BRW_REGISTER_TYPE_VF] = -1,
144 [BRW_REGISTER_TYPE_V] = -1,
145 [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
146 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
147 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
148 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
149 };
150 assert(type < ARRAY_SIZE(hw_types));
151 assert(hw_types[type] != -1);
152 assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
153 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
154 return hw_types[type];
155 }
156 }
157
158 void
159 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
160 struct brw_reg dest)
161 {
162 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
163 dest.file != BRW_MESSAGE_REGISTER_FILE)
164 assert(dest.nr < 128);
165
166 gen7_convert_mrf_to_grf(p, &dest);
167
168 insn->bits1.da1.dest_reg_file = dest.file;
169 insn->bits1.da1.dest_reg_type =
170 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file);
171 insn->bits1.da1.dest_address_mode = dest.address_mode;
172
173 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
174 insn->bits1.da1.dest_reg_nr = dest.nr;
175
176 if (insn->header.access_mode == BRW_ALIGN_1) {
177 insn->bits1.da1.dest_subreg_nr = dest.subnr;
178 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
179 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
180 insn->bits1.da1.dest_horiz_stride = dest.hstride;
181 }
182 else {
183 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
184 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
185 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
186 dest.file == BRW_MESSAGE_REGISTER_FILE) {
187 assert(dest.dw1.bits.writemask != 0);
188 }
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
192 */
193 insn->bits1.da16.dest_horiz_stride = 1;
194 }
195 }
196 else {
197 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
198
199 /* These are different sizes in align1 vs align16:
200 */
201 if (insn->header.access_mode == BRW_ALIGN_1) {
202 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
203 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
204 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
205 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
206 }
207 else {
208 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
209 /* even ignored in da16, still need to set as '01' */
210 insn->bits1.ia16.dest_horiz_stride = 1;
211 }
212 }
213
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
216 */
217 guess_execution_size(p, insn, dest);
218 }
219
220 extern int reg_type_size[];
221
222 static void
223 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
224 {
225 int hstride_for_reg[] = {0, 1, 2, 4};
226 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg[] = {1, 2, 4, 8, 16};
229 int width, hstride, vstride, execsize;
230
231 if (reg.file == BRW_IMMEDIATE_VALUE) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
235 */
236 if (reg.type == BRW_REGISTER_TYPE_V) {
237 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
238 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
239 }
240
241 return;
242 }
243
244 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
245 reg.file == BRW_ARF_NULL)
246 return;
247
248 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
249 hstride = hstride_for_reg[reg.hstride];
250
251 if (reg.vstride == 0xf) {
252 vstride = -1;
253 } else {
254 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
255 vstride = vstride_for_reg[reg.vstride];
256 }
257
258 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
259 width = width_for_reg[reg.width];
260
261 assert(insn->header.execution_size >= 0 &&
262 insn->header.execution_size < Elements(execsize_for_reg));
263 execsize = execsize_for_reg[insn->header.execution_size];
264
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
266 /* 3. */
267 assert(execsize >= width);
268
269 /* 4. */
270 if (execsize == width && hstride != 0) {
271 assert(vstride == -1 || vstride == width * hstride);
272 }
273
274 /* 5. */
275 if (execsize == width && hstride == 0) {
276 /* no restriction on vstride. */
277 }
278
279 /* 6. */
280 if (width == 1) {
281 assert(hstride == 0);
282 }
283
284 /* 7. */
285 if (execsize == 1 && width == 1) {
286 assert(hstride == 0);
287 assert(vstride == 0);
288 }
289
290 /* 8. */
291 if (vstride == 0 && hstride == 0) {
292 assert(width == 1);
293 }
294
295 /* 10. Check destination issues. */
296 }
297
298 void
299 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
300 struct brw_reg reg)
301 {
302 struct brw_context *brw = p->brw;
303
304 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
305 assert(reg.nr < 128);
306
307 gen7_convert_mrf_to_grf(p, &reg);
308
309 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
310 insn->header.opcode == BRW_OPCODE_SENDC)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
314 */
315 assert(!reg.negate);
316 assert(!reg.abs);
317 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
318 }
319
320 validate_reg(insn, reg);
321
322 insn->bits1.da1.src0_reg_file = reg.file;
323 insn->bits1.da1.src0_reg_type =
324 brw_reg_type_to_hw_type(brw, reg.type, reg.file);
325 insn->bits2.da1.src0_abs = reg.abs;
326 insn->bits2.da1.src0_negate = reg.negate;
327 insn->bits2.da1.src0_address_mode = reg.address_mode;
328
329 if (reg.file == BRW_IMMEDIATE_VALUE) {
330 insn->bits3.ud = reg.dw1.ud;
331
332 /* Required to set some fields in src1 as well:
333 */
334 insn->bits1.da1.src1_reg_file = 0; /* arf */
335 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type;
336 }
337 else
338 {
339 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
340 if (insn->header.access_mode == BRW_ALIGN_1) {
341 insn->bits2.da1.src0_subreg_nr = reg.subnr;
342 insn->bits2.da1.src0_reg_nr = reg.nr;
343 }
344 else {
345 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
346 insn->bits2.da16.src0_reg_nr = reg.nr;
347 }
348 }
349 else {
350 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
351
352 if (insn->header.access_mode == BRW_ALIGN_1) {
353 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
354 }
355 else {
356 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
357 }
358 }
359
360 if (insn->header.access_mode == BRW_ALIGN_1) {
361 if (reg.width == BRW_WIDTH_1 &&
362 insn->header.execution_size == BRW_EXECUTE_1) {
363 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
364 insn->bits2.da1.src0_width = BRW_WIDTH_1;
365 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
366 }
367 else {
368 insn->bits2.da1.src0_horiz_stride = reg.hstride;
369 insn->bits2.da1.src0_width = reg.width;
370 insn->bits2.da1.src0_vert_stride = reg.vstride;
371 }
372 }
373 else {
374 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
375 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
376 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
377 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
378
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
381 */
382 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
383 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
384 else
385 insn->bits2.da16.src0_vert_stride = reg.vstride;
386 }
387 }
388 }
389
390
391 void brw_set_src1(struct brw_compile *p,
392 struct brw_instruction *insn,
393 struct brw_reg reg)
394 {
395 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
396
397 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
398 assert(reg.nr < 128);
399
400 gen7_convert_mrf_to_grf(p, &reg);
401
402 validate_reg(insn, reg);
403
404 insn->bits1.da1.src1_reg_file = reg.file;
405 insn->bits1.da1.src1_reg_type =
406 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file);
407 insn->bits3.da1.src1_abs = reg.abs;
408 insn->bits3.da1.src1_negate = reg.negate;
409
410 /* Only src1 can be immediate in two-argument instructions.
411 */
412 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
413
414 if (reg.file == BRW_IMMEDIATE_VALUE) {
415 insn->bits3.ud = reg.dw1.ud;
416 }
417 else {
418 /* This is a hardware restriction, which may or may not be lifted
419 * in the future:
420 */
421 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
422 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
423
424 if (insn->header.access_mode == BRW_ALIGN_1) {
425 insn->bits3.da1.src1_subreg_nr = reg.subnr;
426 insn->bits3.da1.src1_reg_nr = reg.nr;
427 }
428 else {
429 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
430 insn->bits3.da16.src1_reg_nr = reg.nr;
431 }
432
433 if (insn->header.access_mode == BRW_ALIGN_1) {
434 if (reg.width == BRW_WIDTH_1 &&
435 insn->header.execution_size == BRW_EXECUTE_1) {
436 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
437 insn->bits3.da1.src1_width = BRW_WIDTH_1;
438 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
439 }
440 else {
441 insn->bits3.da1.src1_horiz_stride = reg.hstride;
442 insn->bits3.da1.src1_width = reg.width;
443 insn->bits3.da1.src1_vert_stride = reg.vstride;
444 }
445 }
446 else {
447 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
448 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
449 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
450 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
451
452 /* This is an oddity of the fact we're using the same
453 * descriptions for registers in align_16 as align_1:
454 */
455 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
456 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
457 else
458 insn->bits3.da16.src1_vert_stride = reg.vstride;
459 }
460 }
461 }
462
463 /**
464 * Set the Message Descriptor and Extended Message Descriptor fields
465 * for SEND messages.
466 *
467 * \note This zeroes out the Function Control bits, so it must be called
468 * \b before filling out any message-specific data. Callers can
469 * choose not to fill in irrelevant bits; they will be zero.
470 */
471 static void
472 brw_set_message_descriptor(struct brw_compile *p,
473 struct brw_instruction *inst,
474 enum brw_message_target sfid,
475 unsigned msg_length,
476 unsigned response_length,
477 bool header_present,
478 bool end_of_thread)
479 {
480 struct brw_context *brw = p->brw;
481
482 brw_set_src1(p, inst, brw_imm_d(0));
483
484 if (brw->gen >= 5) {
485 inst->bits3.generic_gen5.header_present = header_present;
486 inst->bits3.generic_gen5.response_length = response_length;
487 inst->bits3.generic_gen5.msg_length = msg_length;
488 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
489
490 if (brw->gen >= 6) {
491 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
492 inst->header.destreg__conditionalmod = sfid;
493 } else {
494 /* Set Extended Message Descriptor (ex_desc) */
495 inst->bits2.send_gen5.sfid = sfid;
496 inst->bits2.send_gen5.end_of_thread = end_of_thread;
497 }
498 } else {
499 inst->bits3.generic.response_length = response_length;
500 inst->bits3.generic.msg_length = msg_length;
501 inst->bits3.generic.msg_target = sfid;
502 inst->bits3.generic.end_of_thread = end_of_thread;
503 }
504 }
505
506 static void brw_set_math_message( struct brw_compile *p,
507 struct brw_instruction *insn,
508 unsigned function,
509 unsigned integer_type,
510 bool low_precision,
511 unsigned dataType )
512 {
513 struct brw_context *brw = p->brw;
514 unsigned msg_length;
515 unsigned response_length;
516
517 /* Infer message length from the function */
518 switch (function) {
519 case BRW_MATH_FUNCTION_POW:
520 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
521 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
522 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
523 msg_length = 2;
524 break;
525 default:
526 msg_length = 1;
527 break;
528 }
529
530 /* Infer response length from the function */
531 switch (function) {
532 case BRW_MATH_FUNCTION_SINCOS:
533 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
534 response_length = 2;
535 break;
536 default:
537 response_length = 1;
538 break;
539 }
540
541
542 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
543 msg_length, response_length, false, false);
544 if (brw->gen == 5) {
545 insn->bits3.math_gen5.function = function;
546 insn->bits3.math_gen5.int_type = integer_type;
547 insn->bits3.math_gen5.precision = low_precision;
548 insn->bits3.math_gen5.saturate = insn->header.saturate;
549 insn->bits3.math_gen5.data_type = dataType;
550 insn->bits3.math_gen5.snapshot = 0;
551 } else {
552 insn->bits3.math.function = function;
553 insn->bits3.math.int_type = integer_type;
554 insn->bits3.math.precision = low_precision;
555 insn->bits3.math.saturate = insn->header.saturate;
556 insn->bits3.math.data_type = dataType;
557 }
558 insn->header.saturate = 0;
559 }
560
561
562 static void brw_set_ff_sync_message(struct brw_compile *p,
563 struct brw_instruction *insn,
564 bool allocate,
565 unsigned response_length,
566 bool end_of_thread)
567 {
568 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
569 1, response_length, true, end_of_thread);
570 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
571 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
572 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
573 insn->bits3.urb_gen5.allocate = allocate;
574 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
575 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
576 }
577
578 static void brw_set_urb_message( struct brw_compile *p,
579 struct brw_instruction *insn,
580 enum brw_urb_write_flags flags,
581 unsigned msg_length,
582 unsigned response_length,
583 unsigned offset,
584 unsigned swizzle_control )
585 {
586 struct brw_context *brw = p->brw;
587
588 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
589 msg_length, response_length, true,
590 flags & BRW_URB_WRITE_EOT);
591 if (brw->gen == 7) {
592 if (flags & BRW_URB_WRITE_OWORD) {
593 assert(msg_length == 2); /* header + one OWORD of data */
594 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
595 } else {
596 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
597 }
598 insn->bits3.urb_gen7.offset = offset;
599 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
600 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
601 insn->bits3.urb_gen7.per_slot_offset =
602 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
603 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
604 } else if (brw->gen >= 5) {
605 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
606 insn->bits3.urb_gen5.offset = offset;
607 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
608 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
609 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
610 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
611 } else {
612 insn->bits3.urb.opcode = 0; /* ? */
613 insn->bits3.urb.offset = offset;
614 insn->bits3.urb.swizzle_control = swizzle_control;
615 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
616 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
617 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
618 }
619 }
620
621 void
622 brw_set_dp_write_message(struct brw_compile *p,
623 struct brw_instruction *insn,
624 unsigned binding_table_index,
625 unsigned msg_control,
626 unsigned msg_type,
627 unsigned msg_length,
628 bool header_present,
629 unsigned last_render_target,
630 unsigned response_length,
631 unsigned end_of_thread,
632 unsigned send_commit_msg)
633 {
634 struct brw_context *brw = p->brw;
635 unsigned sfid;
636
637 if (brw->gen >= 7) {
638 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
639 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
640 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
641 else
642 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
643 } else if (brw->gen == 6) {
644 /* Use the render cache for all write messages. */
645 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
646 } else {
647 sfid = BRW_SFID_DATAPORT_WRITE;
648 }
649
650 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
651 header_present, end_of_thread);
652
653 if (brw->gen >= 7) {
654 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
655 insn->bits3.gen7_dp.msg_control = msg_control;
656 insn->bits3.gen7_dp.last_render_target = last_render_target;
657 insn->bits3.gen7_dp.msg_type = msg_type;
658 } else if (brw->gen == 6) {
659 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
660 insn->bits3.gen6_dp.msg_control = msg_control;
661 insn->bits3.gen6_dp.last_render_target = last_render_target;
662 insn->bits3.gen6_dp.msg_type = msg_type;
663 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
664 } else if (brw->gen == 5) {
665 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
666 insn->bits3.dp_write_gen5.msg_control = msg_control;
667 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
668 insn->bits3.dp_write_gen5.msg_type = msg_type;
669 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
670 } else {
671 insn->bits3.dp_write.binding_table_index = binding_table_index;
672 insn->bits3.dp_write.msg_control = msg_control;
673 insn->bits3.dp_write.last_render_target = last_render_target;
674 insn->bits3.dp_write.msg_type = msg_type;
675 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
676 }
677 }
678
679 void
680 brw_set_dp_read_message(struct brw_compile *p,
681 struct brw_instruction *insn,
682 unsigned binding_table_index,
683 unsigned msg_control,
684 unsigned msg_type,
685 unsigned target_cache,
686 unsigned msg_length,
687 bool header_present,
688 unsigned response_length)
689 {
690 struct brw_context *brw = p->brw;
691 unsigned sfid;
692
693 if (brw->gen >= 7) {
694 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
695 } else if (brw->gen == 6) {
696 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
697 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
698 else
699 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
700 } else {
701 sfid = BRW_SFID_DATAPORT_READ;
702 }
703
704 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
705 header_present, false);
706
707 if (brw->gen >= 7) {
708 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
709 insn->bits3.gen7_dp.msg_control = msg_control;
710 insn->bits3.gen7_dp.last_render_target = 0;
711 insn->bits3.gen7_dp.msg_type = msg_type;
712 } else if (brw->gen == 6) {
713 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
714 insn->bits3.gen6_dp.msg_control = msg_control;
715 insn->bits3.gen6_dp.last_render_target = 0;
716 insn->bits3.gen6_dp.msg_type = msg_type;
717 insn->bits3.gen6_dp.send_commit_msg = 0;
718 } else if (brw->gen == 5) {
719 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
720 insn->bits3.dp_read_gen5.msg_control = msg_control;
721 insn->bits3.dp_read_gen5.msg_type = msg_type;
722 insn->bits3.dp_read_gen5.target_cache = target_cache;
723 } else if (brw->is_g4x) {
724 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
725 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
726 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
727 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
728 } else {
729 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
730 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
731 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
732 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
733 }
734 }
735
736 void
737 brw_set_sampler_message(struct brw_compile *p,
738 struct brw_instruction *insn,
739 unsigned binding_table_index,
740 unsigned sampler,
741 unsigned msg_type,
742 unsigned response_length,
743 unsigned msg_length,
744 unsigned header_present,
745 unsigned simd_mode,
746 unsigned return_format)
747 {
748 struct brw_context *brw = p->brw;
749
750 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
751 response_length, header_present, false);
752
753 if (brw->gen >= 7) {
754 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
755 insn->bits3.sampler_gen7.sampler = sampler;
756 insn->bits3.sampler_gen7.msg_type = msg_type;
757 insn->bits3.sampler_gen7.simd_mode = simd_mode;
758 } else if (brw->gen >= 5) {
759 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
760 insn->bits3.sampler_gen5.sampler = sampler;
761 insn->bits3.sampler_gen5.msg_type = msg_type;
762 insn->bits3.sampler_gen5.simd_mode = simd_mode;
763 } else if (brw->is_g4x) {
764 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
765 insn->bits3.sampler_g4x.sampler = sampler;
766 insn->bits3.sampler_g4x.msg_type = msg_type;
767 } else {
768 insn->bits3.sampler.binding_table_index = binding_table_index;
769 insn->bits3.sampler.sampler = sampler;
770 insn->bits3.sampler.msg_type = msg_type;
771 insn->bits3.sampler.return_format = return_format;
772 }
773 }
774
775
776 #define next_insn brw_next_insn
777 struct brw_instruction *
778 brw_next_insn(struct brw_compile *p, unsigned opcode)
779 {
780 struct brw_instruction *insn;
781
782 if (p->nr_insn + 1 > p->store_size) {
783 if (0)
784 printf("incresing the store size to %d\n", p->store_size << 1);
785 p->store_size <<= 1;
786 p->store = reralloc(p->mem_ctx, p->store,
787 struct brw_instruction, p->store_size);
788 if (!p->store)
789 assert(!"realloc eu store memeory failed");
790 }
791
792 p->next_insn_offset += 16;
793 insn = &p->store[p->nr_insn++];
794 memcpy(insn, p->current, sizeof(*insn));
795
796 /* Reset this one-shot flag:
797 */
798
799 if (p->current->header.destreg__conditionalmod) {
800 p->current->header.destreg__conditionalmod = 0;
801 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
802 }
803
804 insn->header.opcode = opcode;
805 return insn;
806 }
807
808 static struct brw_instruction *brw_alu1( struct brw_compile *p,
809 unsigned opcode,
810 struct brw_reg dest,
811 struct brw_reg src )
812 {
813 struct brw_instruction *insn = next_insn(p, opcode);
814 brw_set_dest(p, insn, dest);
815 brw_set_src0(p, insn, src);
816 return insn;
817 }
818
819 static struct brw_instruction *brw_alu2(struct brw_compile *p,
820 unsigned opcode,
821 struct brw_reg dest,
822 struct brw_reg src0,
823 struct brw_reg src1 )
824 {
825 struct brw_instruction *insn = next_insn(p, opcode);
826 brw_set_dest(p, insn, dest);
827 brw_set_src0(p, insn, src0);
828 brw_set_src1(p, insn, src1);
829 return insn;
830 }
831
832 static int
833 get_3src_subreg_nr(struct brw_reg reg)
834 {
835 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
836 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
837 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
838 } else {
839 return reg.subnr / 4;
840 }
841 }
842
843 static struct brw_instruction *brw_alu3(struct brw_compile *p,
844 unsigned opcode,
845 struct brw_reg dest,
846 struct brw_reg src0,
847 struct brw_reg src1,
848 struct brw_reg src2)
849 {
850 struct brw_context *brw = p->brw;
851 struct brw_instruction *insn = next_insn(p, opcode);
852
853 gen7_convert_mrf_to_grf(p, &dest);
854
855 assert(insn->header.access_mode == BRW_ALIGN_16);
856
857 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
858 dest.file == BRW_MESSAGE_REGISTER_FILE);
859 assert(dest.nr < 128);
860 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
861 assert(dest.type == BRW_REGISTER_TYPE_F ||
862 dest.type == BRW_REGISTER_TYPE_D ||
863 dest.type == BRW_REGISTER_TYPE_UD);
864 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
865 insn->bits1.da3src.dest_reg_nr = dest.nr;
866 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
867 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
868 guess_execution_size(p, insn, dest);
869
870 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
871 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
872 assert(src0.nr < 128);
873 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
874 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
875 insn->bits2.da3src.src0_reg_nr = src0.nr;
876 insn->bits1.da3src.src0_abs = src0.abs;
877 insn->bits1.da3src.src0_negate = src0.negate;
878 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
879
880 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
881 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
882 assert(src1.nr < 128);
883 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
884 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
885 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
886 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
887 insn->bits3.da3src.src1_reg_nr = src1.nr;
888 insn->bits1.da3src.src1_abs = src1.abs;
889 insn->bits1.da3src.src1_negate = src1.negate;
890
891 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
892 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
893 assert(src2.nr < 128);
894 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
895 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
896 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
897 insn->bits3.da3src.src2_reg_nr = src2.nr;
898 insn->bits1.da3src.src2_abs = src2.abs;
899 insn->bits1.da3src.src2_negate = src2.negate;
900
901 if (brw->gen >= 7) {
902 /* Set both the source and destination types based on dest.type,
903 * ignoring the source register types. The MAD and LRP emitters ensure
904 * that all four types are float. The BFE and BFI2 emitters, however,
905 * may send us mixed D and UD types and want us to ignore that and use
906 * the destination type.
907 */
908 switch (dest.type) {
909 case BRW_REGISTER_TYPE_F:
910 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
911 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
912 break;
913 case BRW_REGISTER_TYPE_D:
914 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
915 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
916 break;
917 case BRW_REGISTER_TYPE_UD:
918 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
919 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
920 break;
921 }
922 }
923
924 return insn;
925 }
926
927
928 /***********************************************************************
929 * Convenience routines.
930 */
931 #define ALU1(OP) \
932 struct brw_instruction *brw_##OP(struct brw_compile *p, \
933 struct brw_reg dest, \
934 struct brw_reg src0) \
935 { \
936 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
937 }
938
939 #define ALU2(OP) \
940 struct brw_instruction *brw_##OP(struct brw_compile *p, \
941 struct brw_reg dest, \
942 struct brw_reg src0, \
943 struct brw_reg src1) \
944 { \
945 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
946 }
947
948 #define ALU3(OP) \
949 struct brw_instruction *brw_##OP(struct brw_compile *p, \
950 struct brw_reg dest, \
951 struct brw_reg src0, \
952 struct brw_reg src1, \
953 struct brw_reg src2) \
954 { \
955 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
956 }
957
958 #define ALU3F(OP) \
959 struct brw_instruction *brw_##OP(struct brw_compile *p, \
960 struct brw_reg dest, \
961 struct brw_reg src0, \
962 struct brw_reg src1, \
963 struct brw_reg src2) \
964 { \
965 assert(dest.type == BRW_REGISTER_TYPE_F); \
966 assert(src0.type == BRW_REGISTER_TYPE_F); \
967 assert(src1.type == BRW_REGISTER_TYPE_F); \
968 assert(src2.type == BRW_REGISTER_TYPE_F); \
969 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
970 }
971
972 /* Rounding operations (other than RNDD) require two instructions - the first
973 * stores a rounded value (possibly the wrong way) in the dest register, but
974 * also sets a per-channel "increment bit" in the flag register. A predicated
975 * add of 1.0 fixes dest to contain the desired result.
976 *
977 * Sandybridge and later appear to round correctly without an ADD.
978 */
979 #define ROUND(OP) \
980 void brw_##OP(struct brw_compile *p, \
981 struct brw_reg dest, \
982 struct brw_reg src) \
983 { \
984 struct brw_instruction *rnd, *add; \
985 rnd = next_insn(p, BRW_OPCODE_##OP); \
986 brw_set_dest(p, rnd, dest); \
987 brw_set_src0(p, rnd, src); \
988 \
989 if (p->brw->gen < 6) { \
990 /* turn on round-increments */ \
991 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
992 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
993 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
994 } \
995 }
996
997
998 ALU1(MOV)
999 ALU2(SEL)
1000 ALU1(NOT)
1001 ALU2(AND)
1002 ALU2(OR)
1003 ALU2(XOR)
1004 ALU2(SHR)
1005 ALU2(SHL)
1006 ALU2(ASR)
1007 ALU1(F32TO16)
1008 ALU1(F16TO32)
1009 ALU1(FRC)
1010 ALU1(RNDD)
1011 ALU2(MAC)
1012 ALU2(MACH)
1013 ALU1(LZD)
1014 ALU2(DP4)
1015 ALU2(DPH)
1016 ALU2(DP3)
1017 ALU2(DP2)
1018 ALU2(LINE)
1019 ALU2(PLN)
1020 ALU3F(MAD)
1021 ALU3F(LRP)
1022 ALU1(BFREV)
1023 ALU3(BFE)
1024 ALU2(BFI1)
1025 ALU3(BFI2)
1026 ALU1(FBH)
1027 ALU1(FBL)
1028 ALU1(CBIT)
1029 ALU2(ADDC)
1030 ALU2(SUBB)
1031
1032 ROUND(RNDZ)
1033 ROUND(RNDE)
1034
1035
1036 struct brw_instruction *brw_ADD(struct brw_compile *p,
1037 struct brw_reg dest,
1038 struct brw_reg src0,
1039 struct brw_reg src1)
1040 {
1041 /* 6.2.2: add */
1042 if (src0.type == BRW_REGISTER_TYPE_F ||
1043 (src0.file == BRW_IMMEDIATE_VALUE &&
1044 src0.type == BRW_REGISTER_TYPE_VF)) {
1045 assert(src1.type != BRW_REGISTER_TYPE_UD);
1046 assert(src1.type != BRW_REGISTER_TYPE_D);
1047 }
1048
1049 if (src1.type == BRW_REGISTER_TYPE_F ||
1050 (src1.file == BRW_IMMEDIATE_VALUE &&
1051 src1.type == BRW_REGISTER_TYPE_VF)) {
1052 assert(src0.type != BRW_REGISTER_TYPE_UD);
1053 assert(src0.type != BRW_REGISTER_TYPE_D);
1054 }
1055
1056 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1057 }
1058
1059 struct brw_instruction *brw_AVG(struct brw_compile *p,
1060 struct brw_reg dest,
1061 struct brw_reg src0,
1062 struct brw_reg src1)
1063 {
1064 assert(dest.type == src0.type);
1065 assert(src0.type == src1.type);
1066 switch (src0.type) {
1067 case BRW_REGISTER_TYPE_B:
1068 case BRW_REGISTER_TYPE_UB:
1069 case BRW_REGISTER_TYPE_W:
1070 case BRW_REGISTER_TYPE_UW:
1071 case BRW_REGISTER_TYPE_D:
1072 case BRW_REGISTER_TYPE_UD:
1073 break;
1074 default:
1075 assert(!"Bad type for brw_AVG");
1076 }
1077
1078 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1079 }
1080
1081 struct brw_instruction *brw_MUL(struct brw_compile *p,
1082 struct brw_reg dest,
1083 struct brw_reg src0,
1084 struct brw_reg src1)
1085 {
1086 /* 6.32.38: mul */
1087 if (src0.type == BRW_REGISTER_TYPE_D ||
1088 src0.type == BRW_REGISTER_TYPE_UD ||
1089 src1.type == BRW_REGISTER_TYPE_D ||
1090 src1.type == BRW_REGISTER_TYPE_UD) {
1091 assert(dest.type != BRW_REGISTER_TYPE_F);
1092 }
1093
1094 if (src0.type == BRW_REGISTER_TYPE_F ||
1095 (src0.file == BRW_IMMEDIATE_VALUE &&
1096 src0.type == BRW_REGISTER_TYPE_VF)) {
1097 assert(src1.type != BRW_REGISTER_TYPE_UD);
1098 assert(src1.type != BRW_REGISTER_TYPE_D);
1099 }
1100
1101 if (src1.type == BRW_REGISTER_TYPE_F ||
1102 (src1.file == BRW_IMMEDIATE_VALUE &&
1103 src1.type == BRW_REGISTER_TYPE_VF)) {
1104 assert(src0.type != BRW_REGISTER_TYPE_UD);
1105 assert(src0.type != BRW_REGISTER_TYPE_D);
1106 }
1107
1108 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1109 src0.nr != BRW_ARF_ACCUMULATOR);
1110 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1111 src1.nr != BRW_ARF_ACCUMULATOR);
1112
1113 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1114 }
1115
1116
1117 void brw_NOP(struct brw_compile *p)
1118 {
1119 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1120 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1121 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1122 brw_set_src1(p, insn, brw_imm_ud(0x0));
1123 }
1124
1125
1126
1127
1128
1129 /***********************************************************************
1130 * Comparisons, if/else/endif
1131 */
1132
1133 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1134 struct brw_reg dest,
1135 struct brw_reg src0,
1136 struct brw_reg src1)
1137 {
1138 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1139
1140 insn->header.execution_size = 1;
1141 insn->header.compression_control = BRW_COMPRESSION_NONE;
1142 insn->header.mask_control = BRW_MASK_DISABLE;
1143
1144 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1145
1146 return insn;
1147 }
1148
1149 static void
1150 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1151 {
1152 p->if_stack[p->if_stack_depth] = inst - p->store;
1153
1154 p->if_stack_depth++;
1155 if (p->if_stack_array_size <= p->if_stack_depth) {
1156 p->if_stack_array_size *= 2;
1157 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1158 p->if_stack_array_size);
1159 }
1160 }
1161
1162 static struct brw_instruction *
1163 pop_if_stack(struct brw_compile *p)
1164 {
1165 p->if_stack_depth--;
1166 return &p->store[p->if_stack[p->if_stack_depth]];
1167 }
1168
1169 static void
1170 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1171 {
1172 if (p->loop_stack_array_size < p->loop_stack_depth) {
1173 p->loop_stack_array_size *= 2;
1174 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1175 p->loop_stack_array_size);
1176 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1177 p->loop_stack_array_size);
1178 }
1179
1180 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1181 p->loop_stack_depth++;
1182 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1183 }
1184
1185 static struct brw_instruction *
1186 get_inner_do_insn(struct brw_compile *p)
1187 {
1188 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1189 }
1190
1191 /* EU takes the value from the flag register and pushes it onto some
1192 * sort of a stack (presumably merging with any flag value already on
1193 * the stack). Within an if block, the flags at the top of the stack
1194 * control execution on each channel of the unit, eg. on each of the
1195 * 16 pixel values in our wm programs.
1196 *
1197 * When the matching 'else' instruction is reached (presumably by
1198 * countdown of the instruction count patched in by our ELSE/ENDIF
1199 * functions), the relevent flags are inverted.
1200 *
1201 * When the matching 'endif' instruction is reached, the flags are
1202 * popped off. If the stack is now empty, normal execution resumes.
1203 */
1204 struct brw_instruction *
1205 brw_IF(struct brw_compile *p, unsigned execute_size)
1206 {
1207 struct brw_context *brw = p->brw;
1208 struct brw_instruction *insn;
1209
1210 insn = next_insn(p, BRW_OPCODE_IF);
1211
1212 /* Override the defaults for this instruction:
1213 */
1214 if (brw->gen < 6) {
1215 brw_set_dest(p, insn, brw_ip_reg());
1216 brw_set_src0(p, insn, brw_ip_reg());
1217 brw_set_src1(p, insn, brw_imm_d(0x0));
1218 } else if (brw->gen == 6) {
1219 brw_set_dest(p, insn, brw_imm_w(0));
1220 insn->bits1.branch_gen6.jump_count = 0;
1221 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1222 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1223 } else {
1224 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1225 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1226 brw_set_src1(p, insn, brw_imm_ud(0));
1227 insn->bits3.break_cont.jip = 0;
1228 insn->bits3.break_cont.uip = 0;
1229 }
1230
1231 insn->header.execution_size = execute_size;
1232 insn->header.compression_control = BRW_COMPRESSION_NONE;
1233 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1234 insn->header.mask_control = BRW_MASK_ENABLE;
1235 if (!p->single_program_flow)
1236 insn->header.thread_control = BRW_THREAD_SWITCH;
1237
1238 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1239
1240 push_if_stack(p, insn);
1241 p->if_depth_in_loop[p->loop_stack_depth]++;
1242 return insn;
1243 }
1244
1245 /* This function is only used for gen6-style IF instructions with an
1246 * embedded comparison (conditional modifier). It is not used on gen7.
1247 */
1248 struct brw_instruction *
1249 gen6_IF(struct brw_compile *p, uint32_t conditional,
1250 struct brw_reg src0, struct brw_reg src1)
1251 {
1252 struct brw_instruction *insn;
1253
1254 insn = next_insn(p, BRW_OPCODE_IF);
1255
1256 brw_set_dest(p, insn, brw_imm_w(0));
1257 if (p->compressed) {
1258 insn->header.execution_size = BRW_EXECUTE_16;
1259 } else {
1260 insn->header.execution_size = BRW_EXECUTE_8;
1261 }
1262 insn->bits1.branch_gen6.jump_count = 0;
1263 brw_set_src0(p, insn, src0);
1264 brw_set_src1(p, insn, src1);
1265
1266 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1267 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1268 insn->header.destreg__conditionalmod = conditional;
1269
1270 if (!p->single_program_flow)
1271 insn->header.thread_control = BRW_THREAD_SWITCH;
1272
1273 push_if_stack(p, insn);
1274 return insn;
1275 }
1276
1277 /**
1278 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1279 */
1280 static void
1281 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1282 struct brw_instruction *if_inst,
1283 struct brw_instruction *else_inst)
1284 {
1285 /* The next instruction (where the ENDIF would be, if it existed) */
1286 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1287
1288 assert(p->single_program_flow);
1289 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1290 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1291 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1292
1293 /* Convert IF to an ADD instruction that moves the instruction pointer
1294 * to the first instruction of the ELSE block. If there is no ELSE
1295 * block, point to where ENDIF would be. Reverse the predicate.
1296 *
1297 * There's no need to execute an ENDIF since we don't need to do any
1298 * stack operations, and if we're currently executing, we just want to
1299 * continue normally.
1300 */
1301 if_inst->header.opcode = BRW_OPCODE_ADD;
1302 if_inst->header.predicate_inverse = 1;
1303
1304 if (else_inst != NULL) {
1305 /* Convert ELSE to an ADD instruction that points where the ENDIF
1306 * would be.
1307 */
1308 else_inst->header.opcode = BRW_OPCODE_ADD;
1309
1310 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1311 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1312 } else {
1313 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1314 }
1315 }
1316
1317 /**
1318 * Patch IF and ELSE instructions with appropriate jump targets.
1319 */
1320 static void
1321 patch_IF_ELSE(struct brw_compile *p,
1322 struct brw_instruction *if_inst,
1323 struct brw_instruction *else_inst,
1324 struct brw_instruction *endif_inst)
1325 {
1326 struct brw_context *brw = p->brw;
1327
1328 /* We shouldn't be patching IF and ELSE instructions in single program flow
1329 * mode when gen < 6, because in single program flow mode on those
1330 * platforms, we convert flow control instructions to conditional ADDs that
1331 * operate on IP (see brw_ENDIF).
1332 *
1333 * However, on Gen6, writing to IP doesn't work in single program flow mode
1334 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1335 * not be updated by non-flow control instructions."). And on later
1336 * platforms, there is no significant benefit to converting control flow
1337 * instructions to conditional ADDs. So we do patch IF and ELSE
1338 * instructions in single program flow mode on those platforms.
1339 */
1340 if (brw->gen < 6)
1341 assert(!p->single_program_flow);
1342
1343 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1344 assert(endif_inst != NULL);
1345 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1346
1347 unsigned br = 1;
1348 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1349 * requires 2 chunks.
1350 */
1351 if (brw->gen >= 5)
1352 br = 2;
1353
1354 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1355 endif_inst->header.execution_size = if_inst->header.execution_size;
1356
1357 if (else_inst == NULL) {
1358 /* Patch IF -> ENDIF */
1359 if (brw->gen < 6) {
1360 /* Turn it into an IFF, which means no mask stack operations for
1361 * all-false and jumping past the ENDIF.
1362 */
1363 if_inst->header.opcode = BRW_OPCODE_IFF;
1364 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1365 if_inst->bits3.if_else.pop_count = 0;
1366 if_inst->bits3.if_else.pad0 = 0;
1367 } else if (brw->gen == 6) {
1368 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1369 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1370 } else {
1371 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1372 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1373 }
1374 } else {
1375 else_inst->header.execution_size = if_inst->header.execution_size;
1376
1377 /* Patch IF -> ELSE */
1378 if (brw->gen < 6) {
1379 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1380 if_inst->bits3.if_else.pop_count = 0;
1381 if_inst->bits3.if_else.pad0 = 0;
1382 } else if (brw->gen == 6) {
1383 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1384 }
1385
1386 /* Patch ELSE -> ENDIF */
1387 if (brw->gen < 6) {
1388 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1389 * matching ENDIF.
1390 */
1391 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1392 else_inst->bits3.if_else.pop_count = 1;
1393 else_inst->bits3.if_else.pad0 = 0;
1394 } else if (brw->gen == 6) {
1395 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1396 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1397 } else {
1398 /* The IF instruction's JIP should point just past the ELSE */
1399 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1400 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1401 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1402 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1403 }
1404 }
1405 }
1406
1407 void
1408 brw_ELSE(struct brw_compile *p)
1409 {
1410 struct brw_context *brw = p->brw;
1411 struct brw_instruction *insn;
1412
1413 insn = next_insn(p, BRW_OPCODE_ELSE);
1414
1415 if (brw->gen < 6) {
1416 brw_set_dest(p, insn, brw_ip_reg());
1417 brw_set_src0(p, insn, brw_ip_reg());
1418 brw_set_src1(p, insn, brw_imm_d(0x0));
1419 } else if (brw->gen == 6) {
1420 brw_set_dest(p, insn, brw_imm_w(0));
1421 insn->bits1.branch_gen6.jump_count = 0;
1422 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1423 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1424 } else {
1425 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1426 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1427 brw_set_src1(p, insn, brw_imm_ud(0));
1428 insn->bits3.break_cont.jip = 0;
1429 insn->bits3.break_cont.uip = 0;
1430 }
1431
1432 insn->header.compression_control = BRW_COMPRESSION_NONE;
1433 insn->header.mask_control = BRW_MASK_ENABLE;
1434 if (!p->single_program_flow)
1435 insn->header.thread_control = BRW_THREAD_SWITCH;
1436
1437 push_if_stack(p, insn);
1438 }
1439
1440 void
1441 brw_ENDIF(struct brw_compile *p)
1442 {
1443 struct brw_context *brw = p->brw;
1444 struct brw_instruction *insn = NULL;
1445 struct brw_instruction *else_inst = NULL;
1446 struct brw_instruction *if_inst = NULL;
1447 struct brw_instruction *tmp;
1448 bool emit_endif = true;
1449
1450 /* In single program flow mode, we can express IF and ELSE instructions
1451 * equivalently as ADD instructions that operate on IP. On platforms prior
1452 * to Gen6, flow control instructions cause an implied thread switch, so
1453 * this is a significant savings.
1454 *
1455 * However, on Gen6, writing to IP doesn't work in single program flow mode
1456 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1457 * not be updated by non-flow control instructions."). And on later
1458 * platforms, there is no significant benefit to converting control flow
1459 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1460 * Gen5.
1461 */
1462 if (brw->gen < 6 && p->single_program_flow)
1463 emit_endif = false;
1464
1465 /*
1466 * A single next_insn() may change the base adress of instruction store
1467 * memory(p->store), so call it first before referencing the instruction
1468 * store pointer from an index
1469 */
1470 if (emit_endif)
1471 insn = next_insn(p, BRW_OPCODE_ENDIF);
1472
1473 /* Pop the IF and (optional) ELSE instructions from the stack */
1474 p->if_depth_in_loop[p->loop_stack_depth]--;
1475 tmp = pop_if_stack(p);
1476 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1477 else_inst = tmp;
1478 tmp = pop_if_stack(p);
1479 }
1480 if_inst = tmp;
1481
1482 if (!emit_endif) {
1483 /* ENDIF is useless; don't bother emitting it. */
1484 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1485 return;
1486 }
1487
1488 if (brw->gen < 6) {
1489 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1490 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1491 brw_set_src1(p, insn, brw_imm_d(0x0));
1492 } else if (brw->gen == 6) {
1493 brw_set_dest(p, insn, brw_imm_w(0));
1494 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1495 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1496 } else {
1497 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1498 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1499 brw_set_src1(p, insn, brw_imm_ud(0));
1500 }
1501
1502 insn->header.compression_control = BRW_COMPRESSION_NONE;
1503 insn->header.mask_control = BRW_MASK_ENABLE;
1504 insn->header.thread_control = BRW_THREAD_SWITCH;
1505
1506 /* Also pop item off the stack in the endif instruction: */
1507 if (brw->gen < 6) {
1508 insn->bits3.if_else.jump_count = 0;
1509 insn->bits3.if_else.pop_count = 1;
1510 insn->bits3.if_else.pad0 = 0;
1511 } else if (brw->gen == 6) {
1512 insn->bits1.branch_gen6.jump_count = 2;
1513 } else {
1514 insn->bits3.break_cont.jip = 2;
1515 }
1516 patch_IF_ELSE(p, if_inst, else_inst, insn);
1517 }
1518
1519 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1520 {
1521 struct brw_context *brw = p->brw;
1522 struct brw_instruction *insn;
1523
1524 insn = next_insn(p, BRW_OPCODE_BREAK);
1525 if (brw->gen >= 6) {
1526 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1527 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1528 brw_set_src1(p, insn, brw_imm_d(0x0));
1529 } else {
1530 brw_set_dest(p, insn, brw_ip_reg());
1531 brw_set_src0(p, insn, brw_ip_reg());
1532 brw_set_src1(p, insn, brw_imm_d(0x0));
1533 insn->bits3.if_else.pad0 = 0;
1534 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1535 }
1536 insn->header.compression_control = BRW_COMPRESSION_NONE;
1537 insn->header.execution_size = BRW_EXECUTE_8;
1538
1539 return insn;
1540 }
1541
1542 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1543 {
1544 struct brw_instruction *insn;
1545
1546 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1547 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1548 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1549 brw_set_dest(p, insn, brw_ip_reg());
1550 brw_set_src0(p, insn, brw_ip_reg());
1551 brw_set_src1(p, insn, brw_imm_d(0x0));
1552
1553 insn->header.compression_control = BRW_COMPRESSION_NONE;
1554 insn->header.execution_size = BRW_EXECUTE_8;
1555 return insn;
1556 }
1557
1558 struct brw_instruction *brw_CONT(struct brw_compile *p)
1559 {
1560 struct brw_instruction *insn;
1561 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1562 brw_set_dest(p, insn, brw_ip_reg());
1563 brw_set_src0(p, insn, brw_ip_reg());
1564 brw_set_src1(p, insn, brw_imm_d(0x0));
1565 insn->header.compression_control = BRW_COMPRESSION_NONE;
1566 insn->header.execution_size = BRW_EXECUTE_8;
1567 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1568 insn->bits3.if_else.pad0 = 0;
1569 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1570 return insn;
1571 }
1572
1573 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1574 {
1575 struct brw_instruction *insn;
1576
1577 insn = next_insn(p, BRW_OPCODE_HALT);
1578 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1579 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1580 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1581
1582 if (p->compressed) {
1583 insn->header.execution_size = BRW_EXECUTE_16;
1584 } else {
1585 insn->header.compression_control = BRW_COMPRESSION_NONE;
1586 insn->header.execution_size = BRW_EXECUTE_8;
1587 }
1588 return insn;
1589 }
1590
1591 /* DO/WHILE loop:
1592 *
1593 * The DO/WHILE is just an unterminated loop -- break or continue are
1594 * used for control within the loop. We have a few ways they can be
1595 * done.
1596 *
1597 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1598 * jip and no DO instruction.
1599 *
1600 * For non-uniform control flow pre-gen6, there's a DO instruction to
1601 * push the mask, and a WHILE to jump back, and BREAK to get out and
1602 * pop the mask.
1603 *
1604 * For gen6, there's no more mask stack, so no need for DO. WHILE
1605 * just points back to the first instruction of the loop.
1606 */
1607 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1608 {
1609 struct brw_context *brw = p->brw;
1610
1611 if (brw->gen >= 6 || p->single_program_flow) {
1612 push_loop_stack(p, &p->store[p->nr_insn]);
1613 return &p->store[p->nr_insn];
1614 } else {
1615 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1616
1617 push_loop_stack(p, insn);
1618
1619 /* Override the defaults for this instruction:
1620 */
1621 brw_set_dest(p, insn, brw_null_reg());
1622 brw_set_src0(p, insn, brw_null_reg());
1623 brw_set_src1(p, insn, brw_null_reg());
1624
1625 insn->header.compression_control = BRW_COMPRESSION_NONE;
1626 insn->header.execution_size = execute_size;
1627 insn->header.predicate_control = BRW_PREDICATE_NONE;
1628 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1629 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1630
1631 return insn;
1632 }
1633 }
1634
1635 /**
1636 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1637 * instruction here.
1638 *
1639 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1640 * nesting, since it can always just point to the end of the block/current loop.
1641 */
1642 static void
1643 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1644 {
1645 struct brw_context *brw = p->brw;
1646 struct brw_instruction *do_inst = get_inner_do_insn(p);
1647 struct brw_instruction *inst;
1648 int br = (brw->gen == 5) ? 2 : 1;
1649
1650 for (inst = while_inst - 1; inst != do_inst; inst--) {
1651 /* If the jump count is != 0, that means that this instruction has already
1652 * been patched because it's part of a loop inside of the one we're
1653 * patching.
1654 */
1655 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1656 inst->bits3.if_else.jump_count == 0) {
1657 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1658 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1659 inst->bits3.if_else.jump_count == 0) {
1660 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1661 }
1662 }
1663 }
1664
1665 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1666 {
1667 struct brw_context *brw = p->brw;
1668 struct brw_instruction *insn, *do_insn;
1669 unsigned br = 1;
1670
1671 if (brw->gen >= 5)
1672 br = 2;
1673
1674 if (brw->gen >= 7) {
1675 insn = next_insn(p, BRW_OPCODE_WHILE);
1676 do_insn = get_inner_do_insn(p);
1677
1678 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1679 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1680 brw_set_src1(p, insn, brw_imm_ud(0));
1681 insn->bits3.break_cont.jip = br * (do_insn - insn);
1682
1683 insn->header.execution_size = BRW_EXECUTE_8;
1684 } else if (brw->gen == 6) {
1685 insn = next_insn(p, BRW_OPCODE_WHILE);
1686 do_insn = get_inner_do_insn(p);
1687
1688 brw_set_dest(p, insn, brw_imm_w(0));
1689 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1690 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1691 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1692
1693 insn->header.execution_size = BRW_EXECUTE_8;
1694 } else {
1695 if (p->single_program_flow) {
1696 insn = next_insn(p, BRW_OPCODE_ADD);
1697 do_insn = get_inner_do_insn(p);
1698
1699 brw_set_dest(p, insn, brw_ip_reg());
1700 brw_set_src0(p, insn, brw_ip_reg());
1701 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1702 insn->header.execution_size = BRW_EXECUTE_1;
1703 } else {
1704 insn = next_insn(p, BRW_OPCODE_WHILE);
1705 do_insn = get_inner_do_insn(p);
1706
1707 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1708
1709 brw_set_dest(p, insn, brw_ip_reg());
1710 brw_set_src0(p, insn, brw_ip_reg());
1711 brw_set_src1(p, insn, brw_imm_d(0));
1712
1713 insn->header.execution_size = do_insn->header.execution_size;
1714 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1715 insn->bits3.if_else.pop_count = 0;
1716 insn->bits3.if_else.pad0 = 0;
1717
1718 brw_patch_break_cont(p, insn);
1719 }
1720 }
1721 insn->header.compression_control = BRW_COMPRESSION_NONE;
1722 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1723
1724 p->loop_stack_depth--;
1725
1726 return insn;
1727 }
1728
1729
1730 /* FORWARD JUMPS:
1731 */
1732 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1733 {
1734 struct brw_context *brw = p->brw;
1735 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1736 unsigned jmpi = 1;
1737
1738 if (brw->gen >= 5)
1739 jmpi = 2;
1740
1741 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1742 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1743
1744 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1745 }
1746
1747
1748
1749 /* To integrate with the above, it makes sense that the comparison
1750 * instruction should populate the flag register. It might be simpler
1751 * just to use the flag reg for most WM tasks?
1752 */
1753 void brw_CMP(struct brw_compile *p,
1754 struct brw_reg dest,
1755 unsigned conditional,
1756 struct brw_reg src0,
1757 struct brw_reg src1)
1758 {
1759 struct brw_context *brw = p->brw;
1760 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1761
1762 insn->header.destreg__conditionalmod = conditional;
1763 brw_set_dest(p, insn, dest);
1764 brw_set_src0(p, insn, src0);
1765 brw_set_src1(p, insn, src1);
1766
1767 /* guess_execution_size(insn, src0); */
1768
1769
1770 /* Make it so that future instructions will use the computed flag
1771 * value until brw_set_predicate_control_flag_value() is called
1772 * again.
1773 */
1774 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1775 dest.nr == 0) {
1776 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1777 p->flag_value = 0xff;
1778 }
1779
1780 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1781 * page says:
1782 * "Any CMP instruction with a null destination must use a {switch}."
1783 *
1784 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1785 * mentioned on their work-arounds pages.
1786 */
1787 if (brw->gen == 7) {
1788 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1789 dest.nr == BRW_ARF_NULL) {
1790 insn->header.thread_control = BRW_THREAD_SWITCH;
1791 }
1792 }
1793 }
1794
1795 /* Issue 'wait' instruction for n1, host could program MMIO
1796 to wake up thread. */
1797 void brw_WAIT (struct brw_compile *p)
1798 {
1799 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1800 struct brw_reg src = brw_notification_1_reg();
1801
1802 brw_set_dest(p, insn, src);
1803 brw_set_src0(p, insn, src);
1804 brw_set_src1(p, insn, brw_null_reg());
1805 insn->header.execution_size = 0; /* must */
1806 insn->header.predicate_control = 0;
1807 insn->header.compression_control = 0;
1808 }
1809
1810
1811 /***********************************************************************
1812 * Helpers for the various SEND message types:
1813 */
1814
1815 /** Extended math function, float[8].
1816 */
1817 void brw_math( struct brw_compile *p,
1818 struct brw_reg dest,
1819 unsigned function,
1820 unsigned msg_reg_nr,
1821 struct brw_reg src,
1822 unsigned data_type,
1823 unsigned precision )
1824 {
1825 struct brw_context *brw = p->brw;
1826
1827 if (brw->gen >= 6) {
1828 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1829
1830 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1831 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1832 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1833
1834 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1835 if (brw->gen == 6)
1836 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1837
1838 /* Source modifiers are ignored for extended math instructions on Gen6. */
1839 if (brw->gen == 6) {
1840 assert(!src.negate);
1841 assert(!src.abs);
1842 }
1843
1844 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1845 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1846 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1847 assert(src.type != BRW_REGISTER_TYPE_F);
1848 } else {
1849 assert(src.type == BRW_REGISTER_TYPE_F);
1850 }
1851
1852 /* Math is the same ISA format as other opcodes, except that CondModifier
1853 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1854 */
1855 insn->header.destreg__conditionalmod = function;
1856
1857 brw_set_dest(p, insn, dest);
1858 brw_set_src0(p, insn, src);
1859 brw_set_src1(p, insn, brw_null_reg());
1860 } else {
1861 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1862
1863 /* Example code doesn't set predicate_control for send
1864 * instructions.
1865 */
1866 insn->header.predicate_control = 0;
1867 insn->header.destreg__conditionalmod = msg_reg_nr;
1868
1869 brw_set_dest(p, insn, dest);
1870 brw_set_src0(p, insn, src);
1871 brw_set_math_message(p,
1872 insn,
1873 function,
1874 src.type == BRW_REGISTER_TYPE_D,
1875 precision,
1876 data_type);
1877 }
1878 }
1879
1880 /** Extended math function, float[8].
1881 */
1882 void brw_math2(struct brw_compile *p,
1883 struct brw_reg dest,
1884 unsigned function,
1885 struct brw_reg src0,
1886 struct brw_reg src1)
1887 {
1888 struct brw_context *brw = p->brw;
1889 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1890
1891 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1892 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1893 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1894 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1895
1896 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1897 if (brw->gen == 6) {
1898 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1899 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1900 }
1901
1902 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1903 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1904 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1905 assert(src0.type != BRW_REGISTER_TYPE_F);
1906 assert(src1.type != BRW_REGISTER_TYPE_F);
1907 } else {
1908 assert(src0.type == BRW_REGISTER_TYPE_F);
1909 assert(src1.type == BRW_REGISTER_TYPE_F);
1910 }
1911
1912 /* Source modifiers are ignored for extended math instructions on Gen6. */
1913 if (brw->gen == 6) {
1914 assert(!src0.negate);
1915 assert(!src0.abs);
1916 assert(!src1.negate);
1917 assert(!src1.abs);
1918 }
1919
1920 /* Math is the same ISA format as other opcodes, except that CondModifier
1921 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1922 */
1923 insn->header.destreg__conditionalmod = function;
1924
1925 brw_set_dest(p, insn, dest);
1926 brw_set_src0(p, insn, src0);
1927 brw_set_src1(p, insn, src1);
1928 }
1929
1930
1931 /**
1932 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1933 * using a constant offset per channel.
1934 *
1935 * The offset must be aligned to oword size (16 bytes). Used for
1936 * register spilling.
1937 */
1938 void brw_oword_block_write_scratch(struct brw_compile *p,
1939 struct brw_reg mrf,
1940 int num_regs,
1941 unsigned offset)
1942 {
1943 struct brw_context *brw = p->brw;
1944 uint32_t msg_control, msg_type;
1945 int mlen;
1946
1947 if (brw->gen >= 6)
1948 offset /= 16;
1949
1950 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1951
1952 if (num_regs == 1) {
1953 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1954 mlen = 2;
1955 } else {
1956 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1957 mlen = 3;
1958 }
1959
1960 /* Set up the message header. This is g0, with g0.2 filled with
1961 * the offset. We don't want to leave our offset around in g0 or
1962 * it'll screw up texture samples, so set it up inside the message
1963 * reg.
1964 */
1965 {
1966 brw_push_insn_state(p);
1967 brw_set_mask_control(p, BRW_MASK_DISABLE);
1968 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1969
1970 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1971
1972 /* set message header global offset field (reg 0, element 2) */
1973 brw_MOV(p,
1974 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1975 mrf.nr,
1976 2), BRW_REGISTER_TYPE_UD),
1977 brw_imm_ud(offset));
1978
1979 brw_pop_insn_state(p);
1980 }
1981
1982 {
1983 struct brw_reg dest;
1984 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1985 int send_commit_msg;
1986 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1987 BRW_REGISTER_TYPE_UW);
1988
1989 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1990 insn->header.compression_control = BRW_COMPRESSION_NONE;
1991 src_header = vec16(src_header);
1992 }
1993 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1994 insn->header.destreg__conditionalmod = mrf.nr;
1995
1996 /* Until gen6, writes followed by reads from the same location
1997 * are not guaranteed to be ordered unless write_commit is set.
1998 * If set, then a no-op write is issued to the destination
1999 * register to set a dependency, and a read from the destination
2000 * can be used to ensure the ordering.
2001 *
2002 * For gen6, only writes between different threads need ordering
2003 * protection. Our use of DP writes is all about register
2004 * spilling within a thread.
2005 */
2006 if (brw->gen >= 6) {
2007 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2008 send_commit_msg = 0;
2009 } else {
2010 dest = src_header;
2011 send_commit_msg = 1;
2012 }
2013
2014 brw_set_dest(p, insn, dest);
2015 if (brw->gen >= 6) {
2016 brw_set_src0(p, insn, mrf);
2017 } else {
2018 brw_set_src0(p, insn, brw_null_reg());
2019 }
2020
2021 if (brw->gen >= 6)
2022 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2023 else
2024 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2025
2026 brw_set_dp_write_message(p,
2027 insn,
2028 255, /* binding table index (255=stateless) */
2029 msg_control,
2030 msg_type,
2031 mlen,
2032 true, /* header_present */
2033 0, /* not a render target */
2034 send_commit_msg, /* response_length */
2035 0, /* eot */
2036 send_commit_msg);
2037 }
2038 }
2039
2040
2041 /**
2042 * Read a block of owords (half a GRF each) from the scratch buffer
2043 * using a constant index per channel.
2044 *
2045 * Offset must be aligned to oword size (16 bytes). Used for register
2046 * spilling.
2047 */
2048 void
2049 brw_oword_block_read_scratch(struct brw_compile *p,
2050 struct brw_reg dest,
2051 struct brw_reg mrf,
2052 int num_regs,
2053 unsigned offset)
2054 {
2055 struct brw_context *brw = p->brw;
2056 uint32_t msg_control;
2057 int rlen;
2058
2059 if (brw->gen >= 6)
2060 offset /= 16;
2061
2062 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2063 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2064
2065 if (num_regs == 1) {
2066 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2067 rlen = 1;
2068 } else {
2069 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2070 rlen = 2;
2071 }
2072
2073 {
2074 brw_push_insn_state(p);
2075 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2076 brw_set_mask_control(p, BRW_MASK_DISABLE);
2077
2078 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2079
2080 /* set message header global offset field (reg 0, element 2) */
2081 brw_MOV(p,
2082 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2083 mrf.nr,
2084 2), BRW_REGISTER_TYPE_UD),
2085 brw_imm_ud(offset));
2086
2087 brw_pop_insn_state(p);
2088 }
2089
2090 {
2091 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2092
2093 assert(insn->header.predicate_control == 0);
2094 insn->header.compression_control = BRW_COMPRESSION_NONE;
2095 insn->header.destreg__conditionalmod = mrf.nr;
2096
2097 brw_set_dest(p, insn, dest); /* UW? */
2098 if (brw->gen >= 6) {
2099 brw_set_src0(p, insn, mrf);
2100 } else {
2101 brw_set_src0(p, insn, brw_null_reg());
2102 }
2103
2104 brw_set_dp_read_message(p,
2105 insn,
2106 255, /* binding table index (255=stateless) */
2107 msg_control,
2108 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2109 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2110 1, /* msg_length */
2111 true, /* header_present */
2112 rlen);
2113 }
2114 }
2115
2116 void
2117 gen7_block_read_scratch(struct brw_compile *p,
2118 struct brw_reg dest,
2119 int num_regs,
2120 unsigned offset)
2121 {
2122 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2123
2124 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2125
2126 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2127 insn->header.compression_control = BRW_COMPRESSION_NONE;
2128
2129 brw_set_dest(p, insn, dest);
2130
2131 /* The HW requires that the header is present; this is to get the g0.5
2132 * scratch offset.
2133 */
2134 bool header_present = true;
2135 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2136
2137 brw_set_message_descriptor(p, insn,
2138 GEN7_SFID_DATAPORT_DATA_CACHE,
2139 1, /* mlen: just g0 */
2140 num_regs,
2141 header_present,
2142 false);
2143
2144 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2145
2146 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2147 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2148
2149 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2150 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2151 * is 32 bytes, which happens to be the size of a register.
2152 */
2153 offset /= REG_SIZE;
2154 assert(offset < (1 << 12));
2155 insn->bits3.ud |= offset;
2156 }
2157
2158 /**
2159 * Read a float[4] vector from the data port Data Cache (const buffer).
2160 * Location (in buffer) should be a multiple of 16.
2161 * Used for fetching shader constants.
2162 */
2163 void brw_oword_block_read(struct brw_compile *p,
2164 struct brw_reg dest,
2165 struct brw_reg mrf,
2166 uint32_t offset,
2167 uint32_t bind_table_index)
2168 {
2169 struct brw_context *brw = p->brw;
2170
2171 /* On newer hardware, offset is in units of owords. */
2172 if (brw->gen >= 6)
2173 offset /= 16;
2174
2175 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2176
2177 brw_push_insn_state(p);
2178 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2179 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2180 brw_set_mask_control(p, BRW_MASK_DISABLE);
2181
2182 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2183
2184 /* set message header global offset field (reg 0, element 2) */
2185 brw_MOV(p,
2186 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2187 mrf.nr,
2188 2), BRW_REGISTER_TYPE_UD),
2189 brw_imm_ud(offset));
2190
2191 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2192 insn->header.destreg__conditionalmod = mrf.nr;
2193
2194 /* cast dest to a uword[8] vector */
2195 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2196
2197 brw_set_dest(p, insn, dest);
2198 if (brw->gen >= 6) {
2199 brw_set_src0(p, insn, mrf);
2200 } else {
2201 brw_set_src0(p, insn, brw_null_reg());
2202 }
2203
2204 brw_set_dp_read_message(p,
2205 insn,
2206 bind_table_index,
2207 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2208 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2209 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2210 1, /* msg_length */
2211 true, /* header_present */
2212 1); /* response_length (1 reg, 2 owords!) */
2213
2214 brw_pop_insn_state(p);
2215 }
2216
2217
2218 void brw_fb_WRITE(struct brw_compile *p,
2219 int dispatch_width,
2220 unsigned msg_reg_nr,
2221 struct brw_reg src0,
2222 unsigned msg_control,
2223 unsigned binding_table_index,
2224 unsigned msg_length,
2225 unsigned response_length,
2226 bool eot,
2227 bool header_present)
2228 {
2229 struct brw_context *brw = p->brw;
2230 struct brw_instruction *insn;
2231 unsigned msg_type;
2232 struct brw_reg dest;
2233
2234 if (dispatch_width == 16)
2235 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2236 else
2237 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2238
2239 if (brw->gen >= 6) {
2240 insn = next_insn(p, BRW_OPCODE_SENDC);
2241 } else {
2242 insn = next_insn(p, BRW_OPCODE_SEND);
2243 }
2244 /* The execution mask is ignored for render target writes. */
2245 insn->header.predicate_control = 0;
2246 insn->header.compression_control = BRW_COMPRESSION_NONE;
2247
2248 if (brw->gen >= 6) {
2249 /* headerless version, just submit color payload */
2250 src0 = brw_message_reg(msg_reg_nr);
2251
2252 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2253 } else {
2254 insn->header.destreg__conditionalmod = msg_reg_nr;
2255
2256 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2257 }
2258
2259 brw_set_dest(p, insn, dest);
2260 brw_set_src0(p, insn, src0);
2261 brw_set_dp_write_message(p,
2262 insn,
2263 binding_table_index,
2264 msg_control,
2265 msg_type,
2266 msg_length,
2267 header_present,
2268 eot, /* last render target write */
2269 response_length,
2270 eot,
2271 0 /* send_commit_msg */);
2272 }
2273
2274
2275 /**
2276 * Texture sample instruction.
2277 * Note: the msg_type plus msg_length values determine exactly what kind
2278 * of sampling operation is performed. See volume 4, page 161 of docs.
2279 */
2280 void brw_SAMPLE(struct brw_compile *p,
2281 struct brw_reg dest,
2282 unsigned msg_reg_nr,
2283 struct brw_reg src0,
2284 unsigned binding_table_index,
2285 unsigned sampler,
2286 unsigned msg_type,
2287 unsigned response_length,
2288 unsigned msg_length,
2289 unsigned header_present,
2290 unsigned simd_mode,
2291 unsigned return_format)
2292 {
2293 struct brw_context *brw = p->brw;
2294 struct brw_instruction *insn;
2295
2296 if (msg_reg_nr != -1)
2297 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2298
2299 insn = next_insn(p, BRW_OPCODE_SEND);
2300 insn->header.predicate_control = 0; /* XXX */
2301
2302 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2303 *
2304 * "Instruction compression is not allowed for this instruction (that
2305 * is, send). The hardware behavior is undefined if this instruction is
2306 * set as compressed. However, compress control can be set to "SecHalf"
2307 * to affect the EMask generation."
2308 *
2309 * No similar wording is found in later PRMs, but there are examples
2310 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2311 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2312 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2313 */
2314 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2315 insn->header.compression_control = BRW_COMPRESSION_NONE;
2316
2317 if (brw->gen < 6)
2318 insn->header.destreg__conditionalmod = msg_reg_nr;
2319
2320 brw_set_dest(p, insn, dest);
2321 brw_set_src0(p, insn, src0);
2322 brw_set_sampler_message(p, insn,
2323 binding_table_index,
2324 sampler,
2325 msg_type,
2326 response_length,
2327 msg_length,
2328 header_present,
2329 simd_mode,
2330 return_format);
2331 }
2332
2333 /* All these variables are pretty confusing - we might be better off
2334 * using bitmasks and macros for this, in the old style. Or perhaps
2335 * just having the caller instantiate the fields in dword3 itself.
2336 */
2337 void brw_urb_WRITE(struct brw_compile *p,
2338 struct brw_reg dest,
2339 unsigned msg_reg_nr,
2340 struct brw_reg src0,
2341 enum brw_urb_write_flags flags,
2342 unsigned msg_length,
2343 unsigned response_length,
2344 unsigned offset,
2345 unsigned swizzle)
2346 {
2347 struct brw_context *brw = p->brw;
2348 struct brw_instruction *insn;
2349
2350 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2351
2352 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2353 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2354 brw_push_insn_state(p);
2355 brw_set_access_mode(p, BRW_ALIGN_1);
2356 brw_set_mask_control(p, BRW_MASK_DISABLE);
2357 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2358 BRW_REGISTER_TYPE_UD),
2359 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2360 brw_imm_ud(0xff00));
2361 brw_pop_insn_state(p);
2362 }
2363
2364 insn = next_insn(p, BRW_OPCODE_SEND);
2365
2366 assert(msg_length < BRW_MAX_MRF);
2367
2368 brw_set_dest(p, insn, dest);
2369 brw_set_src0(p, insn, src0);
2370 brw_set_src1(p, insn, brw_imm_d(0));
2371
2372 if (brw->gen < 6)
2373 insn->header.destreg__conditionalmod = msg_reg_nr;
2374
2375 brw_set_urb_message(p,
2376 insn,
2377 flags,
2378 msg_length,
2379 response_length,
2380 offset,
2381 swizzle);
2382 }
2383
2384 static int
2385 next_ip(struct brw_compile *p, int ip)
2386 {
2387 struct brw_instruction *insn = (void *)p->store + ip;
2388
2389 if (insn->header.cmpt_control)
2390 return ip + 8;
2391 else
2392 return ip + 16;
2393 }
2394
2395 static int
2396 brw_find_next_block_end(struct brw_compile *p, int start)
2397 {
2398 int ip;
2399 void *store = p->store;
2400
2401 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2402 struct brw_instruction *insn = store + ip;
2403
2404 switch (insn->header.opcode) {
2405 case BRW_OPCODE_ENDIF:
2406 case BRW_OPCODE_ELSE:
2407 case BRW_OPCODE_WHILE:
2408 case BRW_OPCODE_HALT:
2409 return ip;
2410 }
2411 }
2412
2413 return 0;
2414 }
2415
2416 /* There is no DO instruction on gen6, so to find the end of the loop
2417 * we have to see if the loop is jumping back before our start
2418 * instruction.
2419 */
2420 static int
2421 brw_find_loop_end(struct brw_compile *p, int start)
2422 {
2423 struct brw_context *brw = p->brw;
2424 int ip;
2425 int scale = 8;
2426 void *store = p->store;
2427
2428 /* Always start after the instruction (such as a WHILE) we're trying to fix
2429 * up.
2430 */
2431 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2432 struct brw_instruction *insn = store + ip;
2433
2434 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2435 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2436 : insn->bits3.break_cont.jip;
2437 if (ip + jip * scale <= start)
2438 return ip;
2439 }
2440 }
2441 assert(!"not reached");
2442 return start;
2443 }
2444
2445 /* After program generation, go back and update the UIP and JIP of
2446 * BREAK, CONT, and HALT instructions to their correct locations.
2447 */
2448 void
2449 brw_set_uip_jip(struct brw_compile *p)
2450 {
2451 struct brw_context *brw = p->brw;
2452 int ip;
2453 int scale = 8;
2454 void *store = p->store;
2455
2456 if (brw->gen < 6)
2457 return;
2458
2459 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2460 struct brw_instruction *insn = store + ip;
2461
2462 if (insn->header.cmpt_control) {
2463 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2464 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2465 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2466 insn->header.opcode != BRW_OPCODE_HALT);
2467 continue;
2468 }
2469
2470 int block_end_ip = brw_find_next_block_end(p, ip);
2471 switch (insn->header.opcode) {
2472 case BRW_OPCODE_BREAK:
2473 assert(block_end_ip != 0);
2474 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2475 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2476 insn->bits3.break_cont.uip =
2477 (brw_find_loop_end(p, ip) - ip +
2478 (brw->gen == 6 ? 16 : 0)) / scale;
2479 break;
2480 case BRW_OPCODE_CONTINUE:
2481 assert(block_end_ip != 0);
2482 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2483 insn->bits3.break_cont.uip =
2484 (brw_find_loop_end(p, ip) - ip) / scale;
2485
2486 assert(insn->bits3.break_cont.uip != 0);
2487 assert(insn->bits3.break_cont.jip != 0);
2488 break;
2489
2490 case BRW_OPCODE_ENDIF:
2491 if (block_end_ip == 0)
2492 insn->bits3.break_cont.jip = 2;
2493 else
2494 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2495 break;
2496
2497 case BRW_OPCODE_HALT:
2498 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2499 *
2500 * "In case of the halt instruction not inside any conditional
2501 * code block, the value of <JIP> and <UIP> should be the
2502 * same. In case of the halt instruction inside conditional code
2503 * block, the <UIP> should be the end of the program, and the
2504 * <JIP> should be end of the most inner conditional code block."
2505 *
2506 * The uip will have already been set by whoever set up the
2507 * instruction.
2508 */
2509 if (block_end_ip == 0) {
2510 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2511 } else {
2512 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2513 }
2514 assert(insn->bits3.break_cont.uip != 0);
2515 assert(insn->bits3.break_cont.jip != 0);
2516 break;
2517 }
2518 }
2519 }
2520
2521 void brw_ff_sync(struct brw_compile *p,
2522 struct brw_reg dest,
2523 unsigned msg_reg_nr,
2524 struct brw_reg src0,
2525 bool allocate,
2526 unsigned response_length,
2527 bool eot)
2528 {
2529 struct brw_context *brw = p->brw;
2530 struct brw_instruction *insn;
2531
2532 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2533
2534 insn = next_insn(p, BRW_OPCODE_SEND);
2535 brw_set_dest(p, insn, dest);
2536 brw_set_src0(p, insn, src0);
2537 brw_set_src1(p, insn, brw_imm_d(0));
2538
2539 if (brw->gen < 6)
2540 insn->header.destreg__conditionalmod = msg_reg_nr;
2541
2542 brw_set_ff_sync_message(p,
2543 insn,
2544 allocate,
2545 response_length,
2546 eot);
2547 }
2548
2549 /**
2550 * Emit the SEND instruction necessary to generate stream output data on Gen6
2551 * (for transform feedback).
2552 *
2553 * If send_commit_msg is true, this is the last piece of stream output data
2554 * from this thread, so send the data as a committed write. According to the
2555 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2556 *
2557 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2558 * writes are complete by sending the final write as a committed write."
2559 */
2560 void
2561 brw_svb_write(struct brw_compile *p,
2562 struct brw_reg dest,
2563 unsigned msg_reg_nr,
2564 struct brw_reg src0,
2565 unsigned binding_table_index,
2566 bool send_commit_msg)
2567 {
2568 struct brw_instruction *insn;
2569
2570 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2571
2572 insn = next_insn(p, BRW_OPCODE_SEND);
2573 brw_set_dest(p, insn, dest);
2574 brw_set_src0(p, insn, src0);
2575 brw_set_src1(p, insn, brw_imm_d(0));
2576 brw_set_dp_write_message(p, insn,
2577 binding_table_index,
2578 0, /* msg_control: ignored */
2579 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2580 1, /* msg_length */
2581 true, /* header_present */
2582 0, /* last_render_target: ignored */
2583 send_commit_msg, /* response_length */
2584 0, /* end_of_thread */
2585 send_commit_msg); /* send_commit_msg */
2586 }
2587
2588 static void
2589 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2590 struct brw_instruction *insn,
2591 unsigned atomic_op,
2592 unsigned bind_table_index,
2593 unsigned msg_length,
2594 unsigned response_length,
2595 bool header_present)
2596 {
2597 if (p->brw->is_haswell) {
2598 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2599 msg_length, response_length,
2600 header_present, false);
2601
2602
2603 if (insn->header.access_mode == BRW_ALIGN_1) {
2604 if (insn->header.execution_size != BRW_EXECUTE_16)
2605 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2606
2607 insn->bits3.gen7_dp.msg_type =
2608 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2609 } else {
2610 insn->bits3.gen7_dp.msg_type =
2611 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2612 }
2613
2614 } else {
2615 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2616 msg_length, response_length,
2617 header_present, false);
2618
2619 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2620
2621 if (insn->header.execution_size != BRW_EXECUTE_16)
2622 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2623 }
2624
2625 if (response_length)
2626 insn->bits3.ud |= 1 << 13; /* Return data expected */
2627
2628 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2629 insn->bits3.ud |= atomic_op << 8;
2630 }
2631
2632 void
2633 brw_untyped_atomic(struct brw_compile *p,
2634 struct brw_reg dest,
2635 struct brw_reg mrf,
2636 unsigned atomic_op,
2637 unsigned bind_table_index,
2638 unsigned msg_length,
2639 unsigned response_length) {
2640 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2641
2642 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2643 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2644 brw_set_src1(p, insn, brw_imm_d(0));
2645 brw_set_dp_untyped_atomic_message(
2646 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2647 insn->header.access_mode == BRW_ALIGN_1);
2648 }
2649
2650 static void
2651 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2652 struct brw_instruction *insn,
2653 unsigned bind_table_index,
2654 unsigned msg_length,
2655 unsigned response_length,
2656 bool header_present)
2657 {
2658 const unsigned dispatch_width =
2659 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2660 const unsigned num_channels = response_length / (dispatch_width / 8);
2661
2662 if (p->brw->is_haswell) {
2663 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2664 msg_length, response_length,
2665 header_present, false);
2666
2667 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2668 } else {
2669 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2670 msg_length, response_length,
2671 header_present, false);
2672
2673 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2674 }
2675
2676 if (insn->header.access_mode == BRW_ALIGN_1) {
2677 if (dispatch_width == 16)
2678 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2679 else
2680 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2681 }
2682
2683 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2684
2685 /* Set mask of 32-bit channels to drop. */
2686 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2687 }
2688
2689 void
2690 brw_untyped_surface_read(struct brw_compile *p,
2691 struct brw_reg dest,
2692 struct brw_reg mrf,
2693 unsigned bind_table_index,
2694 unsigned msg_length,
2695 unsigned response_length)
2696 {
2697 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2698
2699 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2700 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2701 brw_set_dp_untyped_surface_read_message(
2702 p, insn, bind_table_index, msg_length, response_length,
2703 insn->header.access_mode == BRW_ALIGN_1);
2704 }
2705
2706 /**
2707 * This instruction is generated as a single-channel align1 instruction by
2708 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2709 *
2710 * We can't use the typed atomic op in the FS because that has the execution
2711 * mask ANDed with the pixel mask, but we just want to write the one dword for
2712 * all the pixels.
2713 *
2714 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2715 * one u32. So we use the same untyped atomic write message as the pixel
2716 * shader.
2717 *
2718 * The untyped atomic operation requires a BUFFER surface type with RAW
2719 * format, and is only accessible through the legacy DATA_CACHE dataport
2720 * messages.
2721 */
2722 void brw_shader_time_add(struct brw_compile *p,
2723 struct brw_reg payload,
2724 uint32_t surf_index)
2725 {
2726 struct brw_context *brw = p->brw;
2727 assert(brw->gen >= 7);
2728
2729 brw_push_insn_state(p);
2730 brw_set_access_mode(p, BRW_ALIGN_1);
2731 brw_set_mask_control(p, BRW_MASK_DISABLE);
2732 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2733 brw_pop_insn_state(p);
2734
2735 /* We use brw_vec1_reg and unmasked because we want to increment the given
2736 * offset only once.
2737 */
2738 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2739 BRW_ARF_NULL, 0));
2740 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2741 payload.nr, 0));
2742 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2743 2 /* message length */,
2744 0 /* response length */,
2745 false /* header present */);
2746 }