63e7bdeb206c9af985f4dd10602dc8e0a06abd75
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102 /**
103 * Convert a brw_reg_type enumeration value into the hardware representation.
104 *
105 * The hardware encoding may depend on whether the value is an immediate.
106 */
107 unsigned
108 brw_reg_type_to_hw_type(const struct brw_context *brw,
109 enum brw_reg_type type, unsigned file)
110 {
111 if (file == BRW_IMMEDIATE_VALUE) {
112 const static int imm_hw_types[] = {
113 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
114 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
115 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
116 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
117 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
118 [BRW_REGISTER_TYPE_UB] = -1,
119 [BRW_REGISTER_TYPE_B] = -1,
120 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
121 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
122 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V,
123 [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
124 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
125 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
126 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
127 };
128 assert(type < ARRAY_SIZE(imm_hw_types));
129 assert(imm_hw_types[type] != -1);
130 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
131 return imm_hw_types[type];
132 } else {
133 /* Non-immediate registers */
134 const static int hw_types[] = {
135 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
136 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
137 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
138 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
139 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
140 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B,
141 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
142 [BRW_REGISTER_TYPE_UV] = -1,
143 [BRW_REGISTER_TYPE_VF] = -1,
144 [BRW_REGISTER_TYPE_V] = -1,
145 [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
146 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
147 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
148 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
149 };
150 assert(type < ARRAY_SIZE(hw_types));
151 assert(hw_types[type] != -1);
152 assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
153 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
154 return hw_types[type];
155 }
156 }
157
158 void
159 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
160 struct brw_reg dest)
161 {
162 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
163 dest.file != BRW_MESSAGE_REGISTER_FILE)
164 assert(dest.nr < 128);
165
166 gen7_convert_mrf_to_grf(p, &dest);
167
168 insn->bits1.da1.dest_reg_file = dest.file;
169 insn->bits1.da1.dest_reg_type =
170 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file);
171 insn->bits1.da1.dest_address_mode = dest.address_mode;
172
173 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
174 insn->bits1.da1.dest_reg_nr = dest.nr;
175
176 if (insn->header.access_mode == BRW_ALIGN_1) {
177 insn->bits1.da1.dest_subreg_nr = dest.subnr;
178 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
179 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
180 insn->bits1.da1.dest_horiz_stride = dest.hstride;
181 } else {
182 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
183 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
184 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
185 dest.file == BRW_MESSAGE_REGISTER_FILE) {
186 assert(dest.dw1.bits.writemask != 0);
187 }
188 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
189 * Although Dst.HorzStride is a don't care for Align16, HW needs
190 * this to be programmed as "01".
191 */
192 insn->bits1.da16.dest_horiz_stride = 1;
193 }
194 } else {
195 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
196
197 /* These are different sizes in align1 vs align16:
198 */
199 if (insn->header.access_mode == BRW_ALIGN_1) {
200 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
201 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
202 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
203 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
204 } else {
205 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
206 /* even ignored in da16, still need to set as '01' */
207 insn->bits1.ia16.dest_horiz_stride = 1;
208 }
209 }
210
211 /* NEW: Set the execution size based on dest.width and
212 * insn->compression_control:
213 */
214 guess_execution_size(p, insn, dest);
215 }
216
217 extern int reg_type_size[];
218
219 static void
220 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
221 {
222 int hstride_for_reg[] = {0, 1, 2, 4};
223 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
224 int width_for_reg[] = {1, 2, 4, 8, 16};
225 int execsize_for_reg[] = {1, 2, 4, 8, 16};
226 int width, hstride, vstride, execsize;
227
228 if (reg.file == BRW_IMMEDIATE_VALUE) {
229 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
230 * mean the destination has to be 128-bit aligned and the
231 * destination horiz stride has to be a word.
232 */
233 if (reg.type == BRW_REGISTER_TYPE_V) {
234 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
235 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
236 }
237
238 return;
239 }
240
241 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
242 reg.file == BRW_ARF_NULL)
243 return;
244
245 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
246 hstride = hstride_for_reg[reg.hstride];
247
248 if (reg.vstride == 0xf) {
249 vstride = -1;
250 } else {
251 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
252 vstride = vstride_for_reg[reg.vstride];
253 }
254
255 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
256 width = width_for_reg[reg.width];
257
258 assert(insn->header.execution_size >= 0 &&
259 insn->header.execution_size < Elements(execsize_for_reg));
260 execsize = execsize_for_reg[insn->header.execution_size];
261
262 /* Restrictions from 3.3.10: Register Region Restrictions. */
263 /* 3. */
264 assert(execsize >= width);
265
266 /* 4. */
267 if (execsize == width && hstride != 0) {
268 assert(vstride == -1 || vstride == width * hstride);
269 }
270
271 /* 5. */
272 if (execsize == width && hstride == 0) {
273 /* no restriction on vstride. */
274 }
275
276 /* 6. */
277 if (width == 1) {
278 assert(hstride == 0);
279 }
280
281 /* 7. */
282 if (execsize == 1 && width == 1) {
283 assert(hstride == 0);
284 assert(vstride == 0);
285 }
286
287 /* 8. */
288 if (vstride == 0 && hstride == 0) {
289 assert(width == 1);
290 }
291
292 /* 10. Check destination issues. */
293 }
294
295 static bool
296 is_compactable_immediate(unsigned imm)
297 {
298 /* We get the low 12 bits as-is. */
299 imm &= ~0xfff;
300
301 /* We get one bit replicated through the top 20 bits. */
302 return imm == 0 || imm == 0xfffff000;
303 }
304
305 void
306 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
307 struct brw_reg reg)
308 {
309 struct brw_context *brw = p->brw;
310
311 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
312 assert(reg.nr < 128);
313
314 gen7_convert_mrf_to_grf(p, &reg);
315
316 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
317 insn->header.opcode == BRW_OPCODE_SENDC)) {
318 /* Any source modifiers or regions will be ignored, since this just
319 * identifies the MRF/GRF to start reading the message contents from.
320 * Check for some likely failures.
321 */
322 assert(!reg.negate);
323 assert(!reg.abs);
324 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
325 }
326
327 validate_reg(insn, reg);
328
329 insn->bits1.da1.src0_reg_file = reg.file;
330 insn->bits1.da1.src0_reg_type =
331 brw_reg_type_to_hw_type(brw, reg.type, reg.file);
332 insn->bits2.da1.src0_abs = reg.abs;
333 insn->bits2.da1.src0_negate = reg.negate;
334 insn->bits2.da1.src0_address_mode = reg.address_mode;
335
336 if (reg.file == BRW_IMMEDIATE_VALUE) {
337 insn->bits3.ud = reg.dw1.ud;
338
339 /* The Bspec's section titled "Non-present Operands" claims that if src0
340 * is an immediate that src1's type must be the same as that of src0.
341 *
342 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
343 * that do not follow this rule. E.g., from the IVB/HSW table:
344 *
345 * DataTypeIndex 18-Bit Mapping Mapped Meaning
346 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
347 *
348 * And from the SNB table:
349 *
350 * DataTypeIndex 18-Bit Mapping Mapped Meaning
351 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
352 *
353 * Neither of these cause warnings from the simulator when used,
354 * compacted or otherwise. In fact, all compaction mappings that have an
355 * immediate in src0 use a:ud for src1.
356 *
357 * The GM45 instruction compaction tables do not contain mapped meanings
358 * so it's not clear whether it has the restriction. We'll assume it was
359 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
360 */
361 insn->bits1.da1.src1_reg_file = 0; /* arf */
362 if (brw->gen < 6) {
363 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type;
364 } else {
365 insn->bits1.da1.src1_reg_type = BRW_HW_REG_TYPE_UD;
366 }
367
368 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
369 * for immediate values. Presumably the hardware engineers realized
370 * that the only useful floating-point value that could be represented
371 * in this format is 0.0, which can also be represented as a VF-typed
372 * immediate, so they gave us the previously mentioned mapping on IVB+.
373 *
374 * Strangely, we do have a mapping for imm:f in src1, so we don't need
375 * to do this there.
376 *
377 * If we see a 0.0:F, change the type to VF so that it can be compacted.
378 */
379 if (insn->bits3.ud == 0x0 &&
380 insn->bits1.da1.src0_reg_type == BRW_HW_REG_TYPE_F) {
381 insn->bits1.da1.src0_reg_type = BRW_HW_REG_IMM_TYPE_VF;
382 }
383
384 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
385 * set the types to :UD so the instruction can be compacted.
386 */
387 if (is_compactable_immediate(insn->bits3.ud) &&
388 insn->header.destreg__conditionalmod == BRW_CONDITIONAL_NONE &&
389 insn->bits1.da1.src0_reg_type == BRW_HW_REG_TYPE_D &&
390 insn->bits1.da1.dest_reg_type == BRW_HW_REG_TYPE_D) {
391 insn->bits1.da1.src0_reg_type = BRW_HW_REG_TYPE_UD;
392 insn->bits1.da1.dest_reg_type = BRW_HW_REG_TYPE_UD;
393 }
394 } else {
395 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
396 if (insn->header.access_mode == BRW_ALIGN_1) {
397 insn->bits2.da1.src0_subreg_nr = reg.subnr;
398 insn->bits2.da1.src0_reg_nr = reg.nr;
399 } else {
400 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
401 insn->bits2.da16.src0_reg_nr = reg.nr;
402 }
403 } else {
404 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
405
406 if (insn->header.access_mode == BRW_ALIGN_1) {
407 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
408 } else {
409 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
410 }
411 }
412
413 if (insn->header.access_mode == BRW_ALIGN_1) {
414 if (reg.width == BRW_WIDTH_1 &&
415 insn->header.execution_size == BRW_EXECUTE_1) {
416 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
417 insn->bits2.da1.src0_width = BRW_WIDTH_1;
418 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
419 } else {
420 insn->bits2.da1.src0_horiz_stride = reg.hstride;
421 insn->bits2.da1.src0_width = reg.width;
422 insn->bits2.da1.src0_vert_stride = reg.vstride;
423 }
424 } else {
425 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
426 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
427 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
428 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
429
430 /* This is an oddity of the fact we're using the same
431 * descriptions for registers in align_16 as align_1:
432 */
433 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
434 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
435 else
436 insn->bits2.da16.src0_vert_stride = reg.vstride;
437 }
438 }
439 }
440
441
442 void
443 brw_set_src1(struct brw_compile *p,
444 struct brw_instruction *insn,
445 struct brw_reg reg)
446 {
447 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
448
449 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
450 assert(reg.nr < 128);
451
452 gen7_convert_mrf_to_grf(p, &reg);
453
454 validate_reg(insn, reg);
455
456 insn->bits1.da1.src1_reg_file = reg.file;
457 insn->bits1.da1.src1_reg_type =
458 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file);
459 insn->bits3.da1.src1_abs = reg.abs;
460 insn->bits3.da1.src1_negate = reg.negate;
461
462 /* Only src1 can be immediate in two-argument instructions.
463 */
464 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
465
466 if (reg.file == BRW_IMMEDIATE_VALUE) {
467 insn->bits3.ud = reg.dw1.ud;
468 } else {
469 /* This is a hardware restriction, which may or may not be lifted
470 * in the future:
471 */
472 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
473 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
474
475 if (insn->header.access_mode == BRW_ALIGN_1) {
476 insn->bits3.da1.src1_subreg_nr = reg.subnr;
477 insn->bits3.da1.src1_reg_nr = reg.nr;
478 } else {
479 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
480 insn->bits3.da16.src1_reg_nr = reg.nr;
481 }
482
483 if (insn->header.access_mode == BRW_ALIGN_1) {
484 if (reg.width == BRW_WIDTH_1 &&
485 insn->header.execution_size == BRW_EXECUTE_1) {
486 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
487 insn->bits3.da1.src1_width = BRW_WIDTH_1;
488 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
489 } else {
490 insn->bits3.da1.src1_horiz_stride = reg.hstride;
491 insn->bits3.da1.src1_width = reg.width;
492 insn->bits3.da1.src1_vert_stride = reg.vstride;
493 }
494 } else {
495 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
496 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
497 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
498 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
499
500 /* This is an oddity of the fact we're using the same
501 * descriptions for registers in align_16 as align_1:
502 */
503 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
504 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
505 else
506 insn->bits3.da16.src1_vert_stride = reg.vstride;
507 }
508 }
509 }
510
511 /**
512 * Set the Message Descriptor and Extended Message Descriptor fields
513 * for SEND messages.
514 *
515 * \note This zeroes out the Function Control bits, so it must be called
516 * \b before filling out any message-specific data. Callers can
517 * choose not to fill in irrelevant bits; they will be zero.
518 */
519 static void
520 brw_set_message_descriptor(struct brw_compile *p,
521 struct brw_instruction *inst,
522 enum brw_message_target sfid,
523 unsigned msg_length,
524 unsigned response_length,
525 bool header_present,
526 bool end_of_thread)
527 {
528 struct brw_context *brw = p->brw;
529
530 brw_set_src1(p, inst, brw_imm_d(0));
531
532 if (brw->gen >= 5) {
533 inst->bits3.generic_gen5.header_present = header_present;
534 inst->bits3.generic_gen5.response_length = response_length;
535 inst->bits3.generic_gen5.msg_length = msg_length;
536 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
537
538 if (brw->gen >= 6) {
539 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
540 inst->header.destreg__conditionalmod = sfid;
541 } else {
542 /* Set Extended Message Descriptor (ex_desc) */
543 inst->bits2.send_gen5.sfid = sfid;
544 inst->bits2.send_gen5.end_of_thread = end_of_thread;
545 }
546 } else {
547 inst->bits3.generic.response_length = response_length;
548 inst->bits3.generic.msg_length = msg_length;
549 inst->bits3.generic.msg_target = sfid;
550 inst->bits3.generic.end_of_thread = end_of_thread;
551 }
552 }
553
554 static void brw_set_math_message( struct brw_compile *p,
555 struct brw_instruction *insn,
556 unsigned function,
557 unsigned integer_type,
558 bool low_precision,
559 unsigned dataType )
560 {
561 struct brw_context *brw = p->brw;
562 unsigned msg_length;
563 unsigned response_length;
564
565 /* Infer message length from the function */
566 switch (function) {
567 case BRW_MATH_FUNCTION_POW:
568 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
569 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
570 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
571 msg_length = 2;
572 break;
573 default:
574 msg_length = 1;
575 break;
576 }
577
578 /* Infer response length from the function */
579 switch (function) {
580 case BRW_MATH_FUNCTION_SINCOS:
581 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
582 response_length = 2;
583 break;
584 default:
585 response_length = 1;
586 break;
587 }
588
589
590 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
591 msg_length, response_length, false, false);
592 if (brw->gen == 5) {
593 insn->bits3.math_gen5.function = function;
594 insn->bits3.math_gen5.int_type = integer_type;
595 insn->bits3.math_gen5.precision = low_precision;
596 insn->bits3.math_gen5.saturate = insn->header.saturate;
597 insn->bits3.math_gen5.data_type = dataType;
598 insn->bits3.math_gen5.snapshot = 0;
599 } else {
600 insn->bits3.math.function = function;
601 insn->bits3.math.int_type = integer_type;
602 insn->bits3.math.precision = low_precision;
603 insn->bits3.math.saturate = insn->header.saturate;
604 insn->bits3.math.data_type = dataType;
605 }
606 insn->header.saturate = 0;
607 }
608
609
610 static void brw_set_ff_sync_message(struct brw_compile *p,
611 struct brw_instruction *insn,
612 bool allocate,
613 unsigned response_length,
614 bool end_of_thread)
615 {
616 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
617 1, response_length, true, end_of_thread);
618 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
619 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
620 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
621 insn->bits3.urb_gen5.allocate = allocate;
622 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
623 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
624 }
625
626 static void brw_set_urb_message( struct brw_compile *p,
627 struct brw_instruction *insn,
628 enum brw_urb_write_flags flags,
629 unsigned msg_length,
630 unsigned response_length,
631 unsigned offset,
632 unsigned swizzle_control )
633 {
634 struct brw_context *brw = p->brw;
635
636 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
637 msg_length, response_length, true,
638 flags & BRW_URB_WRITE_EOT);
639 if (brw->gen == 7) {
640 if (flags & BRW_URB_WRITE_OWORD) {
641 assert(msg_length == 2); /* header + one OWORD of data */
642 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
643 } else {
644 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
645 }
646 insn->bits3.urb_gen7.offset = offset;
647 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
648 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
649 insn->bits3.urb_gen7.per_slot_offset =
650 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
651 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
652 } else if (brw->gen >= 5) {
653 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
654 insn->bits3.urb_gen5.offset = offset;
655 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
656 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
657 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
658 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
659 } else {
660 insn->bits3.urb.opcode = 0; /* ? */
661 insn->bits3.urb.offset = offset;
662 insn->bits3.urb.swizzle_control = swizzle_control;
663 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
664 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
665 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
666 }
667 }
668
669 void
670 brw_set_dp_write_message(struct brw_compile *p,
671 struct brw_instruction *insn,
672 unsigned binding_table_index,
673 unsigned msg_control,
674 unsigned msg_type,
675 unsigned msg_length,
676 bool header_present,
677 unsigned last_render_target,
678 unsigned response_length,
679 unsigned end_of_thread,
680 unsigned send_commit_msg)
681 {
682 struct brw_context *brw = p->brw;
683 unsigned sfid;
684
685 if (brw->gen >= 7) {
686 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
687 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
688 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
689 else
690 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
691 } else if (brw->gen == 6) {
692 /* Use the render cache for all write messages. */
693 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
694 } else {
695 sfid = BRW_SFID_DATAPORT_WRITE;
696 }
697
698 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
699 header_present, end_of_thread);
700
701 if (brw->gen >= 7) {
702 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
703 insn->bits3.gen7_dp.msg_control = msg_control;
704 insn->bits3.gen7_dp.last_render_target = last_render_target;
705 insn->bits3.gen7_dp.msg_type = msg_type;
706 } else if (brw->gen == 6) {
707 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
708 insn->bits3.gen6_dp.msg_control = msg_control;
709 insn->bits3.gen6_dp.last_render_target = last_render_target;
710 insn->bits3.gen6_dp.msg_type = msg_type;
711 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
712 } else if (brw->gen == 5) {
713 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
714 insn->bits3.dp_write_gen5.msg_control = msg_control;
715 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
716 insn->bits3.dp_write_gen5.msg_type = msg_type;
717 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
718 } else {
719 insn->bits3.dp_write.binding_table_index = binding_table_index;
720 insn->bits3.dp_write.msg_control = msg_control;
721 insn->bits3.dp_write.last_render_target = last_render_target;
722 insn->bits3.dp_write.msg_type = msg_type;
723 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
724 }
725 }
726
727 void
728 brw_set_dp_read_message(struct brw_compile *p,
729 struct brw_instruction *insn,
730 unsigned binding_table_index,
731 unsigned msg_control,
732 unsigned msg_type,
733 unsigned target_cache,
734 unsigned msg_length,
735 bool header_present,
736 unsigned response_length)
737 {
738 struct brw_context *brw = p->brw;
739 unsigned sfid;
740
741 if (brw->gen >= 7) {
742 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
743 } else if (brw->gen == 6) {
744 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
745 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
746 else
747 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
748 } else {
749 sfid = BRW_SFID_DATAPORT_READ;
750 }
751
752 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
753 header_present, false);
754
755 if (brw->gen >= 7) {
756 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
757 insn->bits3.gen7_dp.msg_control = msg_control;
758 insn->bits3.gen7_dp.last_render_target = 0;
759 insn->bits3.gen7_dp.msg_type = msg_type;
760 } else if (brw->gen == 6) {
761 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
762 insn->bits3.gen6_dp.msg_control = msg_control;
763 insn->bits3.gen6_dp.last_render_target = 0;
764 insn->bits3.gen6_dp.msg_type = msg_type;
765 insn->bits3.gen6_dp.send_commit_msg = 0;
766 } else if (brw->gen == 5) {
767 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
768 insn->bits3.dp_read_gen5.msg_control = msg_control;
769 insn->bits3.dp_read_gen5.msg_type = msg_type;
770 insn->bits3.dp_read_gen5.target_cache = target_cache;
771 } else if (brw->is_g4x) {
772 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
773 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
774 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
775 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
776 } else {
777 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
778 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
779 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
780 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
781 }
782 }
783
784 void
785 brw_set_sampler_message(struct brw_compile *p,
786 struct brw_instruction *insn,
787 unsigned binding_table_index,
788 unsigned sampler,
789 unsigned msg_type,
790 unsigned response_length,
791 unsigned msg_length,
792 unsigned header_present,
793 unsigned simd_mode,
794 unsigned return_format)
795 {
796 struct brw_context *brw = p->brw;
797
798 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
799 response_length, header_present, false);
800
801 if (brw->gen >= 7) {
802 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
803 insn->bits3.sampler_gen7.sampler = sampler;
804 insn->bits3.sampler_gen7.msg_type = msg_type;
805 insn->bits3.sampler_gen7.simd_mode = simd_mode;
806 } else if (brw->gen >= 5) {
807 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
808 insn->bits3.sampler_gen5.sampler = sampler;
809 insn->bits3.sampler_gen5.msg_type = msg_type;
810 insn->bits3.sampler_gen5.simd_mode = simd_mode;
811 } else if (brw->is_g4x) {
812 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
813 insn->bits3.sampler_g4x.sampler = sampler;
814 insn->bits3.sampler_g4x.msg_type = msg_type;
815 } else {
816 insn->bits3.sampler.binding_table_index = binding_table_index;
817 insn->bits3.sampler.sampler = sampler;
818 insn->bits3.sampler.msg_type = msg_type;
819 insn->bits3.sampler.return_format = return_format;
820 }
821 }
822
823
824 #define next_insn brw_next_insn
825 struct brw_instruction *
826 brw_next_insn(struct brw_compile *p, unsigned opcode)
827 {
828 struct brw_instruction *insn;
829
830 if (p->nr_insn + 1 > p->store_size) {
831 p->store_size <<= 1;
832 p->store = reralloc(p->mem_ctx, p->store,
833 struct brw_instruction, p->store_size);
834 }
835
836 p->next_insn_offset += 16;
837 insn = &p->store[p->nr_insn++];
838 memcpy(insn, p->current, sizeof(*insn));
839
840 insn->header.opcode = opcode;
841 return insn;
842 }
843
844 static struct brw_instruction *brw_alu1( struct brw_compile *p,
845 unsigned opcode,
846 struct brw_reg dest,
847 struct brw_reg src )
848 {
849 struct brw_instruction *insn = next_insn(p, opcode);
850 brw_set_dest(p, insn, dest);
851 brw_set_src0(p, insn, src);
852 return insn;
853 }
854
855 static struct brw_instruction *brw_alu2(struct brw_compile *p,
856 unsigned opcode,
857 struct brw_reg dest,
858 struct brw_reg src0,
859 struct brw_reg src1 )
860 {
861 struct brw_instruction *insn = next_insn(p, opcode);
862 brw_set_dest(p, insn, dest);
863 brw_set_src0(p, insn, src0);
864 brw_set_src1(p, insn, src1);
865 return insn;
866 }
867
868 static int
869 get_3src_subreg_nr(struct brw_reg reg)
870 {
871 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
872 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
873 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
874 } else {
875 return reg.subnr / 4;
876 }
877 }
878
879 static struct brw_instruction *brw_alu3(struct brw_compile *p,
880 unsigned opcode,
881 struct brw_reg dest,
882 struct brw_reg src0,
883 struct brw_reg src1,
884 struct brw_reg src2)
885 {
886 struct brw_context *brw = p->brw;
887 struct brw_instruction *insn = next_insn(p, opcode);
888
889 gen7_convert_mrf_to_grf(p, &dest);
890
891 assert(insn->header.access_mode == BRW_ALIGN_16);
892
893 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
894 dest.file == BRW_MESSAGE_REGISTER_FILE);
895 assert(dest.nr < 128);
896 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
897 assert(dest.type == BRW_REGISTER_TYPE_F ||
898 dest.type == BRW_REGISTER_TYPE_D ||
899 dest.type == BRW_REGISTER_TYPE_UD);
900 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
901 insn->bits1.da3src.dest_reg_nr = dest.nr;
902 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
903 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
904 guess_execution_size(p, insn, dest);
905
906 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
907 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
908 assert(src0.nr < 128);
909 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
910 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
911 insn->bits2.da3src.src0_reg_nr = src0.nr;
912 insn->bits1.da3src.src0_abs = src0.abs;
913 insn->bits1.da3src.src0_negate = src0.negate;
914 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
915
916 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
917 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
918 assert(src1.nr < 128);
919 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
920 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
921 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
922 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
923 insn->bits3.da3src.src1_reg_nr = src1.nr;
924 insn->bits1.da3src.src1_abs = src1.abs;
925 insn->bits1.da3src.src1_negate = src1.negate;
926
927 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
928 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
929 assert(src2.nr < 128);
930 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
931 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
932 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
933 insn->bits3.da3src.src2_reg_nr = src2.nr;
934 insn->bits1.da3src.src2_abs = src2.abs;
935 insn->bits1.da3src.src2_negate = src2.negate;
936
937 if (brw->gen >= 7) {
938 /* Set both the source and destination types based on dest.type,
939 * ignoring the source register types. The MAD and LRP emitters ensure
940 * that all four types are float. The BFE and BFI2 emitters, however,
941 * may send us mixed D and UD types and want us to ignore that and use
942 * the destination type.
943 */
944 switch (dest.type) {
945 case BRW_REGISTER_TYPE_F:
946 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
947 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
948 break;
949 case BRW_REGISTER_TYPE_D:
950 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
951 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
952 break;
953 case BRW_REGISTER_TYPE_UD:
954 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
955 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
956 break;
957 }
958 }
959
960 return insn;
961 }
962
963
964 /***********************************************************************
965 * Convenience routines.
966 */
967 #define ALU1(OP) \
968 struct brw_instruction *brw_##OP(struct brw_compile *p, \
969 struct brw_reg dest, \
970 struct brw_reg src0) \
971 { \
972 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
973 }
974
975 #define ALU2(OP) \
976 struct brw_instruction *brw_##OP(struct brw_compile *p, \
977 struct brw_reg dest, \
978 struct brw_reg src0, \
979 struct brw_reg src1) \
980 { \
981 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
982 }
983
984 #define ALU3(OP) \
985 struct brw_instruction *brw_##OP(struct brw_compile *p, \
986 struct brw_reg dest, \
987 struct brw_reg src0, \
988 struct brw_reg src1, \
989 struct brw_reg src2) \
990 { \
991 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
992 }
993
994 #define ALU3F(OP) \
995 struct brw_instruction *brw_##OP(struct brw_compile *p, \
996 struct brw_reg dest, \
997 struct brw_reg src0, \
998 struct brw_reg src1, \
999 struct brw_reg src2) \
1000 { \
1001 assert(dest.type == BRW_REGISTER_TYPE_F); \
1002 assert(src0.type == BRW_REGISTER_TYPE_F); \
1003 assert(src1.type == BRW_REGISTER_TYPE_F); \
1004 assert(src2.type == BRW_REGISTER_TYPE_F); \
1005 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1006 }
1007
1008 /* Rounding operations (other than RNDD) require two instructions - the first
1009 * stores a rounded value (possibly the wrong way) in the dest register, but
1010 * also sets a per-channel "increment bit" in the flag register. A predicated
1011 * add of 1.0 fixes dest to contain the desired result.
1012 *
1013 * Sandybridge and later appear to round correctly without an ADD.
1014 */
1015 #define ROUND(OP) \
1016 void brw_##OP(struct brw_compile *p, \
1017 struct brw_reg dest, \
1018 struct brw_reg src) \
1019 { \
1020 struct brw_instruction *rnd, *add; \
1021 rnd = next_insn(p, BRW_OPCODE_##OP); \
1022 brw_set_dest(p, rnd, dest); \
1023 brw_set_src0(p, rnd, src); \
1024 \
1025 if (p->brw->gen < 6) { \
1026 /* turn on round-increments */ \
1027 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1028 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1029 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1030 } \
1031 }
1032
1033
1034 ALU1(MOV)
1035 ALU2(SEL)
1036 ALU1(NOT)
1037 ALU2(AND)
1038 ALU2(OR)
1039 ALU2(XOR)
1040 ALU2(SHR)
1041 ALU2(SHL)
1042 ALU2(ASR)
1043 ALU1(F32TO16)
1044 ALU1(F16TO32)
1045 ALU1(FRC)
1046 ALU1(RNDD)
1047 ALU2(MAC)
1048 ALU2(MACH)
1049 ALU1(LZD)
1050 ALU2(DP4)
1051 ALU2(DPH)
1052 ALU2(DP3)
1053 ALU2(DP2)
1054 ALU2(LINE)
1055 ALU2(PLN)
1056 ALU3F(MAD)
1057 ALU3F(LRP)
1058 ALU1(BFREV)
1059 ALU3(BFE)
1060 ALU2(BFI1)
1061 ALU3(BFI2)
1062 ALU1(FBH)
1063 ALU1(FBL)
1064 ALU1(CBIT)
1065 ALU2(ADDC)
1066 ALU2(SUBB)
1067
1068 ROUND(RNDZ)
1069 ROUND(RNDE)
1070
1071
1072 struct brw_instruction *brw_ADD(struct brw_compile *p,
1073 struct brw_reg dest,
1074 struct brw_reg src0,
1075 struct brw_reg src1)
1076 {
1077 /* 6.2.2: add */
1078 if (src0.type == BRW_REGISTER_TYPE_F ||
1079 (src0.file == BRW_IMMEDIATE_VALUE &&
1080 src0.type == BRW_REGISTER_TYPE_VF)) {
1081 assert(src1.type != BRW_REGISTER_TYPE_UD);
1082 assert(src1.type != BRW_REGISTER_TYPE_D);
1083 }
1084
1085 if (src1.type == BRW_REGISTER_TYPE_F ||
1086 (src1.file == BRW_IMMEDIATE_VALUE &&
1087 src1.type == BRW_REGISTER_TYPE_VF)) {
1088 assert(src0.type != BRW_REGISTER_TYPE_UD);
1089 assert(src0.type != BRW_REGISTER_TYPE_D);
1090 }
1091
1092 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1093 }
1094
1095 struct brw_instruction *brw_AVG(struct brw_compile *p,
1096 struct brw_reg dest,
1097 struct brw_reg src0,
1098 struct brw_reg src1)
1099 {
1100 assert(dest.type == src0.type);
1101 assert(src0.type == src1.type);
1102 switch (src0.type) {
1103 case BRW_REGISTER_TYPE_B:
1104 case BRW_REGISTER_TYPE_UB:
1105 case BRW_REGISTER_TYPE_W:
1106 case BRW_REGISTER_TYPE_UW:
1107 case BRW_REGISTER_TYPE_D:
1108 case BRW_REGISTER_TYPE_UD:
1109 break;
1110 default:
1111 assert(!"Bad type for brw_AVG");
1112 }
1113
1114 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1115 }
1116
1117 struct brw_instruction *brw_MUL(struct brw_compile *p,
1118 struct brw_reg dest,
1119 struct brw_reg src0,
1120 struct brw_reg src1)
1121 {
1122 /* 6.32.38: mul */
1123 if (src0.type == BRW_REGISTER_TYPE_D ||
1124 src0.type == BRW_REGISTER_TYPE_UD ||
1125 src1.type == BRW_REGISTER_TYPE_D ||
1126 src1.type == BRW_REGISTER_TYPE_UD) {
1127 assert(dest.type != BRW_REGISTER_TYPE_F);
1128 }
1129
1130 if (src0.type == BRW_REGISTER_TYPE_F ||
1131 (src0.file == BRW_IMMEDIATE_VALUE &&
1132 src0.type == BRW_REGISTER_TYPE_VF)) {
1133 assert(src1.type != BRW_REGISTER_TYPE_UD);
1134 assert(src1.type != BRW_REGISTER_TYPE_D);
1135 }
1136
1137 if (src1.type == BRW_REGISTER_TYPE_F ||
1138 (src1.file == BRW_IMMEDIATE_VALUE &&
1139 src1.type == BRW_REGISTER_TYPE_VF)) {
1140 assert(src0.type != BRW_REGISTER_TYPE_UD);
1141 assert(src0.type != BRW_REGISTER_TYPE_D);
1142 }
1143
1144 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1145 src0.nr != BRW_ARF_ACCUMULATOR);
1146 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1147 src1.nr != BRW_ARF_ACCUMULATOR);
1148
1149 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1150 }
1151
1152
1153 void brw_NOP(struct brw_compile *p)
1154 {
1155 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1156 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1157 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1158 brw_set_src1(p, insn, brw_imm_ud(0x0));
1159 }
1160
1161
1162
1163
1164
1165 /***********************************************************************
1166 * Comparisons, if/else/endif
1167 */
1168
1169 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1170 struct brw_reg index,
1171 unsigned predicate_control)
1172 {
1173 struct brw_reg ip = brw_ip_reg();
1174 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1175
1176 insn->header.execution_size = 1;
1177 insn->header.compression_control = BRW_COMPRESSION_NONE;
1178 insn->header.mask_control = BRW_MASK_DISABLE;
1179 insn->header.predicate_control = predicate_control;
1180
1181 return insn;
1182 }
1183
1184 static void
1185 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1186 {
1187 p->if_stack[p->if_stack_depth] = inst - p->store;
1188
1189 p->if_stack_depth++;
1190 if (p->if_stack_array_size <= p->if_stack_depth) {
1191 p->if_stack_array_size *= 2;
1192 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1193 p->if_stack_array_size);
1194 }
1195 }
1196
1197 static struct brw_instruction *
1198 pop_if_stack(struct brw_compile *p)
1199 {
1200 p->if_stack_depth--;
1201 return &p->store[p->if_stack[p->if_stack_depth]];
1202 }
1203
1204 static void
1205 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1206 {
1207 if (p->loop_stack_array_size < p->loop_stack_depth) {
1208 p->loop_stack_array_size *= 2;
1209 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1210 p->loop_stack_array_size);
1211 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1212 p->loop_stack_array_size);
1213 }
1214
1215 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1216 p->loop_stack_depth++;
1217 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1218 }
1219
1220 static struct brw_instruction *
1221 get_inner_do_insn(struct brw_compile *p)
1222 {
1223 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1224 }
1225
1226 /* EU takes the value from the flag register and pushes it onto some
1227 * sort of a stack (presumably merging with any flag value already on
1228 * the stack). Within an if block, the flags at the top of the stack
1229 * control execution on each channel of the unit, eg. on each of the
1230 * 16 pixel values in our wm programs.
1231 *
1232 * When the matching 'else' instruction is reached (presumably by
1233 * countdown of the instruction count patched in by our ELSE/ENDIF
1234 * functions), the relevent flags are inverted.
1235 *
1236 * When the matching 'endif' instruction is reached, the flags are
1237 * popped off. If the stack is now empty, normal execution resumes.
1238 */
1239 struct brw_instruction *
1240 brw_IF(struct brw_compile *p, unsigned execute_size)
1241 {
1242 struct brw_context *brw = p->brw;
1243 struct brw_instruction *insn;
1244
1245 insn = next_insn(p, BRW_OPCODE_IF);
1246
1247 /* Override the defaults for this instruction:
1248 */
1249 if (brw->gen < 6) {
1250 brw_set_dest(p, insn, brw_ip_reg());
1251 brw_set_src0(p, insn, brw_ip_reg());
1252 brw_set_src1(p, insn, brw_imm_d(0x0));
1253 } else if (brw->gen == 6) {
1254 brw_set_dest(p, insn, brw_imm_w(0));
1255 insn->bits1.branch_gen6.jump_count = 0;
1256 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1257 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1258 } else {
1259 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1260 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1261 brw_set_src1(p, insn, brw_imm_ud(0));
1262 insn->bits3.break_cont.jip = 0;
1263 insn->bits3.break_cont.uip = 0;
1264 }
1265
1266 insn->header.execution_size = execute_size;
1267 insn->header.compression_control = BRW_COMPRESSION_NONE;
1268 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1269 insn->header.mask_control = BRW_MASK_ENABLE;
1270 if (!p->single_program_flow && brw->gen < 6)
1271 insn->header.thread_control = BRW_THREAD_SWITCH;
1272
1273 push_if_stack(p, insn);
1274 p->if_depth_in_loop[p->loop_stack_depth]++;
1275 return insn;
1276 }
1277
1278 /* This function is only used for gen6-style IF instructions with an
1279 * embedded comparison (conditional modifier). It is not used on gen7.
1280 */
1281 struct brw_instruction *
1282 gen6_IF(struct brw_compile *p, uint32_t conditional,
1283 struct brw_reg src0, struct brw_reg src1)
1284 {
1285 struct brw_instruction *insn;
1286
1287 insn = next_insn(p, BRW_OPCODE_IF);
1288
1289 brw_set_dest(p, insn, brw_imm_w(0));
1290 if (p->compressed) {
1291 insn->header.execution_size = BRW_EXECUTE_16;
1292 } else {
1293 insn->header.execution_size = BRW_EXECUTE_8;
1294 }
1295 insn->bits1.branch_gen6.jump_count = 0;
1296 brw_set_src0(p, insn, src0);
1297 brw_set_src1(p, insn, src1);
1298
1299 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1300 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1301 insn->header.destreg__conditionalmod = conditional;
1302
1303 push_if_stack(p, insn);
1304 return insn;
1305 }
1306
1307 /**
1308 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1309 */
1310 static void
1311 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1312 struct brw_instruction *if_inst,
1313 struct brw_instruction *else_inst)
1314 {
1315 /* The next instruction (where the ENDIF would be, if it existed) */
1316 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1317
1318 assert(p->single_program_flow);
1319 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1320 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1321 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1322
1323 /* Convert IF to an ADD instruction that moves the instruction pointer
1324 * to the first instruction of the ELSE block. If there is no ELSE
1325 * block, point to where ENDIF would be. Reverse the predicate.
1326 *
1327 * There's no need to execute an ENDIF since we don't need to do any
1328 * stack operations, and if we're currently executing, we just want to
1329 * continue normally.
1330 */
1331 if_inst->header.opcode = BRW_OPCODE_ADD;
1332 if_inst->header.predicate_inverse = 1;
1333
1334 if (else_inst != NULL) {
1335 /* Convert ELSE to an ADD instruction that points where the ENDIF
1336 * would be.
1337 */
1338 else_inst->header.opcode = BRW_OPCODE_ADD;
1339
1340 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1341 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1342 } else {
1343 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1344 }
1345 }
1346
1347 /**
1348 * Patch IF and ELSE instructions with appropriate jump targets.
1349 */
1350 static void
1351 patch_IF_ELSE(struct brw_compile *p,
1352 struct brw_instruction *if_inst,
1353 struct brw_instruction *else_inst,
1354 struct brw_instruction *endif_inst)
1355 {
1356 struct brw_context *brw = p->brw;
1357
1358 /* We shouldn't be patching IF and ELSE instructions in single program flow
1359 * mode when gen < 6, because in single program flow mode on those
1360 * platforms, we convert flow control instructions to conditional ADDs that
1361 * operate on IP (see brw_ENDIF).
1362 *
1363 * However, on Gen6, writing to IP doesn't work in single program flow mode
1364 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1365 * not be updated by non-flow control instructions."). And on later
1366 * platforms, there is no significant benefit to converting control flow
1367 * instructions to conditional ADDs. So we do patch IF and ELSE
1368 * instructions in single program flow mode on those platforms.
1369 */
1370 if (brw->gen < 6)
1371 assert(!p->single_program_flow);
1372
1373 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1374 assert(endif_inst != NULL);
1375 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1376
1377 unsigned br = 1;
1378 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1379 * requires 2 chunks.
1380 */
1381 if (brw->gen >= 5)
1382 br = 2;
1383
1384 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1385 endif_inst->header.execution_size = if_inst->header.execution_size;
1386
1387 if (else_inst == NULL) {
1388 /* Patch IF -> ENDIF */
1389 if (brw->gen < 6) {
1390 /* Turn it into an IFF, which means no mask stack operations for
1391 * all-false and jumping past the ENDIF.
1392 */
1393 if_inst->header.opcode = BRW_OPCODE_IFF;
1394 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1395 if_inst->bits3.if_else.pop_count = 0;
1396 if_inst->bits3.if_else.pad0 = 0;
1397 } else if (brw->gen == 6) {
1398 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1399 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1400 } else {
1401 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1402 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1403 }
1404 } else {
1405 else_inst->header.execution_size = if_inst->header.execution_size;
1406
1407 /* Patch IF -> ELSE */
1408 if (brw->gen < 6) {
1409 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1410 if_inst->bits3.if_else.pop_count = 0;
1411 if_inst->bits3.if_else.pad0 = 0;
1412 } else if (brw->gen == 6) {
1413 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1414 }
1415
1416 /* Patch ELSE -> ENDIF */
1417 if (brw->gen < 6) {
1418 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1419 * matching ENDIF.
1420 */
1421 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1422 else_inst->bits3.if_else.pop_count = 1;
1423 else_inst->bits3.if_else.pad0 = 0;
1424 } else if (brw->gen == 6) {
1425 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1426 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1427 } else {
1428 /* The IF instruction's JIP should point just past the ELSE */
1429 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1430 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1431 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1432 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1433 }
1434 }
1435 }
1436
1437 void
1438 brw_ELSE(struct brw_compile *p)
1439 {
1440 struct brw_context *brw = p->brw;
1441 struct brw_instruction *insn;
1442
1443 insn = next_insn(p, BRW_OPCODE_ELSE);
1444
1445 if (brw->gen < 6) {
1446 brw_set_dest(p, insn, brw_ip_reg());
1447 brw_set_src0(p, insn, brw_ip_reg());
1448 brw_set_src1(p, insn, brw_imm_d(0x0));
1449 } else if (brw->gen == 6) {
1450 brw_set_dest(p, insn, brw_imm_w(0));
1451 insn->bits1.branch_gen6.jump_count = 0;
1452 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1453 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1454 } else {
1455 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1456 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1457 brw_set_src1(p, insn, brw_imm_ud(0));
1458 insn->bits3.break_cont.jip = 0;
1459 insn->bits3.break_cont.uip = 0;
1460 }
1461
1462 insn->header.compression_control = BRW_COMPRESSION_NONE;
1463 insn->header.mask_control = BRW_MASK_ENABLE;
1464 if (!p->single_program_flow && brw->gen < 6)
1465 insn->header.thread_control = BRW_THREAD_SWITCH;
1466
1467 push_if_stack(p, insn);
1468 }
1469
1470 void
1471 brw_ENDIF(struct brw_compile *p)
1472 {
1473 struct brw_context *brw = p->brw;
1474 struct brw_instruction *insn = NULL;
1475 struct brw_instruction *else_inst = NULL;
1476 struct brw_instruction *if_inst = NULL;
1477 struct brw_instruction *tmp;
1478 bool emit_endif = true;
1479
1480 /* In single program flow mode, we can express IF and ELSE instructions
1481 * equivalently as ADD instructions that operate on IP. On platforms prior
1482 * to Gen6, flow control instructions cause an implied thread switch, so
1483 * this is a significant savings.
1484 *
1485 * However, on Gen6, writing to IP doesn't work in single program flow mode
1486 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1487 * not be updated by non-flow control instructions."). And on later
1488 * platforms, there is no significant benefit to converting control flow
1489 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1490 * Gen5.
1491 */
1492 if (brw->gen < 6 && p->single_program_flow)
1493 emit_endif = false;
1494
1495 /*
1496 * A single next_insn() may change the base adress of instruction store
1497 * memory(p->store), so call it first before referencing the instruction
1498 * store pointer from an index
1499 */
1500 if (emit_endif)
1501 insn = next_insn(p, BRW_OPCODE_ENDIF);
1502
1503 /* Pop the IF and (optional) ELSE instructions from the stack */
1504 p->if_depth_in_loop[p->loop_stack_depth]--;
1505 tmp = pop_if_stack(p);
1506 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1507 else_inst = tmp;
1508 tmp = pop_if_stack(p);
1509 }
1510 if_inst = tmp;
1511
1512 if (!emit_endif) {
1513 /* ENDIF is useless; don't bother emitting it. */
1514 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1515 return;
1516 }
1517
1518 if (brw->gen < 6) {
1519 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1520 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1521 brw_set_src1(p, insn, brw_imm_d(0x0));
1522 } else if (brw->gen == 6) {
1523 brw_set_dest(p, insn, brw_imm_w(0));
1524 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1525 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1526 } else {
1527 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1528 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1529 brw_set_src1(p, insn, brw_imm_ud(0));
1530 }
1531
1532 insn->header.compression_control = BRW_COMPRESSION_NONE;
1533 insn->header.mask_control = BRW_MASK_ENABLE;
1534 if (brw->gen < 6)
1535 insn->header.thread_control = BRW_THREAD_SWITCH;
1536
1537 /* Also pop item off the stack in the endif instruction: */
1538 if (brw->gen < 6) {
1539 insn->bits3.if_else.jump_count = 0;
1540 insn->bits3.if_else.pop_count = 1;
1541 insn->bits3.if_else.pad0 = 0;
1542 } else if (brw->gen == 6) {
1543 insn->bits1.branch_gen6.jump_count = 2;
1544 } else {
1545 insn->bits3.break_cont.jip = 2;
1546 }
1547 patch_IF_ELSE(p, if_inst, else_inst, insn);
1548 }
1549
1550 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1551 {
1552 struct brw_context *brw = p->brw;
1553 struct brw_instruction *insn;
1554
1555 insn = next_insn(p, BRW_OPCODE_BREAK);
1556 if (brw->gen >= 6) {
1557 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1558 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1559 brw_set_src1(p, insn, brw_imm_d(0x0));
1560 } else {
1561 brw_set_dest(p, insn, brw_ip_reg());
1562 brw_set_src0(p, insn, brw_ip_reg());
1563 brw_set_src1(p, insn, brw_imm_d(0x0));
1564 insn->bits3.if_else.pad0 = 0;
1565 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1566 }
1567 insn->header.compression_control = BRW_COMPRESSION_NONE;
1568 insn->header.execution_size = BRW_EXECUTE_8;
1569
1570 return insn;
1571 }
1572
1573 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1574 {
1575 struct brw_instruction *insn;
1576
1577 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1578 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1579 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1580 brw_set_dest(p, insn, brw_ip_reg());
1581 brw_set_src0(p, insn, brw_ip_reg());
1582 brw_set_src1(p, insn, brw_imm_d(0x0));
1583
1584 insn->header.compression_control = BRW_COMPRESSION_NONE;
1585 insn->header.execution_size = BRW_EXECUTE_8;
1586 return insn;
1587 }
1588
1589 struct brw_instruction *brw_CONT(struct brw_compile *p)
1590 {
1591 struct brw_instruction *insn;
1592 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1593 brw_set_dest(p, insn, brw_ip_reg());
1594 brw_set_src0(p, insn, brw_ip_reg());
1595 brw_set_src1(p, insn, brw_imm_d(0x0));
1596 insn->header.compression_control = BRW_COMPRESSION_NONE;
1597 insn->header.execution_size = BRW_EXECUTE_8;
1598 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1599 insn->bits3.if_else.pad0 = 0;
1600 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1601 return insn;
1602 }
1603
1604 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1605 {
1606 struct brw_instruction *insn;
1607
1608 insn = next_insn(p, BRW_OPCODE_HALT);
1609 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1610 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1611 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1612
1613 if (p->compressed) {
1614 insn->header.execution_size = BRW_EXECUTE_16;
1615 } else {
1616 insn->header.compression_control = BRW_COMPRESSION_NONE;
1617 insn->header.execution_size = BRW_EXECUTE_8;
1618 }
1619 return insn;
1620 }
1621
1622 /* DO/WHILE loop:
1623 *
1624 * The DO/WHILE is just an unterminated loop -- break or continue are
1625 * used for control within the loop. We have a few ways they can be
1626 * done.
1627 *
1628 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1629 * jip and no DO instruction.
1630 *
1631 * For non-uniform control flow pre-gen6, there's a DO instruction to
1632 * push the mask, and a WHILE to jump back, and BREAK to get out and
1633 * pop the mask.
1634 *
1635 * For gen6, there's no more mask stack, so no need for DO. WHILE
1636 * just points back to the first instruction of the loop.
1637 */
1638 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1639 {
1640 struct brw_context *brw = p->brw;
1641
1642 if (brw->gen >= 6 || p->single_program_flow) {
1643 push_loop_stack(p, &p->store[p->nr_insn]);
1644 return &p->store[p->nr_insn];
1645 } else {
1646 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1647
1648 push_loop_stack(p, insn);
1649
1650 /* Override the defaults for this instruction:
1651 */
1652 brw_set_dest(p, insn, brw_null_reg());
1653 brw_set_src0(p, insn, brw_null_reg());
1654 brw_set_src1(p, insn, brw_null_reg());
1655
1656 insn->header.compression_control = BRW_COMPRESSION_NONE;
1657 insn->header.execution_size = execute_size;
1658 insn->header.predicate_control = BRW_PREDICATE_NONE;
1659 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1660 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1661
1662 return insn;
1663 }
1664 }
1665
1666 /**
1667 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1668 * instruction here.
1669 *
1670 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1671 * nesting, since it can always just point to the end of the block/current loop.
1672 */
1673 static void
1674 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1675 {
1676 struct brw_context *brw = p->brw;
1677 struct brw_instruction *do_inst = get_inner_do_insn(p);
1678 struct brw_instruction *inst;
1679 int br = (brw->gen == 5) ? 2 : 1;
1680
1681 for (inst = while_inst - 1; inst != do_inst; inst--) {
1682 /* If the jump count is != 0, that means that this instruction has already
1683 * been patched because it's part of a loop inside of the one we're
1684 * patching.
1685 */
1686 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1687 inst->bits3.if_else.jump_count == 0) {
1688 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1689 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1690 inst->bits3.if_else.jump_count == 0) {
1691 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1692 }
1693 }
1694 }
1695
1696 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1697 {
1698 struct brw_context *brw = p->brw;
1699 struct brw_instruction *insn, *do_insn;
1700 unsigned br = 1;
1701
1702 if (brw->gen >= 5)
1703 br = 2;
1704
1705 if (brw->gen >= 7) {
1706 insn = next_insn(p, BRW_OPCODE_WHILE);
1707 do_insn = get_inner_do_insn(p);
1708
1709 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1710 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1711 brw_set_src1(p, insn, brw_imm_ud(0));
1712 insn->bits3.break_cont.jip = br * (do_insn - insn);
1713
1714 insn->header.execution_size = BRW_EXECUTE_8;
1715 } else if (brw->gen == 6) {
1716 insn = next_insn(p, BRW_OPCODE_WHILE);
1717 do_insn = get_inner_do_insn(p);
1718
1719 brw_set_dest(p, insn, brw_imm_w(0));
1720 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1721 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1722 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1723
1724 insn->header.execution_size = BRW_EXECUTE_8;
1725 } else {
1726 if (p->single_program_flow) {
1727 insn = next_insn(p, BRW_OPCODE_ADD);
1728 do_insn = get_inner_do_insn(p);
1729
1730 brw_set_dest(p, insn, brw_ip_reg());
1731 brw_set_src0(p, insn, brw_ip_reg());
1732 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1733 insn->header.execution_size = BRW_EXECUTE_1;
1734 } else {
1735 insn = next_insn(p, BRW_OPCODE_WHILE);
1736 do_insn = get_inner_do_insn(p);
1737
1738 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1739
1740 brw_set_dest(p, insn, brw_ip_reg());
1741 brw_set_src0(p, insn, brw_ip_reg());
1742 brw_set_src1(p, insn, brw_imm_d(0));
1743
1744 insn->header.execution_size = do_insn->header.execution_size;
1745 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1746 insn->bits3.if_else.pop_count = 0;
1747 insn->bits3.if_else.pad0 = 0;
1748
1749 brw_patch_break_cont(p, insn);
1750 }
1751 }
1752 insn->header.compression_control = BRW_COMPRESSION_NONE;
1753
1754 p->loop_stack_depth--;
1755
1756 return insn;
1757 }
1758
1759 /* FORWARD JUMPS:
1760 */
1761 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1762 {
1763 struct brw_context *brw = p->brw;
1764 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1765 unsigned jmpi = 1;
1766
1767 if (brw->gen >= 5)
1768 jmpi = 2;
1769
1770 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1771 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1772
1773 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1774 }
1775
1776 /* To integrate with the above, it makes sense that the comparison
1777 * instruction should populate the flag register. It might be simpler
1778 * just to use the flag reg for most WM tasks?
1779 */
1780 void brw_CMP(struct brw_compile *p,
1781 struct brw_reg dest,
1782 unsigned conditional,
1783 struct brw_reg src0,
1784 struct brw_reg src1)
1785 {
1786 struct brw_context *brw = p->brw;
1787 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1788
1789 insn->header.destreg__conditionalmod = conditional;
1790 brw_set_dest(p, insn, dest);
1791 brw_set_src0(p, insn, src0);
1792 brw_set_src1(p, insn, src1);
1793
1794 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1795 * page says:
1796 * "Any CMP instruction with a null destination must use a {switch}."
1797 *
1798 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1799 * mentioned on their work-arounds pages.
1800 */
1801 if (brw->gen == 7) {
1802 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1803 dest.nr == BRW_ARF_NULL) {
1804 insn->header.thread_control = BRW_THREAD_SWITCH;
1805 }
1806 }
1807 }
1808
1809 /* Issue 'wait' instruction for n1, host could program MMIO
1810 to wake up thread. */
1811 void brw_WAIT (struct brw_compile *p)
1812 {
1813 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1814 struct brw_reg src = brw_notification_1_reg();
1815
1816 brw_set_dest(p, insn, src);
1817 brw_set_src0(p, insn, src);
1818 brw_set_src1(p, insn, brw_null_reg());
1819 insn->header.execution_size = 0; /* must */
1820 insn->header.predicate_control = 0;
1821 insn->header.compression_control = 0;
1822 }
1823
1824
1825 /***********************************************************************
1826 * Helpers for the various SEND message types:
1827 */
1828
1829 /** Extended math function, float[8].
1830 */
1831 void brw_math( struct brw_compile *p,
1832 struct brw_reg dest,
1833 unsigned function,
1834 unsigned msg_reg_nr,
1835 struct brw_reg src,
1836 unsigned data_type,
1837 unsigned precision )
1838 {
1839 struct brw_context *brw = p->brw;
1840 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1841
1842 assert(brw->gen < 6);
1843
1844 /* Example code doesn't set predicate_control for send
1845 * instructions.
1846 */
1847 insn->header.predicate_control = 0;
1848 insn->header.destreg__conditionalmod = msg_reg_nr;
1849
1850 brw_set_dest(p, insn, dest);
1851 brw_set_src0(p, insn, src);
1852 brw_set_math_message(p,
1853 insn,
1854 function,
1855 src.type == BRW_REGISTER_TYPE_D,
1856 precision,
1857 data_type);
1858 }
1859
1860 void gen6_math(struct brw_compile *p,
1861 struct brw_reg dest,
1862 unsigned function,
1863 struct brw_reg src0,
1864 struct brw_reg src1)
1865 {
1866 struct brw_context *brw = p->brw;
1867 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1868
1869 assert(brw->gen >= 6);
1870
1871 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1872 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1873 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1874
1875 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1876 if (brw->gen == 6) {
1877 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1878 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1879 }
1880
1881 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1882 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1883 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1884 assert(src0.type != BRW_REGISTER_TYPE_F);
1885 assert(src1.type != BRW_REGISTER_TYPE_F);
1886 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1887 } else {
1888 assert(src0.type == BRW_REGISTER_TYPE_F);
1889 assert(src1.type == BRW_REGISTER_TYPE_F);
1890 if (function == BRW_MATH_FUNCTION_POW) {
1891 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1892 } else {
1893 assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1894 src1.nr == BRW_ARF_NULL);
1895 }
1896 }
1897
1898 /* Source modifiers are ignored for extended math instructions on Gen6. */
1899 if (brw->gen == 6) {
1900 assert(!src0.negate);
1901 assert(!src0.abs);
1902 assert(!src1.negate);
1903 assert(!src1.abs);
1904 }
1905
1906 /* Math is the same ISA format as other opcodes, except that CondModifier
1907 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1908 */
1909 insn->header.destreg__conditionalmod = function;
1910
1911 brw_set_dest(p, insn, dest);
1912 brw_set_src0(p, insn, src0);
1913 brw_set_src1(p, insn, src1);
1914 }
1915
1916
1917 /**
1918 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1919 * using a constant offset per channel.
1920 *
1921 * The offset must be aligned to oword size (16 bytes). Used for
1922 * register spilling.
1923 */
1924 void brw_oword_block_write_scratch(struct brw_compile *p,
1925 struct brw_reg mrf,
1926 int num_regs,
1927 unsigned offset)
1928 {
1929 struct brw_context *brw = p->brw;
1930 uint32_t msg_control, msg_type;
1931 int mlen;
1932
1933 if (brw->gen >= 6)
1934 offset /= 16;
1935
1936 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1937
1938 if (num_regs == 1) {
1939 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1940 mlen = 2;
1941 } else {
1942 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1943 mlen = 3;
1944 }
1945
1946 /* Set up the message header. This is g0, with g0.2 filled with
1947 * the offset. We don't want to leave our offset around in g0 or
1948 * it'll screw up texture samples, so set it up inside the message
1949 * reg.
1950 */
1951 {
1952 brw_push_insn_state(p);
1953 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1954 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
1955
1956 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1957
1958 /* set message header global offset field (reg 0, element 2) */
1959 brw_MOV(p,
1960 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1961 mrf.nr,
1962 2), BRW_REGISTER_TYPE_UD),
1963 brw_imm_ud(offset));
1964
1965 brw_pop_insn_state(p);
1966 }
1967
1968 {
1969 struct brw_reg dest;
1970 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1971 int send_commit_msg;
1972 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1973 BRW_REGISTER_TYPE_UW);
1974
1975 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1976 insn->header.compression_control = BRW_COMPRESSION_NONE;
1977 src_header = vec16(src_header);
1978 }
1979 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1980 insn->header.destreg__conditionalmod = mrf.nr;
1981
1982 /* Until gen6, writes followed by reads from the same location
1983 * are not guaranteed to be ordered unless write_commit is set.
1984 * If set, then a no-op write is issued to the destination
1985 * register to set a dependency, and a read from the destination
1986 * can be used to ensure the ordering.
1987 *
1988 * For gen6, only writes between different threads need ordering
1989 * protection. Our use of DP writes is all about register
1990 * spilling within a thread.
1991 */
1992 if (brw->gen >= 6) {
1993 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1994 send_commit_msg = 0;
1995 } else {
1996 dest = src_header;
1997 send_commit_msg = 1;
1998 }
1999
2000 brw_set_dest(p, insn, dest);
2001 if (brw->gen >= 6) {
2002 brw_set_src0(p, insn, mrf);
2003 } else {
2004 brw_set_src0(p, insn, brw_null_reg());
2005 }
2006
2007 if (brw->gen >= 6)
2008 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2009 else
2010 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2011
2012 brw_set_dp_write_message(p,
2013 insn,
2014 255, /* binding table index (255=stateless) */
2015 msg_control,
2016 msg_type,
2017 mlen,
2018 true, /* header_present */
2019 0, /* not a render target */
2020 send_commit_msg, /* response_length */
2021 0, /* eot */
2022 send_commit_msg);
2023 }
2024 }
2025
2026
2027 /**
2028 * Read a block of owords (half a GRF each) from the scratch buffer
2029 * using a constant index per channel.
2030 *
2031 * Offset must be aligned to oword size (16 bytes). Used for register
2032 * spilling.
2033 */
2034 void
2035 brw_oword_block_read_scratch(struct brw_compile *p,
2036 struct brw_reg dest,
2037 struct brw_reg mrf,
2038 int num_regs,
2039 unsigned offset)
2040 {
2041 struct brw_context *brw = p->brw;
2042 uint32_t msg_control;
2043 int rlen;
2044
2045 if (brw->gen >= 6)
2046 offset /= 16;
2047
2048 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2049 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2050
2051 if (num_regs == 1) {
2052 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2053 rlen = 1;
2054 } else {
2055 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2056 rlen = 2;
2057 }
2058
2059 {
2060 brw_push_insn_state(p);
2061 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2062 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2063
2064 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2065
2066 /* set message header global offset field (reg 0, element 2) */
2067 brw_MOV(p,
2068 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2069 mrf.nr,
2070 2), BRW_REGISTER_TYPE_UD),
2071 brw_imm_ud(offset));
2072
2073 brw_pop_insn_state(p);
2074 }
2075
2076 {
2077 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2078
2079 assert(insn->header.predicate_control == 0);
2080 insn->header.compression_control = BRW_COMPRESSION_NONE;
2081 insn->header.destreg__conditionalmod = mrf.nr;
2082
2083 brw_set_dest(p, insn, dest); /* UW? */
2084 if (brw->gen >= 6) {
2085 brw_set_src0(p, insn, mrf);
2086 } else {
2087 brw_set_src0(p, insn, brw_null_reg());
2088 }
2089
2090 brw_set_dp_read_message(p,
2091 insn,
2092 255, /* binding table index (255=stateless) */
2093 msg_control,
2094 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2095 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2096 1, /* msg_length */
2097 true, /* header_present */
2098 rlen);
2099 }
2100 }
2101
2102 void
2103 gen7_block_read_scratch(struct brw_compile *p,
2104 struct brw_reg dest,
2105 int num_regs,
2106 unsigned offset)
2107 {
2108 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2109
2110 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2111
2112 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2113 insn->header.compression_control = BRW_COMPRESSION_NONE;
2114
2115 brw_set_dest(p, insn, dest);
2116
2117 /* The HW requires that the header is present; this is to get the g0.5
2118 * scratch offset.
2119 */
2120 bool header_present = true;
2121 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2122
2123 brw_set_message_descriptor(p, insn,
2124 GEN7_SFID_DATAPORT_DATA_CACHE,
2125 1, /* mlen: just g0 */
2126 num_regs,
2127 header_present,
2128 false);
2129
2130 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2131
2132 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2133 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2134
2135 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2136 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2137 * is 32 bytes, which happens to be the size of a register.
2138 */
2139 offset /= REG_SIZE;
2140 assert(offset < (1 << 12));
2141 insn->bits3.ud |= offset;
2142 }
2143
2144 /**
2145 * Read a float[4] vector from the data port Data Cache (const buffer).
2146 * Location (in buffer) should be a multiple of 16.
2147 * Used for fetching shader constants.
2148 */
2149 void brw_oword_block_read(struct brw_compile *p,
2150 struct brw_reg dest,
2151 struct brw_reg mrf,
2152 uint32_t offset,
2153 uint32_t bind_table_index)
2154 {
2155 struct brw_context *brw = p->brw;
2156
2157 /* On newer hardware, offset is in units of owords. */
2158 if (brw->gen >= 6)
2159 offset /= 16;
2160
2161 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2162
2163 brw_push_insn_state(p);
2164 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2165 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2166 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2167
2168 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2169
2170 /* set message header global offset field (reg 0, element 2) */
2171 brw_MOV(p,
2172 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2173 mrf.nr,
2174 2), BRW_REGISTER_TYPE_UD),
2175 brw_imm_ud(offset));
2176
2177 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2178 insn->header.destreg__conditionalmod = mrf.nr;
2179
2180 /* cast dest to a uword[8] vector */
2181 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2182
2183 brw_set_dest(p, insn, dest);
2184 if (brw->gen >= 6) {
2185 brw_set_src0(p, insn, mrf);
2186 } else {
2187 brw_set_src0(p, insn, brw_null_reg());
2188 }
2189
2190 brw_set_dp_read_message(p,
2191 insn,
2192 bind_table_index,
2193 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2194 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2195 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2196 1, /* msg_length */
2197 true, /* header_present */
2198 1); /* response_length (1 reg, 2 owords!) */
2199
2200 brw_pop_insn_state(p);
2201 }
2202
2203
2204 void brw_fb_WRITE(struct brw_compile *p,
2205 int dispatch_width,
2206 unsigned msg_reg_nr,
2207 struct brw_reg src0,
2208 unsigned msg_control,
2209 unsigned binding_table_index,
2210 unsigned msg_length,
2211 unsigned response_length,
2212 bool eot,
2213 bool header_present)
2214 {
2215 struct brw_context *brw = p->brw;
2216 struct brw_instruction *insn;
2217 unsigned msg_type;
2218 struct brw_reg dest;
2219
2220 if (dispatch_width == 16)
2221 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2222 else
2223 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2224
2225 if (brw->gen >= 6) {
2226 insn = next_insn(p, BRW_OPCODE_SENDC);
2227 } else {
2228 insn = next_insn(p, BRW_OPCODE_SEND);
2229 }
2230 insn->header.compression_control = BRW_COMPRESSION_NONE;
2231
2232 if (brw->gen >= 6) {
2233 /* headerless version, just submit color payload */
2234 src0 = brw_message_reg(msg_reg_nr);
2235
2236 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2237 } else {
2238 insn->header.destreg__conditionalmod = msg_reg_nr;
2239
2240 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2241 }
2242
2243 brw_set_dest(p, insn, dest);
2244 brw_set_src0(p, insn, src0);
2245 brw_set_dp_write_message(p,
2246 insn,
2247 binding_table_index,
2248 msg_control,
2249 msg_type,
2250 msg_length,
2251 header_present,
2252 eot, /* last render target write */
2253 response_length,
2254 eot,
2255 0 /* send_commit_msg */);
2256 }
2257
2258
2259 /**
2260 * Texture sample instruction.
2261 * Note: the msg_type plus msg_length values determine exactly what kind
2262 * of sampling operation is performed. See volume 4, page 161 of docs.
2263 */
2264 void brw_SAMPLE(struct brw_compile *p,
2265 struct brw_reg dest,
2266 unsigned msg_reg_nr,
2267 struct brw_reg src0,
2268 unsigned binding_table_index,
2269 unsigned sampler,
2270 unsigned msg_type,
2271 unsigned response_length,
2272 unsigned msg_length,
2273 unsigned header_present,
2274 unsigned simd_mode,
2275 unsigned return_format)
2276 {
2277 struct brw_context *brw = p->brw;
2278 struct brw_instruction *insn;
2279
2280 if (msg_reg_nr != -1)
2281 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2282
2283 insn = next_insn(p, BRW_OPCODE_SEND);
2284 insn->header.predicate_control = 0; /* XXX */
2285
2286 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2287 *
2288 * "Instruction compression is not allowed for this instruction (that
2289 * is, send). The hardware behavior is undefined if this instruction is
2290 * set as compressed. However, compress control can be set to "SecHalf"
2291 * to affect the EMask generation."
2292 *
2293 * No similar wording is found in later PRMs, but there are examples
2294 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2295 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2296 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2297 */
2298 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2299 insn->header.compression_control = BRW_COMPRESSION_NONE;
2300
2301 if (brw->gen < 6)
2302 insn->header.destreg__conditionalmod = msg_reg_nr;
2303
2304 brw_set_dest(p, insn, dest);
2305 brw_set_src0(p, insn, src0);
2306 brw_set_sampler_message(p, insn,
2307 binding_table_index,
2308 sampler,
2309 msg_type,
2310 response_length,
2311 msg_length,
2312 header_present,
2313 simd_mode,
2314 return_format);
2315 }
2316
2317 /* All these variables are pretty confusing - we might be better off
2318 * using bitmasks and macros for this, in the old style. Or perhaps
2319 * just having the caller instantiate the fields in dword3 itself.
2320 */
2321 void brw_urb_WRITE(struct brw_compile *p,
2322 struct brw_reg dest,
2323 unsigned msg_reg_nr,
2324 struct brw_reg src0,
2325 enum brw_urb_write_flags flags,
2326 unsigned msg_length,
2327 unsigned response_length,
2328 unsigned offset,
2329 unsigned swizzle)
2330 {
2331 struct brw_context *brw = p->brw;
2332 struct brw_instruction *insn;
2333
2334 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2335
2336 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2337 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2338 brw_push_insn_state(p);
2339 brw_set_default_access_mode(p, BRW_ALIGN_1);
2340 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2341 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2342 BRW_REGISTER_TYPE_UD),
2343 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2344 brw_imm_ud(0xff00));
2345 brw_pop_insn_state(p);
2346 }
2347
2348 insn = next_insn(p, BRW_OPCODE_SEND);
2349
2350 assert(msg_length < BRW_MAX_MRF);
2351
2352 brw_set_dest(p, insn, dest);
2353 brw_set_src0(p, insn, src0);
2354 brw_set_src1(p, insn, brw_imm_d(0));
2355
2356 if (brw->gen < 6)
2357 insn->header.destreg__conditionalmod = msg_reg_nr;
2358
2359 brw_set_urb_message(p,
2360 insn,
2361 flags,
2362 msg_length,
2363 response_length,
2364 offset,
2365 swizzle);
2366 }
2367
2368 static int
2369 brw_find_next_block_end(struct brw_compile *p, int start_offset)
2370 {
2371 int offset;
2372 void *store = p->store;
2373
2374 for (offset = next_offset(store, start_offset); offset < p->next_insn_offset;
2375 offset = next_offset(store, offset)) {
2376 struct brw_instruction *insn = store + offset;
2377
2378 switch (insn->header.opcode) {
2379 case BRW_OPCODE_ENDIF:
2380 case BRW_OPCODE_ELSE:
2381 case BRW_OPCODE_WHILE:
2382 case BRW_OPCODE_HALT:
2383 return offset;
2384 }
2385 }
2386
2387 return 0;
2388 }
2389
2390 /* There is no DO instruction on gen6, so to find the end of the loop
2391 * we have to see if the loop is jumping back before our start
2392 * instruction.
2393 */
2394 static int
2395 brw_find_loop_end(struct brw_compile *p, int start_offset)
2396 {
2397 struct brw_context *brw = p->brw;
2398 int offset;
2399 int scale = 8;
2400 void *store = p->store;
2401
2402 /* Always start after the instruction (such as a WHILE) we're trying to fix
2403 * up.
2404 */
2405 for (offset = next_offset(store, start_offset); offset < p->next_insn_offset;
2406 offset = next_offset(store, offset)) {
2407 struct brw_instruction *insn = store + offset;
2408
2409 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2410 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2411 : insn->bits3.break_cont.jip;
2412 if (offset + jip * scale <= start_offset)
2413 return offset;
2414 }
2415 }
2416 assert(!"not reached");
2417 return start_offset;
2418 }
2419
2420 /* After program generation, go back and update the UIP and JIP of
2421 * BREAK, CONT, and HALT instructions to their correct locations.
2422 */
2423 void
2424 brw_set_uip_jip(struct brw_compile *p)
2425 {
2426 struct brw_context *brw = p->brw;
2427 int offset;
2428 int scale = 8;
2429 void *store = p->store;
2430
2431 if (brw->gen < 6)
2432 return;
2433
2434 for (offset = 0; offset < p->next_insn_offset;
2435 offset = next_offset(store, offset)) {
2436 struct brw_instruction *insn = store + offset;
2437
2438 if (insn->header.cmpt_control) {
2439 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2440 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2441 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2442 insn->header.opcode != BRW_OPCODE_HALT);
2443 continue;
2444 }
2445
2446 int block_end_offset = brw_find_next_block_end(p, offset);
2447 switch (insn->header.opcode) {
2448 case BRW_OPCODE_BREAK:
2449 assert(block_end_offset != 0);
2450 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale;
2451 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2452 insn->bits3.break_cont.uip =
2453 (brw_find_loop_end(p, offset) - offset +
2454 (brw->gen == 6 ? 16 : 0)) / scale;
2455 break;
2456 case BRW_OPCODE_CONTINUE:
2457 assert(block_end_offset != 0);
2458 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale;
2459 insn->bits3.break_cont.uip =
2460 (brw_find_loop_end(p, offset) - offset) / scale;
2461
2462 assert(insn->bits3.break_cont.uip != 0);
2463 assert(insn->bits3.break_cont.jip != 0);
2464 break;
2465
2466 case BRW_OPCODE_ENDIF:
2467 if (block_end_offset == 0)
2468 insn->bits3.break_cont.jip = 2;
2469 else
2470 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale;
2471 break;
2472
2473 case BRW_OPCODE_HALT:
2474 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2475 *
2476 * "In case of the halt instruction not inside any conditional
2477 * code block, the value of <JIP> and <UIP> should be the
2478 * same. In case of the halt instruction inside conditional code
2479 * block, the <UIP> should be the end of the program, and the
2480 * <JIP> should be end of the most inner conditional code block."
2481 *
2482 * The uip will have already been set by whoever set up the
2483 * instruction.
2484 */
2485 if (block_end_offset == 0) {
2486 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2487 } else {
2488 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale;
2489 }
2490 assert(insn->bits3.break_cont.uip != 0);
2491 assert(insn->bits3.break_cont.jip != 0);
2492 break;
2493 }
2494 }
2495 }
2496
2497 void brw_ff_sync(struct brw_compile *p,
2498 struct brw_reg dest,
2499 unsigned msg_reg_nr,
2500 struct brw_reg src0,
2501 bool allocate,
2502 unsigned response_length,
2503 bool eot)
2504 {
2505 struct brw_context *brw = p->brw;
2506 struct brw_instruction *insn;
2507
2508 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2509
2510 insn = next_insn(p, BRW_OPCODE_SEND);
2511 brw_set_dest(p, insn, dest);
2512 brw_set_src0(p, insn, src0);
2513 brw_set_src1(p, insn, brw_imm_d(0));
2514
2515 if (brw->gen < 6)
2516 insn->header.destreg__conditionalmod = msg_reg_nr;
2517
2518 brw_set_ff_sync_message(p,
2519 insn,
2520 allocate,
2521 response_length,
2522 eot);
2523 }
2524
2525 /**
2526 * Emit the SEND instruction necessary to generate stream output data on Gen6
2527 * (for transform feedback).
2528 *
2529 * If send_commit_msg is true, this is the last piece of stream output data
2530 * from this thread, so send the data as a committed write. According to the
2531 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2532 *
2533 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2534 * writes are complete by sending the final write as a committed write."
2535 */
2536 void
2537 brw_svb_write(struct brw_compile *p,
2538 struct brw_reg dest,
2539 unsigned msg_reg_nr,
2540 struct brw_reg src0,
2541 unsigned binding_table_index,
2542 bool send_commit_msg)
2543 {
2544 struct brw_instruction *insn;
2545
2546 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2547
2548 insn = next_insn(p, BRW_OPCODE_SEND);
2549 brw_set_dest(p, insn, dest);
2550 brw_set_src0(p, insn, src0);
2551 brw_set_src1(p, insn, brw_imm_d(0));
2552 brw_set_dp_write_message(p, insn,
2553 binding_table_index,
2554 0, /* msg_control: ignored */
2555 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2556 1, /* msg_length */
2557 true, /* header_present */
2558 0, /* last_render_target: ignored */
2559 send_commit_msg, /* response_length */
2560 0, /* end_of_thread */
2561 send_commit_msg); /* send_commit_msg */
2562 }
2563
2564 static void
2565 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2566 struct brw_instruction *insn,
2567 unsigned atomic_op,
2568 unsigned bind_table_index,
2569 unsigned msg_length,
2570 unsigned response_length,
2571 bool header_present)
2572 {
2573 if (p->brw->is_haswell) {
2574 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2575 msg_length, response_length,
2576 header_present, false);
2577
2578
2579 if (insn->header.access_mode == BRW_ALIGN_1) {
2580 if (insn->header.execution_size != BRW_EXECUTE_16)
2581 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2582
2583 insn->bits3.gen7_dp.msg_type =
2584 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2585 } else {
2586 insn->bits3.gen7_dp.msg_type =
2587 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2588 }
2589 } else {
2590 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2591 msg_length, response_length,
2592 header_present, false);
2593
2594 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2595
2596 if (insn->header.execution_size != BRW_EXECUTE_16)
2597 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2598 }
2599
2600 if (response_length)
2601 insn->bits3.ud |= 1 << 13; /* Return data expected */
2602
2603 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2604 insn->bits3.ud |= atomic_op << 8;
2605 }
2606
2607 void
2608 brw_untyped_atomic(struct brw_compile *p,
2609 struct brw_reg dest,
2610 struct brw_reg mrf,
2611 unsigned atomic_op,
2612 unsigned bind_table_index,
2613 unsigned msg_length,
2614 unsigned response_length) {
2615 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2616
2617 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2618 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2619 brw_set_src1(p, insn, brw_imm_d(0));
2620 brw_set_dp_untyped_atomic_message(
2621 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2622 insn->header.access_mode == BRW_ALIGN_1);
2623 }
2624
2625 static void
2626 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2627 struct brw_instruction *insn,
2628 unsigned bind_table_index,
2629 unsigned msg_length,
2630 unsigned response_length,
2631 bool header_present)
2632 {
2633 const unsigned dispatch_width =
2634 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2635 const unsigned num_channels = response_length / (dispatch_width / 8);
2636
2637 if (p->brw->is_haswell) {
2638 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2639 msg_length, response_length,
2640 header_present, false);
2641
2642 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2643 } else {
2644 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2645 msg_length, response_length,
2646 header_present, false);
2647
2648 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2649 }
2650
2651 if (insn->header.access_mode == BRW_ALIGN_1) {
2652 if (dispatch_width == 16)
2653 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2654 else
2655 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2656 }
2657
2658 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2659
2660 /* Set mask of 32-bit channels to drop. */
2661 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2662 }
2663
2664 void
2665 brw_untyped_surface_read(struct brw_compile *p,
2666 struct brw_reg dest,
2667 struct brw_reg mrf,
2668 unsigned bind_table_index,
2669 unsigned msg_length,
2670 unsigned response_length)
2671 {
2672 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2673
2674 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2675 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2676 brw_set_dp_untyped_surface_read_message(
2677 p, insn, bind_table_index, msg_length, response_length,
2678 insn->header.access_mode == BRW_ALIGN_1);
2679 }
2680
2681 /**
2682 * This instruction is generated as a single-channel align1 instruction by
2683 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2684 *
2685 * We can't use the typed atomic op in the FS because that has the execution
2686 * mask ANDed with the pixel mask, but we just want to write the one dword for
2687 * all the pixels.
2688 *
2689 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2690 * one u32. So we use the same untyped atomic write message as the pixel
2691 * shader.
2692 *
2693 * The untyped atomic operation requires a BUFFER surface type with RAW
2694 * format, and is only accessible through the legacy DATA_CACHE dataport
2695 * messages.
2696 */
2697 void brw_shader_time_add(struct brw_compile *p,
2698 struct brw_reg payload,
2699 uint32_t surf_index)
2700 {
2701 struct brw_context *brw = p->brw;
2702 assert(brw->gen >= 7);
2703
2704 brw_push_insn_state(p);
2705 brw_set_default_access_mode(p, BRW_ALIGN_1);
2706 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2707 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2708 brw_pop_insn_state(p);
2709
2710 /* We use brw_vec1_reg and unmasked because we want to increment the given
2711 * offset only once.
2712 */
2713 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2714 BRW_ARF_NULL, 0));
2715 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2716 payload.nr, 0));
2717 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2718 2 /* message length */,
2719 0 /* response length */,
2720 false /* header present */);
2721 }