i965: Add HiZ operation state to brw_context
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373 /**
374 * Set the Message Descriptor and Extended Message Descriptor fields
375 * for SEND messages.
376 *
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
380 */
381 static void
382 brw_set_message_descriptor(struct brw_compile *p,
383 struct brw_instruction *inst,
384 enum brw_message_target sfid,
385 unsigned msg_length,
386 unsigned response_length,
387 bool header_present,
388 bool end_of_thread)
389 {
390 struct intel_context *intel = &p->brw->intel;
391
392 brw_set_src1(p, inst, brw_imm_d(0));
393
394 if (intel->gen >= 5) {
395 inst->bits3.generic_gen5.header_present = header_present;
396 inst->bits3.generic_gen5.response_length = response_length;
397 inst->bits3.generic_gen5.msg_length = msg_length;
398 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
399
400 if (intel->gen >= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst->header.destreg__conditionalmod = sfid;
403 } else {
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst->bits2.send_gen5.sfid = sfid;
406 inst->bits2.send_gen5.end_of_thread = end_of_thread;
407 }
408 } else {
409 inst->bits3.generic.response_length = response_length;
410 inst->bits3.generic.msg_length = msg_length;
411 inst->bits3.generic.msg_target = sfid;
412 inst->bits3.generic.end_of_thread = end_of_thread;
413 }
414 }
415
416 static void brw_set_math_message( struct brw_compile *p,
417 struct brw_instruction *insn,
418 GLuint function,
419 GLuint integer_type,
420 bool low_precision,
421 bool saturate,
422 GLuint dataType )
423 {
424 struct brw_context *brw = p->brw;
425 struct intel_context *intel = &brw->intel;
426 unsigned msg_length;
427 unsigned response_length;
428
429 /* Infer message length from the function */
430 switch (function) {
431 case BRW_MATH_FUNCTION_POW:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
435 msg_length = 2;
436 break;
437 default:
438 msg_length = 1;
439 break;
440 }
441
442 /* Infer response length from the function */
443 switch (function) {
444 case BRW_MATH_FUNCTION_SINCOS:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
446 response_length = 2;
447 break;
448 default:
449 response_length = 1;
450 break;
451 }
452
453 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
454 msg_length, response_length, false, false);
455 if (intel->gen == 5) {
456 insn->bits3.math_gen5.function = function;
457 insn->bits3.math_gen5.int_type = integer_type;
458 insn->bits3.math_gen5.precision = low_precision;
459 insn->bits3.math_gen5.saturate = saturate;
460 insn->bits3.math_gen5.data_type = dataType;
461 insn->bits3.math_gen5.snapshot = 0;
462 } else {
463 insn->bits3.math.function = function;
464 insn->bits3.math.int_type = integer_type;
465 insn->bits3.math.precision = low_precision;
466 insn->bits3.math.saturate = saturate;
467 insn->bits3.math.data_type = dataType;
468 }
469 }
470
471
472 static void brw_set_ff_sync_message(struct brw_compile *p,
473 struct brw_instruction *insn,
474 bool allocate,
475 GLuint response_length,
476 bool end_of_thread)
477 {
478 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
479 1, response_length, true, end_of_thread);
480 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
481 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
482 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
483 insn->bits3.urb_gen5.allocate = allocate;
484 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
486 }
487
488 static void brw_set_urb_message( struct brw_compile *p,
489 struct brw_instruction *insn,
490 bool allocate,
491 bool used,
492 GLuint msg_length,
493 GLuint response_length,
494 bool end_of_thread,
495 bool complete,
496 GLuint offset,
497 GLuint swizzle_control )
498 {
499 struct brw_context *brw = p->brw;
500 struct intel_context *intel = &brw->intel;
501
502 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
503 msg_length, response_length, true, end_of_thread);
504 if (intel->gen == 7) {
505 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
506 insn->bits3.urb_gen7.offset = offset;
507 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
508 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn->bits3.urb_gen7.per_slot_offset = 0;
511 insn->bits3.urb_gen7.complete = complete;
512 } else if (intel->gen >= 5) {
513 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
514 insn->bits3.urb_gen5.offset = offset;
515 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
516 insn->bits3.urb_gen5.allocate = allocate;
517 insn->bits3.urb_gen5.used = used; /* ? */
518 insn->bits3.urb_gen5.complete = complete;
519 } else {
520 insn->bits3.urb.opcode = 0; /* ? */
521 insn->bits3.urb.offset = offset;
522 insn->bits3.urb.swizzle_control = swizzle_control;
523 insn->bits3.urb.allocate = allocate;
524 insn->bits3.urb.used = used; /* ? */
525 insn->bits3.urb.complete = complete;
526 }
527 }
528
529 void
530 brw_set_dp_write_message(struct brw_compile *p,
531 struct brw_instruction *insn,
532 GLuint binding_table_index,
533 GLuint msg_control,
534 GLuint msg_type,
535 GLuint msg_length,
536 bool header_present,
537 GLuint last_render_target,
538 GLuint response_length,
539 GLuint end_of_thread,
540 GLuint send_commit_msg)
541 {
542 struct brw_context *brw = p->brw;
543 struct intel_context *intel = &brw->intel;
544 unsigned sfid;
545
546 if (intel->gen >= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
549 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
550 else
551 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
552 } else if (intel->gen == 6) {
553 /* Use the render cache for all write messages. */
554 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
555 } else {
556 sfid = BRW_SFID_DATAPORT_WRITE;
557 }
558
559 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
560 header_present, end_of_thread);
561
562 if (intel->gen >= 7) {
563 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
564 insn->bits3.gen7_dp.msg_control = msg_control;
565 insn->bits3.gen7_dp.last_render_target = last_render_target;
566 insn->bits3.gen7_dp.msg_type = msg_type;
567 } else if (intel->gen == 6) {
568 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
569 insn->bits3.gen6_dp.msg_control = msg_control;
570 insn->bits3.gen6_dp.last_render_target = last_render_target;
571 insn->bits3.gen6_dp.msg_type = msg_type;
572 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
573 } else if (intel->gen == 5) {
574 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
575 insn->bits3.dp_write_gen5.msg_control = msg_control;
576 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
577 insn->bits3.dp_write_gen5.msg_type = msg_type;
578 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
579 } else {
580 insn->bits3.dp_write.binding_table_index = binding_table_index;
581 insn->bits3.dp_write.msg_control = msg_control;
582 insn->bits3.dp_write.last_render_target = last_render_target;
583 insn->bits3.dp_write.msg_type = msg_type;
584 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
585 }
586 }
587
588 void
589 brw_set_dp_read_message(struct brw_compile *p,
590 struct brw_instruction *insn,
591 GLuint binding_table_index,
592 GLuint msg_control,
593 GLuint msg_type,
594 GLuint target_cache,
595 GLuint msg_length,
596 GLuint response_length)
597 {
598 struct brw_context *brw = p->brw;
599 struct intel_context *intel = &brw->intel;
600 unsigned sfid;
601
602 if (intel->gen >= 7) {
603 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
604 } else if (intel->gen == 6) {
605 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
606 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
607 else
608 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
609 } else {
610 sfid = BRW_SFID_DATAPORT_READ;
611 }
612
613 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
614 true, false);
615
616 if (intel->gen >= 7) {
617 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
618 insn->bits3.gen7_dp.msg_control = msg_control;
619 insn->bits3.gen7_dp.last_render_target = 0;
620 insn->bits3.gen7_dp.msg_type = msg_type;
621 } else if (intel->gen == 6) {
622 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
623 insn->bits3.gen6_dp.msg_control = msg_control;
624 insn->bits3.gen6_dp.last_render_target = 0;
625 insn->bits3.gen6_dp.msg_type = msg_type;
626 insn->bits3.gen6_dp.send_commit_msg = 0;
627 } else if (intel->gen == 5) {
628 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
629 insn->bits3.dp_read_gen5.msg_control = msg_control;
630 insn->bits3.dp_read_gen5.msg_type = msg_type;
631 insn->bits3.dp_read_gen5.target_cache = target_cache;
632 } else if (intel->is_g4x) {
633 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
634 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
635 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
636 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
637 } else {
638 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
639 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
640 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
641 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
642 }
643 }
644
645 static void brw_set_sampler_message(struct brw_compile *p,
646 struct brw_instruction *insn,
647 GLuint binding_table_index,
648 GLuint sampler,
649 GLuint msg_type,
650 GLuint response_length,
651 GLuint msg_length,
652 GLuint header_present,
653 GLuint simd_mode)
654 {
655 struct brw_context *brw = p->brw;
656 struct intel_context *intel = &brw->intel;
657
658 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
659 response_length, header_present, false);
660
661 if (intel->gen >= 7) {
662 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
663 insn->bits3.sampler_gen7.sampler = sampler;
664 insn->bits3.sampler_gen7.msg_type = msg_type;
665 insn->bits3.sampler_gen7.simd_mode = simd_mode;
666 } else if (intel->gen >= 5) {
667 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
668 insn->bits3.sampler_gen5.sampler = sampler;
669 insn->bits3.sampler_gen5.msg_type = msg_type;
670 insn->bits3.sampler_gen5.simd_mode = simd_mode;
671 } else if (intel->is_g4x) {
672 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
673 insn->bits3.sampler_g4x.sampler = sampler;
674 insn->bits3.sampler_g4x.msg_type = msg_type;
675 } else {
676 insn->bits3.sampler.binding_table_index = binding_table_index;
677 insn->bits3.sampler.sampler = sampler;
678 insn->bits3.sampler.msg_type = msg_type;
679 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
680 }
681 }
682
683
684 #define next_insn brw_next_insn
685 struct brw_instruction *
686 brw_next_insn(struct brw_compile *p, GLuint opcode)
687 {
688 struct brw_instruction *insn;
689
690 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
691
692 insn = &p->store[p->nr_insn++];
693 memcpy(insn, p->current, sizeof(*insn));
694
695 /* Reset this one-shot flag:
696 */
697
698 if (p->current->header.destreg__conditionalmod) {
699 p->current->header.destreg__conditionalmod = 0;
700 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
701 }
702
703 insn->header.opcode = opcode;
704 return insn;
705 }
706
707 static struct brw_instruction *brw_alu1( struct brw_compile *p,
708 GLuint opcode,
709 struct brw_reg dest,
710 struct brw_reg src )
711 {
712 struct brw_instruction *insn = next_insn(p, opcode);
713 brw_set_dest(p, insn, dest);
714 brw_set_src0(p, insn, src);
715 return insn;
716 }
717
718 static struct brw_instruction *brw_alu2(struct brw_compile *p,
719 GLuint opcode,
720 struct brw_reg dest,
721 struct brw_reg src0,
722 struct brw_reg src1 )
723 {
724 struct brw_instruction *insn = next_insn(p, opcode);
725 brw_set_dest(p, insn, dest);
726 brw_set_src0(p, insn, src0);
727 brw_set_src1(p, insn, src1);
728 return insn;
729 }
730
731
732 /***********************************************************************
733 * Convenience routines.
734 */
735 #define ALU1(OP) \
736 struct brw_instruction *brw_##OP(struct brw_compile *p, \
737 struct brw_reg dest, \
738 struct brw_reg src0) \
739 { \
740 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
741 }
742
743 #define ALU2(OP) \
744 struct brw_instruction *brw_##OP(struct brw_compile *p, \
745 struct brw_reg dest, \
746 struct brw_reg src0, \
747 struct brw_reg src1) \
748 { \
749 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
750 }
751
752 /* Rounding operations (other than RNDD) require two instructions - the first
753 * stores a rounded value (possibly the wrong way) in the dest register, but
754 * also sets a per-channel "increment bit" in the flag register. A predicated
755 * add of 1.0 fixes dest to contain the desired result.
756 *
757 * Sandybridge and later appear to round correctly without an ADD.
758 */
759 #define ROUND(OP) \
760 void brw_##OP(struct brw_compile *p, \
761 struct brw_reg dest, \
762 struct brw_reg src) \
763 { \
764 struct brw_instruction *rnd, *add; \
765 rnd = next_insn(p, BRW_OPCODE_##OP); \
766 brw_set_dest(p, rnd, dest); \
767 brw_set_src0(p, rnd, src); \
768 \
769 if (p->brw->intel.gen < 6) { \
770 /* turn on round-increments */ \
771 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
772 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
773 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
774 } \
775 }
776
777
778 ALU1(MOV)
779 ALU2(SEL)
780 ALU1(NOT)
781 ALU2(AND)
782 ALU2(OR)
783 ALU2(XOR)
784 ALU2(SHR)
785 ALU2(SHL)
786 ALU2(RSR)
787 ALU2(RSL)
788 ALU2(ASR)
789 ALU1(FRC)
790 ALU1(RNDD)
791 ALU2(MAC)
792 ALU2(MACH)
793 ALU1(LZD)
794 ALU2(DP4)
795 ALU2(DPH)
796 ALU2(DP3)
797 ALU2(DP2)
798 ALU2(LINE)
799 ALU2(PLN)
800
801
802 ROUND(RNDZ)
803 ROUND(RNDE)
804
805
806 struct brw_instruction *brw_ADD(struct brw_compile *p,
807 struct brw_reg dest,
808 struct brw_reg src0,
809 struct brw_reg src1)
810 {
811 /* 6.2.2: add */
812 if (src0.type == BRW_REGISTER_TYPE_F ||
813 (src0.file == BRW_IMMEDIATE_VALUE &&
814 src0.type == BRW_REGISTER_TYPE_VF)) {
815 assert(src1.type != BRW_REGISTER_TYPE_UD);
816 assert(src1.type != BRW_REGISTER_TYPE_D);
817 }
818
819 if (src1.type == BRW_REGISTER_TYPE_F ||
820 (src1.file == BRW_IMMEDIATE_VALUE &&
821 src1.type == BRW_REGISTER_TYPE_VF)) {
822 assert(src0.type != BRW_REGISTER_TYPE_UD);
823 assert(src0.type != BRW_REGISTER_TYPE_D);
824 }
825
826 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
827 }
828
829 struct brw_instruction *brw_MUL(struct brw_compile *p,
830 struct brw_reg dest,
831 struct brw_reg src0,
832 struct brw_reg src1)
833 {
834 /* 6.32.38: mul */
835 if (src0.type == BRW_REGISTER_TYPE_D ||
836 src0.type == BRW_REGISTER_TYPE_UD ||
837 src1.type == BRW_REGISTER_TYPE_D ||
838 src1.type == BRW_REGISTER_TYPE_UD) {
839 assert(dest.type != BRW_REGISTER_TYPE_F);
840 }
841
842 if (src0.type == BRW_REGISTER_TYPE_F ||
843 (src0.file == BRW_IMMEDIATE_VALUE &&
844 src0.type == BRW_REGISTER_TYPE_VF)) {
845 assert(src1.type != BRW_REGISTER_TYPE_UD);
846 assert(src1.type != BRW_REGISTER_TYPE_D);
847 }
848
849 if (src1.type == BRW_REGISTER_TYPE_F ||
850 (src1.file == BRW_IMMEDIATE_VALUE &&
851 src1.type == BRW_REGISTER_TYPE_VF)) {
852 assert(src0.type != BRW_REGISTER_TYPE_UD);
853 assert(src0.type != BRW_REGISTER_TYPE_D);
854 }
855
856 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
857 src0.nr != BRW_ARF_ACCUMULATOR);
858 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
859 src1.nr != BRW_ARF_ACCUMULATOR);
860
861 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
862 }
863
864
865 void brw_NOP(struct brw_compile *p)
866 {
867 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
868 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
869 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
870 brw_set_src1(p, insn, brw_imm_ud(0x0));
871 }
872
873
874
875
876
877 /***********************************************************************
878 * Comparisons, if/else/endif
879 */
880
881 struct brw_instruction *brw_JMPI(struct brw_compile *p,
882 struct brw_reg dest,
883 struct brw_reg src0,
884 struct brw_reg src1)
885 {
886 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
887
888 insn->header.execution_size = 1;
889 insn->header.compression_control = BRW_COMPRESSION_NONE;
890 insn->header.mask_control = BRW_MASK_DISABLE;
891
892 p->current->header.predicate_control = BRW_PREDICATE_NONE;
893
894 return insn;
895 }
896
897 static void
898 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
899 {
900 p->if_stack[p->if_stack_depth] = inst;
901
902 p->if_stack_depth++;
903 if (p->if_stack_array_size <= p->if_stack_depth) {
904 p->if_stack_array_size *= 2;
905 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
906 p->if_stack_array_size);
907 }
908 }
909
910 /* EU takes the value from the flag register and pushes it onto some
911 * sort of a stack (presumably merging with any flag value already on
912 * the stack). Within an if block, the flags at the top of the stack
913 * control execution on each channel of the unit, eg. on each of the
914 * 16 pixel values in our wm programs.
915 *
916 * When the matching 'else' instruction is reached (presumably by
917 * countdown of the instruction count patched in by our ELSE/ENDIF
918 * functions), the relevent flags are inverted.
919 *
920 * When the matching 'endif' instruction is reached, the flags are
921 * popped off. If the stack is now empty, normal execution resumes.
922 */
923 struct brw_instruction *
924 brw_IF(struct brw_compile *p, GLuint execute_size)
925 {
926 struct intel_context *intel = &p->brw->intel;
927 struct brw_instruction *insn;
928
929 insn = next_insn(p, BRW_OPCODE_IF);
930
931 /* Override the defaults for this instruction:
932 */
933 if (intel->gen < 6) {
934 brw_set_dest(p, insn, brw_ip_reg());
935 brw_set_src0(p, insn, brw_ip_reg());
936 brw_set_src1(p, insn, brw_imm_d(0x0));
937 } else if (intel->gen == 6) {
938 brw_set_dest(p, insn, brw_imm_w(0));
939 insn->bits1.branch_gen6.jump_count = 0;
940 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
941 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
942 } else {
943 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
944 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
945 brw_set_src1(p, insn, brw_imm_ud(0));
946 insn->bits3.break_cont.jip = 0;
947 insn->bits3.break_cont.uip = 0;
948 }
949
950 insn->header.execution_size = execute_size;
951 insn->header.compression_control = BRW_COMPRESSION_NONE;
952 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
953 insn->header.mask_control = BRW_MASK_ENABLE;
954 if (!p->single_program_flow)
955 insn->header.thread_control = BRW_THREAD_SWITCH;
956
957 p->current->header.predicate_control = BRW_PREDICATE_NONE;
958
959 push_if_stack(p, insn);
960 return insn;
961 }
962
963 /* This function is only used for gen6-style IF instructions with an
964 * embedded comparison (conditional modifier). It is not used on gen7.
965 */
966 struct brw_instruction *
967 gen6_IF(struct brw_compile *p, uint32_t conditional,
968 struct brw_reg src0, struct brw_reg src1)
969 {
970 struct brw_instruction *insn;
971
972 insn = next_insn(p, BRW_OPCODE_IF);
973
974 brw_set_dest(p, insn, brw_imm_w(0));
975 if (p->compressed) {
976 insn->header.execution_size = BRW_EXECUTE_16;
977 } else {
978 insn->header.execution_size = BRW_EXECUTE_8;
979 }
980 insn->bits1.branch_gen6.jump_count = 0;
981 brw_set_src0(p, insn, src0);
982 brw_set_src1(p, insn, src1);
983
984 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
985 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
986 insn->header.destreg__conditionalmod = conditional;
987
988 if (!p->single_program_flow)
989 insn->header.thread_control = BRW_THREAD_SWITCH;
990
991 push_if_stack(p, insn);
992 return insn;
993 }
994
995 /**
996 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
997 */
998 static void
999 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1000 struct brw_instruction *if_inst,
1001 struct brw_instruction *else_inst)
1002 {
1003 /* The next instruction (where the ENDIF would be, if it existed) */
1004 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1005
1006 assert(p->single_program_flow);
1007 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1008 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1009 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1010
1011 /* Convert IF to an ADD instruction that moves the instruction pointer
1012 * to the first instruction of the ELSE block. If there is no ELSE
1013 * block, point to where ENDIF would be. Reverse the predicate.
1014 *
1015 * There's no need to execute an ENDIF since we don't need to do any
1016 * stack operations, and if we're currently executing, we just want to
1017 * continue normally.
1018 */
1019 if_inst->header.opcode = BRW_OPCODE_ADD;
1020 if_inst->header.predicate_inverse = 1;
1021
1022 if (else_inst != NULL) {
1023 /* Convert ELSE to an ADD instruction that points where the ENDIF
1024 * would be.
1025 */
1026 else_inst->header.opcode = BRW_OPCODE_ADD;
1027
1028 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1029 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1030 } else {
1031 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1032 }
1033 }
1034
1035 /**
1036 * Patch IF and ELSE instructions with appropriate jump targets.
1037 */
1038 static void
1039 patch_IF_ELSE(struct brw_compile *p,
1040 struct brw_instruction *if_inst,
1041 struct brw_instruction *else_inst,
1042 struct brw_instruction *endif_inst)
1043 {
1044 struct intel_context *intel = &p->brw->intel;
1045
1046 assert(!p->single_program_flow);
1047 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1048 assert(endif_inst != NULL);
1049 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1050
1051 unsigned br = 1;
1052 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1053 * requires 2 chunks.
1054 */
1055 if (intel->gen >= 5)
1056 br = 2;
1057
1058 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1059 endif_inst->header.execution_size = if_inst->header.execution_size;
1060
1061 if (else_inst == NULL) {
1062 /* Patch IF -> ENDIF */
1063 if (intel->gen < 6) {
1064 /* Turn it into an IFF, which means no mask stack operations for
1065 * all-false and jumping past the ENDIF.
1066 */
1067 if_inst->header.opcode = BRW_OPCODE_IFF;
1068 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1069 if_inst->bits3.if_else.pop_count = 0;
1070 if_inst->bits3.if_else.pad0 = 0;
1071 } else if (intel->gen == 6) {
1072 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1073 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1074 } else {
1075 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1076 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1077 }
1078 } else {
1079 else_inst->header.execution_size = if_inst->header.execution_size;
1080
1081 /* Patch IF -> ELSE */
1082 if (intel->gen < 6) {
1083 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1084 if_inst->bits3.if_else.pop_count = 0;
1085 if_inst->bits3.if_else.pad0 = 0;
1086 } else if (intel->gen == 6) {
1087 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1088 }
1089
1090 /* Patch ELSE -> ENDIF */
1091 if (intel->gen < 6) {
1092 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1093 * matching ENDIF.
1094 */
1095 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1096 else_inst->bits3.if_else.pop_count = 1;
1097 else_inst->bits3.if_else.pad0 = 0;
1098 } else if (intel->gen == 6) {
1099 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1100 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1101 } else {
1102 /* The IF instruction's JIP should point just past the ELSE */
1103 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1104 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1105 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1106 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1107 }
1108 }
1109 }
1110
1111 void
1112 brw_ELSE(struct brw_compile *p)
1113 {
1114 struct intel_context *intel = &p->brw->intel;
1115 struct brw_instruction *insn;
1116
1117 insn = next_insn(p, BRW_OPCODE_ELSE);
1118
1119 if (intel->gen < 6) {
1120 brw_set_dest(p, insn, brw_ip_reg());
1121 brw_set_src0(p, insn, brw_ip_reg());
1122 brw_set_src1(p, insn, brw_imm_d(0x0));
1123 } else if (intel->gen == 6) {
1124 brw_set_dest(p, insn, brw_imm_w(0));
1125 insn->bits1.branch_gen6.jump_count = 0;
1126 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1127 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1128 } else {
1129 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1130 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1131 brw_set_src1(p, insn, brw_imm_ud(0));
1132 insn->bits3.break_cont.jip = 0;
1133 insn->bits3.break_cont.uip = 0;
1134 }
1135
1136 insn->header.compression_control = BRW_COMPRESSION_NONE;
1137 insn->header.mask_control = BRW_MASK_ENABLE;
1138 if (!p->single_program_flow)
1139 insn->header.thread_control = BRW_THREAD_SWITCH;
1140
1141 push_if_stack(p, insn);
1142 }
1143
1144 void
1145 brw_ENDIF(struct brw_compile *p)
1146 {
1147 struct intel_context *intel = &p->brw->intel;
1148 struct brw_instruction *insn;
1149 struct brw_instruction *else_inst = NULL;
1150 struct brw_instruction *if_inst = NULL;
1151
1152 /* Pop the IF and (optional) ELSE instructions from the stack */
1153 p->if_stack_depth--;
1154 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1155 else_inst = p->if_stack[p->if_stack_depth];
1156 p->if_stack_depth--;
1157 }
1158 if_inst = p->if_stack[p->if_stack_depth];
1159
1160 if (p->single_program_flow) {
1161 /* ENDIF is useless; don't bother emitting it. */
1162 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1163 return;
1164 }
1165
1166 insn = next_insn(p, BRW_OPCODE_ENDIF);
1167
1168 if (intel->gen < 6) {
1169 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1170 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1171 brw_set_src1(p, insn, brw_imm_d(0x0));
1172 } else if (intel->gen == 6) {
1173 brw_set_dest(p, insn, brw_imm_w(0));
1174 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1175 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1176 } else {
1177 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1178 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1179 brw_set_src1(p, insn, brw_imm_ud(0));
1180 }
1181
1182 insn->header.compression_control = BRW_COMPRESSION_NONE;
1183 insn->header.mask_control = BRW_MASK_ENABLE;
1184 insn->header.thread_control = BRW_THREAD_SWITCH;
1185
1186 /* Also pop item off the stack in the endif instruction: */
1187 if (intel->gen < 6) {
1188 insn->bits3.if_else.jump_count = 0;
1189 insn->bits3.if_else.pop_count = 1;
1190 insn->bits3.if_else.pad0 = 0;
1191 } else if (intel->gen == 6) {
1192 insn->bits1.branch_gen6.jump_count = 2;
1193 } else {
1194 insn->bits3.break_cont.jip = 2;
1195 }
1196 patch_IF_ELSE(p, if_inst, else_inst, insn);
1197 }
1198
1199 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1200 {
1201 struct intel_context *intel = &p->brw->intel;
1202 struct brw_instruction *insn;
1203
1204 insn = next_insn(p, BRW_OPCODE_BREAK);
1205 if (intel->gen >= 6) {
1206 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1207 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208 brw_set_src1(p, insn, brw_imm_d(0x0));
1209 } else {
1210 brw_set_dest(p, insn, brw_ip_reg());
1211 brw_set_src0(p, insn, brw_ip_reg());
1212 brw_set_src1(p, insn, brw_imm_d(0x0));
1213 insn->bits3.if_else.pad0 = 0;
1214 insn->bits3.if_else.pop_count = pop_count;
1215 }
1216 insn->header.compression_control = BRW_COMPRESSION_NONE;
1217 insn->header.execution_size = BRW_EXECUTE_8;
1218
1219 return insn;
1220 }
1221
1222 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1223 struct brw_instruction *do_insn)
1224 {
1225 struct brw_instruction *insn;
1226
1227 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1228 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1229 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1230 brw_set_dest(p, insn, brw_ip_reg());
1231 brw_set_src0(p, insn, brw_ip_reg());
1232 brw_set_src1(p, insn, brw_imm_d(0x0));
1233
1234 insn->header.compression_control = BRW_COMPRESSION_NONE;
1235 insn->header.execution_size = BRW_EXECUTE_8;
1236 return insn;
1237 }
1238
1239 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1240 {
1241 struct brw_instruction *insn;
1242 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1243 brw_set_dest(p, insn, brw_ip_reg());
1244 brw_set_src0(p, insn, brw_ip_reg());
1245 brw_set_src1(p, insn, brw_imm_d(0x0));
1246 insn->header.compression_control = BRW_COMPRESSION_NONE;
1247 insn->header.execution_size = BRW_EXECUTE_8;
1248 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1249 insn->bits3.if_else.pad0 = 0;
1250 insn->bits3.if_else.pop_count = pop_count;
1251 return insn;
1252 }
1253
1254 /* DO/WHILE loop:
1255 *
1256 * The DO/WHILE is just an unterminated loop -- break or continue are
1257 * used for control within the loop. We have a few ways they can be
1258 * done.
1259 *
1260 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1261 * jip and no DO instruction.
1262 *
1263 * For non-uniform control flow pre-gen6, there's a DO instruction to
1264 * push the mask, and a WHILE to jump back, and BREAK to get out and
1265 * pop the mask.
1266 *
1267 * For gen6, there's no more mask stack, so no need for DO. WHILE
1268 * just points back to the first instruction of the loop.
1269 */
1270 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1271 {
1272 struct intel_context *intel = &p->brw->intel;
1273
1274 if (intel->gen >= 6 || p->single_program_flow) {
1275 return &p->store[p->nr_insn];
1276 } else {
1277 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1278
1279 /* Override the defaults for this instruction:
1280 */
1281 brw_set_dest(p, insn, brw_null_reg());
1282 brw_set_src0(p, insn, brw_null_reg());
1283 brw_set_src1(p, insn, brw_null_reg());
1284
1285 insn->header.compression_control = BRW_COMPRESSION_NONE;
1286 insn->header.execution_size = execute_size;
1287 insn->header.predicate_control = BRW_PREDICATE_NONE;
1288 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1289 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1290
1291 return insn;
1292 }
1293 }
1294
1295
1296
1297 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1298 struct brw_instruction *do_insn)
1299 {
1300 struct intel_context *intel = &p->brw->intel;
1301 struct brw_instruction *insn;
1302 GLuint br = 1;
1303
1304 if (intel->gen >= 5)
1305 br = 2;
1306
1307 if (intel->gen >= 7) {
1308 insn = next_insn(p, BRW_OPCODE_WHILE);
1309
1310 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1311 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1312 brw_set_src1(p, insn, brw_imm_ud(0));
1313 insn->bits3.break_cont.jip = br * (do_insn - insn);
1314
1315 insn->header.execution_size = BRW_EXECUTE_8;
1316 } else if (intel->gen == 6) {
1317 insn = next_insn(p, BRW_OPCODE_WHILE);
1318
1319 brw_set_dest(p, insn, brw_imm_w(0));
1320 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1321 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1322 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1323
1324 insn->header.execution_size = BRW_EXECUTE_8;
1325 } else {
1326 if (p->single_program_flow) {
1327 insn = next_insn(p, BRW_OPCODE_ADD);
1328
1329 brw_set_dest(p, insn, brw_ip_reg());
1330 brw_set_src0(p, insn, brw_ip_reg());
1331 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1332 insn->header.execution_size = BRW_EXECUTE_1;
1333 } else {
1334 insn = next_insn(p, BRW_OPCODE_WHILE);
1335
1336 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1337
1338 brw_set_dest(p, insn, brw_ip_reg());
1339 brw_set_src0(p, insn, brw_ip_reg());
1340 brw_set_src1(p, insn, brw_imm_d(0));
1341
1342 insn->header.execution_size = do_insn->header.execution_size;
1343 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1344 insn->bits3.if_else.pop_count = 0;
1345 insn->bits3.if_else.pad0 = 0;
1346 }
1347 }
1348 insn->header.compression_control = BRW_COMPRESSION_NONE;
1349 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1350
1351 return insn;
1352 }
1353
1354
1355 /* FORWARD JUMPS:
1356 */
1357 void brw_land_fwd_jump(struct brw_compile *p,
1358 struct brw_instruction *jmp_insn)
1359 {
1360 struct intel_context *intel = &p->brw->intel;
1361 struct brw_instruction *landing = &p->store[p->nr_insn];
1362 GLuint jmpi = 1;
1363
1364 if (intel->gen >= 5)
1365 jmpi = 2;
1366
1367 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1368 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1369
1370 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1371 }
1372
1373
1374
1375 /* To integrate with the above, it makes sense that the comparison
1376 * instruction should populate the flag register. It might be simpler
1377 * just to use the flag reg for most WM tasks?
1378 */
1379 void brw_CMP(struct brw_compile *p,
1380 struct brw_reg dest,
1381 GLuint conditional,
1382 struct brw_reg src0,
1383 struct brw_reg src1)
1384 {
1385 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1386
1387 insn->header.destreg__conditionalmod = conditional;
1388 brw_set_dest(p, insn, dest);
1389 brw_set_src0(p, insn, src0);
1390 brw_set_src1(p, insn, src1);
1391
1392 /* guess_execution_size(insn, src0); */
1393
1394
1395 /* Make it so that future instructions will use the computed flag
1396 * value until brw_set_predicate_control_flag_value() is called
1397 * again.
1398 */
1399 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1400 dest.nr == 0) {
1401 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1402 p->flag_value = 0xff;
1403 }
1404 }
1405
1406 /* Issue 'wait' instruction for n1, host could program MMIO
1407 to wake up thread. */
1408 void brw_WAIT (struct brw_compile *p)
1409 {
1410 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1411 struct brw_reg src = brw_notification_1_reg();
1412
1413 brw_set_dest(p, insn, src);
1414 brw_set_src0(p, insn, src);
1415 brw_set_src1(p, insn, brw_null_reg());
1416 insn->header.execution_size = 0; /* must */
1417 insn->header.predicate_control = 0;
1418 insn->header.compression_control = 0;
1419 }
1420
1421
1422 /***********************************************************************
1423 * Helpers for the various SEND message types:
1424 */
1425
1426 /** Extended math function, float[8].
1427 */
1428 void brw_math( struct brw_compile *p,
1429 struct brw_reg dest,
1430 GLuint function,
1431 GLuint saturate,
1432 GLuint msg_reg_nr,
1433 struct brw_reg src,
1434 GLuint data_type,
1435 GLuint precision )
1436 {
1437 struct intel_context *intel = &p->brw->intel;
1438
1439 if (intel->gen >= 6) {
1440 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1441
1442 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1443 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1444
1445 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1446 if (intel->gen == 6)
1447 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1448
1449 /* Source modifiers are ignored for extended math instructions on Gen6. */
1450 if (intel->gen == 6) {
1451 assert(!src.negate);
1452 assert(!src.abs);
1453 }
1454
1455 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1456 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1457 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1458 assert(src.type != BRW_REGISTER_TYPE_F);
1459 } else {
1460 assert(src.type == BRW_REGISTER_TYPE_F);
1461 }
1462
1463 /* Math is the same ISA format as other opcodes, except that CondModifier
1464 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1465 */
1466 insn->header.destreg__conditionalmod = function;
1467 insn->header.saturate = saturate;
1468
1469 brw_set_dest(p, insn, dest);
1470 brw_set_src0(p, insn, src);
1471 brw_set_src1(p, insn, brw_null_reg());
1472 } else {
1473 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1474
1475 /* Example code doesn't set predicate_control for send
1476 * instructions.
1477 */
1478 insn->header.predicate_control = 0;
1479 insn->header.destreg__conditionalmod = msg_reg_nr;
1480
1481 brw_set_dest(p, insn, dest);
1482 brw_set_src0(p, insn, src);
1483 brw_set_math_message(p,
1484 insn,
1485 function,
1486 src.type == BRW_REGISTER_TYPE_D,
1487 precision,
1488 saturate,
1489 data_type);
1490 }
1491 }
1492
1493 /** Extended math function, float[8].
1494 */
1495 void brw_math2(struct brw_compile *p,
1496 struct brw_reg dest,
1497 GLuint function,
1498 struct brw_reg src0,
1499 struct brw_reg src1)
1500 {
1501 struct intel_context *intel = &p->brw->intel;
1502 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1503
1504 assert(intel->gen >= 6);
1505 (void) intel;
1506
1507
1508 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1509 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1510 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1511
1512 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1513 if (intel->gen == 6) {
1514 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1515 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1516 }
1517
1518 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1519 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1520 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1521 assert(src0.type != BRW_REGISTER_TYPE_F);
1522 assert(src1.type != BRW_REGISTER_TYPE_F);
1523 } else {
1524 assert(src0.type == BRW_REGISTER_TYPE_F);
1525 assert(src1.type == BRW_REGISTER_TYPE_F);
1526 }
1527
1528 /* Source modifiers are ignored for extended math instructions on Gen6. */
1529 if (intel->gen == 6) {
1530 assert(!src0.negate);
1531 assert(!src0.abs);
1532 assert(!src1.negate);
1533 assert(!src1.abs);
1534 }
1535
1536 /* Math is the same ISA format as other opcodes, except that CondModifier
1537 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1538 */
1539 insn->header.destreg__conditionalmod = function;
1540
1541 brw_set_dest(p, insn, dest);
1542 brw_set_src0(p, insn, src0);
1543 brw_set_src1(p, insn, src1);
1544 }
1545
1546 /**
1547 * Extended math function, float[16].
1548 * Use 2 send instructions.
1549 */
1550 void brw_math_16( struct brw_compile *p,
1551 struct brw_reg dest,
1552 GLuint function,
1553 GLuint saturate,
1554 GLuint msg_reg_nr,
1555 struct brw_reg src,
1556 GLuint precision )
1557 {
1558 struct intel_context *intel = &p->brw->intel;
1559 struct brw_instruction *insn;
1560
1561 if (intel->gen >= 6) {
1562 insn = next_insn(p, BRW_OPCODE_MATH);
1563
1564 /* Math is the same ISA format as other opcodes, except that CondModifier
1565 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1566 */
1567 insn->header.destreg__conditionalmod = function;
1568 insn->header.saturate = saturate;
1569
1570 /* Source modifiers are ignored for extended math instructions. */
1571 assert(!src.negate);
1572 assert(!src.abs);
1573
1574 brw_set_dest(p, insn, dest);
1575 brw_set_src0(p, insn, src);
1576 brw_set_src1(p, insn, brw_null_reg());
1577 return;
1578 }
1579
1580 /* First instruction:
1581 */
1582 brw_push_insn_state(p);
1583 brw_set_predicate_control_flag_value(p, 0xff);
1584 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1585
1586 insn = next_insn(p, BRW_OPCODE_SEND);
1587 insn->header.destreg__conditionalmod = msg_reg_nr;
1588
1589 brw_set_dest(p, insn, dest);
1590 brw_set_src0(p, insn, src);
1591 brw_set_math_message(p,
1592 insn,
1593 function,
1594 BRW_MATH_INTEGER_UNSIGNED,
1595 precision,
1596 saturate,
1597 BRW_MATH_DATA_VECTOR);
1598
1599 /* Second instruction:
1600 */
1601 insn = next_insn(p, BRW_OPCODE_SEND);
1602 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1603 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1604
1605 brw_set_dest(p, insn, offset(dest,1));
1606 brw_set_src0(p, insn, src);
1607 brw_set_math_message(p,
1608 insn,
1609 function,
1610 BRW_MATH_INTEGER_UNSIGNED,
1611 precision,
1612 saturate,
1613 BRW_MATH_DATA_VECTOR);
1614
1615 brw_pop_insn_state(p);
1616 }
1617
1618
1619 /**
1620 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1621 * using a constant offset per channel.
1622 *
1623 * The offset must be aligned to oword size (16 bytes). Used for
1624 * register spilling.
1625 */
1626 void brw_oword_block_write_scratch(struct brw_compile *p,
1627 struct brw_reg mrf,
1628 int num_regs,
1629 GLuint offset)
1630 {
1631 struct intel_context *intel = &p->brw->intel;
1632 uint32_t msg_control, msg_type;
1633 int mlen;
1634
1635 if (intel->gen >= 6)
1636 offset /= 16;
1637
1638 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1639
1640 if (num_regs == 1) {
1641 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1642 mlen = 2;
1643 } else {
1644 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1645 mlen = 3;
1646 }
1647
1648 /* Set up the message header. This is g0, with g0.2 filled with
1649 * the offset. We don't want to leave our offset around in g0 or
1650 * it'll screw up texture samples, so set it up inside the message
1651 * reg.
1652 */
1653 {
1654 brw_push_insn_state(p);
1655 brw_set_mask_control(p, BRW_MASK_DISABLE);
1656 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1657
1658 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1659
1660 /* set message header global offset field (reg 0, element 2) */
1661 brw_MOV(p,
1662 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1663 mrf.nr,
1664 2), BRW_REGISTER_TYPE_UD),
1665 brw_imm_ud(offset));
1666
1667 brw_pop_insn_state(p);
1668 }
1669
1670 {
1671 struct brw_reg dest;
1672 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1673 int send_commit_msg;
1674 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1675 BRW_REGISTER_TYPE_UW);
1676
1677 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1678 insn->header.compression_control = BRW_COMPRESSION_NONE;
1679 src_header = vec16(src_header);
1680 }
1681 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1682 insn->header.destreg__conditionalmod = mrf.nr;
1683
1684 /* Until gen6, writes followed by reads from the same location
1685 * are not guaranteed to be ordered unless write_commit is set.
1686 * If set, then a no-op write is issued to the destination
1687 * register to set a dependency, and a read from the destination
1688 * can be used to ensure the ordering.
1689 *
1690 * For gen6, only writes between different threads need ordering
1691 * protection. Our use of DP writes is all about register
1692 * spilling within a thread.
1693 */
1694 if (intel->gen >= 6) {
1695 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1696 send_commit_msg = 0;
1697 } else {
1698 dest = src_header;
1699 send_commit_msg = 1;
1700 }
1701
1702 brw_set_dest(p, insn, dest);
1703 if (intel->gen >= 6) {
1704 brw_set_src0(p, insn, mrf);
1705 } else {
1706 brw_set_src0(p, insn, brw_null_reg());
1707 }
1708
1709 if (intel->gen >= 6)
1710 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1711 else
1712 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1713
1714 brw_set_dp_write_message(p,
1715 insn,
1716 255, /* binding table index (255=stateless) */
1717 msg_control,
1718 msg_type,
1719 mlen,
1720 true, /* header_present */
1721 0, /* not a render target */
1722 send_commit_msg, /* response_length */
1723 0, /* eot */
1724 send_commit_msg);
1725 }
1726 }
1727
1728
1729 /**
1730 * Read a block of owords (half a GRF each) from the scratch buffer
1731 * using a constant index per channel.
1732 *
1733 * Offset must be aligned to oword size (16 bytes). Used for register
1734 * spilling.
1735 */
1736 void
1737 brw_oword_block_read_scratch(struct brw_compile *p,
1738 struct brw_reg dest,
1739 struct brw_reg mrf,
1740 int num_regs,
1741 GLuint offset)
1742 {
1743 struct intel_context *intel = &p->brw->intel;
1744 uint32_t msg_control;
1745 int rlen;
1746
1747 if (intel->gen >= 6)
1748 offset /= 16;
1749
1750 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1751 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1752
1753 if (num_regs == 1) {
1754 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1755 rlen = 1;
1756 } else {
1757 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1758 rlen = 2;
1759 }
1760
1761 {
1762 brw_push_insn_state(p);
1763 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1764 brw_set_mask_control(p, BRW_MASK_DISABLE);
1765
1766 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1767
1768 /* set message header global offset field (reg 0, element 2) */
1769 brw_MOV(p,
1770 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1771 mrf.nr,
1772 2), BRW_REGISTER_TYPE_UD),
1773 brw_imm_ud(offset));
1774
1775 brw_pop_insn_state(p);
1776 }
1777
1778 {
1779 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1780
1781 assert(insn->header.predicate_control == 0);
1782 insn->header.compression_control = BRW_COMPRESSION_NONE;
1783 insn->header.destreg__conditionalmod = mrf.nr;
1784
1785 brw_set_dest(p, insn, dest); /* UW? */
1786 if (intel->gen >= 6) {
1787 brw_set_src0(p, insn, mrf);
1788 } else {
1789 brw_set_src0(p, insn, brw_null_reg());
1790 }
1791
1792 brw_set_dp_read_message(p,
1793 insn,
1794 255, /* binding table index (255=stateless) */
1795 msg_control,
1796 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1797 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1798 1, /* msg_length */
1799 rlen);
1800 }
1801 }
1802
1803 /**
1804 * Read a float[4] vector from the data port Data Cache (const buffer).
1805 * Location (in buffer) should be a multiple of 16.
1806 * Used for fetching shader constants.
1807 */
1808 void brw_oword_block_read(struct brw_compile *p,
1809 struct brw_reg dest,
1810 struct brw_reg mrf,
1811 uint32_t offset,
1812 uint32_t bind_table_index)
1813 {
1814 struct intel_context *intel = &p->brw->intel;
1815
1816 /* On newer hardware, offset is in units of owords. */
1817 if (intel->gen >= 6)
1818 offset /= 16;
1819
1820 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1821
1822 brw_push_insn_state(p);
1823 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1824 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1825 brw_set_mask_control(p, BRW_MASK_DISABLE);
1826
1827 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1828
1829 /* set message header global offset field (reg 0, element 2) */
1830 brw_MOV(p,
1831 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1832 mrf.nr,
1833 2), BRW_REGISTER_TYPE_UD),
1834 brw_imm_ud(offset));
1835
1836 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1837 insn->header.destreg__conditionalmod = mrf.nr;
1838
1839 /* cast dest to a uword[8] vector */
1840 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1841
1842 brw_set_dest(p, insn, dest);
1843 if (intel->gen >= 6) {
1844 brw_set_src0(p, insn, mrf);
1845 } else {
1846 brw_set_src0(p, insn, brw_null_reg());
1847 }
1848
1849 brw_set_dp_read_message(p,
1850 insn,
1851 bind_table_index,
1852 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1853 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1854 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1855 1, /* msg_length */
1856 1); /* response_length (1 reg, 2 owords!) */
1857
1858 brw_pop_insn_state(p);
1859 }
1860
1861 /**
1862 * Read a set of dwords from the data port Data Cache (const buffer).
1863 *
1864 * Location (in buffer) appears as UD offsets in the register after
1865 * the provided mrf header reg.
1866 */
1867 void brw_dword_scattered_read(struct brw_compile *p,
1868 struct brw_reg dest,
1869 struct brw_reg mrf,
1870 uint32_t bind_table_index)
1871 {
1872 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1873
1874 brw_push_insn_state(p);
1875 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1876 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1877 brw_set_mask_control(p, BRW_MASK_DISABLE);
1878 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1879 brw_pop_insn_state(p);
1880
1881 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1882 insn->header.destreg__conditionalmod = mrf.nr;
1883
1884 /* cast dest to a uword[8] vector */
1885 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1886
1887 brw_set_dest(p, insn, dest);
1888 brw_set_src0(p, insn, brw_null_reg());
1889
1890 brw_set_dp_read_message(p,
1891 insn,
1892 bind_table_index,
1893 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1894 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1895 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1896 2, /* msg_length */
1897 1); /* response_length */
1898 }
1899
1900
1901
1902 /**
1903 * Read float[4] constant(s) from VS constant buffer.
1904 * For relative addressing, two float[4] constants will be read into 'dest'.
1905 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1906 */
1907 void brw_dp_READ_4_vs(struct brw_compile *p,
1908 struct brw_reg dest,
1909 GLuint location,
1910 GLuint bind_table_index)
1911 {
1912 struct intel_context *intel = &p->brw->intel;
1913 struct brw_instruction *insn;
1914 GLuint msg_reg_nr = 1;
1915
1916 if (intel->gen >= 6)
1917 location /= 16;
1918
1919 /* Setup MRF[1] with location/offset into const buffer */
1920 brw_push_insn_state(p);
1921 brw_set_access_mode(p, BRW_ALIGN_1);
1922 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1923 brw_set_mask_control(p, BRW_MASK_DISABLE);
1924 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1925 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1926 BRW_REGISTER_TYPE_UD),
1927 brw_imm_ud(location));
1928 brw_pop_insn_state(p);
1929
1930 insn = next_insn(p, BRW_OPCODE_SEND);
1931
1932 insn->header.predicate_control = BRW_PREDICATE_NONE;
1933 insn->header.compression_control = BRW_COMPRESSION_NONE;
1934 insn->header.destreg__conditionalmod = msg_reg_nr;
1935 insn->header.mask_control = BRW_MASK_DISABLE;
1936
1937 brw_set_dest(p, insn, dest);
1938 if (intel->gen >= 6) {
1939 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1940 } else {
1941 brw_set_src0(p, insn, brw_null_reg());
1942 }
1943
1944 brw_set_dp_read_message(p,
1945 insn,
1946 bind_table_index,
1947 0,
1948 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1949 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1950 1, /* msg_length */
1951 1); /* response_length (1 Oword) */
1952 }
1953
1954 /**
1955 * Read a float[4] constant per vertex from VS constant buffer, with
1956 * relative addressing.
1957 */
1958 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1959 struct brw_reg dest,
1960 struct brw_reg addr_reg,
1961 GLuint offset,
1962 GLuint bind_table_index)
1963 {
1964 struct intel_context *intel = &p->brw->intel;
1965 struct brw_reg src = brw_vec8_grf(0, 0);
1966 int msg_type;
1967
1968 /* Setup MRF[1] with offset into const buffer */
1969 brw_push_insn_state(p);
1970 brw_set_access_mode(p, BRW_ALIGN_1);
1971 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1972 brw_set_mask_control(p, BRW_MASK_DISABLE);
1973 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1974
1975 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1976 * fields ignored.
1977 */
1978 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1979 addr_reg, brw_imm_d(offset));
1980 brw_pop_insn_state(p);
1981
1982 gen6_resolve_implied_move(p, &src, 0);
1983 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1984
1985 insn->header.predicate_control = BRW_PREDICATE_NONE;
1986 insn->header.compression_control = BRW_COMPRESSION_NONE;
1987 insn->header.destreg__conditionalmod = 0;
1988 insn->header.mask_control = BRW_MASK_DISABLE;
1989
1990 brw_set_dest(p, insn, dest);
1991 brw_set_src0(p, insn, src);
1992
1993 if (intel->gen >= 6)
1994 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1995 else if (intel->gen == 5 || intel->is_g4x)
1996 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1997 else
1998 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1999
2000 brw_set_dp_read_message(p,
2001 insn,
2002 bind_table_index,
2003 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2004 msg_type,
2005 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2006 2, /* msg_length */
2007 1); /* response_length */
2008 }
2009
2010
2011
2012 void brw_fb_WRITE(struct brw_compile *p,
2013 int dispatch_width,
2014 GLuint msg_reg_nr,
2015 struct brw_reg src0,
2016 GLuint binding_table_index,
2017 GLuint msg_length,
2018 GLuint response_length,
2019 bool eot,
2020 bool header_present)
2021 {
2022 struct intel_context *intel = &p->brw->intel;
2023 struct brw_instruction *insn;
2024 GLuint msg_control, msg_type;
2025 struct brw_reg dest;
2026
2027 if (dispatch_width == 16)
2028 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2029 else
2030 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2031
2032 if (intel->gen >= 6 && binding_table_index == 0) {
2033 insn = next_insn(p, BRW_OPCODE_SENDC);
2034 } else {
2035 insn = next_insn(p, BRW_OPCODE_SEND);
2036 }
2037 /* The execution mask is ignored for render target writes. */
2038 insn->header.predicate_control = 0;
2039 insn->header.compression_control = BRW_COMPRESSION_NONE;
2040
2041 if (intel->gen >= 6) {
2042 /* headerless version, just submit color payload */
2043 src0 = brw_message_reg(msg_reg_nr);
2044
2045 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2046 } else {
2047 insn->header.destreg__conditionalmod = msg_reg_nr;
2048
2049 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2050 }
2051
2052 if (dispatch_width == 16)
2053 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2054 else
2055 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2056
2057 brw_set_dest(p, insn, dest);
2058 brw_set_src0(p, insn, src0);
2059 brw_set_dp_write_message(p,
2060 insn,
2061 binding_table_index,
2062 msg_control,
2063 msg_type,
2064 msg_length,
2065 header_present,
2066 1, /* last render target write */
2067 response_length,
2068 eot,
2069 0 /* send_commit_msg */);
2070 }
2071
2072
2073 /**
2074 * Texture sample instruction.
2075 * Note: the msg_type plus msg_length values determine exactly what kind
2076 * of sampling operation is performed. See volume 4, page 161 of docs.
2077 */
2078 void brw_SAMPLE(struct brw_compile *p,
2079 struct brw_reg dest,
2080 GLuint msg_reg_nr,
2081 struct brw_reg src0,
2082 GLuint binding_table_index,
2083 GLuint sampler,
2084 GLuint writemask,
2085 GLuint msg_type,
2086 GLuint response_length,
2087 GLuint msg_length,
2088 GLuint header_present,
2089 GLuint simd_mode)
2090 {
2091 struct intel_context *intel = &p->brw->intel;
2092 bool need_stall = 0;
2093
2094 if (writemask == 0) {
2095 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2096 return;
2097 }
2098
2099 /* Hardware doesn't do destination dependency checking on send
2100 * instructions properly. Add a workaround which generates the
2101 * dependency by other means. In practice it seems like this bug
2102 * only crops up for texture samples, and only where registers are
2103 * written by the send and then written again later without being
2104 * read in between. Luckily for us, we already track that
2105 * information and use it to modify the writemask for the
2106 * instruction, so that is a guide for whether a workaround is
2107 * needed.
2108 */
2109 if (writemask != WRITEMASK_XYZW) {
2110 GLuint dst_offset = 0;
2111 GLuint i, newmask = 0, len = 0;
2112
2113 for (i = 0; i < 4; i++) {
2114 if (writemask & (1<<i))
2115 break;
2116 dst_offset += 2;
2117 }
2118 for (; i < 4; i++) {
2119 if (!(writemask & (1<<i)))
2120 break;
2121 newmask |= 1<<i;
2122 len++;
2123 }
2124
2125 if (newmask != writemask) {
2126 need_stall = 1;
2127 /* printf("need stall %x %x\n", newmask , writemask); */
2128 }
2129 else {
2130 bool dispatch_16 = false;
2131
2132 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2133
2134 guess_execution_size(p, p->current, dest);
2135 if (p->current->header.execution_size == BRW_EXECUTE_16)
2136 dispatch_16 = true;
2137
2138 newmask = ~newmask & WRITEMASK_XYZW;
2139
2140 brw_push_insn_state(p);
2141
2142 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2143 brw_set_mask_control(p, BRW_MASK_DISABLE);
2144
2145 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2146 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2147 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2148
2149 brw_pop_insn_state(p);
2150
2151 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2152 dest = offset(dest, dst_offset);
2153
2154 /* For 16-wide dispatch, masked channels are skipped in the
2155 * response. For 8-wide, masked channels still take up slots,
2156 * and are just not written to.
2157 */
2158 if (dispatch_16)
2159 response_length = len * 2;
2160 }
2161 }
2162
2163 {
2164 struct brw_instruction *insn;
2165
2166 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2167
2168 insn = next_insn(p, BRW_OPCODE_SEND);
2169 insn->header.predicate_control = 0; /* XXX */
2170 insn->header.compression_control = BRW_COMPRESSION_NONE;
2171 if (intel->gen < 6)
2172 insn->header.destreg__conditionalmod = msg_reg_nr;
2173
2174 brw_set_dest(p, insn, dest);
2175 brw_set_src0(p, insn, src0);
2176 brw_set_sampler_message(p, insn,
2177 binding_table_index,
2178 sampler,
2179 msg_type,
2180 response_length,
2181 msg_length,
2182 header_present,
2183 simd_mode);
2184 }
2185
2186 if (need_stall) {
2187 struct brw_reg reg = vec8(offset(dest, response_length-1));
2188
2189 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2190 */
2191 brw_push_insn_state(p);
2192 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2193 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2194 retype(reg, BRW_REGISTER_TYPE_UD));
2195 brw_pop_insn_state(p);
2196 }
2197
2198 }
2199
2200 /* All these variables are pretty confusing - we might be better off
2201 * using bitmasks and macros for this, in the old style. Or perhaps
2202 * just having the caller instantiate the fields in dword3 itself.
2203 */
2204 void brw_urb_WRITE(struct brw_compile *p,
2205 struct brw_reg dest,
2206 GLuint msg_reg_nr,
2207 struct brw_reg src0,
2208 bool allocate,
2209 bool used,
2210 GLuint msg_length,
2211 GLuint response_length,
2212 bool eot,
2213 bool writes_complete,
2214 GLuint offset,
2215 GLuint swizzle)
2216 {
2217 struct intel_context *intel = &p->brw->intel;
2218 struct brw_instruction *insn;
2219
2220 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2221
2222 if (intel->gen == 7) {
2223 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2224 brw_push_insn_state(p);
2225 brw_set_access_mode(p, BRW_ALIGN_1);
2226 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2227 BRW_REGISTER_TYPE_UD),
2228 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2229 brw_imm_ud(0xff00));
2230 brw_pop_insn_state(p);
2231 }
2232
2233 insn = next_insn(p, BRW_OPCODE_SEND);
2234
2235 assert(msg_length < BRW_MAX_MRF);
2236
2237 brw_set_dest(p, insn, dest);
2238 brw_set_src0(p, insn, src0);
2239 brw_set_src1(p, insn, brw_imm_d(0));
2240
2241 if (intel->gen < 6)
2242 insn->header.destreg__conditionalmod = msg_reg_nr;
2243
2244 brw_set_urb_message(p,
2245 insn,
2246 allocate,
2247 used,
2248 msg_length,
2249 response_length,
2250 eot,
2251 writes_complete,
2252 offset,
2253 swizzle);
2254 }
2255
2256 static int
2257 brw_find_next_block_end(struct brw_compile *p, int start)
2258 {
2259 int ip;
2260
2261 for (ip = start + 1; ip < p->nr_insn; ip++) {
2262 struct brw_instruction *insn = &p->store[ip];
2263
2264 switch (insn->header.opcode) {
2265 case BRW_OPCODE_ENDIF:
2266 case BRW_OPCODE_ELSE:
2267 case BRW_OPCODE_WHILE:
2268 return ip;
2269 }
2270 }
2271 assert(!"not reached");
2272 return start + 1;
2273 }
2274
2275 /* There is no DO instruction on gen6, so to find the end of the loop
2276 * we have to see if the loop is jumping back before our start
2277 * instruction.
2278 */
2279 static int
2280 brw_find_loop_end(struct brw_compile *p, int start)
2281 {
2282 struct intel_context *intel = &p->brw->intel;
2283 int ip;
2284 int br = 2;
2285
2286 for (ip = start + 1; ip < p->nr_insn; ip++) {
2287 struct brw_instruction *insn = &p->store[ip];
2288
2289 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2290 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2291 : insn->bits3.break_cont.jip;
2292 if (ip + jip / br <= start)
2293 return ip;
2294 }
2295 }
2296 assert(!"not reached");
2297 return start + 1;
2298 }
2299
2300 /* After program generation, go back and update the UIP and JIP of
2301 * BREAK and CONT instructions to their correct locations.
2302 */
2303 void
2304 brw_set_uip_jip(struct brw_compile *p)
2305 {
2306 struct intel_context *intel = &p->brw->intel;
2307 int ip;
2308 int br = 2;
2309
2310 if (intel->gen < 6)
2311 return;
2312
2313 for (ip = 0; ip < p->nr_insn; ip++) {
2314 struct brw_instruction *insn = &p->store[ip];
2315
2316 switch (insn->header.opcode) {
2317 case BRW_OPCODE_BREAK:
2318 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2319 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2320 insn->bits3.break_cont.uip =
2321 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2322 break;
2323 case BRW_OPCODE_CONTINUE:
2324 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2325 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2326
2327 assert(insn->bits3.break_cont.uip != 0);
2328 assert(insn->bits3.break_cont.jip != 0);
2329 break;
2330 }
2331 }
2332 }
2333
2334 void brw_ff_sync(struct brw_compile *p,
2335 struct brw_reg dest,
2336 GLuint msg_reg_nr,
2337 struct brw_reg src0,
2338 bool allocate,
2339 GLuint response_length,
2340 bool eot)
2341 {
2342 struct intel_context *intel = &p->brw->intel;
2343 struct brw_instruction *insn;
2344
2345 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2346
2347 insn = next_insn(p, BRW_OPCODE_SEND);
2348 brw_set_dest(p, insn, dest);
2349 brw_set_src0(p, insn, src0);
2350 brw_set_src1(p, insn, brw_imm_d(0));
2351
2352 if (intel->gen < 6)
2353 insn->header.destreg__conditionalmod = msg_reg_nr;
2354
2355 brw_set_ff_sync_message(p,
2356 insn,
2357 allocate,
2358 response_length,
2359 eot);
2360 }