i965: Make some EU emit code for DP read/write messages non-static.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "../glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 static void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373
374
375 static void brw_set_math_message( struct brw_compile *p,
376 struct brw_instruction *insn,
377 GLuint msg_length,
378 GLuint response_length,
379 GLuint function,
380 GLuint integer_type,
381 GLboolean low_precision,
382 GLboolean saturate,
383 GLuint dataType )
384 {
385 struct brw_context *brw = p->brw;
386 struct intel_context *intel = &brw->intel;
387 brw_set_src1(p, insn, brw_imm_d(0));
388
389 if (intel->gen == 5) {
390 insn->bits3.math_gen5.function = function;
391 insn->bits3.math_gen5.int_type = integer_type;
392 insn->bits3.math_gen5.precision = low_precision;
393 insn->bits3.math_gen5.saturate = saturate;
394 insn->bits3.math_gen5.data_type = dataType;
395 insn->bits3.math_gen5.snapshot = 0;
396 insn->bits3.math_gen5.header_present = 0;
397 insn->bits3.math_gen5.response_length = response_length;
398 insn->bits3.math_gen5.msg_length = msg_length;
399 insn->bits3.math_gen5.end_of_thread = 0;
400 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
401 insn->bits2.send_gen5.end_of_thread = 0;
402 } else {
403 insn->bits3.math.function = function;
404 insn->bits3.math.int_type = integer_type;
405 insn->bits3.math.precision = low_precision;
406 insn->bits3.math.saturate = saturate;
407 insn->bits3.math.data_type = dataType;
408 insn->bits3.math.response_length = response_length;
409 insn->bits3.math.msg_length = msg_length;
410 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
411 insn->bits3.math.end_of_thread = 0;
412 }
413 }
414
415
416 static void brw_set_ff_sync_message(struct brw_compile *p,
417 struct brw_instruction *insn,
418 GLboolean allocate,
419 GLuint response_length,
420 GLboolean end_of_thread)
421 {
422 struct brw_context *brw = p->brw;
423 struct intel_context *intel = &brw->intel;
424 brw_set_src1(p, insn, brw_imm_d(0));
425
426 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
427 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
428 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
429 insn->bits3.urb_gen5.allocate = allocate;
430 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
431 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
432 insn->bits3.urb_gen5.header_present = 1;
433 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
434 insn->bits3.urb_gen5.msg_length = 1;
435 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
436 if (intel->gen >= 6) {
437 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
438 } else {
439 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
440 insn->bits2.send_gen5.end_of_thread = end_of_thread;
441 }
442 }
443
444 static void brw_set_urb_message( struct brw_compile *p,
445 struct brw_instruction *insn,
446 GLboolean allocate,
447 GLboolean used,
448 GLuint msg_length,
449 GLuint response_length,
450 GLboolean end_of_thread,
451 GLboolean complete,
452 GLuint offset,
453 GLuint swizzle_control )
454 {
455 struct brw_context *brw = p->brw;
456 struct intel_context *intel = &brw->intel;
457 brw_set_src1(p, insn, brw_imm_d(0));
458
459 if (intel->gen == 7) {
460 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
461 insn->bits3.urb_gen7.offset = offset;
462 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
463 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
464 /* per_slot_offset = 0 makes it ignore offsets in message header */
465 insn->bits3.urb_gen7.per_slot_offset = 0;
466 insn->bits3.urb_gen7.complete = complete;
467 insn->bits3.urb_gen7.header_present = 1;
468 insn->bits3.urb_gen7.response_length = response_length;
469 insn->bits3.urb_gen7.msg_length = msg_length;
470 insn->bits3.urb_gen7.end_of_thread = end_of_thread;
471 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
472 } else if (intel->gen >= 5) {
473 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
474 insn->bits3.urb_gen5.offset = offset;
475 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
476 insn->bits3.urb_gen5.allocate = allocate;
477 insn->bits3.urb_gen5.used = used; /* ? */
478 insn->bits3.urb_gen5.complete = complete;
479 insn->bits3.urb_gen5.header_present = 1;
480 insn->bits3.urb_gen5.response_length = response_length;
481 insn->bits3.urb_gen5.msg_length = msg_length;
482 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
483 if (intel->gen >= 6) {
484 /* For SNB, the SFID bits moved to the condmod bits, and
485 * EOT stayed in bits3 above. Does the EOT bit setting
486 * below on Ironlake even do anything?
487 */
488 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
489 } else {
490 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
491 insn->bits2.send_gen5.end_of_thread = end_of_thread;
492 }
493 } else {
494 insn->bits3.urb.opcode = 0; /* ? */
495 insn->bits3.urb.offset = offset;
496 insn->bits3.urb.swizzle_control = swizzle_control;
497 insn->bits3.urb.allocate = allocate;
498 insn->bits3.urb.used = used; /* ? */
499 insn->bits3.urb.complete = complete;
500 insn->bits3.urb.response_length = response_length;
501 insn->bits3.urb.msg_length = msg_length;
502 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
503 insn->bits3.urb.end_of_thread = end_of_thread;
504 }
505 }
506
507 void
508 brw_set_dp_write_message(struct brw_compile *p,
509 struct brw_instruction *insn,
510 GLuint binding_table_index,
511 GLuint msg_control,
512 GLuint msg_type,
513 GLuint msg_length,
514 GLboolean header_present,
515 GLuint pixel_scoreboard_clear,
516 GLuint response_length,
517 GLuint end_of_thread,
518 GLuint send_commit_msg)
519 {
520 struct brw_context *brw = p->brw;
521 struct intel_context *intel = &brw->intel;
522 brw_set_src1(p, insn, brw_imm_ud(0));
523
524 if (intel->gen >= 7) {
525 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
526 insn->bits3.gen7_dp.msg_control = msg_control;
527 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
528 insn->bits3.gen7_dp.msg_type = msg_type;
529 insn->bits3.gen7_dp.header_present = header_present;
530 insn->bits3.gen7_dp.response_length = response_length;
531 insn->bits3.gen7_dp.msg_length = msg_length;
532 insn->bits3.gen7_dp.end_of_thread = end_of_thread;
533
534 /* We always use the render cache for write messages */
535 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
536 } else if (intel->gen == 6) {
537 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
538 insn->bits3.gen6_dp.msg_control = msg_control;
539 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
540 insn->bits3.gen6_dp.msg_type = msg_type;
541 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
542 insn->bits3.gen6_dp.header_present = header_present;
543 insn->bits3.gen6_dp.response_length = response_length;
544 insn->bits3.gen6_dp.msg_length = msg_length;
545 insn->bits3.gen6_dp.end_of_thread = end_of_thread;
546
547 /* We always use the render cache for write messages */
548 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
549 } else if (intel->gen == 5) {
550 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
551 insn->bits3.dp_write_gen5.msg_control = msg_control;
552 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
553 insn->bits3.dp_write_gen5.msg_type = msg_type;
554 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
555 insn->bits3.dp_write_gen5.header_present = header_present;
556 insn->bits3.dp_write_gen5.response_length = response_length;
557 insn->bits3.dp_write_gen5.msg_length = msg_length;
558 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
559 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
560 insn->bits2.send_gen5.end_of_thread = end_of_thread;
561 } else {
562 insn->bits3.dp_write.binding_table_index = binding_table_index;
563 insn->bits3.dp_write.msg_control = msg_control;
564 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
565 insn->bits3.dp_write.msg_type = msg_type;
566 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
567 insn->bits3.dp_write.response_length = response_length;
568 insn->bits3.dp_write.msg_length = msg_length;
569 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
570 insn->bits3.dp_write.end_of_thread = end_of_thread;
571 }
572 }
573
574 void
575 brw_set_dp_read_message(struct brw_compile *p,
576 struct brw_instruction *insn,
577 GLuint binding_table_index,
578 GLuint msg_control,
579 GLuint msg_type,
580 GLuint target_cache,
581 GLuint msg_length,
582 GLuint response_length)
583 {
584 struct brw_context *brw = p->brw;
585 struct intel_context *intel = &brw->intel;
586 brw_set_src1(p, insn, brw_imm_d(0));
587
588 if (intel->gen >= 7) {
589 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
590 insn->bits3.gen7_dp.msg_control = msg_control;
591 insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
592 insn->bits3.gen7_dp.msg_type = msg_type;
593 insn->bits3.gen7_dp.header_present = 1;
594 insn->bits3.gen7_dp.response_length = response_length;
595 insn->bits3.gen7_dp.msg_length = msg_length;
596 insn->bits3.gen7_dp.end_of_thread = 0;
597 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
598 } else if (intel->gen == 6) {
599 uint32_t target_function;
600
601 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
602 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
603 else
604 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
605
606 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
607 insn->bits3.gen6_dp.msg_control = msg_control;
608 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
609 insn->bits3.gen6_dp.msg_type = msg_type;
610 insn->bits3.gen6_dp.send_commit_msg = 0;
611 insn->bits3.gen6_dp.header_present = 1;
612 insn->bits3.gen6_dp.response_length = response_length;
613 insn->bits3.gen6_dp.msg_length = msg_length;
614 insn->bits3.gen6_dp.end_of_thread = 0;
615 insn->header.destreg__conditionalmod = target_function;
616 } else if (intel->gen == 5) {
617 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618 insn->bits3.dp_read_gen5.msg_control = msg_control;
619 insn->bits3.dp_read_gen5.msg_type = msg_type;
620 insn->bits3.dp_read_gen5.target_cache = target_cache;
621 insn->bits3.dp_read_gen5.header_present = 1;
622 insn->bits3.dp_read_gen5.response_length = response_length;
623 insn->bits3.dp_read_gen5.msg_length = msg_length;
624 insn->bits3.dp_read_gen5.pad1 = 0;
625 insn->bits3.dp_read_gen5.end_of_thread = 0;
626 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
627 insn->bits2.send_gen5.end_of_thread = 0;
628 } else if (intel->is_g4x) {
629 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
630 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
631 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
632 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
633 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
634 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
635 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
636 insn->bits3.dp_read_g4x.pad1 = 0;
637 insn->bits3.dp_read_g4x.end_of_thread = 0;
638 } else {
639 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
640 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
641 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
642 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
643 insn->bits3.dp_read.response_length = response_length; /*16:19*/
644 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
645 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
646 insn->bits3.dp_read.pad1 = 0; /*28:30*/
647 insn->bits3.dp_read.end_of_thread = 0; /*31*/
648 }
649 }
650
651 static void brw_set_sampler_message(struct brw_compile *p,
652 struct brw_instruction *insn,
653 GLuint binding_table_index,
654 GLuint sampler,
655 GLuint msg_type,
656 GLuint response_length,
657 GLuint msg_length,
658 GLboolean eot,
659 GLuint header_present,
660 GLuint simd_mode)
661 {
662 struct brw_context *brw = p->brw;
663 struct intel_context *intel = &brw->intel;
664 assert(eot == 0);
665 brw_set_src1(p, insn, brw_imm_d(0));
666
667 if (intel->gen >= 7) {
668 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
669 insn->bits3.sampler_gen7.sampler = sampler;
670 insn->bits3.sampler_gen7.msg_type = msg_type;
671 insn->bits3.sampler_gen7.simd_mode = simd_mode;
672 insn->bits3.sampler_gen7.header_present = header_present;
673 insn->bits3.sampler_gen7.response_length = response_length;
674 insn->bits3.sampler_gen7.msg_length = msg_length;
675 insn->bits3.sampler_gen7.end_of_thread = eot;
676 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
677 } else if (intel->gen >= 5) {
678 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
679 insn->bits3.sampler_gen5.sampler = sampler;
680 insn->bits3.sampler_gen5.msg_type = msg_type;
681 insn->bits3.sampler_gen5.simd_mode = simd_mode;
682 insn->bits3.sampler_gen5.header_present = header_present;
683 insn->bits3.sampler_gen5.response_length = response_length;
684 insn->bits3.sampler_gen5.msg_length = msg_length;
685 insn->bits3.sampler_gen5.end_of_thread = eot;
686 if (intel->gen >= 6)
687 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
688 else {
689 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
690 insn->bits2.send_gen5.end_of_thread = eot;
691 }
692 } else if (intel->is_g4x) {
693 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
694 insn->bits3.sampler_g4x.sampler = sampler;
695 insn->bits3.sampler_g4x.msg_type = msg_type;
696 insn->bits3.sampler_g4x.response_length = response_length;
697 insn->bits3.sampler_g4x.msg_length = msg_length;
698 insn->bits3.sampler_g4x.end_of_thread = eot;
699 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
700 } else {
701 insn->bits3.sampler.binding_table_index = binding_table_index;
702 insn->bits3.sampler.sampler = sampler;
703 insn->bits3.sampler.msg_type = msg_type;
704 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
705 insn->bits3.sampler.response_length = response_length;
706 insn->bits3.sampler.msg_length = msg_length;
707 insn->bits3.sampler.end_of_thread = eot;
708 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
709 }
710 }
711
712
713 #define next_insn brw_next_insn
714 struct brw_instruction *
715 brw_next_insn(struct brw_compile *p, GLuint opcode)
716 {
717 struct brw_instruction *insn;
718
719 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
720
721 insn = &p->store[p->nr_insn++];
722 memcpy(insn, p->current, sizeof(*insn));
723
724 /* Reset this one-shot flag:
725 */
726
727 if (p->current->header.destreg__conditionalmod) {
728 p->current->header.destreg__conditionalmod = 0;
729 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
730 }
731
732 insn->header.opcode = opcode;
733 return insn;
734 }
735
736 static struct brw_instruction *brw_alu1( struct brw_compile *p,
737 GLuint opcode,
738 struct brw_reg dest,
739 struct brw_reg src )
740 {
741 struct brw_instruction *insn = next_insn(p, opcode);
742 brw_set_dest(p, insn, dest);
743 brw_set_src0(p, insn, src);
744 return insn;
745 }
746
747 static struct brw_instruction *brw_alu2(struct brw_compile *p,
748 GLuint opcode,
749 struct brw_reg dest,
750 struct brw_reg src0,
751 struct brw_reg src1 )
752 {
753 struct brw_instruction *insn = next_insn(p, opcode);
754 brw_set_dest(p, insn, dest);
755 brw_set_src0(p, insn, src0);
756 brw_set_src1(p, insn, src1);
757 return insn;
758 }
759
760
761 /***********************************************************************
762 * Convenience routines.
763 */
764 #define ALU1(OP) \
765 struct brw_instruction *brw_##OP(struct brw_compile *p, \
766 struct brw_reg dest, \
767 struct brw_reg src0) \
768 { \
769 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
770 }
771
772 #define ALU2(OP) \
773 struct brw_instruction *brw_##OP(struct brw_compile *p, \
774 struct brw_reg dest, \
775 struct brw_reg src0, \
776 struct brw_reg src1) \
777 { \
778 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
779 }
780
781 /* Rounding operations (other than RNDD) require two instructions - the first
782 * stores a rounded value (possibly the wrong way) in the dest register, but
783 * also sets a per-channel "increment bit" in the flag register. A predicated
784 * add of 1.0 fixes dest to contain the desired result.
785 *
786 * Sandybridge and later appear to round correctly without an ADD.
787 */
788 #define ROUND(OP) \
789 void brw_##OP(struct brw_compile *p, \
790 struct brw_reg dest, \
791 struct brw_reg src) \
792 { \
793 struct brw_instruction *rnd, *add; \
794 rnd = next_insn(p, BRW_OPCODE_##OP); \
795 brw_set_dest(p, rnd, dest); \
796 brw_set_src0(p, rnd, src); \
797 \
798 if (p->brw->intel.gen < 6) { \
799 /* turn on round-increments */ \
800 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
801 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
802 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
803 } \
804 }
805
806
807 ALU1(MOV)
808 ALU2(SEL)
809 ALU1(NOT)
810 ALU2(AND)
811 ALU2(OR)
812 ALU2(XOR)
813 ALU2(SHR)
814 ALU2(SHL)
815 ALU2(RSR)
816 ALU2(RSL)
817 ALU2(ASR)
818 ALU1(FRC)
819 ALU1(RNDD)
820 ALU2(MAC)
821 ALU2(MACH)
822 ALU1(LZD)
823 ALU2(DP4)
824 ALU2(DPH)
825 ALU2(DP3)
826 ALU2(DP2)
827 ALU2(LINE)
828 ALU2(PLN)
829
830
831 ROUND(RNDZ)
832 ROUND(RNDE)
833
834
835 struct brw_instruction *brw_ADD(struct brw_compile *p,
836 struct brw_reg dest,
837 struct brw_reg src0,
838 struct brw_reg src1)
839 {
840 /* 6.2.2: add */
841 if (src0.type == BRW_REGISTER_TYPE_F ||
842 (src0.file == BRW_IMMEDIATE_VALUE &&
843 src0.type == BRW_REGISTER_TYPE_VF)) {
844 assert(src1.type != BRW_REGISTER_TYPE_UD);
845 assert(src1.type != BRW_REGISTER_TYPE_D);
846 }
847
848 if (src1.type == BRW_REGISTER_TYPE_F ||
849 (src1.file == BRW_IMMEDIATE_VALUE &&
850 src1.type == BRW_REGISTER_TYPE_VF)) {
851 assert(src0.type != BRW_REGISTER_TYPE_UD);
852 assert(src0.type != BRW_REGISTER_TYPE_D);
853 }
854
855 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
856 }
857
858 struct brw_instruction *brw_MUL(struct brw_compile *p,
859 struct brw_reg dest,
860 struct brw_reg src0,
861 struct brw_reg src1)
862 {
863 /* 6.32.38: mul */
864 if (src0.type == BRW_REGISTER_TYPE_D ||
865 src0.type == BRW_REGISTER_TYPE_UD ||
866 src1.type == BRW_REGISTER_TYPE_D ||
867 src1.type == BRW_REGISTER_TYPE_UD) {
868 assert(dest.type != BRW_REGISTER_TYPE_F);
869 }
870
871 if (src0.type == BRW_REGISTER_TYPE_F ||
872 (src0.file == BRW_IMMEDIATE_VALUE &&
873 src0.type == BRW_REGISTER_TYPE_VF)) {
874 assert(src1.type != BRW_REGISTER_TYPE_UD);
875 assert(src1.type != BRW_REGISTER_TYPE_D);
876 }
877
878 if (src1.type == BRW_REGISTER_TYPE_F ||
879 (src1.file == BRW_IMMEDIATE_VALUE &&
880 src1.type == BRW_REGISTER_TYPE_VF)) {
881 assert(src0.type != BRW_REGISTER_TYPE_UD);
882 assert(src0.type != BRW_REGISTER_TYPE_D);
883 }
884
885 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
886 src0.nr != BRW_ARF_ACCUMULATOR);
887 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
888 src1.nr != BRW_ARF_ACCUMULATOR);
889
890 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
891 }
892
893
894 void brw_NOP(struct brw_compile *p)
895 {
896 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
897 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
898 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
899 brw_set_src1(p, insn, brw_imm_ud(0x0));
900 }
901
902
903
904
905
906 /***********************************************************************
907 * Comparisons, if/else/endif
908 */
909
910 struct brw_instruction *brw_JMPI(struct brw_compile *p,
911 struct brw_reg dest,
912 struct brw_reg src0,
913 struct brw_reg src1)
914 {
915 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
916
917 insn->header.execution_size = 1;
918 insn->header.compression_control = BRW_COMPRESSION_NONE;
919 insn->header.mask_control = BRW_MASK_DISABLE;
920
921 p->current->header.predicate_control = BRW_PREDICATE_NONE;
922
923 return insn;
924 }
925
926 static void
927 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
928 {
929 p->if_stack[p->if_stack_depth] = inst;
930
931 p->if_stack_depth++;
932 if (p->if_stack_array_size <= p->if_stack_depth) {
933 p->if_stack_array_size *= 2;
934 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
935 p->if_stack_array_size);
936 }
937 }
938
939 /* EU takes the value from the flag register and pushes it onto some
940 * sort of a stack (presumably merging with any flag value already on
941 * the stack). Within an if block, the flags at the top of the stack
942 * control execution on each channel of the unit, eg. on each of the
943 * 16 pixel values in our wm programs.
944 *
945 * When the matching 'else' instruction is reached (presumably by
946 * countdown of the instruction count patched in by our ELSE/ENDIF
947 * functions), the relevent flags are inverted.
948 *
949 * When the matching 'endif' instruction is reached, the flags are
950 * popped off. If the stack is now empty, normal execution resumes.
951 */
952 struct brw_instruction *
953 brw_IF(struct brw_compile *p, GLuint execute_size)
954 {
955 struct intel_context *intel = &p->brw->intel;
956 struct brw_instruction *insn;
957
958 insn = next_insn(p, BRW_OPCODE_IF);
959
960 /* Override the defaults for this instruction:
961 */
962 if (intel->gen < 6) {
963 brw_set_dest(p, insn, brw_ip_reg());
964 brw_set_src0(p, insn, brw_ip_reg());
965 brw_set_src1(p, insn, brw_imm_d(0x0));
966 } else if (intel->gen == 6) {
967 brw_set_dest(p, insn, brw_imm_w(0));
968 insn->bits1.branch_gen6.jump_count = 0;
969 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
970 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
971 } else {
972 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
973 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
974 brw_set_src1(p, insn, brw_imm_ud(0));
975 insn->bits3.break_cont.jip = 0;
976 insn->bits3.break_cont.uip = 0;
977 }
978
979 insn->header.execution_size = execute_size;
980 insn->header.compression_control = BRW_COMPRESSION_NONE;
981 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
982 insn->header.mask_control = BRW_MASK_ENABLE;
983 if (!p->single_program_flow)
984 insn->header.thread_control = BRW_THREAD_SWITCH;
985
986 p->current->header.predicate_control = BRW_PREDICATE_NONE;
987
988 push_if_stack(p, insn);
989 return insn;
990 }
991
992 /* This function is only used for gen6-style IF instructions with an
993 * embedded comparison (conditional modifier). It is not used on gen7.
994 */
995 struct brw_instruction *
996 gen6_IF(struct brw_compile *p, uint32_t conditional,
997 struct brw_reg src0, struct brw_reg src1)
998 {
999 struct brw_instruction *insn;
1000
1001 insn = next_insn(p, BRW_OPCODE_IF);
1002
1003 brw_set_dest(p, insn, brw_imm_w(0));
1004 if (p->compressed) {
1005 insn->header.execution_size = BRW_EXECUTE_16;
1006 } else {
1007 insn->header.execution_size = BRW_EXECUTE_8;
1008 }
1009 insn->bits1.branch_gen6.jump_count = 0;
1010 brw_set_src0(p, insn, src0);
1011 brw_set_src1(p, insn, src1);
1012
1013 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1014 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1015 insn->header.destreg__conditionalmod = conditional;
1016
1017 if (!p->single_program_flow)
1018 insn->header.thread_control = BRW_THREAD_SWITCH;
1019
1020 push_if_stack(p, insn);
1021 return insn;
1022 }
1023
1024 /**
1025 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1026 */
1027 static void
1028 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1029 struct brw_instruction *if_inst,
1030 struct brw_instruction *else_inst)
1031 {
1032 /* The next instruction (where the ENDIF would be, if it existed) */
1033 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1034
1035 assert(p->single_program_flow);
1036 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1037 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1038 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1039
1040 /* Convert IF to an ADD instruction that moves the instruction pointer
1041 * to the first instruction of the ELSE block. If there is no ELSE
1042 * block, point to where ENDIF would be. Reverse the predicate.
1043 *
1044 * There's no need to execute an ENDIF since we don't need to do any
1045 * stack operations, and if we're currently executing, we just want to
1046 * continue normally.
1047 */
1048 if_inst->header.opcode = BRW_OPCODE_ADD;
1049 if_inst->header.predicate_inverse = 1;
1050
1051 if (else_inst != NULL) {
1052 /* Convert ELSE to an ADD instruction that points where the ENDIF
1053 * would be.
1054 */
1055 else_inst->header.opcode = BRW_OPCODE_ADD;
1056
1057 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1058 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1059 } else {
1060 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1061 }
1062 }
1063
1064 /**
1065 * Patch IF and ELSE instructions with appropriate jump targets.
1066 */
1067 static void
1068 patch_IF_ELSE(struct brw_compile *p,
1069 struct brw_instruction *if_inst,
1070 struct brw_instruction *else_inst,
1071 struct brw_instruction *endif_inst)
1072 {
1073 struct intel_context *intel = &p->brw->intel;
1074
1075 assert(!p->single_program_flow);
1076 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1077 assert(endif_inst != NULL);
1078 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1079
1080 unsigned br = 1;
1081 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1082 * requires 2 chunks.
1083 */
1084 if (intel->gen >= 5)
1085 br = 2;
1086
1087 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1088 endif_inst->header.execution_size = if_inst->header.execution_size;
1089
1090 if (else_inst == NULL) {
1091 /* Patch IF -> ENDIF */
1092 if (intel->gen < 6) {
1093 /* Turn it into an IFF, which means no mask stack operations for
1094 * all-false and jumping past the ENDIF.
1095 */
1096 if_inst->header.opcode = BRW_OPCODE_IFF;
1097 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1098 if_inst->bits3.if_else.pop_count = 0;
1099 if_inst->bits3.if_else.pad0 = 0;
1100 } else if (intel->gen == 6) {
1101 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1102 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1103 } else {
1104 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1105 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1106 }
1107 } else {
1108 else_inst->header.execution_size = if_inst->header.execution_size;
1109
1110 /* Patch IF -> ELSE */
1111 if (intel->gen < 6) {
1112 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1113 if_inst->bits3.if_else.pop_count = 0;
1114 if_inst->bits3.if_else.pad0 = 0;
1115 } else if (intel->gen == 6) {
1116 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1117 }
1118
1119 /* Patch ELSE -> ENDIF */
1120 if (intel->gen < 6) {
1121 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1122 * matching ENDIF.
1123 */
1124 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1125 else_inst->bits3.if_else.pop_count = 1;
1126 else_inst->bits3.if_else.pad0 = 0;
1127 } else if (intel->gen == 6) {
1128 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1129 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1130 } else {
1131 /* The IF instruction's JIP should point just past the ELSE */
1132 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1133 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1134 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1135 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1136 }
1137 }
1138 }
1139
1140 void
1141 brw_ELSE(struct brw_compile *p)
1142 {
1143 struct intel_context *intel = &p->brw->intel;
1144 struct brw_instruction *insn;
1145
1146 insn = next_insn(p, BRW_OPCODE_ELSE);
1147
1148 if (intel->gen < 6) {
1149 brw_set_dest(p, insn, brw_ip_reg());
1150 brw_set_src0(p, insn, brw_ip_reg());
1151 brw_set_src1(p, insn, brw_imm_d(0x0));
1152 } else if (intel->gen == 6) {
1153 brw_set_dest(p, insn, brw_imm_w(0));
1154 insn->bits1.branch_gen6.jump_count = 0;
1155 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1156 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1157 } else {
1158 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1159 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1160 brw_set_src1(p, insn, brw_imm_ud(0));
1161 insn->bits3.break_cont.jip = 0;
1162 insn->bits3.break_cont.uip = 0;
1163 }
1164
1165 insn->header.compression_control = BRW_COMPRESSION_NONE;
1166 insn->header.mask_control = BRW_MASK_ENABLE;
1167 if (!p->single_program_flow)
1168 insn->header.thread_control = BRW_THREAD_SWITCH;
1169
1170 push_if_stack(p, insn);
1171 }
1172
1173 void
1174 brw_ENDIF(struct brw_compile *p)
1175 {
1176 struct intel_context *intel = &p->brw->intel;
1177 struct brw_instruction *insn;
1178 struct brw_instruction *else_inst = NULL;
1179 struct brw_instruction *if_inst = NULL;
1180
1181 /* Pop the IF and (optional) ELSE instructions from the stack */
1182 p->if_stack_depth--;
1183 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1184 else_inst = p->if_stack[p->if_stack_depth];
1185 p->if_stack_depth--;
1186 }
1187 if_inst = p->if_stack[p->if_stack_depth];
1188
1189 if (p->single_program_flow) {
1190 /* ENDIF is useless; don't bother emitting it. */
1191 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1192 return;
1193 }
1194
1195 insn = next_insn(p, BRW_OPCODE_ENDIF);
1196
1197 if (intel->gen < 6) {
1198 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1199 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1200 brw_set_src1(p, insn, brw_imm_d(0x0));
1201 } else if (intel->gen == 6) {
1202 brw_set_dest(p, insn, brw_imm_w(0));
1203 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1204 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1205 } else {
1206 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1207 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208 brw_set_src1(p, insn, brw_imm_ud(0));
1209 }
1210
1211 insn->header.compression_control = BRW_COMPRESSION_NONE;
1212 insn->header.mask_control = BRW_MASK_ENABLE;
1213 insn->header.thread_control = BRW_THREAD_SWITCH;
1214
1215 /* Also pop item off the stack in the endif instruction: */
1216 if (intel->gen < 6) {
1217 insn->bits3.if_else.jump_count = 0;
1218 insn->bits3.if_else.pop_count = 1;
1219 insn->bits3.if_else.pad0 = 0;
1220 } else if (intel->gen == 6) {
1221 insn->bits1.branch_gen6.jump_count = 2;
1222 } else {
1223 insn->bits3.break_cont.jip = 2;
1224 }
1225 patch_IF_ELSE(p, if_inst, else_inst, insn);
1226 }
1227
1228 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1229 {
1230 struct intel_context *intel = &p->brw->intel;
1231 struct brw_instruction *insn;
1232
1233 insn = next_insn(p, BRW_OPCODE_BREAK);
1234 if (intel->gen >= 6) {
1235 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1236 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1237 brw_set_src1(p, insn, brw_imm_d(0x0));
1238 } else {
1239 brw_set_dest(p, insn, brw_ip_reg());
1240 brw_set_src0(p, insn, brw_ip_reg());
1241 brw_set_src1(p, insn, brw_imm_d(0x0));
1242 insn->bits3.if_else.pad0 = 0;
1243 insn->bits3.if_else.pop_count = pop_count;
1244 }
1245 insn->header.compression_control = BRW_COMPRESSION_NONE;
1246 insn->header.execution_size = BRW_EXECUTE_8;
1247
1248 return insn;
1249 }
1250
1251 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1252 struct brw_instruction *do_insn)
1253 {
1254 struct brw_instruction *insn;
1255
1256 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1257 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1258 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1259 brw_set_dest(p, insn, brw_ip_reg());
1260 brw_set_src0(p, insn, brw_ip_reg());
1261 brw_set_src1(p, insn, brw_imm_d(0x0));
1262
1263 insn->header.compression_control = BRW_COMPRESSION_NONE;
1264 insn->header.execution_size = BRW_EXECUTE_8;
1265 return insn;
1266 }
1267
1268 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1269 {
1270 struct brw_instruction *insn;
1271 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1272 brw_set_dest(p, insn, brw_ip_reg());
1273 brw_set_src0(p, insn, brw_ip_reg());
1274 brw_set_src1(p, insn, brw_imm_d(0x0));
1275 insn->header.compression_control = BRW_COMPRESSION_NONE;
1276 insn->header.execution_size = BRW_EXECUTE_8;
1277 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1278 insn->bits3.if_else.pad0 = 0;
1279 insn->bits3.if_else.pop_count = pop_count;
1280 return insn;
1281 }
1282
1283 /* DO/WHILE loop:
1284 *
1285 * The DO/WHILE is just an unterminated loop -- break or continue are
1286 * used for control within the loop. We have a few ways they can be
1287 * done.
1288 *
1289 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1290 * jip and no DO instruction.
1291 *
1292 * For non-uniform control flow pre-gen6, there's a DO instruction to
1293 * push the mask, and a WHILE to jump back, and BREAK to get out and
1294 * pop the mask.
1295 *
1296 * For gen6, there's no more mask stack, so no need for DO. WHILE
1297 * just points back to the first instruction of the loop.
1298 */
1299 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1300 {
1301 struct intel_context *intel = &p->brw->intel;
1302
1303 if (intel->gen >= 6 || p->single_program_flow) {
1304 return &p->store[p->nr_insn];
1305 } else {
1306 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1307
1308 /* Override the defaults for this instruction:
1309 */
1310 brw_set_dest(p, insn, brw_null_reg());
1311 brw_set_src0(p, insn, brw_null_reg());
1312 brw_set_src1(p, insn, brw_null_reg());
1313
1314 insn->header.compression_control = BRW_COMPRESSION_NONE;
1315 insn->header.execution_size = execute_size;
1316 insn->header.predicate_control = BRW_PREDICATE_NONE;
1317 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1318 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1319
1320 return insn;
1321 }
1322 }
1323
1324
1325
1326 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1327 struct brw_instruction *do_insn)
1328 {
1329 struct intel_context *intel = &p->brw->intel;
1330 struct brw_instruction *insn;
1331 GLuint br = 1;
1332
1333 if (intel->gen >= 5)
1334 br = 2;
1335
1336 if (intel->gen >= 7) {
1337 insn = next_insn(p, BRW_OPCODE_WHILE);
1338
1339 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1340 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1341 brw_set_src1(p, insn, brw_imm_ud(0));
1342 insn->bits3.break_cont.jip = br * (do_insn - insn);
1343
1344 insn->header.execution_size = do_insn->header.execution_size;
1345 assert(insn->header.execution_size == BRW_EXECUTE_8);
1346 } else if (intel->gen == 6) {
1347 insn = next_insn(p, BRW_OPCODE_WHILE);
1348
1349 brw_set_dest(p, insn, brw_imm_w(0));
1350 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1351 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1352 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1353
1354 insn->header.execution_size = do_insn->header.execution_size;
1355 assert(insn->header.execution_size == BRW_EXECUTE_8);
1356 } else {
1357 if (p->single_program_flow) {
1358 insn = next_insn(p, BRW_OPCODE_ADD);
1359
1360 brw_set_dest(p, insn, brw_ip_reg());
1361 brw_set_src0(p, insn, brw_ip_reg());
1362 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1363 insn->header.execution_size = BRW_EXECUTE_1;
1364 } else {
1365 insn = next_insn(p, BRW_OPCODE_WHILE);
1366
1367 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1368
1369 brw_set_dest(p, insn, brw_ip_reg());
1370 brw_set_src0(p, insn, brw_ip_reg());
1371 brw_set_src1(p, insn, brw_imm_d(0));
1372
1373 insn->header.execution_size = do_insn->header.execution_size;
1374 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1375 insn->bits3.if_else.pop_count = 0;
1376 insn->bits3.if_else.pad0 = 0;
1377 }
1378 }
1379 insn->header.compression_control = BRW_COMPRESSION_NONE;
1380 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1381
1382 return insn;
1383 }
1384
1385
1386 /* FORWARD JUMPS:
1387 */
1388 void brw_land_fwd_jump(struct brw_compile *p,
1389 struct brw_instruction *jmp_insn)
1390 {
1391 struct intel_context *intel = &p->brw->intel;
1392 struct brw_instruction *landing = &p->store[p->nr_insn];
1393 GLuint jmpi = 1;
1394
1395 if (intel->gen >= 5)
1396 jmpi = 2;
1397
1398 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1399 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1400
1401 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1402 }
1403
1404
1405
1406 /* To integrate with the above, it makes sense that the comparison
1407 * instruction should populate the flag register. It might be simpler
1408 * just to use the flag reg for most WM tasks?
1409 */
1410 void brw_CMP(struct brw_compile *p,
1411 struct brw_reg dest,
1412 GLuint conditional,
1413 struct brw_reg src0,
1414 struct brw_reg src1)
1415 {
1416 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1417
1418 insn->header.destreg__conditionalmod = conditional;
1419 brw_set_dest(p, insn, dest);
1420 brw_set_src0(p, insn, src0);
1421 brw_set_src1(p, insn, src1);
1422
1423 /* guess_execution_size(insn, src0); */
1424
1425
1426 /* Make it so that future instructions will use the computed flag
1427 * value until brw_set_predicate_control_flag_value() is called
1428 * again.
1429 */
1430 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1431 dest.nr == 0) {
1432 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1433 p->flag_value = 0xff;
1434 }
1435 }
1436
1437 /* Issue 'wait' instruction for n1, host could program MMIO
1438 to wake up thread. */
1439 void brw_WAIT (struct brw_compile *p)
1440 {
1441 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1442 struct brw_reg src = brw_notification_1_reg();
1443
1444 brw_set_dest(p, insn, src);
1445 brw_set_src0(p, insn, src);
1446 brw_set_src1(p, insn, brw_null_reg());
1447 insn->header.execution_size = 0; /* must */
1448 insn->header.predicate_control = 0;
1449 insn->header.compression_control = 0;
1450 }
1451
1452
1453 /***********************************************************************
1454 * Helpers for the various SEND message types:
1455 */
1456
1457 /** Extended math function, float[8].
1458 */
1459 void brw_math( struct brw_compile *p,
1460 struct brw_reg dest,
1461 GLuint function,
1462 GLuint saturate,
1463 GLuint msg_reg_nr,
1464 struct brw_reg src,
1465 GLuint data_type,
1466 GLuint precision )
1467 {
1468 struct intel_context *intel = &p->brw->intel;
1469
1470 if (intel->gen >= 6) {
1471 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1472
1473 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1474 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1475
1476 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1477 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1478
1479 /* Source modifiers are ignored for extended math instructions. */
1480 assert(!src.negate);
1481 assert(!src.abs);
1482
1483 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1484 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1485 assert(src.type == BRW_REGISTER_TYPE_F);
1486 }
1487
1488 /* Math is the same ISA format as other opcodes, except that CondModifier
1489 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1490 */
1491 insn->header.destreg__conditionalmod = function;
1492 insn->header.saturate = saturate;
1493
1494 brw_set_dest(p, insn, dest);
1495 brw_set_src0(p, insn, src);
1496 brw_set_src1(p, insn, brw_null_reg());
1497 } else {
1498 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1499 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1500 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1501 /* Example code doesn't set predicate_control for send
1502 * instructions.
1503 */
1504 insn->header.predicate_control = 0;
1505 insn->header.destreg__conditionalmod = msg_reg_nr;
1506
1507 brw_set_dest(p, insn, dest);
1508 brw_set_src0(p, insn, src);
1509 brw_set_math_message(p,
1510 insn,
1511 msg_length, response_length,
1512 function,
1513 BRW_MATH_INTEGER_UNSIGNED,
1514 precision,
1515 saturate,
1516 data_type);
1517 }
1518 }
1519
1520 /** Extended math function, float[8].
1521 */
1522 void brw_math2(struct brw_compile *p,
1523 struct brw_reg dest,
1524 GLuint function,
1525 struct brw_reg src0,
1526 struct brw_reg src1)
1527 {
1528 struct intel_context *intel = &p->brw->intel;
1529 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1530
1531 assert(intel->gen >= 6);
1532 (void) intel;
1533
1534
1535 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1536 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1537 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1538
1539 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1540 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1541 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1542
1543 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1544 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1545 assert(src0.type == BRW_REGISTER_TYPE_F);
1546 assert(src1.type == BRW_REGISTER_TYPE_F);
1547 }
1548
1549 /* Source modifiers are ignored for extended math instructions. */
1550 assert(!src0.negate);
1551 assert(!src0.abs);
1552 assert(!src1.negate);
1553 assert(!src1.abs);
1554
1555 /* Math is the same ISA format as other opcodes, except that CondModifier
1556 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1557 */
1558 insn->header.destreg__conditionalmod = function;
1559
1560 brw_set_dest(p, insn, dest);
1561 brw_set_src0(p, insn, src0);
1562 brw_set_src1(p, insn, src1);
1563 }
1564
1565 /**
1566 * Extended math function, float[16].
1567 * Use 2 send instructions.
1568 */
1569 void brw_math_16( struct brw_compile *p,
1570 struct brw_reg dest,
1571 GLuint function,
1572 GLuint saturate,
1573 GLuint msg_reg_nr,
1574 struct brw_reg src,
1575 GLuint precision )
1576 {
1577 struct intel_context *intel = &p->brw->intel;
1578 struct brw_instruction *insn;
1579 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1580 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1581
1582 if (intel->gen >= 6) {
1583 insn = next_insn(p, BRW_OPCODE_MATH);
1584
1585 /* Math is the same ISA format as other opcodes, except that CondModifier
1586 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1587 */
1588 insn->header.destreg__conditionalmod = function;
1589 insn->header.saturate = saturate;
1590
1591 /* Source modifiers are ignored for extended math instructions. */
1592 assert(!src.negate);
1593 assert(!src.abs);
1594
1595 brw_set_dest(p, insn, dest);
1596 brw_set_src0(p, insn, src);
1597 brw_set_src1(p, insn, brw_null_reg());
1598 return;
1599 }
1600
1601 /* First instruction:
1602 */
1603 brw_push_insn_state(p);
1604 brw_set_predicate_control_flag_value(p, 0xff);
1605 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1606
1607 insn = next_insn(p, BRW_OPCODE_SEND);
1608 insn->header.destreg__conditionalmod = msg_reg_nr;
1609
1610 brw_set_dest(p, insn, dest);
1611 brw_set_src0(p, insn, src);
1612 brw_set_math_message(p,
1613 insn,
1614 msg_length, response_length,
1615 function,
1616 BRW_MATH_INTEGER_UNSIGNED,
1617 precision,
1618 saturate,
1619 BRW_MATH_DATA_VECTOR);
1620
1621 /* Second instruction:
1622 */
1623 insn = next_insn(p, BRW_OPCODE_SEND);
1624 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1625 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1626
1627 brw_set_dest(p, insn, offset(dest,1));
1628 brw_set_src0(p, insn, src);
1629 brw_set_math_message(p,
1630 insn,
1631 msg_length, response_length,
1632 function,
1633 BRW_MATH_INTEGER_UNSIGNED,
1634 precision,
1635 saturate,
1636 BRW_MATH_DATA_VECTOR);
1637
1638 brw_pop_insn_state(p);
1639 }
1640
1641
1642 /**
1643 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1644 * using a constant offset per channel.
1645 *
1646 * The offset must be aligned to oword size (16 bytes). Used for
1647 * register spilling.
1648 */
1649 void brw_oword_block_write_scratch(struct brw_compile *p,
1650 struct brw_reg mrf,
1651 int num_regs,
1652 GLuint offset)
1653 {
1654 struct intel_context *intel = &p->brw->intel;
1655 uint32_t msg_control, msg_type;
1656 int mlen;
1657
1658 if (intel->gen >= 6)
1659 offset /= 16;
1660
1661 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1662
1663 if (num_regs == 1) {
1664 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1665 mlen = 2;
1666 } else {
1667 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1668 mlen = 3;
1669 }
1670
1671 /* Set up the message header. This is g0, with g0.2 filled with
1672 * the offset. We don't want to leave our offset around in g0 or
1673 * it'll screw up texture samples, so set it up inside the message
1674 * reg.
1675 */
1676 {
1677 brw_push_insn_state(p);
1678 brw_set_mask_control(p, BRW_MASK_DISABLE);
1679 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1680
1681 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1682
1683 /* set message header global offset field (reg 0, element 2) */
1684 brw_MOV(p,
1685 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1686 mrf.nr,
1687 2), BRW_REGISTER_TYPE_UD),
1688 brw_imm_ud(offset));
1689
1690 brw_pop_insn_state(p);
1691 }
1692
1693 {
1694 struct brw_reg dest;
1695 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1696 int send_commit_msg;
1697 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1698 BRW_REGISTER_TYPE_UW);
1699
1700 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1701 insn->header.compression_control = BRW_COMPRESSION_NONE;
1702 src_header = vec16(src_header);
1703 }
1704 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1705 insn->header.destreg__conditionalmod = mrf.nr;
1706
1707 /* Until gen6, writes followed by reads from the same location
1708 * are not guaranteed to be ordered unless write_commit is set.
1709 * If set, then a no-op write is issued to the destination
1710 * register to set a dependency, and a read from the destination
1711 * can be used to ensure the ordering.
1712 *
1713 * For gen6, only writes between different threads need ordering
1714 * protection. Our use of DP writes is all about register
1715 * spilling within a thread.
1716 */
1717 if (intel->gen >= 6) {
1718 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1719 send_commit_msg = 0;
1720 } else {
1721 dest = src_header;
1722 send_commit_msg = 1;
1723 }
1724
1725 brw_set_dest(p, insn, dest);
1726 if (intel->gen >= 6) {
1727 brw_set_src0(p, insn, mrf);
1728 } else {
1729 brw_set_src0(p, insn, brw_null_reg());
1730 }
1731
1732 if (intel->gen >= 6)
1733 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1734 else
1735 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1736
1737 brw_set_dp_write_message(p,
1738 insn,
1739 255, /* binding table index (255=stateless) */
1740 msg_control,
1741 msg_type,
1742 mlen,
1743 GL_TRUE, /* header_present */
1744 0, /* pixel scoreboard */
1745 send_commit_msg, /* response_length */
1746 0, /* eot */
1747 send_commit_msg);
1748 }
1749 }
1750
1751
1752 /**
1753 * Read a block of owords (half a GRF each) from the scratch buffer
1754 * using a constant index per channel.
1755 *
1756 * Offset must be aligned to oword size (16 bytes). Used for register
1757 * spilling.
1758 */
1759 void
1760 brw_oword_block_read_scratch(struct brw_compile *p,
1761 struct brw_reg dest,
1762 struct brw_reg mrf,
1763 int num_regs,
1764 GLuint offset)
1765 {
1766 struct intel_context *intel = &p->brw->intel;
1767 uint32_t msg_control;
1768 int rlen;
1769
1770 if (intel->gen >= 6)
1771 offset /= 16;
1772
1773 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1774 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1775
1776 if (num_regs == 1) {
1777 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1778 rlen = 1;
1779 } else {
1780 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1781 rlen = 2;
1782 }
1783
1784 {
1785 brw_push_insn_state(p);
1786 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1787 brw_set_mask_control(p, BRW_MASK_DISABLE);
1788
1789 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1790
1791 /* set message header global offset field (reg 0, element 2) */
1792 brw_MOV(p,
1793 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1794 mrf.nr,
1795 2), BRW_REGISTER_TYPE_UD),
1796 brw_imm_ud(offset));
1797
1798 brw_pop_insn_state(p);
1799 }
1800
1801 {
1802 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1803
1804 assert(insn->header.predicate_control == 0);
1805 insn->header.compression_control = BRW_COMPRESSION_NONE;
1806 insn->header.destreg__conditionalmod = mrf.nr;
1807
1808 brw_set_dest(p, insn, dest); /* UW? */
1809 if (intel->gen >= 6) {
1810 brw_set_src0(p, insn, mrf);
1811 } else {
1812 brw_set_src0(p, insn, brw_null_reg());
1813 }
1814
1815 brw_set_dp_read_message(p,
1816 insn,
1817 255, /* binding table index (255=stateless) */
1818 msg_control,
1819 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1820 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1821 1, /* msg_length */
1822 rlen);
1823 }
1824 }
1825
1826 /**
1827 * Read a float[4] vector from the data port Data Cache (const buffer).
1828 * Location (in buffer) should be a multiple of 16.
1829 * Used for fetching shader constants.
1830 */
1831 void brw_oword_block_read(struct brw_compile *p,
1832 struct brw_reg dest,
1833 struct brw_reg mrf,
1834 uint32_t offset,
1835 uint32_t bind_table_index)
1836 {
1837 struct intel_context *intel = &p->brw->intel;
1838
1839 /* On newer hardware, offset is in units of owords. */
1840 if (intel->gen >= 6)
1841 offset /= 16;
1842
1843 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1844
1845 brw_push_insn_state(p);
1846 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1847 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1848 brw_set_mask_control(p, BRW_MASK_DISABLE);
1849
1850 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1851
1852 /* set message header global offset field (reg 0, element 2) */
1853 brw_MOV(p,
1854 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1855 mrf.nr,
1856 2), BRW_REGISTER_TYPE_UD),
1857 brw_imm_ud(offset));
1858
1859 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1860 insn->header.destreg__conditionalmod = mrf.nr;
1861
1862 /* cast dest to a uword[8] vector */
1863 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1864
1865 brw_set_dest(p, insn, dest);
1866 if (intel->gen >= 6) {
1867 brw_set_src0(p, insn, mrf);
1868 } else {
1869 brw_set_src0(p, insn, brw_null_reg());
1870 }
1871
1872 brw_set_dp_read_message(p,
1873 insn,
1874 bind_table_index,
1875 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1876 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1877 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1878 1, /* msg_length */
1879 1); /* response_length (1 reg, 2 owords!) */
1880
1881 brw_pop_insn_state(p);
1882 }
1883
1884 /**
1885 * Read a set of dwords from the data port Data Cache (const buffer).
1886 *
1887 * Location (in buffer) appears as UD offsets in the register after
1888 * the provided mrf header reg.
1889 */
1890 void brw_dword_scattered_read(struct brw_compile *p,
1891 struct brw_reg dest,
1892 struct brw_reg mrf,
1893 uint32_t bind_table_index)
1894 {
1895 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1896
1897 brw_push_insn_state(p);
1898 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1899 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1900 brw_set_mask_control(p, BRW_MASK_DISABLE);
1901 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1902 brw_pop_insn_state(p);
1903
1904 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1905 insn->header.destreg__conditionalmod = mrf.nr;
1906
1907 /* cast dest to a uword[8] vector */
1908 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1909
1910 brw_set_dest(p, insn, dest);
1911 brw_set_src0(p, insn, brw_null_reg());
1912
1913 brw_set_dp_read_message(p,
1914 insn,
1915 bind_table_index,
1916 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1917 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1918 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1919 2, /* msg_length */
1920 1); /* response_length */
1921 }
1922
1923
1924
1925 /**
1926 * Read float[4] constant(s) from VS constant buffer.
1927 * For relative addressing, two float[4] constants will be read into 'dest'.
1928 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1929 */
1930 void brw_dp_READ_4_vs(struct brw_compile *p,
1931 struct brw_reg dest,
1932 GLuint location,
1933 GLuint bind_table_index)
1934 {
1935 struct intel_context *intel = &p->brw->intel;
1936 struct brw_instruction *insn;
1937 GLuint msg_reg_nr = 1;
1938
1939 if (intel->gen >= 6)
1940 location /= 16;
1941
1942 /* Setup MRF[1] with location/offset into const buffer */
1943 brw_push_insn_state(p);
1944 brw_set_access_mode(p, BRW_ALIGN_1);
1945 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1946 brw_set_mask_control(p, BRW_MASK_DISABLE);
1947 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1948 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1949 BRW_REGISTER_TYPE_UD),
1950 brw_imm_ud(location));
1951 brw_pop_insn_state(p);
1952
1953 insn = next_insn(p, BRW_OPCODE_SEND);
1954
1955 insn->header.predicate_control = BRW_PREDICATE_NONE;
1956 insn->header.compression_control = BRW_COMPRESSION_NONE;
1957 insn->header.destreg__conditionalmod = msg_reg_nr;
1958 insn->header.mask_control = BRW_MASK_DISABLE;
1959
1960 brw_set_dest(p, insn, dest);
1961 if (intel->gen >= 6) {
1962 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1963 } else {
1964 brw_set_src0(p, insn, brw_null_reg());
1965 }
1966
1967 brw_set_dp_read_message(p,
1968 insn,
1969 bind_table_index,
1970 0,
1971 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1972 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1973 1, /* msg_length */
1974 1); /* response_length (1 Oword) */
1975 }
1976
1977 /**
1978 * Read a float[4] constant per vertex from VS constant buffer, with
1979 * relative addressing.
1980 */
1981 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1982 struct brw_reg dest,
1983 struct brw_reg addr_reg,
1984 GLuint offset,
1985 GLuint bind_table_index)
1986 {
1987 struct intel_context *intel = &p->brw->intel;
1988 struct brw_reg src = brw_vec8_grf(0, 0);
1989 int msg_type;
1990
1991 /* Setup MRF[1] with offset into const buffer */
1992 brw_push_insn_state(p);
1993 brw_set_access_mode(p, BRW_ALIGN_1);
1994 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1995 brw_set_mask_control(p, BRW_MASK_DISABLE);
1996 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1997
1998 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1999 * fields ignored.
2000 */
2001 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2002 addr_reg, brw_imm_d(offset));
2003 brw_pop_insn_state(p);
2004
2005 gen6_resolve_implied_move(p, &src, 0);
2006 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2007
2008 insn->header.predicate_control = BRW_PREDICATE_NONE;
2009 insn->header.compression_control = BRW_COMPRESSION_NONE;
2010 insn->header.destreg__conditionalmod = 0;
2011 insn->header.mask_control = BRW_MASK_DISABLE;
2012
2013 brw_set_dest(p, insn, dest);
2014 brw_set_src0(p, insn, src);
2015
2016 if (intel->gen >= 6)
2017 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2018 else if (intel->gen == 5 || intel->is_g4x)
2019 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2020 else
2021 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2022
2023 brw_set_dp_read_message(p,
2024 insn,
2025 bind_table_index,
2026 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2027 msg_type,
2028 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2029 2, /* msg_length */
2030 1); /* response_length */
2031 }
2032
2033
2034
2035 void brw_fb_WRITE(struct brw_compile *p,
2036 int dispatch_width,
2037 GLuint msg_reg_nr,
2038 struct brw_reg src0,
2039 GLuint binding_table_index,
2040 GLuint msg_length,
2041 GLuint response_length,
2042 GLboolean eot,
2043 GLboolean header_present)
2044 {
2045 struct intel_context *intel = &p->brw->intel;
2046 struct brw_instruction *insn;
2047 GLuint msg_control, msg_type;
2048 struct brw_reg dest;
2049
2050 if (dispatch_width == 16)
2051 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2052 else
2053 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2054
2055 if (intel->gen >= 6 && binding_table_index == 0) {
2056 insn = next_insn(p, BRW_OPCODE_SENDC);
2057 } else {
2058 insn = next_insn(p, BRW_OPCODE_SEND);
2059 }
2060 /* The execution mask is ignored for render target writes. */
2061 insn->header.predicate_control = 0;
2062 insn->header.compression_control = BRW_COMPRESSION_NONE;
2063
2064 if (intel->gen >= 6) {
2065 /* headerless version, just submit color payload */
2066 src0 = brw_message_reg(msg_reg_nr);
2067
2068 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2069 } else {
2070 insn->header.destreg__conditionalmod = msg_reg_nr;
2071
2072 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2073 }
2074
2075 if (dispatch_width == 16)
2076 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2077 else
2078 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2079
2080 brw_set_dest(p, insn, dest);
2081 brw_set_src0(p, insn, src0);
2082 brw_set_dp_write_message(p,
2083 insn,
2084 binding_table_index,
2085 msg_control,
2086 msg_type,
2087 msg_length,
2088 header_present,
2089 1, /* pixel scoreboard */
2090 response_length,
2091 eot,
2092 0 /* send_commit_msg */);
2093 }
2094
2095
2096 /**
2097 * Texture sample instruction.
2098 * Note: the msg_type plus msg_length values determine exactly what kind
2099 * of sampling operation is performed. See volume 4, page 161 of docs.
2100 */
2101 void brw_SAMPLE(struct brw_compile *p,
2102 struct brw_reg dest,
2103 GLuint msg_reg_nr,
2104 struct brw_reg src0,
2105 GLuint binding_table_index,
2106 GLuint sampler,
2107 GLuint writemask,
2108 GLuint msg_type,
2109 GLuint response_length,
2110 GLuint msg_length,
2111 GLboolean eot,
2112 GLuint header_present,
2113 GLuint simd_mode)
2114 {
2115 struct intel_context *intel = &p->brw->intel;
2116 GLboolean need_stall = 0;
2117
2118 if (writemask == 0) {
2119 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2120 return;
2121 }
2122
2123 /* Hardware doesn't do destination dependency checking on send
2124 * instructions properly. Add a workaround which generates the
2125 * dependency by other means. In practice it seems like this bug
2126 * only crops up for texture samples, and only where registers are
2127 * written by the send and then written again later without being
2128 * read in between. Luckily for us, we already track that
2129 * information and use it to modify the writemask for the
2130 * instruction, so that is a guide for whether a workaround is
2131 * needed.
2132 */
2133 if (writemask != WRITEMASK_XYZW) {
2134 GLuint dst_offset = 0;
2135 GLuint i, newmask = 0, len = 0;
2136
2137 for (i = 0; i < 4; i++) {
2138 if (writemask & (1<<i))
2139 break;
2140 dst_offset += 2;
2141 }
2142 for (; i < 4; i++) {
2143 if (!(writemask & (1<<i)))
2144 break;
2145 newmask |= 1<<i;
2146 len++;
2147 }
2148
2149 if (newmask != writemask) {
2150 need_stall = 1;
2151 /* printf("need stall %x %x\n", newmask , writemask); */
2152 }
2153 else {
2154 GLboolean dispatch_16 = GL_FALSE;
2155
2156 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2157
2158 guess_execution_size(p, p->current, dest);
2159 if (p->current->header.execution_size == BRW_EXECUTE_16)
2160 dispatch_16 = GL_TRUE;
2161
2162 newmask = ~newmask & WRITEMASK_XYZW;
2163
2164 brw_push_insn_state(p);
2165
2166 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2167 brw_set_mask_control(p, BRW_MASK_DISABLE);
2168
2169 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2170 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2171 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2172
2173 brw_pop_insn_state(p);
2174
2175 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2176 dest = offset(dest, dst_offset);
2177
2178 /* For 16-wide dispatch, masked channels are skipped in the
2179 * response. For 8-wide, masked channels still take up slots,
2180 * and are just not written to.
2181 */
2182 if (dispatch_16)
2183 response_length = len * 2;
2184 }
2185 }
2186
2187 {
2188 struct brw_instruction *insn;
2189
2190 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2191
2192 insn = next_insn(p, BRW_OPCODE_SEND);
2193 insn->header.predicate_control = 0; /* XXX */
2194 insn->header.compression_control = BRW_COMPRESSION_NONE;
2195 if (intel->gen < 6)
2196 insn->header.destreg__conditionalmod = msg_reg_nr;
2197
2198 brw_set_dest(p, insn, dest);
2199 brw_set_src0(p, insn, src0);
2200 brw_set_sampler_message(p, insn,
2201 binding_table_index,
2202 sampler,
2203 msg_type,
2204 response_length,
2205 msg_length,
2206 eot,
2207 header_present,
2208 simd_mode);
2209 }
2210
2211 if (need_stall) {
2212 struct brw_reg reg = vec8(offset(dest, response_length-1));
2213
2214 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2215 */
2216 brw_push_insn_state(p);
2217 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2218 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2219 retype(reg, BRW_REGISTER_TYPE_UD));
2220 brw_pop_insn_state(p);
2221 }
2222
2223 }
2224
2225 /* All these variables are pretty confusing - we might be better off
2226 * using bitmasks and macros for this, in the old style. Or perhaps
2227 * just having the caller instantiate the fields in dword3 itself.
2228 */
2229 void brw_urb_WRITE(struct brw_compile *p,
2230 struct brw_reg dest,
2231 GLuint msg_reg_nr,
2232 struct brw_reg src0,
2233 GLboolean allocate,
2234 GLboolean used,
2235 GLuint msg_length,
2236 GLuint response_length,
2237 GLboolean eot,
2238 GLboolean writes_complete,
2239 GLuint offset,
2240 GLuint swizzle)
2241 {
2242 struct intel_context *intel = &p->brw->intel;
2243 struct brw_instruction *insn;
2244
2245 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2246
2247 if (intel->gen == 7) {
2248 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2249 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2250 BRW_REGISTER_TYPE_UD),
2251 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2252 brw_imm_ud(0xff00));
2253 }
2254
2255 insn = next_insn(p, BRW_OPCODE_SEND);
2256
2257 assert(msg_length < BRW_MAX_MRF);
2258
2259 brw_set_dest(p, insn, dest);
2260 brw_set_src0(p, insn, src0);
2261 brw_set_src1(p, insn, brw_imm_d(0));
2262
2263 if (intel->gen < 6)
2264 insn->header.destreg__conditionalmod = msg_reg_nr;
2265
2266 brw_set_urb_message(p,
2267 insn,
2268 allocate,
2269 used,
2270 msg_length,
2271 response_length,
2272 eot,
2273 writes_complete,
2274 offset,
2275 swizzle);
2276 }
2277
2278 static int
2279 brw_find_next_block_end(struct brw_compile *p, int start)
2280 {
2281 int ip;
2282
2283 for (ip = start + 1; ip < p->nr_insn; ip++) {
2284 struct brw_instruction *insn = &p->store[ip];
2285
2286 switch (insn->header.opcode) {
2287 case BRW_OPCODE_ENDIF:
2288 case BRW_OPCODE_ELSE:
2289 case BRW_OPCODE_WHILE:
2290 return ip;
2291 }
2292 }
2293 assert(!"not reached");
2294 return start + 1;
2295 }
2296
2297 /* There is no DO instruction on gen6, so to find the end of the loop
2298 * we have to see if the loop is jumping back before our start
2299 * instruction.
2300 */
2301 static int
2302 brw_find_loop_end(struct brw_compile *p, int start)
2303 {
2304 struct intel_context *intel = &p->brw->intel;
2305 int ip;
2306 int br = 2;
2307
2308 for (ip = start + 1; ip < p->nr_insn; ip++) {
2309 struct brw_instruction *insn = &p->store[ip];
2310
2311 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2312 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2313 : insn->bits3.break_cont.jip;
2314 if (ip + jip / br < start)
2315 return ip;
2316 }
2317 }
2318 assert(!"not reached");
2319 return start + 1;
2320 }
2321
2322 /* After program generation, go back and update the UIP and JIP of
2323 * BREAK and CONT instructions to their correct locations.
2324 */
2325 void
2326 brw_set_uip_jip(struct brw_compile *p)
2327 {
2328 struct intel_context *intel = &p->brw->intel;
2329 int ip;
2330 int br = 2;
2331
2332 if (intel->gen < 6)
2333 return;
2334
2335 for (ip = 0; ip < p->nr_insn; ip++) {
2336 struct brw_instruction *insn = &p->store[ip];
2337
2338 switch (insn->header.opcode) {
2339 case BRW_OPCODE_BREAK:
2340 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2341 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2342 insn->bits3.break_cont.uip =
2343 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2344 break;
2345 case BRW_OPCODE_CONTINUE:
2346 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2347 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2348
2349 assert(insn->bits3.break_cont.uip != 0);
2350 assert(insn->bits3.break_cont.jip != 0);
2351 break;
2352 }
2353 }
2354 }
2355
2356 void brw_ff_sync(struct brw_compile *p,
2357 struct brw_reg dest,
2358 GLuint msg_reg_nr,
2359 struct brw_reg src0,
2360 GLboolean allocate,
2361 GLuint response_length,
2362 GLboolean eot)
2363 {
2364 struct intel_context *intel = &p->brw->intel;
2365 struct brw_instruction *insn;
2366
2367 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2368
2369 insn = next_insn(p, BRW_OPCODE_SEND);
2370 brw_set_dest(p, insn, dest);
2371 brw_set_src0(p, insn, src0);
2372 brw_set_src1(p, insn, brw_imm_d(0));
2373
2374 if (intel->gen < 6)
2375 insn->header.destreg__conditionalmod = msg_reg_nr;
2376
2377 brw_set_ff_sync_message(p,
2378 insn,
2379 allocate,
2380 response_length,
2381 eot);
2382 }