i965: Make the old VS backend record pull constant references in pull_params[].
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 static void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373
374
375 static void brw_set_math_message( struct brw_compile *p,
376 struct brw_instruction *insn,
377 GLuint msg_length,
378 GLuint response_length,
379 GLuint function,
380 GLuint integer_type,
381 GLboolean low_precision,
382 GLboolean saturate,
383 GLuint dataType )
384 {
385 struct brw_context *brw = p->brw;
386 struct intel_context *intel = &brw->intel;
387 brw_set_src1(p, insn, brw_imm_d(0));
388
389 if (intel->gen == 5) {
390 insn->bits3.math_gen5.function = function;
391 insn->bits3.math_gen5.int_type = integer_type;
392 insn->bits3.math_gen5.precision = low_precision;
393 insn->bits3.math_gen5.saturate = saturate;
394 insn->bits3.math_gen5.data_type = dataType;
395 insn->bits3.math_gen5.snapshot = 0;
396 insn->bits3.math_gen5.header_present = 0;
397 insn->bits3.math_gen5.response_length = response_length;
398 insn->bits3.math_gen5.msg_length = msg_length;
399 insn->bits3.math_gen5.end_of_thread = 0;
400 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
401 insn->bits2.send_gen5.end_of_thread = 0;
402 } else {
403 insn->bits3.math.function = function;
404 insn->bits3.math.int_type = integer_type;
405 insn->bits3.math.precision = low_precision;
406 insn->bits3.math.saturate = saturate;
407 insn->bits3.math.data_type = dataType;
408 insn->bits3.math.response_length = response_length;
409 insn->bits3.math.msg_length = msg_length;
410 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
411 insn->bits3.math.end_of_thread = 0;
412 }
413 }
414
415
416 static void brw_set_ff_sync_message(struct brw_compile *p,
417 struct brw_instruction *insn,
418 GLboolean allocate,
419 GLuint response_length,
420 GLboolean end_of_thread)
421 {
422 struct brw_context *brw = p->brw;
423 struct intel_context *intel = &brw->intel;
424 brw_set_src1(p, insn, brw_imm_d(0));
425
426 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
427 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
428 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
429 insn->bits3.urb_gen5.allocate = allocate;
430 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
431 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
432 insn->bits3.urb_gen5.header_present = 1;
433 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
434 insn->bits3.urb_gen5.msg_length = 1;
435 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
436 if (intel->gen >= 6) {
437 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
438 } else {
439 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
440 insn->bits2.send_gen5.end_of_thread = end_of_thread;
441 }
442 }
443
444 static void brw_set_urb_message( struct brw_compile *p,
445 struct brw_instruction *insn,
446 GLboolean allocate,
447 GLboolean used,
448 GLuint msg_length,
449 GLuint response_length,
450 GLboolean end_of_thread,
451 GLboolean complete,
452 GLuint offset,
453 GLuint swizzle_control )
454 {
455 struct brw_context *brw = p->brw;
456 struct intel_context *intel = &brw->intel;
457 brw_set_src1(p, insn, brw_imm_d(0));
458
459 if (intel->gen == 7) {
460 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
461 insn->bits3.urb_gen7.offset = offset;
462 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
463 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
464 /* per_slot_offset = 0 makes it ignore offsets in message header */
465 insn->bits3.urb_gen7.per_slot_offset = 0;
466 insn->bits3.urb_gen7.complete = complete;
467 insn->bits3.urb_gen7.header_present = 1;
468 insn->bits3.urb_gen7.response_length = response_length;
469 insn->bits3.urb_gen7.msg_length = msg_length;
470 insn->bits3.urb_gen7.end_of_thread = end_of_thread;
471 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
472 } else if (intel->gen >= 5) {
473 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
474 insn->bits3.urb_gen5.offset = offset;
475 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
476 insn->bits3.urb_gen5.allocate = allocate;
477 insn->bits3.urb_gen5.used = used; /* ? */
478 insn->bits3.urb_gen5.complete = complete;
479 insn->bits3.urb_gen5.header_present = 1;
480 insn->bits3.urb_gen5.response_length = response_length;
481 insn->bits3.urb_gen5.msg_length = msg_length;
482 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
483 if (intel->gen >= 6) {
484 /* For SNB, the SFID bits moved to the condmod bits, and
485 * EOT stayed in bits3 above. Does the EOT bit setting
486 * below on Ironlake even do anything?
487 */
488 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
489 } else {
490 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
491 insn->bits2.send_gen5.end_of_thread = end_of_thread;
492 }
493 } else {
494 insn->bits3.urb.opcode = 0; /* ? */
495 insn->bits3.urb.offset = offset;
496 insn->bits3.urb.swizzle_control = swizzle_control;
497 insn->bits3.urb.allocate = allocate;
498 insn->bits3.urb.used = used; /* ? */
499 insn->bits3.urb.complete = complete;
500 insn->bits3.urb.response_length = response_length;
501 insn->bits3.urb.msg_length = msg_length;
502 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
503 insn->bits3.urb.end_of_thread = end_of_thread;
504 }
505 }
506
507 void
508 brw_set_dp_write_message(struct brw_compile *p,
509 struct brw_instruction *insn,
510 GLuint binding_table_index,
511 GLuint msg_control,
512 GLuint msg_type,
513 GLuint msg_length,
514 GLboolean header_present,
515 GLuint pixel_scoreboard_clear,
516 GLuint response_length,
517 GLuint end_of_thread,
518 GLuint send_commit_msg)
519 {
520 struct brw_context *brw = p->brw;
521 struct intel_context *intel = &brw->intel;
522 brw_set_src1(p, insn, brw_imm_ud(0));
523
524 if (intel->gen >= 7) {
525 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
526 insn->bits3.gen7_dp.msg_control = msg_control;
527 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
528 insn->bits3.gen7_dp.msg_type = msg_type;
529 insn->bits3.gen7_dp.header_present = header_present;
530 insn->bits3.gen7_dp.response_length = response_length;
531 insn->bits3.gen7_dp.msg_length = msg_length;
532 insn->bits3.gen7_dp.end_of_thread = end_of_thread;
533
534 /* We always use the render cache for write messages */
535 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
536 } else if (intel->gen == 6) {
537 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
538 insn->bits3.gen6_dp.msg_control = msg_control;
539 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
540 insn->bits3.gen6_dp.msg_type = msg_type;
541 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
542 insn->bits3.gen6_dp.header_present = header_present;
543 insn->bits3.gen6_dp.response_length = response_length;
544 insn->bits3.gen6_dp.msg_length = msg_length;
545 insn->bits3.gen6_dp.end_of_thread = end_of_thread;
546
547 /* We always use the render cache for write messages */
548 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
549 } else if (intel->gen == 5) {
550 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
551 insn->bits3.dp_write_gen5.msg_control = msg_control;
552 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
553 insn->bits3.dp_write_gen5.msg_type = msg_type;
554 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
555 insn->bits3.dp_write_gen5.header_present = header_present;
556 insn->bits3.dp_write_gen5.response_length = response_length;
557 insn->bits3.dp_write_gen5.msg_length = msg_length;
558 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
559 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
560 insn->bits2.send_gen5.end_of_thread = end_of_thread;
561 } else {
562 insn->bits3.dp_write.binding_table_index = binding_table_index;
563 insn->bits3.dp_write.msg_control = msg_control;
564 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
565 insn->bits3.dp_write.msg_type = msg_type;
566 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
567 insn->bits3.dp_write.response_length = response_length;
568 insn->bits3.dp_write.msg_length = msg_length;
569 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
570 insn->bits3.dp_write.end_of_thread = end_of_thread;
571 }
572 }
573
574 void
575 brw_set_dp_read_message(struct brw_compile *p,
576 struct brw_instruction *insn,
577 GLuint binding_table_index,
578 GLuint msg_control,
579 GLuint msg_type,
580 GLuint target_cache,
581 GLuint msg_length,
582 GLuint response_length)
583 {
584 struct brw_context *brw = p->brw;
585 struct intel_context *intel = &brw->intel;
586 brw_set_src1(p, insn, brw_imm_d(0));
587
588 if (intel->gen >= 7) {
589 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
590 insn->bits3.gen7_dp.msg_control = msg_control;
591 insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
592 insn->bits3.gen7_dp.msg_type = msg_type;
593 insn->bits3.gen7_dp.header_present = 1;
594 insn->bits3.gen7_dp.response_length = response_length;
595 insn->bits3.gen7_dp.msg_length = msg_length;
596 insn->bits3.gen7_dp.end_of_thread = 0;
597 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
598 } else if (intel->gen == 6) {
599 uint32_t target_function;
600
601 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
602 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
603 else
604 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
605
606 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
607 insn->bits3.gen6_dp.msg_control = msg_control;
608 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
609 insn->bits3.gen6_dp.msg_type = msg_type;
610 insn->bits3.gen6_dp.send_commit_msg = 0;
611 insn->bits3.gen6_dp.header_present = 1;
612 insn->bits3.gen6_dp.response_length = response_length;
613 insn->bits3.gen6_dp.msg_length = msg_length;
614 insn->bits3.gen6_dp.end_of_thread = 0;
615 insn->header.destreg__conditionalmod = target_function;
616 } else if (intel->gen == 5) {
617 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618 insn->bits3.dp_read_gen5.msg_control = msg_control;
619 insn->bits3.dp_read_gen5.msg_type = msg_type;
620 insn->bits3.dp_read_gen5.target_cache = target_cache;
621 insn->bits3.dp_read_gen5.header_present = 1;
622 insn->bits3.dp_read_gen5.response_length = response_length;
623 insn->bits3.dp_read_gen5.msg_length = msg_length;
624 insn->bits3.dp_read_gen5.pad1 = 0;
625 insn->bits3.dp_read_gen5.end_of_thread = 0;
626 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
627 insn->bits2.send_gen5.end_of_thread = 0;
628 } else if (intel->is_g4x) {
629 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
630 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
631 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
632 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
633 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
634 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
635 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
636 insn->bits3.dp_read_g4x.pad1 = 0;
637 insn->bits3.dp_read_g4x.end_of_thread = 0;
638 } else {
639 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
640 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
641 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
642 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
643 insn->bits3.dp_read.response_length = response_length; /*16:19*/
644 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
645 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
646 insn->bits3.dp_read.pad1 = 0; /*28:30*/
647 insn->bits3.dp_read.end_of_thread = 0; /*31*/
648 }
649 }
650
651 static void brw_set_sampler_message(struct brw_compile *p,
652 struct brw_instruction *insn,
653 GLuint binding_table_index,
654 GLuint sampler,
655 GLuint msg_type,
656 GLuint response_length,
657 GLuint msg_length,
658 GLboolean eot,
659 GLuint header_present,
660 GLuint simd_mode)
661 {
662 struct brw_context *brw = p->brw;
663 struct intel_context *intel = &brw->intel;
664 assert(eot == 0);
665 brw_set_src1(p, insn, brw_imm_d(0));
666
667 if (intel->gen >= 7) {
668 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
669 insn->bits3.sampler_gen7.sampler = sampler;
670 insn->bits3.sampler_gen7.msg_type = msg_type;
671 insn->bits3.sampler_gen7.simd_mode = simd_mode;
672 insn->bits3.sampler_gen7.header_present = header_present;
673 insn->bits3.sampler_gen7.response_length = response_length;
674 insn->bits3.sampler_gen7.msg_length = msg_length;
675 insn->bits3.sampler_gen7.end_of_thread = eot;
676 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
677 } else if (intel->gen >= 5) {
678 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
679 insn->bits3.sampler_gen5.sampler = sampler;
680 insn->bits3.sampler_gen5.msg_type = msg_type;
681 insn->bits3.sampler_gen5.simd_mode = simd_mode;
682 insn->bits3.sampler_gen5.header_present = header_present;
683 insn->bits3.sampler_gen5.response_length = response_length;
684 insn->bits3.sampler_gen5.msg_length = msg_length;
685 insn->bits3.sampler_gen5.end_of_thread = eot;
686 if (intel->gen >= 6)
687 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
688 else {
689 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
690 insn->bits2.send_gen5.end_of_thread = eot;
691 }
692 } else if (intel->is_g4x) {
693 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
694 insn->bits3.sampler_g4x.sampler = sampler;
695 insn->bits3.sampler_g4x.msg_type = msg_type;
696 insn->bits3.sampler_g4x.response_length = response_length;
697 insn->bits3.sampler_g4x.msg_length = msg_length;
698 insn->bits3.sampler_g4x.end_of_thread = eot;
699 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
700 } else {
701 insn->bits3.sampler.binding_table_index = binding_table_index;
702 insn->bits3.sampler.sampler = sampler;
703 insn->bits3.sampler.msg_type = msg_type;
704 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
705 insn->bits3.sampler.response_length = response_length;
706 insn->bits3.sampler.msg_length = msg_length;
707 insn->bits3.sampler.end_of_thread = eot;
708 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
709 }
710 }
711
712
713 #define next_insn brw_next_insn
714 struct brw_instruction *
715 brw_next_insn(struct brw_compile *p, GLuint opcode)
716 {
717 struct brw_instruction *insn;
718
719 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
720
721 insn = &p->store[p->nr_insn++];
722 memcpy(insn, p->current, sizeof(*insn));
723
724 /* Reset this one-shot flag:
725 */
726
727 if (p->current->header.destreg__conditionalmod) {
728 p->current->header.destreg__conditionalmod = 0;
729 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
730 }
731
732 insn->header.opcode = opcode;
733 return insn;
734 }
735
736 static struct brw_instruction *brw_alu1( struct brw_compile *p,
737 GLuint opcode,
738 struct brw_reg dest,
739 struct brw_reg src )
740 {
741 struct brw_instruction *insn = next_insn(p, opcode);
742 brw_set_dest(p, insn, dest);
743 brw_set_src0(p, insn, src);
744 return insn;
745 }
746
747 static struct brw_instruction *brw_alu2(struct brw_compile *p,
748 GLuint opcode,
749 struct brw_reg dest,
750 struct brw_reg src0,
751 struct brw_reg src1 )
752 {
753 struct brw_instruction *insn = next_insn(p, opcode);
754 brw_set_dest(p, insn, dest);
755 brw_set_src0(p, insn, src0);
756 brw_set_src1(p, insn, src1);
757 return insn;
758 }
759
760
761 /***********************************************************************
762 * Convenience routines.
763 */
764 #define ALU1(OP) \
765 struct brw_instruction *brw_##OP(struct brw_compile *p, \
766 struct brw_reg dest, \
767 struct brw_reg src0) \
768 { \
769 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
770 }
771
772 #define ALU2(OP) \
773 struct brw_instruction *brw_##OP(struct brw_compile *p, \
774 struct brw_reg dest, \
775 struct brw_reg src0, \
776 struct brw_reg src1) \
777 { \
778 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
779 }
780
781 /* Rounding operations (other than RNDD) require two instructions - the first
782 * stores a rounded value (possibly the wrong way) in the dest register, but
783 * also sets a per-channel "increment bit" in the flag register. A predicated
784 * add of 1.0 fixes dest to contain the desired result.
785 *
786 * Sandybridge and later appear to round correctly without an ADD.
787 */
788 #define ROUND(OP) \
789 void brw_##OP(struct brw_compile *p, \
790 struct brw_reg dest, \
791 struct brw_reg src) \
792 { \
793 struct brw_instruction *rnd, *add; \
794 rnd = next_insn(p, BRW_OPCODE_##OP); \
795 brw_set_dest(p, rnd, dest); \
796 brw_set_src0(p, rnd, src); \
797 \
798 if (p->brw->intel.gen < 6) { \
799 /* turn on round-increments */ \
800 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
801 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
802 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
803 } \
804 }
805
806
807 ALU1(MOV)
808 ALU2(SEL)
809 ALU1(NOT)
810 ALU2(AND)
811 ALU2(OR)
812 ALU2(XOR)
813 ALU2(SHR)
814 ALU2(SHL)
815 ALU2(RSR)
816 ALU2(RSL)
817 ALU2(ASR)
818 ALU1(FRC)
819 ALU1(RNDD)
820 ALU2(MAC)
821 ALU2(MACH)
822 ALU1(LZD)
823 ALU2(DP4)
824 ALU2(DPH)
825 ALU2(DP3)
826 ALU2(DP2)
827 ALU2(LINE)
828 ALU2(PLN)
829
830
831 ROUND(RNDZ)
832 ROUND(RNDE)
833
834
835 struct brw_instruction *brw_ADD(struct brw_compile *p,
836 struct brw_reg dest,
837 struct brw_reg src0,
838 struct brw_reg src1)
839 {
840 /* 6.2.2: add */
841 if (src0.type == BRW_REGISTER_TYPE_F ||
842 (src0.file == BRW_IMMEDIATE_VALUE &&
843 src0.type == BRW_REGISTER_TYPE_VF)) {
844 assert(src1.type != BRW_REGISTER_TYPE_UD);
845 assert(src1.type != BRW_REGISTER_TYPE_D);
846 }
847
848 if (src1.type == BRW_REGISTER_TYPE_F ||
849 (src1.file == BRW_IMMEDIATE_VALUE &&
850 src1.type == BRW_REGISTER_TYPE_VF)) {
851 assert(src0.type != BRW_REGISTER_TYPE_UD);
852 assert(src0.type != BRW_REGISTER_TYPE_D);
853 }
854
855 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
856 }
857
858 struct brw_instruction *brw_MUL(struct brw_compile *p,
859 struct brw_reg dest,
860 struct brw_reg src0,
861 struct brw_reg src1)
862 {
863 /* 6.32.38: mul */
864 if (src0.type == BRW_REGISTER_TYPE_D ||
865 src0.type == BRW_REGISTER_TYPE_UD ||
866 src1.type == BRW_REGISTER_TYPE_D ||
867 src1.type == BRW_REGISTER_TYPE_UD) {
868 assert(dest.type != BRW_REGISTER_TYPE_F);
869 }
870
871 if (src0.type == BRW_REGISTER_TYPE_F ||
872 (src0.file == BRW_IMMEDIATE_VALUE &&
873 src0.type == BRW_REGISTER_TYPE_VF)) {
874 assert(src1.type != BRW_REGISTER_TYPE_UD);
875 assert(src1.type != BRW_REGISTER_TYPE_D);
876 }
877
878 if (src1.type == BRW_REGISTER_TYPE_F ||
879 (src1.file == BRW_IMMEDIATE_VALUE &&
880 src1.type == BRW_REGISTER_TYPE_VF)) {
881 assert(src0.type != BRW_REGISTER_TYPE_UD);
882 assert(src0.type != BRW_REGISTER_TYPE_D);
883 }
884
885 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
886 src0.nr != BRW_ARF_ACCUMULATOR);
887 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
888 src1.nr != BRW_ARF_ACCUMULATOR);
889
890 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
891 }
892
893
894 void brw_NOP(struct brw_compile *p)
895 {
896 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
897 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
898 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
899 brw_set_src1(p, insn, brw_imm_ud(0x0));
900 }
901
902
903
904
905
906 /***********************************************************************
907 * Comparisons, if/else/endif
908 */
909
910 struct brw_instruction *brw_JMPI(struct brw_compile *p,
911 struct brw_reg dest,
912 struct brw_reg src0,
913 struct brw_reg src1)
914 {
915 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
916
917 insn->header.execution_size = 1;
918 insn->header.compression_control = BRW_COMPRESSION_NONE;
919 insn->header.mask_control = BRW_MASK_DISABLE;
920
921 p->current->header.predicate_control = BRW_PREDICATE_NONE;
922
923 return insn;
924 }
925
926 static void
927 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
928 {
929 p->if_stack[p->if_stack_depth] = inst;
930
931 p->if_stack_depth++;
932 if (p->if_stack_array_size <= p->if_stack_depth) {
933 p->if_stack_array_size *= 2;
934 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
935 p->if_stack_array_size);
936 }
937 }
938
939 /* EU takes the value from the flag register and pushes it onto some
940 * sort of a stack (presumably merging with any flag value already on
941 * the stack). Within an if block, the flags at the top of the stack
942 * control execution on each channel of the unit, eg. on each of the
943 * 16 pixel values in our wm programs.
944 *
945 * When the matching 'else' instruction is reached (presumably by
946 * countdown of the instruction count patched in by our ELSE/ENDIF
947 * functions), the relevent flags are inverted.
948 *
949 * When the matching 'endif' instruction is reached, the flags are
950 * popped off. If the stack is now empty, normal execution resumes.
951 */
952 struct brw_instruction *
953 brw_IF(struct brw_compile *p, GLuint execute_size)
954 {
955 struct intel_context *intel = &p->brw->intel;
956 struct brw_instruction *insn;
957
958 insn = next_insn(p, BRW_OPCODE_IF);
959
960 /* Override the defaults for this instruction:
961 */
962 if (intel->gen < 6) {
963 brw_set_dest(p, insn, brw_ip_reg());
964 brw_set_src0(p, insn, brw_ip_reg());
965 brw_set_src1(p, insn, brw_imm_d(0x0));
966 } else if (intel->gen == 6) {
967 brw_set_dest(p, insn, brw_imm_w(0));
968 insn->bits1.branch_gen6.jump_count = 0;
969 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
970 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
971 } else {
972 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
973 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
974 brw_set_src1(p, insn, brw_imm_ud(0));
975 insn->bits3.break_cont.jip = 0;
976 insn->bits3.break_cont.uip = 0;
977 }
978
979 insn->header.execution_size = execute_size;
980 insn->header.compression_control = BRW_COMPRESSION_NONE;
981 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
982 insn->header.mask_control = BRW_MASK_ENABLE;
983 if (!p->single_program_flow)
984 insn->header.thread_control = BRW_THREAD_SWITCH;
985
986 p->current->header.predicate_control = BRW_PREDICATE_NONE;
987
988 push_if_stack(p, insn);
989 return insn;
990 }
991
992 /* This function is only used for gen6-style IF instructions with an
993 * embedded comparison (conditional modifier). It is not used on gen7.
994 */
995 struct brw_instruction *
996 gen6_IF(struct brw_compile *p, uint32_t conditional,
997 struct brw_reg src0, struct brw_reg src1)
998 {
999 struct brw_instruction *insn;
1000
1001 insn = next_insn(p, BRW_OPCODE_IF);
1002
1003 brw_set_dest(p, insn, brw_imm_w(0));
1004 if (p->compressed) {
1005 insn->header.execution_size = BRW_EXECUTE_16;
1006 } else {
1007 insn->header.execution_size = BRW_EXECUTE_8;
1008 }
1009 insn->bits1.branch_gen6.jump_count = 0;
1010 brw_set_src0(p, insn, src0);
1011 brw_set_src1(p, insn, src1);
1012
1013 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1014 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1015 insn->header.destreg__conditionalmod = conditional;
1016
1017 if (!p->single_program_flow)
1018 insn->header.thread_control = BRW_THREAD_SWITCH;
1019
1020 push_if_stack(p, insn);
1021 return insn;
1022 }
1023
1024 /**
1025 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1026 */
1027 static void
1028 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1029 struct brw_instruction *if_inst,
1030 struct brw_instruction *else_inst)
1031 {
1032 /* The next instruction (where the ENDIF would be, if it existed) */
1033 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1034
1035 assert(p->single_program_flow);
1036 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1037 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1038 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1039
1040 /* Convert IF to an ADD instruction that moves the instruction pointer
1041 * to the first instruction of the ELSE block. If there is no ELSE
1042 * block, point to where ENDIF would be. Reverse the predicate.
1043 *
1044 * There's no need to execute an ENDIF since we don't need to do any
1045 * stack operations, and if we're currently executing, we just want to
1046 * continue normally.
1047 */
1048 if_inst->header.opcode = BRW_OPCODE_ADD;
1049 if_inst->header.predicate_inverse = 1;
1050
1051 if (else_inst != NULL) {
1052 /* Convert ELSE to an ADD instruction that points where the ENDIF
1053 * would be.
1054 */
1055 else_inst->header.opcode = BRW_OPCODE_ADD;
1056
1057 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1058 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1059 } else {
1060 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1061 }
1062 }
1063
1064 /**
1065 * Patch IF and ELSE instructions with appropriate jump targets.
1066 */
1067 static void
1068 patch_IF_ELSE(struct brw_compile *p,
1069 struct brw_instruction *if_inst,
1070 struct brw_instruction *else_inst,
1071 struct brw_instruction *endif_inst)
1072 {
1073 struct intel_context *intel = &p->brw->intel;
1074
1075 assert(!p->single_program_flow);
1076 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1077 assert(endif_inst != NULL);
1078 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1079
1080 unsigned br = 1;
1081 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1082 * requires 2 chunks.
1083 */
1084 if (intel->gen >= 5)
1085 br = 2;
1086
1087 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1088 endif_inst->header.execution_size = if_inst->header.execution_size;
1089
1090 if (else_inst == NULL) {
1091 /* Patch IF -> ENDIF */
1092 if (intel->gen < 6) {
1093 /* Turn it into an IFF, which means no mask stack operations for
1094 * all-false and jumping past the ENDIF.
1095 */
1096 if_inst->header.opcode = BRW_OPCODE_IFF;
1097 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1098 if_inst->bits3.if_else.pop_count = 0;
1099 if_inst->bits3.if_else.pad0 = 0;
1100 } else if (intel->gen == 6) {
1101 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1102 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1103 } else {
1104 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1105 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1106 }
1107 } else {
1108 else_inst->header.execution_size = if_inst->header.execution_size;
1109
1110 /* Patch IF -> ELSE */
1111 if (intel->gen < 6) {
1112 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1113 if_inst->bits3.if_else.pop_count = 0;
1114 if_inst->bits3.if_else.pad0 = 0;
1115 } else if (intel->gen == 6) {
1116 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1117 }
1118
1119 /* Patch ELSE -> ENDIF */
1120 if (intel->gen < 6) {
1121 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1122 * matching ENDIF.
1123 */
1124 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1125 else_inst->bits3.if_else.pop_count = 1;
1126 else_inst->bits3.if_else.pad0 = 0;
1127 } else if (intel->gen == 6) {
1128 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1129 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1130 } else {
1131 /* The IF instruction's JIP should point just past the ELSE */
1132 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1133 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1134 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1135 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1136 }
1137 }
1138 }
1139
1140 void
1141 brw_ELSE(struct brw_compile *p)
1142 {
1143 struct intel_context *intel = &p->brw->intel;
1144 struct brw_instruction *insn;
1145
1146 insn = next_insn(p, BRW_OPCODE_ELSE);
1147
1148 if (intel->gen < 6) {
1149 brw_set_dest(p, insn, brw_ip_reg());
1150 brw_set_src0(p, insn, brw_ip_reg());
1151 brw_set_src1(p, insn, brw_imm_d(0x0));
1152 } else if (intel->gen == 6) {
1153 brw_set_dest(p, insn, brw_imm_w(0));
1154 insn->bits1.branch_gen6.jump_count = 0;
1155 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1156 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1157 } else {
1158 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1159 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1160 brw_set_src1(p, insn, brw_imm_ud(0));
1161 insn->bits3.break_cont.jip = 0;
1162 insn->bits3.break_cont.uip = 0;
1163 }
1164
1165 insn->header.compression_control = BRW_COMPRESSION_NONE;
1166 insn->header.mask_control = BRW_MASK_ENABLE;
1167 if (!p->single_program_flow)
1168 insn->header.thread_control = BRW_THREAD_SWITCH;
1169
1170 push_if_stack(p, insn);
1171 }
1172
1173 void
1174 brw_ENDIF(struct brw_compile *p)
1175 {
1176 struct intel_context *intel = &p->brw->intel;
1177 struct brw_instruction *insn;
1178 struct brw_instruction *else_inst = NULL;
1179 struct brw_instruction *if_inst = NULL;
1180
1181 /* Pop the IF and (optional) ELSE instructions from the stack */
1182 p->if_stack_depth--;
1183 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1184 else_inst = p->if_stack[p->if_stack_depth];
1185 p->if_stack_depth--;
1186 }
1187 if_inst = p->if_stack[p->if_stack_depth];
1188
1189 if (p->single_program_flow) {
1190 /* ENDIF is useless; don't bother emitting it. */
1191 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1192 return;
1193 }
1194
1195 insn = next_insn(p, BRW_OPCODE_ENDIF);
1196
1197 if (intel->gen < 6) {
1198 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1199 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1200 brw_set_src1(p, insn, brw_imm_d(0x0));
1201 } else if (intel->gen == 6) {
1202 brw_set_dest(p, insn, brw_imm_w(0));
1203 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1204 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1205 } else {
1206 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1207 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208 brw_set_src1(p, insn, brw_imm_ud(0));
1209 }
1210
1211 insn->header.compression_control = BRW_COMPRESSION_NONE;
1212 insn->header.mask_control = BRW_MASK_ENABLE;
1213 insn->header.thread_control = BRW_THREAD_SWITCH;
1214
1215 /* Also pop item off the stack in the endif instruction: */
1216 if (intel->gen < 6) {
1217 insn->bits3.if_else.jump_count = 0;
1218 insn->bits3.if_else.pop_count = 1;
1219 insn->bits3.if_else.pad0 = 0;
1220 } else if (intel->gen == 6) {
1221 insn->bits1.branch_gen6.jump_count = 2;
1222 } else {
1223 insn->bits3.break_cont.jip = 2;
1224 }
1225 patch_IF_ELSE(p, if_inst, else_inst, insn);
1226 }
1227
1228 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1229 {
1230 struct intel_context *intel = &p->brw->intel;
1231 struct brw_instruction *insn;
1232
1233 insn = next_insn(p, BRW_OPCODE_BREAK);
1234 if (intel->gen >= 6) {
1235 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1236 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1237 brw_set_src1(p, insn, brw_imm_d(0x0));
1238 } else {
1239 brw_set_dest(p, insn, brw_ip_reg());
1240 brw_set_src0(p, insn, brw_ip_reg());
1241 brw_set_src1(p, insn, brw_imm_d(0x0));
1242 insn->bits3.if_else.pad0 = 0;
1243 insn->bits3.if_else.pop_count = pop_count;
1244 }
1245 insn->header.compression_control = BRW_COMPRESSION_NONE;
1246 insn->header.execution_size = BRW_EXECUTE_8;
1247
1248 return insn;
1249 }
1250
1251 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1252 struct brw_instruction *do_insn)
1253 {
1254 struct brw_instruction *insn;
1255
1256 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1257 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1258 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1259 brw_set_dest(p, insn, brw_ip_reg());
1260 brw_set_src0(p, insn, brw_ip_reg());
1261 brw_set_src1(p, insn, brw_imm_d(0x0));
1262
1263 insn->header.compression_control = BRW_COMPRESSION_NONE;
1264 insn->header.execution_size = BRW_EXECUTE_8;
1265 return insn;
1266 }
1267
1268 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1269 {
1270 struct brw_instruction *insn;
1271 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1272 brw_set_dest(p, insn, brw_ip_reg());
1273 brw_set_src0(p, insn, brw_ip_reg());
1274 brw_set_src1(p, insn, brw_imm_d(0x0));
1275 insn->header.compression_control = BRW_COMPRESSION_NONE;
1276 insn->header.execution_size = BRW_EXECUTE_8;
1277 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1278 insn->bits3.if_else.pad0 = 0;
1279 insn->bits3.if_else.pop_count = pop_count;
1280 return insn;
1281 }
1282
1283 /* DO/WHILE loop:
1284 *
1285 * The DO/WHILE is just an unterminated loop -- break or continue are
1286 * used for control within the loop. We have a few ways they can be
1287 * done.
1288 *
1289 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1290 * jip and no DO instruction.
1291 *
1292 * For non-uniform control flow pre-gen6, there's a DO instruction to
1293 * push the mask, and a WHILE to jump back, and BREAK to get out and
1294 * pop the mask.
1295 *
1296 * For gen6, there's no more mask stack, so no need for DO. WHILE
1297 * just points back to the first instruction of the loop.
1298 */
1299 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1300 {
1301 struct intel_context *intel = &p->brw->intel;
1302
1303 if (intel->gen >= 6 || p->single_program_flow) {
1304 return &p->store[p->nr_insn];
1305 } else {
1306 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1307
1308 /* Override the defaults for this instruction:
1309 */
1310 brw_set_dest(p, insn, brw_null_reg());
1311 brw_set_src0(p, insn, brw_null_reg());
1312 brw_set_src1(p, insn, brw_null_reg());
1313
1314 insn->header.compression_control = BRW_COMPRESSION_NONE;
1315 insn->header.execution_size = execute_size;
1316 insn->header.predicate_control = BRW_PREDICATE_NONE;
1317 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1318 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1319
1320 return insn;
1321 }
1322 }
1323
1324
1325
1326 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1327 struct brw_instruction *do_insn)
1328 {
1329 struct intel_context *intel = &p->brw->intel;
1330 struct brw_instruction *insn;
1331 GLuint br = 1;
1332
1333 if (intel->gen >= 5)
1334 br = 2;
1335
1336 if (intel->gen >= 7) {
1337 insn = next_insn(p, BRW_OPCODE_WHILE);
1338
1339 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1340 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1341 brw_set_src1(p, insn, brw_imm_ud(0));
1342 insn->bits3.break_cont.jip = br * (do_insn - insn);
1343
1344 insn->header.execution_size = BRW_EXECUTE_8;
1345 } else if (intel->gen == 6) {
1346 insn = next_insn(p, BRW_OPCODE_WHILE);
1347
1348 brw_set_dest(p, insn, brw_imm_w(0));
1349 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1350 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1351 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1352
1353 insn->header.execution_size = BRW_EXECUTE_8;
1354 } else {
1355 if (p->single_program_flow) {
1356 insn = next_insn(p, BRW_OPCODE_ADD);
1357
1358 brw_set_dest(p, insn, brw_ip_reg());
1359 brw_set_src0(p, insn, brw_ip_reg());
1360 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1361 insn->header.execution_size = BRW_EXECUTE_1;
1362 } else {
1363 insn = next_insn(p, BRW_OPCODE_WHILE);
1364
1365 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1366
1367 brw_set_dest(p, insn, brw_ip_reg());
1368 brw_set_src0(p, insn, brw_ip_reg());
1369 brw_set_src1(p, insn, brw_imm_d(0));
1370
1371 insn->header.execution_size = do_insn->header.execution_size;
1372 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1373 insn->bits3.if_else.pop_count = 0;
1374 insn->bits3.if_else.pad0 = 0;
1375 }
1376 }
1377 insn->header.compression_control = BRW_COMPRESSION_NONE;
1378 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1379
1380 return insn;
1381 }
1382
1383
1384 /* FORWARD JUMPS:
1385 */
1386 void brw_land_fwd_jump(struct brw_compile *p,
1387 struct brw_instruction *jmp_insn)
1388 {
1389 struct intel_context *intel = &p->brw->intel;
1390 struct brw_instruction *landing = &p->store[p->nr_insn];
1391 GLuint jmpi = 1;
1392
1393 if (intel->gen >= 5)
1394 jmpi = 2;
1395
1396 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1397 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1398
1399 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1400 }
1401
1402
1403
1404 /* To integrate with the above, it makes sense that the comparison
1405 * instruction should populate the flag register. It might be simpler
1406 * just to use the flag reg for most WM tasks?
1407 */
1408 void brw_CMP(struct brw_compile *p,
1409 struct brw_reg dest,
1410 GLuint conditional,
1411 struct brw_reg src0,
1412 struct brw_reg src1)
1413 {
1414 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1415
1416 insn->header.destreg__conditionalmod = conditional;
1417 brw_set_dest(p, insn, dest);
1418 brw_set_src0(p, insn, src0);
1419 brw_set_src1(p, insn, src1);
1420
1421 /* guess_execution_size(insn, src0); */
1422
1423
1424 /* Make it so that future instructions will use the computed flag
1425 * value until brw_set_predicate_control_flag_value() is called
1426 * again.
1427 */
1428 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1429 dest.nr == 0) {
1430 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1431 p->flag_value = 0xff;
1432 }
1433 }
1434
1435 /* Issue 'wait' instruction for n1, host could program MMIO
1436 to wake up thread. */
1437 void brw_WAIT (struct brw_compile *p)
1438 {
1439 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1440 struct brw_reg src = brw_notification_1_reg();
1441
1442 brw_set_dest(p, insn, src);
1443 brw_set_src0(p, insn, src);
1444 brw_set_src1(p, insn, brw_null_reg());
1445 insn->header.execution_size = 0; /* must */
1446 insn->header.predicate_control = 0;
1447 insn->header.compression_control = 0;
1448 }
1449
1450
1451 /***********************************************************************
1452 * Helpers for the various SEND message types:
1453 */
1454
1455 /** Extended math function, float[8].
1456 */
1457 void brw_math( struct brw_compile *p,
1458 struct brw_reg dest,
1459 GLuint function,
1460 GLuint saturate,
1461 GLuint msg_reg_nr,
1462 struct brw_reg src,
1463 GLuint data_type,
1464 GLuint precision )
1465 {
1466 struct intel_context *intel = &p->brw->intel;
1467
1468 if (intel->gen >= 6) {
1469 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1470
1471 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1472 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1473
1474 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1475 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1476
1477 /* Source modifiers are ignored for extended math instructions. */
1478 assert(!src.negate);
1479 assert(!src.abs);
1480
1481 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1482 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1483 assert(src.type == BRW_REGISTER_TYPE_F);
1484 }
1485
1486 /* Math is the same ISA format as other opcodes, except that CondModifier
1487 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1488 */
1489 insn->header.destreg__conditionalmod = function;
1490 insn->header.saturate = saturate;
1491
1492 brw_set_dest(p, insn, dest);
1493 brw_set_src0(p, insn, src);
1494 brw_set_src1(p, insn, brw_null_reg());
1495 } else {
1496 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1497 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1498 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1499 /* Example code doesn't set predicate_control for send
1500 * instructions.
1501 */
1502 insn->header.predicate_control = 0;
1503 insn->header.destreg__conditionalmod = msg_reg_nr;
1504
1505 brw_set_dest(p, insn, dest);
1506 brw_set_src0(p, insn, src);
1507 brw_set_math_message(p,
1508 insn,
1509 msg_length, response_length,
1510 function,
1511 BRW_MATH_INTEGER_UNSIGNED,
1512 precision,
1513 saturate,
1514 data_type);
1515 }
1516 }
1517
1518 /** Extended math function, float[8].
1519 */
1520 void brw_math2(struct brw_compile *p,
1521 struct brw_reg dest,
1522 GLuint function,
1523 struct brw_reg src0,
1524 struct brw_reg src1)
1525 {
1526 struct intel_context *intel = &p->brw->intel;
1527 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1528
1529 assert(intel->gen >= 6);
1530 (void) intel;
1531
1532
1533 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1534 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1535 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1536
1537 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1538 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1539 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1540
1541 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1542 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1543 assert(src0.type == BRW_REGISTER_TYPE_F);
1544 assert(src1.type == BRW_REGISTER_TYPE_F);
1545 }
1546
1547 /* Source modifiers are ignored for extended math instructions. */
1548 assert(!src0.negate);
1549 assert(!src0.abs);
1550 assert(!src1.negate);
1551 assert(!src1.abs);
1552
1553 /* Math is the same ISA format as other opcodes, except that CondModifier
1554 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1555 */
1556 insn->header.destreg__conditionalmod = function;
1557
1558 brw_set_dest(p, insn, dest);
1559 brw_set_src0(p, insn, src0);
1560 brw_set_src1(p, insn, src1);
1561 }
1562
1563 /**
1564 * Extended math function, float[16].
1565 * Use 2 send instructions.
1566 */
1567 void brw_math_16( struct brw_compile *p,
1568 struct brw_reg dest,
1569 GLuint function,
1570 GLuint saturate,
1571 GLuint msg_reg_nr,
1572 struct brw_reg src,
1573 GLuint precision )
1574 {
1575 struct intel_context *intel = &p->brw->intel;
1576 struct brw_instruction *insn;
1577 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1578 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1579
1580 if (intel->gen >= 6) {
1581 insn = next_insn(p, BRW_OPCODE_MATH);
1582
1583 /* Math is the same ISA format as other opcodes, except that CondModifier
1584 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1585 */
1586 insn->header.destreg__conditionalmod = function;
1587 insn->header.saturate = saturate;
1588
1589 /* Source modifiers are ignored for extended math instructions. */
1590 assert(!src.negate);
1591 assert(!src.abs);
1592
1593 brw_set_dest(p, insn, dest);
1594 brw_set_src0(p, insn, src);
1595 brw_set_src1(p, insn, brw_null_reg());
1596 return;
1597 }
1598
1599 /* First instruction:
1600 */
1601 brw_push_insn_state(p);
1602 brw_set_predicate_control_flag_value(p, 0xff);
1603 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1604
1605 insn = next_insn(p, BRW_OPCODE_SEND);
1606 insn->header.destreg__conditionalmod = msg_reg_nr;
1607
1608 brw_set_dest(p, insn, dest);
1609 brw_set_src0(p, insn, src);
1610 brw_set_math_message(p,
1611 insn,
1612 msg_length, response_length,
1613 function,
1614 BRW_MATH_INTEGER_UNSIGNED,
1615 precision,
1616 saturate,
1617 BRW_MATH_DATA_VECTOR);
1618
1619 /* Second instruction:
1620 */
1621 insn = next_insn(p, BRW_OPCODE_SEND);
1622 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1623 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1624
1625 brw_set_dest(p, insn, offset(dest,1));
1626 brw_set_src0(p, insn, src);
1627 brw_set_math_message(p,
1628 insn,
1629 msg_length, response_length,
1630 function,
1631 BRW_MATH_INTEGER_UNSIGNED,
1632 precision,
1633 saturate,
1634 BRW_MATH_DATA_VECTOR);
1635
1636 brw_pop_insn_state(p);
1637 }
1638
1639
1640 /**
1641 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1642 * using a constant offset per channel.
1643 *
1644 * The offset must be aligned to oword size (16 bytes). Used for
1645 * register spilling.
1646 */
1647 void brw_oword_block_write_scratch(struct brw_compile *p,
1648 struct brw_reg mrf,
1649 int num_regs,
1650 GLuint offset)
1651 {
1652 struct intel_context *intel = &p->brw->intel;
1653 uint32_t msg_control, msg_type;
1654 int mlen;
1655
1656 if (intel->gen >= 6)
1657 offset /= 16;
1658
1659 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1660
1661 if (num_regs == 1) {
1662 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1663 mlen = 2;
1664 } else {
1665 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1666 mlen = 3;
1667 }
1668
1669 /* Set up the message header. This is g0, with g0.2 filled with
1670 * the offset. We don't want to leave our offset around in g0 or
1671 * it'll screw up texture samples, so set it up inside the message
1672 * reg.
1673 */
1674 {
1675 brw_push_insn_state(p);
1676 brw_set_mask_control(p, BRW_MASK_DISABLE);
1677 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1678
1679 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1680
1681 /* set message header global offset field (reg 0, element 2) */
1682 brw_MOV(p,
1683 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1684 mrf.nr,
1685 2), BRW_REGISTER_TYPE_UD),
1686 brw_imm_ud(offset));
1687
1688 brw_pop_insn_state(p);
1689 }
1690
1691 {
1692 struct brw_reg dest;
1693 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1694 int send_commit_msg;
1695 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1696 BRW_REGISTER_TYPE_UW);
1697
1698 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1699 insn->header.compression_control = BRW_COMPRESSION_NONE;
1700 src_header = vec16(src_header);
1701 }
1702 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1703 insn->header.destreg__conditionalmod = mrf.nr;
1704
1705 /* Until gen6, writes followed by reads from the same location
1706 * are not guaranteed to be ordered unless write_commit is set.
1707 * If set, then a no-op write is issued to the destination
1708 * register to set a dependency, and a read from the destination
1709 * can be used to ensure the ordering.
1710 *
1711 * For gen6, only writes between different threads need ordering
1712 * protection. Our use of DP writes is all about register
1713 * spilling within a thread.
1714 */
1715 if (intel->gen >= 6) {
1716 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1717 send_commit_msg = 0;
1718 } else {
1719 dest = src_header;
1720 send_commit_msg = 1;
1721 }
1722
1723 brw_set_dest(p, insn, dest);
1724 if (intel->gen >= 6) {
1725 brw_set_src0(p, insn, mrf);
1726 } else {
1727 brw_set_src0(p, insn, brw_null_reg());
1728 }
1729
1730 if (intel->gen >= 6)
1731 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1732 else
1733 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1734
1735 brw_set_dp_write_message(p,
1736 insn,
1737 255, /* binding table index (255=stateless) */
1738 msg_control,
1739 msg_type,
1740 mlen,
1741 GL_TRUE, /* header_present */
1742 0, /* pixel scoreboard */
1743 send_commit_msg, /* response_length */
1744 0, /* eot */
1745 send_commit_msg);
1746 }
1747 }
1748
1749
1750 /**
1751 * Read a block of owords (half a GRF each) from the scratch buffer
1752 * using a constant index per channel.
1753 *
1754 * Offset must be aligned to oword size (16 bytes). Used for register
1755 * spilling.
1756 */
1757 void
1758 brw_oword_block_read_scratch(struct brw_compile *p,
1759 struct brw_reg dest,
1760 struct brw_reg mrf,
1761 int num_regs,
1762 GLuint offset)
1763 {
1764 struct intel_context *intel = &p->brw->intel;
1765 uint32_t msg_control;
1766 int rlen;
1767
1768 if (intel->gen >= 6)
1769 offset /= 16;
1770
1771 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1772 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1773
1774 if (num_regs == 1) {
1775 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1776 rlen = 1;
1777 } else {
1778 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1779 rlen = 2;
1780 }
1781
1782 {
1783 brw_push_insn_state(p);
1784 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1785 brw_set_mask_control(p, BRW_MASK_DISABLE);
1786
1787 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1788
1789 /* set message header global offset field (reg 0, element 2) */
1790 brw_MOV(p,
1791 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1792 mrf.nr,
1793 2), BRW_REGISTER_TYPE_UD),
1794 brw_imm_ud(offset));
1795
1796 brw_pop_insn_state(p);
1797 }
1798
1799 {
1800 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1801
1802 assert(insn->header.predicate_control == 0);
1803 insn->header.compression_control = BRW_COMPRESSION_NONE;
1804 insn->header.destreg__conditionalmod = mrf.nr;
1805
1806 brw_set_dest(p, insn, dest); /* UW? */
1807 if (intel->gen >= 6) {
1808 brw_set_src0(p, insn, mrf);
1809 } else {
1810 brw_set_src0(p, insn, brw_null_reg());
1811 }
1812
1813 brw_set_dp_read_message(p,
1814 insn,
1815 255, /* binding table index (255=stateless) */
1816 msg_control,
1817 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1818 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1819 1, /* msg_length */
1820 rlen);
1821 }
1822 }
1823
1824 /**
1825 * Read a float[4] vector from the data port Data Cache (const buffer).
1826 * Location (in buffer) should be a multiple of 16.
1827 * Used for fetching shader constants.
1828 */
1829 void brw_oword_block_read(struct brw_compile *p,
1830 struct brw_reg dest,
1831 struct brw_reg mrf,
1832 uint32_t offset,
1833 uint32_t bind_table_index)
1834 {
1835 struct intel_context *intel = &p->brw->intel;
1836
1837 /* On newer hardware, offset is in units of owords. */
1838 if (intel->gen >= 6)
1839 offset /= 16;
1840
1841 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1842
1843 brw_push_insn_state(p);
1844 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1845 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1846 brw_set_mask_control(p, BRW_MASK_DISABLE);
1847
1848 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1849
1850 /* set message header global offset field (reg 0, element 2) */
1851 brw_MOV(p,
1852 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1853 mrf.nr,
1854 2), BRW_REGISTER_TYPE_UD),
1855 brw_imm_ud(offset));
1856
1857 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1858 insn->header.destreg__conditionalmod = mrf.nr;
1859
1860 /* cast dest to a uword[8] vector */
1861 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1862
1863 brw_set_dest(p, insn, dest);
1864 if (intel->gen >= 6) {
1865 brw_set_src0(p, insn, mrf);
1866 } else {
1867 brw_set_src0(p, insn, brw_null_reg());
1868 }
1869
1870 brw_set_dp_read_message(p,
1871 insn,
1872 bind_table_index,
1873 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1874 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1875 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1876 1, /* msg_length */
1877 1); /* response_length (1 reg, 2 owords!) */
1878
1879 brw_pop_insn_state(p);
1880 }
1881
1882 /**
1883 * Read a set of dwords from the data port Data Cache (const buffer).
1884 *
1885 * Location (in buffer) appears as UD offsets in the register after
1886 * the provided mrf header reg.
1887 */
1888 void brw_dword_scattered_read(struct brw_compile *p,
1889 struct brw_reg dest,
1890 struct brw_reg mrf,
1891 uint32_t bind_table_index)
1892 {
1893 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1894
1895 brw_push_insn_state(p);
1896 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1897 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1898 brw_set_mask_control(p, BRW_MASK_DISABLE);
1899 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1900 brw_pop_insn_state(p);
1901
1902 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1903 insn->header.destreg__conditionalmod = mrf.nr;
1904
1905 /* cast dest to a uword[8] vector */
1906 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1907
1908 brw_set_dest(p, insn, dest);
1909 brw_set_src0(p, insn, brw_null_reg());
1910
1911 brw_set_dp_read_message(p,
1912 insn,
1913 bind_table_index,
1914 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1915 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1916 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1917 2, /* msg_length */
1918 1); /* response_length */
1919 }
1920
1921
1922
1923 /**
1924 * Read float[4] constant(s) from VS constant buffer.
1925 * For relative addressing, two float[4] constants will be read into 'dest'.
1926 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1927 */
1928 void brw_dp_READ_4_vs(struct brw_compile *p,
1929 struct brw_reg dest,
1930 GLuint location,
1931 GLuint bind_table_index)
1932 {
1933 struct intel_context *intel = &p->brw->intel;
1934 struct brw_instruction *insn;
1935 GLuint msg_reg_nr = 1;
1936
1937 if (intel->gen >= 6)
1938 location /= 16;
1939
1940 /* Setup MRF[1] with location/offset into const buffer */
1941 brw_push_insn_state(p);
1942 brw_set_access_mode(p, BRW_ALIGN_1);
1943 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1944 brw_set_mask_control(p, BRW_MASK_DISABLE);
1945 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1946 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1947 BRW_REGISTER_TYPE_UD),
1948 brw_imm_ud(location));
1949 brw_pop_insn_state(p);
1950
1951 insn = next_insn(p, BRW_OPCODE_SEND);
1952
1953 insn->header.predicate_control = BRW_PREDICATE_NONE;
1954 insn->header.compression_control = BRW_COMPRESSION_NONE;
1955 insn->header.destreg__conditionalmod = msg_reg_nr;
1956 insn->header.mask_control = BRW_MASK_DISABLE;
1957
1958 brw_set_dest(p, insn, dest);
1959 if (intel->gen >= 6) {
1960 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1961 } else {
1962 brw_set_src0(p, insn, brw_null_reg());
1963 }
1964
1965 brw_set_dp_read_message(p,
1966 insn,
1967 bind_table_index,
1968 0,
1969 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1970 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1971 1, /* msg_length */
1972 1); /* response_length (1 Oword) */
1973 }
1974
1975 /**
1976 * Read a float[4] constant per vertex from VS constant buffer, with
1977 * relative addressing.
1978 */
1979 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1980 struct brw_reg dest,
1981 struct brw_reg addr_reg,
1982 GLuint offset,
1983 GLuint bind_table_index)
1984 {
1985 struct intel_context *intel = &p->brw->intel;
1986 struct brw_reg src = brw_vec8_grf(0, 0);
1987 int msg_type;
1988
1989 /* Setup MRF[1] with offset into const buffer */
1990 brw_push_insn_state(p);
1991 brw_set_access_mode(p, BRW_ALIGN_1);
1992 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1993 brw_set_mask_control(p, BRW_MASK_DISABLE);
1994 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1995
1996 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1997 * fields ignored.
1998 */
1999 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2000 addr_reg, brw_imm_d(offset));
2001 brw_pop_insn_state(p);
2002
2003 gen6_resolve_implied_move(p, &src, 0);
2004 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2005
2006 insn->header.predicate_control = BRW_PREDICATE_NONE;
2007 insn->header.compression_control = BRW_COMPRESSION_NONE;
2008 insn->header.destreg__conditionalmod = 0;
2009 insn->header.mask_control = BRW_MASK_DISABLE;
2010
2011 brw_set_dest(p, insn, dest);
2012 brw_set_src0(p, insn, src);
2013
2014 if (intel->gen >= 6)
2015 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2016 else if (intel->gen == 5 || intel->is_g4x)
2017 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2018 else
2019 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2020
2021 brw_set_dp_read_message(p,
2022 insn,
2023 bind_table_index,
2024 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2025 msg_type,
2026 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2027 2, /* msg_length */
2028 1); /* response_length */
2029 }
2030
2031
2032
2033 void brw_fb_WRITE(struct brw_compile *p,
2034 int dispatch_width,
2035 GLuint msg_reg_nr,
2036 struct brw_reg src0,
2037 GLuint binding_table_index,
2038 GLuint msg_length,
2039 GLuint response_length,
2040 GLboolean eot,
2041 GLboolean header_present)
2042 {
2043 struct intel_context *intel = &p->brw->intel;
2044 struct brw_instruction *insn;
2045 GLuint msg_control, msg_type;
2046 struct brw_reg dest;
2047
2048 if (dispatch_width == 16)
2049 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2050 else
2051 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2052
2053 if (intel->gen >= 6 && binding_table_index == 0) {
2054 insn = next_insn(p, BRW_OPCODE_SENDC);
2055 } else {
2056 insn = next_insn(p, BRW_OPCODE_SEND);
2057 }
2058 /* The execution mask is ignored for render target writes. */
2059 insn->header.predicate_control = 0;
2060 insn->header.compression_control = BRW_COMPRESSION_NONE;
2061
2062 if (intel->gen >= 6) {
2063 /* headerless version, just submit color payload */
2064 src0 = brw_message_reg(msg_reg_nr);
2065
2066 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2067 } else {
2068 insn->header.destreg__conditionalmod = msg_reg_nr;
2069
2070 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2071 }
2072
2073 if (dispatch_width == 16)
2074 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2075 else
2076 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2077
2078 brw_set_dest(p, insn, dest);
2079 brw_set_src0(p, insn, src0);
2080 brw_set_dp_write_message(p,
2081 insn,
2082 binding_table_index,
2083 msg_control,
2084 msg_type,
2085 msg_length,
2086 header_present,
2087 1, /* pixel scoreboard */
2088 response_length,
2089 eot,
2090 0 /* send_commit_msg */);
2091 }
2092
2093
2094 /**
2095 * Texture sample instruction.
2096 * Note: the msg_type plus msg_length values determine exactly what kind
2097 * of sampling operation is performed. See volume 4, page 161 of docs.
2098 */
2099 void brw_SAMPLE(struct brw_compile *p,
2100 struct brw_reg dest,
2101 GLuint msg_reg_nr,
2102 struct brw_reg src0,
2103 GLuint binding_table_index,
2104 GLuint sampler,
2105 GLuint writemask,
2106 GLuint msg_type,
2107 GLuint response_length,
2108 GLuint msg_length,
2109 GLboolean eot,
2110 GLuint header_present,
2111 GLuint simd_mode)
2112 {
2113 struct intel_context *intel = &p->brw->intel;
2114 GLboolean need_stall = 0;
2115
2116 if (writemask == 0) {
2117 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2118 return;
2119 }
2120
2121 /* Hardware doesn't do destination dependency checking on send
2122 * instructions properly. Add a workaround which generates the
2123 * dependency by other means. In practice it seems like this bug
2124 * only crops up for texture samples, and only where registers are
2125 * written by the send and then written again later without being
2126 * read in between. Luckily for us, we already track that
2127 * information and use it to modify the writemask for the
2128 * instruction, so that is a guide for whether a workaround is
2129 * needed.
2130 */
2131 if (writemask != WRITEMASK_XYZW) {
2132 GLuint dst_offset = 0;
2133 GLuint i, newmask = 0, len = 0;
2134
2135 for (i = 0; i < 4; i++) {
2136 if (writemask & (1<<i))
2137 break;
2138 dst_offset += 2;
2139 }
2140 for (; i < 4; i++) {
2141 if (!(writemask & (1<<i)))
2142 break;
2143 newmask |= 1<<i;
2144 len++;
2145 }
2146
2147 if (newmask != writemask) {
2148 need_stall = 1;
2149 /* printf("need stall %x %x\n", newmask , writemask); */
2150 }
2151 else {
2152 GLboolean dispatch_16 = GL_FALSE;
2153
2154 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2155
2156 guess_execution_size(p, p->current, dest);
2157 if (p->current->header.execution_size == BRW_EXECUTE_16)
2158 dispatch_16 = GL_TRUE;
2159
2160 newmask = ~newmask & WRITEMASK_XYZW;
2161
2162 brw_push_insn_state(p);
2163
2164 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2165 brw_set_mask_control(p, BRW_MASK_DISABLE);
2166
2167 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2168 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2169 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2170
2171 brw_pop_insn_state(p);
2172
2173 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2174 dest = offset(dest, dst_offset);
2175
2176 /* For 16-wide dispatch, masked channels are skipped in the
2177 * response. For 8-wide, masked channels still take up slots,
2178 * and are just not written to.
2179 */
2180 if (dispatch_16)
2181 response_length = len * 2;
2182 }
2183 }
2184
2185 {
2186 struct brw_instruction *insn;
2187
2188 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2189
2190 insn = next_insn(p, BRW_OPCODE_SEND);
2191 insn->header.predicate_control = 0; /* XXX */
2192 insn->header.compression_control = BRW_COMPRESSION_NONE;
2193 if (intel->gen < 6)
2194 insn->header.destreg__conditionalmod = msg_reg_nr;
2195
2196 brw_set_dest(p, insn, dest);
2197 brw_set_src0(p, insn, src0);
2198 brw_set_sampler_message(p, insn,
2199 binding_table_index,
2200 sampler,
2201 msg_type,
2202 response_length,
2203 msg_length,
2204 eot,
2205 header_present,
2206 simd_mode);
2207 }
2208
2209 if (need_stall) {
2210 struct brw_reg reg = vec8(offset(dest, response_length-1));
2211
2212 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2213 */
2214 brw_push_insn_state(p);
2215 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2216 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2217 retype(reg, BRW_REGISTER_TYPE_UD));
2218 brw_pop_insn_state(p);
2219 }
2220
2221 }
2222
2223 /* All these variables are pretty confusing - we might be better off
2224 * using bitmasks and macros for this, in the old style. Or perhaps
2225 * just having the caller instantiate the fields in dword3 itself.
2226 */
2227 void brw_urb_WRITE(struct brw_compile *p,
2228 struct brw_reg dest,
2229 GLuint msg_reg_nr,
2230 struct brw_reg src0,
2231 GLboolean allocate,
2232 GLboolean used,
2233 GLuint msg_length,
2234 GLuint response_length,
2235 GLboolean eot,
2236 GLboolean writes_complete,
2237 GLuint offset,
2238 GLuint swizzle)
2239 {
2240 struct intel_context *intel = &p->brw->intel;
2241 struct brw_instruction *insn;
2242
2243 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2244
2245 if (intel->gen == 7) {
2246 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2247 brw_push_insn_state(p);
2248 brw_set_access_mode(p, BRW_ALIGN_1);
2249 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2250 BRW_REGISTER_TYPE_UD),
2251 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2252 brw_imm_ud(0xff00));
2253 brw_pop_insn_state(p);
2254 }
2255
2256 insn = next_insn(p, BRW_OPCODE_SEND);
2257
2258 assert(msg_length < BRW_MAX_MRF);
2259
2260 brw_set_dest(p, insn, dest);
2261 brw_set_src0(p, insn, src0);
2262 brw_set_src1(p, insn, brw_imm_d(0));
2263
2264 if (intel->gen < 6)
2265 insn->header.destreg__conditionalmod = msg_reg_nr;
2266
2267 brw_set_urb_message(p,
2268 insn,
2269 allocate,
2270 used,
2271 msg_length,
2272 response_length,
2273 eot,
2274 writes_complete,
2275 offset,
2276 swizzle);
2277 }
2278
2279 static int
2280 brw_find_next_block_end(struct brw_compile *p, int start)
2281 {
2282 int ip;
2283
2284 for (ip = start + 1; ip < p->nr_insn; ip++) {
2285 struct brw_instruction *insn = &p->store[ip];
2286
2287 switch (insn->header.opcode) {
2288 case BRW_OPCODE_ENDIF:
2289 case BRW_OPCODE_ELSE:
2290 case BRW_OPCODE_WHILE:
2291 return ip;
2292 }
2293 }
2294 assert(!"not reached");
2295 return start + 1;
2296 }
2297
2298 /* There is no DO instruction on gen6, so to find the end of the loop
2299 * we have to see if the loop is jumping back before our start
2300 * instruction.
2301 */
2302 static int
2303 brw_find_loop_end(struct brw_compile *p, int start)
2304 {
2305 struct intel_context *intel = &p->brw->intel;
2306 int ip;
2307 int br = 2;
2308
2309 for (ip = start + 1; ip < p->nr_insn; ip++) {
2310 struct brw_instruction *insn = &p->store[ip];
2311
2312 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2313 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2314 : insn->bits3.break_cont.jip;
2315 if (ip + jip / br <= start)
2316 return ip;
2317 }
2318 }
2319 assert(!"not reached");
2320 return start + 1;
2321 }
2322
2323 /* After program generation, go back and update the UIP and JIP of
2324 * BREAK and CONT instructions to their correct locations.
2325 */
2326 void
2327 brw_set_uip_jip(struct brw_compile *p)
2328 {
2329 struct intel_context *intel = &p->brw->intel;
2330 int ip;
2331 int br = 2;
2332
2333 if (intel->gen < 6)
2334 return;
2335
2336 for (ip = 0; ip < p->nr_insn; ip++) {
2337 struct brw_instruction *insn = &p->store[ip];
2338
2339 switch (insn->header.opcode) {
2340 case BRW_OPCODE_BREAK:
2341 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2342 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2343 insn->bits3.break_cont.uip =
2344 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2345 break;
2346 case BRW_OPCODE_CONTINUE:
2347 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2348 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2349
2350 assert(insn->bits3.break_cont.uip != 0);
2351 assert(insn->bits3.break_cont.jip != 0);
2352 break;
2353 }
2354 }
2355 }
2356
2357 void brw_ff_sync(struct brw_compile *p,
2358 struct brw_reg dest,
2359 GLuint msg_reg_nr,
2360 struct brw_reg src0,
2361 GLboolean allocate,
2362 GLuint response_length,
2363 GLboolean eot)
2364 {
2365 struct intel_context *intel = &p->brw->intel;
2366 struct brw_instruction *insn;
2367
2368 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2369
2370 insn = next_insn(p, BRW_OPCODE_SEND);
2371 brw_set_dest(p, insn, dest);
2372 brw_set_src0(p, insn, src0);
2373 brw_set_src1(p, insn, brw_imm_d(0));
2374
2375 if (intel->gen < 6)
2376 insn->header.destreg__conditionalmod = msg_reg_nr;
2377
2378 brw_set_ff_sync_message(p,
2379 insn,
2380 allocate,
2381 response_length,
2382 eot);
2383 }