i965: Attempt to un-muddle Gen6 data port message target defines.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size(struct brw_compile *p,
45 struct brw_instruction *insn,
46 struct brw_reg reg)
47 {
48 if (reg.width == BRW_WIDTH_8 && p->compressed)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 /**
56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
57 * registers, implicitly moving the operand to a message register.
58 *
59 * On Sandybridge, this is no longer the case. This function performs the
60 * explicit move; it should be called before emitting a SEND instruction.
61 */
62 static void
63 gen6_resolve_implied_move(struct brw_compile *p,
64 struct brw_reg *src,
65 GLuint msg_reg_nr)
66 {
67 struct intel_context *intel = &p->brw->intel;
68 if (intel->gen != 6)
69 return;
70
71 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
72 brw_push_insn_state(p);
73 brw_set_mask_control(p, BRW_MASK_DISABLE);
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
76 retype(*src, BRW_REGISTER_TYPE_UD));
77 brw_pop_insn_state(p);
78 }
79 *src = brw_message_reg(msg_reg_nr);
80 }
81
82
83 static void brw_set_dest(struct brw_compile *p,
84 struct brw_instruction *insn,
85 struct brw_reg dest)
86 {
87 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
88 dest.file != BRW_MESSAGE_REGISTER_FILE)
89 assert(dest.nr < 128);
90
91 insn->bits1.da1.dest_reg_file = dest.file;
92 insn->bits1.da1.dest_reg_type = dest.type;
93 insn->bits1.da1.dest_address_mode = dest.address_mode;
94
95 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
96 insn->bits1.da1.dest_reg_nr = dest.nr;
97
98 if (insn->header.access_mode == BRW_ALIGN_1) {
99 insn->bits1.da1.dest_subreg_nr = dest.subnr;
100 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
101 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
102 insn->bits1.da1.dest_horiz_stride = dest.hstride;
103 }
104 else {
105 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
106 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
107 /* even ignored in da16, still need to set as '01' */
108 insn->bits1.da16.dest_horiz_stride = 1;
109 }
110 }
111 else {
112 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
113
114 /* These are different sizes in align1 vs align16:
115 */
116 if (insn->header.access_mode == BRW_ALIGN_1) {
117 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
118 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
119 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
120 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
121 }
122 else {
123 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
124 /* even ignored in da16, still need to set as '01' */
125 insn->bits1.ia16.dest_horiz_stride = 1;
126 }
127 }
128
129 /* NEW: Set the execution size based on dest.width and
130 * insn->compression_control:
131 */
132 guess_execution_size(p, insn, dest);
133 }
134
135 extern int reg_type_size[];
136
137 static void
138 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
139 {
140 int hstride_for_reg[] = {0, 1, 2, 4};
141 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
142 int width_for_reg[] = {1, 2, 4, 8, 16};
143 int execsize_for_reg[] = {1, 2, 4, 8, 16};
144 int width, hstride, vstride, execsize;
145
146 if (reg.file == BRW_IMMEDIATE_VALUE) {
147 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
148 * mean the destination has to be 128-bit aligned and the
149 * destination horiz stride has to be a word.
150 */
151 if (reg.type == BRW_REGISTER_TYPE_V) {
152 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
153 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
154 }
155
156 return;
157 }
158
159 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
160 reg.file == BRW_ARF_NULL)
161 return;
162
163 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
164 hstride = hstride_for_reg[reg.hstride];
165
166 if (reg.vstride == 0xf) {
167 vstride = -1;
168 } else {
169 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
170 vstride = vstride_for_reg[reg.vstride];
171 }
172
173 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
174 width = width_for_reg[reg.width];
175
176 assert(insn->header.execution_size >= 0 &&
177 insn->header.execution_size < Elements(execsize_for_reg));
178 execsize = execsize_for_reg[insn->header.execution_size];
179
180 /* Restrictions from 3.3.10: Register Region Restrictions. */
181 /* 3. */
182 assert(execsize >= width);
183
184 /* 4. */
185 if (execsize == width && hstride != 0) {
186 assert(vstride == -1 || vstride == width * hstride);
187 }
188
189 /* 5. */
190 if (execsize == width && hstride == 0) {
191 /* no restriction on vstride. */
192 }
193
194 /* 6. */
195 if (width == 1) {
196 assert(hstride == 0);
197 }
198
199 /* 7. */
200 if (execsize == 1 && width == 1) {
201 assert(hstride == 0);
202 assert(vstride == 0);
203 }
204
205 /* 8. */
206 if (vstride == 0 && hstride == 0) {
207 assert(width == 1);
208 }
209
210 /* 10. Check destination issues. */
211 }
212
213 static void brw_set_src0( struct brw_instruction *insn,
214 struct brw_reg reg )
215 {
216 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
217 assert(reg.nr < 128);
218
219 validate_reg(insn, reg);
220
221 insn->bits1.da1.src0_reg_file = reg.file;
222 insn->bits1.da1.src0_reg_type = reg.type;
223 insn->bits2.da1.src0_abs = reg.abs;
224 insn->bits2.da1.src0_negate = reg.negate;
225 insn->bits2.da1.src0_address_mode = reg.address_mode;
226
227 if (reg.file == BRW_IMMEDIATE_VALUE) {
228 insn->bits3.ud = reg.dw1.ud;
229
230 /* Required to set some fields in src1 as well:
231 */
232 insn->bits1.da1.src1_reg_file = 0; /* arf */
233 insn->bits1.da1.src1_reg_type = reg.type;
234 }
235 else
236 {
237 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
238 if (insn->header.access_mode == BRW_ALIGN_1) {
239 insn->bits2.da1.src0_subreg_nr = reg.subnr;
240 insn->bits2.da1.src0_reg_nr = reg.nr;
241 }
242 else {
243 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
244 insn->bits2.da16.src0_reg_nr = reg.nr;
245 }
246 }
247 else {
248 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
249
250 if (insn->header.access_mode == BRW_ALIGN_1) {
251 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
252 }
253 else {
254 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
255 }
256 }
257
258 if (insn->header.access_mode == BRW_ALIGN_1) {
259 if (reg.width == BRW_WIDTH_1 &&
260 insn->header.execution_size == BRW_EXECUTE_1) {
261 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
262 insn->bits2.da1.src0_width = BRW_WIDTH_1;
263 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
264 }
265 else {
266 insn->bits2.da1.src0_horiz_stride = reg.hstride;
267 insn->bits2.da1.src0_width = reg.width;
268 insn->bits2.da1.src0_vert_stride = reg.vstride;
269 }
270 }
271 else {
272 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
273 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
274 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
275 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
276
277 /* This is an oddity of the fact we're using the same
278 * descriptions for registers in align_16 as align_1:
279 */
280 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
281 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
282 else
283 insn->bits2.da16.src0_vert_stride = reg.vstride;
284 }
285 }
286 }
287
288
289 void brw_set_src1( struct brw_instruction *insn,
290 struct brw_reg reg )
291 {
292 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
293
294 assert(reg.nr < 128);
295
296 validate_reg(insn, reg);
297
298 insn->bits1.da1.src1_reg_file = reg.file;
299 insn->bits1.da1.src1_reg_type = reg.type;
300 insn->bits3.da1.src1_abs = reg.abs;
301 insn->bits3.da1.src1_negate = reg.negate;
302
303 /* Only src1 can be immediate in two-argument instructions.
304 */
305 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
306
307 if (reg.file == BRW_IMMEDIATE_VALUE) {
308 insn->bits3.ud = reg.dw1.ud;
309 }
310 else {
311 /* This is a hardware restriction, which may or may not be lifted
312 * in the future:
313 */
314 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
315 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
316
317 if (insn->header.access_mode == BRW_ALIGN_1) {
318 insn->bits3.da1.src1_subreg_nr = reg.subnr;
319 insn->bits3.da1.src1_reg_nr = reg.nr;
320 }
321 else {
322 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
323 insn->bits3.da16.src1_reg_nr = reg.nr;
324 }
325
326 if (insn->header.access_mode == BRW_ALIGN_1) {
327 if (reg.width == BRW_WIDTH_1 &&
328 insn->header.execution_size == BRW_EXECUTE_1) {
329 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
330 insn->bits3.da1.src1_width = BRW_WIDTH_1;
331 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
332 }
333 else {
334 insn->bits3.da1.src1_horiz_stride = reg.hstride;
335 insn->bits3.da1.src1_width = reg.width;
336 insn->bits3.da1.src1_vert_stride = reg.vstride;
337 }
338 }
339 else {
340 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
341 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
342 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
343 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
344
345 /* This is an oddity of the fact we're using the same
346 * descriptions for registers in align_16 as align_1:
347 */
348 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
349 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
350 else
351 insn->bits3.da16.src1_vert_stride = reg.vstride;
352 }
353 }
354 }
355
356
357
358 static void brw_set_math_message( struct brw_context *brw,
359 struct brw_instruction *insn,
360 GLuint msg_length,
361 GLuint response_length,
362 GLuint function,
363 GLuint integer_type,
364 GLboolean low_precision,
365 GLboolean saturate,
366 GLuint dataType )
367 {
368 struct intel_context *intel = &brw->intel;
369 brw_set_src1(insn, brw_imm_d(0));
370
371 if (intel->gen == 5) {
372 insn->bits3.math_gen5.function = function;
373 insn->bits3.math_gen5.int_type = integer_type;
374 insn->bits3.math_gen5.precision = low_precision;
375 insn->bits3.math_gen5.saturate = saturate;
376 insn->bits3.math_gen5.data_type = dataType;
377 insn->bits3.math_gen5.snapshot = 0;
378 insn->bits3.math_gen5.header_present = 0;
379 insn->bits3.math_gen5.response_length = response_length;
380 insn->bits3.math_gen5.msg_length = msg_length;
381 insn->bits3.math_gen5.end_of_thread = 0;
382 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
383 insn->bits2.send_gen5.end_of_thread = 0;
384 } else {
385 insn->bits3.math.function = function;
386 insn->bits3.math.int_type = integer_type;
387 insn->bits3.math.precision = low_precision;
388 insn->bits3.math.saturate = saturate;
389 insn->bits3.math.data_type = dataType;
390 insn->bits3.math.response_length = response_length;
391 insn->bits3.math.msg_length = msg_length;
392 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
393 insn->bits3.math.end_of_thread = 0;
394 }
395 }
396
397
398 static void brw_set_ff_sync_message(struct brw_context *brw,
399 struct brw_instruction *insn,
400 GLboolean allocate,
401 GLuint response_length,
402 GLboolean end_of_thread)
403 {
404 struct intel_context *intel = &brw->intel;
405 brw_set_src1(insn, brw_imm_d(0));
406
407 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
408 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
409 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
410 insn->bits3.urb_gen5.allocate = allocate;
411 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
412 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
413 insn->bits3.urb_gen5.header_present = 1;
414 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
415 insn->bits3.urb_gen5.msg_length = 1;
416 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
417 if (intel->gen >= 6) {
418 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
419 } else {
420 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
421 insn->bits2.send_gen5.end_of_thread = end_of_thread;
422 }
423 }
424
425 static void brw_set_urb_message( struct brw_context *brw,
426 struct brw_instruction *insn,
427 GLboolean allocate,
428 GLboolean used,
429 GLuint msg_length,
430 GLuint response_length,
431 GLboolean end_of_thread,
432 GLboolean complete,
433 GLuint offset,
434 GLuint swizzle_control )
435 {
436 struct intel_context *intel = &brw->intel;
437 brw_set_src1(insn, brw_imm_d(0));
438
439 if (intel->gen >= 5) {
440 insn->bits3.urb_gen5.opcode = 0; /* ? */
441 insn->bits3.urb_gen5.offset = offset;
442 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
443 insn->bits3.urb_gen5.allocate = allocate;
444 insn->bits3.urb_gen5.used = used; /* ? */
445 insn->bits3.urb_gen5.complete = complete;
446 insn->bits3.urb_gen5.header_present = 1;
447 insn->bits3.urb_gen5.response_length = response_length;
448 insn->bits3.urb_gen5.msg_length = msg_length;
449 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
450 if (intel->gen >= 6) {
451 /* For SNB, the SFID bits moved to the condmod bits, and
452 * EOT stayed in bits3 above. Does the EOT bit setting
453 * below on Ironlake even do anything?
454 */
455 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
456 } else {
457 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
458 insn->bits2.send_gen5.end_of_thread = end_of_thread;
459 }
460 } else {
461 insn->bits3.urb.opcode = 0; /* ? */
462 insn->bits3.urb.offset = offset;
463 insn->bits3.urb.swizzle_control = swizzle_control;
464 insn->bits3.urb.allocate = allocate;
465 insn->bits3.urb.used = used; /* ? */
466 insn->bits3.urb.complete = complete;
467 insn->bits3.urb.response_length = response_length;
468 insn->bits3.urb.msg_length = msg_length;
469 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
470 insn->bits3.urb.end_of_thread = end_of_thread;
471 }
472 }
473
474 static void brw_set_dp_write_message( struct brw_context *brw,
475 struct brw_instruction *insn,
476 GLuint binding_table_index,
477 GLuint msg_control,
478 GLuint msg_type,
479 GLuint msg_length,
480 GLboolean header_present,
481 GLuint pixel_scoreboard_clear,
482 GLuint response_length,
483 GLuint end_of_thread,
484 GLuint send_commit_msg)
485 {
486 struct intel_context *intel = &brw->intel;
487 brw_set_src1(insn, brw_imm_ud(0));
488
489 if (intel->gen >= 6) {
490 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
491 insn->bits3.dp_render_cache.msg_control = msg_control;
492 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
493 insn->bits3.dp_render_cache.msg_type = msg_type;
494 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
495 insn->bits3.dp_render_cache.header_present = header_present;
496 insn->bits3.dp_render_cache.response_length = response_length;
497 insn->bits3.dp_render_cache.msg_length = msg_length;
498 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
499
500 /* We always use the render cache for write messages */
501 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
502 } else if (intel->gen == 5) {
503 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
504 insn->bits3.dp_write_gen5.msg_control = msg_control;
505 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
506 insn->bits3.dp_write_gen5.msg_type = msg_type;
507 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
508 insn->bits3.dp_write_gen5.header_present = header_present;
509 insn->bits3.dp_write_gen5.response_length = response_length;
510 insn->bits3.dp_write_gen5.msg_length = msg_length;
511 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
512 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
513 insn->bits2.send_gen5.end_of_thread = end_of_thread;
514 } else {
515 insn->bits3.dp_write.binding_table_index = binding_table_index;
516 insn->bits3.dp_write.msg_control = msg_control;
517 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
518 insn->bits3.dp_write.msg_type = msg_type;
519 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
520 insn->bits3.dp_write.response_length = response_length;
521 insn->bits3.dp_write.msg_length = msg_length;
522 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
523 insn->bits3.dp_write.end_of_thread = end_of_thread;
524 }
525 }
526
527 static void
528 brw_set_dp_read_message(struct brw_context *brw,
529 struct brw_instruction *insn,
530 GLuint binding_table_index,
531 GLuint msg_control,
532 GLuint msg_type,
533 GLuint target_cache,
534 GLuint msg_length,
535 GLuint response_length)
536 {
537 struct intel_context *intel = &brw->intel;
538 brw_set_src1(insn, brw_imm_d(0));
539
540 if (intel->gen >= 6) {
541 uint32_t target_function;
542
543 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
544 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
545 else
546 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
547
548 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
549 insn->bits3.dp_render_cache.msg_control = msg_control;
550 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
551 insn->bits3.dp_render_cache.msg_type = msg_type;
552 insn->bits3.dp_render_cache.send_commit_msg = 0;
553 insn->bits3.dp_render_cache.header_present = 1;
554 insn->bits3.dp_render_cache.response_length = response_length;
555 insn->bits3.dp_render_cache.msg_length = msg_length;
556 insn->bits3.dp_render_cache.end_of_thread = 0;
557 insn->header.destreg__conditionalmod = target_function;
558 } else if (intel->gen == 5) {
559 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
560 insn->bits3.dp_read_gen5.msg_control = msg_control;
561 insn->bits3.dp_read_gen5.msg_type = msg_type;
562 insn->bits3.dp_read_gen5.target_cache = target_cache;
563 insn->bits3.dp_read_gen5.header_present = 1;
564 insn->bits3.dp_read_gen5.response_length = response_length;
565 insn->bits3.dp_read_gen5.msg_length = msg_length;
566 insn->bits3.dp_read_gen5.pad1 = 0;
567 insn->bits3.dp_read_gen5.end_of_thread = 0;
568 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
569 insn->bits2.send_gen5.end_of_thread = 0;
570 } else if (intel->is_g4x) {
571 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
572 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
573 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
574 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
575 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
576 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
577 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
578 insn->bits3.dp_read_g4x.pad1 = 0;
579 insn->bits3.dp_read_g4x.end_of_thread = 0;
580 } else {
581 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
582 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
583 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
584 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
585 insn->bits3.dp_read.response_length = response_length; /*16:19*/
586 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
587 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
588 insn->bits3.dp_read.pad1 = 0; /*28:30*/
589 insn->bits3.dp_read.end_of_thread = 0; /*31*/
590 }
591 }
592
593 static void brw_set_sampler_message(struct brw_context *brw,
594 struct brw_instruction *insn,
595 GLuint binding_table_index,
596 GLuint sampler,
597 GLuint msg_type,
598 GLuint response_length,
599 GLuint msg_length,
600 GLboolean eot,
601 GLuint header_present,
602 GLuint simd_mode)
603 {
604 struct intel_context *intel = &brw->intel;
605 assert(eot == 0);
606 brw_set_src1(insn, brw_imm_d(0));
607
608 if (intel->gen >= 5) {
609 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
610 insn->bits3.sampler_gen5.sampler = sampler;
611 insn->bits3.sampler_gen5.msg_type = msg_type;
612 insn->bits3.sampler_gen5.simd_mode = simd_mode;
613 insn->bits3.sampler_gen5.header_present = header_present;
614 insn->bits3.sampler_gen5.response_length = response_length;
615 insn->bits3.sampler_gen5.msg_length = msg_length;
616 insn->bits3.sampler_gen5.end_of_thread = eot;
617 if (intel->gen >= 6)
618 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
619 else {
620 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
621 insn->bits2.send_gen5.end_of_thread = eot;
622 }
623 } else if (intel->is_g4x) {
624 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
625 insn->bits3.sampler_g4x.sampler = sampler;
626 insn->bits3.sampler_g4x.msg_type = msg_type;
627 insn->bits3.sampler_g4x.response_length = response_length;
628 insn->bits3.sampler_g4x.msg_length = msg_length;
629 insn->bits3.sampler_g4x.end_of_thread = eot;
630 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
631 } else {
632 insn->bits3.sampler.binding_table_index = binding_table_index;
633 insn->bits3.sampler.sampler = sampler;
634 insn->bits3.sampler.msg_type = msg_type;
635 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
636 insn->bits3.sampler.response_length = response_length;
637 insn->bits3.sampler.msg_length = msg_length;
638 insn->bits3.sampler.end_of_thread = eot;
639 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
640 }
641 }
642
643
644
645 static struct brw_instruction *next_insn( struct brw_compile *p,
646 GLuint opcode )
647 {
648 struct brw_instruction *insn;
649
650 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
651
652 insn = &p->store[p->nr_insn++];
653 memcpy(insn, p->current, sizeof(*insn));
654
655 /* Reset this one-shot flag:
656 */
657
658 if (p->current->header.destreg__conditionalmod) {
659 p->current->header.destreg__conditionalmod = 0;
660 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
661 }
662
663 insn->header.opcode = opcode;
664 return insn;
665 }
666
667
668 static struct brw_instruction *brw_alu1( struct brw_compile *p,
669 GLuint opcode,
670 struct brw_reg dest,
671 struct brw_reg src )
672 {
673 struct brw_instruction *insn = next_insn(p, opcode);
674 brw_set_dest(p, insn, dest);
675 brw_set_src0(insn, src);
676 return insn;
677 }
678
679 static struct brw_instruction *brw_alu2(struct brw_compile *p,
680 GLuint opcode,
681 struct brw_reg dest,
682 struct brw_reg src0,
683 struct brw_reg src1 )
684 {
685 struct brw_instruction *insn = next_insn(p, opcode);
686 brw_set_dest(p, insn, dest);
687 brw_set_src0(insn, src0);
688 brw_set_src1(insn, src1);
689 return insn;
690 }
691
692
693 /***********************************************************************
694 * Convenience routines.
695 */
696 #define ALU1(OP) \
697 struct brw_instruction *brw_##OP(struct brw_compile *p, \
698 struct brw_reg dest, \
699 struct brw_reg src0) \
700 { \
701 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
702 }
703
704 #define ALU2(OP) \
705 struct brw_instruction *brw_##OP(struct brw_compile *p, \
706 struct brw_reg dest, \
707 struct brw_reg src0, \
708 struct brw_reg src1) \
709 { \
710 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
711 }
712
713 /* Rounding operations (other than RNDD) require two instructions - the first
714 * stores a rounded value (possibly the wrong way) in the dest register, but
715 * also sets a per-channel "increment bit" in the flag register. A predicated
716 * add of 1.0 fixes dest to contain the desired result.
717 */
718 #define ROUND(OP) \
719 void brw_##OP(struct brw_compile *p, \
720 struct brw_reg dest, \
721 struct brw_reg src) \
722 { \
723 struct brw_instruction *rnd, *add; \
724 rnd = next_insn(p, BRW_OPCODE_##OP); \
725 brw_set_dest(p, rnd, dest); \
726 brw_set_src0(rnd, src); \
727 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
728 \
729 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
730 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
731 }
732
733
734 ALU1(MOV)
735 ALU2(SEL)
736 ALU1(NOT)
737 ALU2(AND)
738 ALU2(OR)
739 ALU2(XOR)
740 ALU2(SHR)
741 ALU2(SHL)
742 ALU2(RSR)
743 ALU2(RSL)
744 ALU2(ASR)
745 ALU1(FRC)
746 ALU1(RNDD)
747 ALU2(MAC)
748 ALU2(MACH)
749 ALU1(LZD)
750 ALU2(DP4)
751 ALU2(DPH)
752 ALU2(DP3)
753 ALU2(DP2)
754 ALU2(LINE)
755 ALU2(PLN)
756
757
758 ROUND(RNDZ)
759 ROUND(RNDE)
760
761
762 struct brw_instruction *brw_ADD(struct brw_compile *p,
763 struct brw_reg dest,
764 struct brw_reg src0,
765 struct brw_reg src1)
766 {
767 /* 6.2.2: add */
768 if (src0.type == BRW_REGISTER_TYPE_F ||
769 (src0.file == BRW_IMMEDIATE_VALUE &&
770 src0.type == BRW_REGISTER_TYPE_VF)) {
771 assert(src1.type != BRW_REGISTER_TYPE_UD);
772 assert(src1.type != BRW_REGISTER_TYPE_D);
773 }
774
775 if (src1.type == BRW_REGISTER_TYPE_F ||
776 (src1.file == BRW_IMMEDIATE_VALUE &&
777 src1.type == BRW_REGISTER_TYPE_VF)) {
778 assert(src0.type != BRW_REGISTER_TYPE_UD);
779 assert(src0.type != BRW_REGISTER_TYPE_D);
780 }
781
782 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
783 }
784
785 struct brw_instruction *brw_MUL(struct brw_compile *p,
786 struct brw_reg dest,
787 struct brw_reg src0,
788 struct brw_reg src1)
789 {
790 /* 6.32.38: mul */
791 if (src0.type == BRW_REGISTER_TYPE_D ||
792 src0.type == BRW_REGISTER_TYPE_UD ||
793 src1.type == BRW_REGISTER_TYPE_D ||
794 src1.type == BRW_REGISTER_TYPE_UD) {
795 assert(dest.type != BRW_REGISTER_TYPE_F);
796 }
797
798 if (src0.type == BRW_REGISTER_TYPE_F ||
799 (src0.file == BRW_IMMEDIATE_VALUE &&
800 src0.type == BRW_REGISTER_TYPE_VF)) {
801 assert(src1.type != BRW_REGISTER_TYPE_UD);
802 assert(src1.type != BRW_REGISTER_TYPE_D);
803 }
804
805 if (src1.type == BRW_REGISTER_TYPE_F ||
806 (src1.file == BRW_IMMEDIATE_VALUE &&
807 src1.type == BRW_REGISTER_TYPE_VF)) {
808 assert(src0.type != BRW_REGISTER_TYPE_UD);
809 assert(src0.type != BRW_REGISTER_TYPE_D);
810 }
811
812 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
813 src0.nr != BRW_ARF_ACCUMULATOR);
814 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
815 src1.nr != BRW_ARF_ACCUMULATOR);
816
817 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
818 }
819
820
821 void brw_NOP(struct brw_compile *p)
822 {
823 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
824 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
825 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
826 brw_set_src1(insn, brw_imm_ud(0x0));
827 }
828
829
830
831
832
833 /***********************************************************************
834 * Comparisons, if/else/endif
835 */
836
837 struct brw_instruction *brw_JMPI(struct brw_compile *p,
838 struct brw_reg dest,
839 struct brw_reg src0,
840 struct brw_reg src1)
841 {
842 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
843
844 insn->header.execution_size = 1;
845 insn->header.compression_control = BRW_COMPRESSION_NONE;
846 insn->header.mask_control = BRW_MASK_DISABLE;
847
848 p->current->header.predicate_control = BRW_PREDICATE_NONE;
849
850 return insn;
851 }
852
853 /* EU takes the value from the flag register and pushes it onto some
854 * sort of a stack (presumably merging with any flag value already on
855 * the stack). Within an if block, the flags at the top of the stack
856 * control execution on each channel of the unit, eg. on each of the
857 * 16 pixel values in our wm programs.
858 *
859 * When the matching 'else' instruction is reached (presumably by
860 * countdown of the instruction count patched in by our ELSE/ENDIF
861 * functions), the relevent flags are inverted.
862 *
863 * When the matching 'endif' instruction is reached, the flags are
864 * popped off. If the stack is now empty, normal execution resumes.
865 *
866 * No attempt is made to deal with stack overflow (14 elements?).
867 */
868 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
869 {
870 struct intel_context *intel = &p->brw->intel;
871 struct brw_instruction *insn;
872
873 if (p->single_program_flow) {
874 assert(execute_size == BRW_EXECUTE_1);
875
876 insn = next_insn(p, BRW_OPCODE_ADD);
877 insn->header.predicate_inverse = 1;
878 } else {
879 insn = next_insn(p, BRW_OPCODE_IF);
880 }
881
882 /* Override the defaults for this instruction:
883 */
884 if (intel->gen < 6) {
885 brw_set_dest(p, insn, brw_ip_reg());
886 brw_set_src0(insn, brw_ip_reg());
887 brw_set_src1(insn, brw_imm_d(0x0));
888 } else {
889 brw_set_dest(p, insn, brw_imm_w(0));
890 insn->bits1.branch_gen6.jump_count = 0;
891 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
892 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
893 }
894
895 insn->header.execution_size = execute_size;
896 insn->header.compression_control = BRW_COMPRESSION_NONE;
897 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
898 insn->header.mask_control = BRW_MASK_ENABLE;
899 if (!p->single_program_flow)
900 insn->header.thread_control = BRW_THREAD_SWITCH;
901
902 p->current->header.predicate_control = BRW_PREDICATE_NONE;
903
904 return insn;
905 }
906
907 struct brw_instruction *
908 gen6_IF(struct brw_compile *p, uint32_t conditional,
909 struct brw_reg src0, struct brw_reg src1)
910 {
911 struct brw_instruction *insn;
912
913 insn = next_insn(p, BRW_OPCODE_IF);
914
915 brw_set_dest(p, insn, brw_imm_w(0));
916 insn->header.execution_size = BRW_EXECUTE_8;
917 insn->bits1.branch_gen6.jump_count = 0;
918 brw_set_src0(insn, src0);
919 brw_set_src1(insn, src1);
920
921 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
922 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
923 insn->header.destreg__conditionalmod = conditional;
924
925 if (!p->single_program_flow)
926 insn->header.thread_control = BRW_THREAD_SWITCH;
927
928 return insn;
929 }
930
931 struct brw_instruction *brw_ELSE(struct brw_compile *p,
932 struct brw_instruction *if_insn)
933 {
934 struct intel_context *intel = &p->brw->intel;
935 struct brw_instruction *insn;
936 GLuint br = 1;
937
938 /* jump count is for 64bit data chunk each, so one 128bit
939 instruction requires 2 chunks. */
940 if (intel->gen >= 5)
941 br = 2;
942
943 if (p->single_program_flow) {
944 insn = next_insn(p, BRW_OPCODE_ADD);
945 } else {
946 insn = next_insn(p, BRW_OPCODE_ELSE);
947 }
948
949 if (intel->gen < 6) {
950 brw_set_dest(p, insn, brw_ip_reg());
951 brw_set_src0(insn, brw_ip_reg());
952 brw_set_src1(insn, brw_imm_d(0x0));
953 } else {
954 brw_set_dest(p, insn, brw_imm_w(0));
955 insn->bits1.branch_gen6.jump_count = 0;
956 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
957 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
958 }
959
960 insn->header.compression_control = BRW_COMPRESSION_NONE;
961 insn->header.execution_size = if_insn->header.execution_size;
962 insn->header.mask_control = BRW_MASK_ENABLE;
963 if (!p->single_program_flow)
964 insn->header.thread_control = BRW_THREAD_SWITCH;
965
966 /* Patch the if instruction to point at this instruction.
967 */
968 if (p->single_program_flow) {
969 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
970
971 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
972 } else {
973 assert(if_insn->header.opcode == BRW_OPCODE_IF);
974
975 if (intel->gen < 6) {
976 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
977 if_insn->bits3.if_else.pop_count = 0;
978 if_insn->bits3.if_else.pad0 = 0;
979 } else {
980 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
981 }
982 }
983
984 return insn;
985 }
986
987 void brw_ENDIF(struct brw_compile *p,
988 struct brw_instruction *patch_insn)
989 {
990 struct intel_context *intel = &p->brw->intel;
991 GLuint br = 1;
992
993 if (intel->gen >= 5)
994 br = 2;
995
996 if (p->single_program_flow) {
997 /* In single program flow mode, there's no need to execute an ENDIF,
998 * since we don't need to do any stack operations, and if we're executing
999 * currently, we want to just continue executing.
1000 */
1001 struct brw_instruction *next = &p->store[p->nr_insn];
1002
1003 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
1004
1005 patch_insn->bits3.ud = (next - patch_insn) * 16;
1006 } else {
1007 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
1008
1009 if (intel->gen < 6) {
1010 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1011 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1012 brw_set_src1(insn, brw_imm_d(0x0));
1013 } else {
1014 brw_set_dest(p, insn, brw_imm_w(0));
1015 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1016 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1017 }
1018
1019 insn->header.compression_control = BRW_COMPRESSION_NONE;
1020 insn->header.execution_size = patch_insn->header.execution_size;
1021 insn->header.mask_control = BRW_MASK_ENABLE;
1022 insn->header.thread_control = BRW_THREAD_SWITCH;
1023
1024 if (intel->gen < 6)
1025 assert(patch_insn->bits3.if_else.jump_count == 0);
1026 else
1027 assert(patch_insn->bits1.branch_gen6.jump_count == 0);
1028
1029 /* Patch the if or else instructions to point at this or the next
1030 * instruction respectively.
1031 */
1032 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
1033 if (intel->gen < 6) {
1034 /* Turn it into an IFF, which means no mask stack operations for
1035 * all-false and jumping past the ENDIF.
1036 */
1037 patch_insn->header.opcode = BRW_OPCODE_IFF;
1038 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1039 patch_insn->bits3.if_else.pop_count = 0;
1040 patch_insn->bits3.if_else.pad0 = 0;
1041 } else {
1042 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1043 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1044 }
1045 } else {
1046 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
1047 if (intel->gen < 6) {
1048 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1049 * matching ENDIF.
1050 */
1051 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1052 patch_insn->bits3.if_else.pop_count = 1;
1053 patch_insn->bits3.if_else.pad0 = 0;
1054 } else {
1055 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1056 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1057 }
1058 }
1059
1060 /* Also pop item off the stack in the endif instruction:
1061 */
1062 if (intel->gen < 6) {
1063 insn->bits3.if_else.jump_count = 0;
1064 insn->bits3.if_else.pop_count = 1;
1065 insn->bits3.if_else.pad0 = 0;
1066 } else {
1067 insn->bits1.branch_gen6.jump_count = 2;
1068 }
1069 }
1070 }
1071
1072 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1073 {
1074 struct intel_context *intel = &p->brw->intel;
1075 struct brw_instruction *insn;
1076
1077 insn = next_insn(p, BRW_OPCODE_BREAK);
1078 if (intel->gen >= 6) {
1079 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1080 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1081 brw_set_src1(insn, brw_imm_d(0x0));
1082 } else {
1083 brw_set_dest(p, insn, brw_ip_reg());
1084 brw_set_src0(insn, brw_ip_reg());
1085 brw_set_src1(insn, brw_imm_d(0x0));
1086 insn->bits3.if_else.pad0 = 0;
1087 insn->bits3.if_else.pop_count = pop_count;
1088 }
1089 insn->header.compression_control = BRW_COMPRESSION_NONE;
1090 insn->header.execution_size = BRW_EXECUTE_8;
1091
1092 return insn;
1093 }
1094
1095 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1096 struct brw_instruction *do_insn)
1097 {
1098 struct brw_instruction *insn;
1099 int br = 2;
1100
1101 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1102 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1103 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1104 brw_set_dest(p, insn, brw_ip_reg());
1105 brw_set_src0(insn, brw_ip_reg());
1106 brw_set_src1(insn, brw_imm_d(0x0));
1107
1108 insn->bits3.break_cont.uip = br * (do_insn - insn);
1109
1110 insn->header.compression_control = BRW_COMPRESSION_NONE;
1111 insn->header.execution_size = BRW_EXECUTE_8;
1112 return insn;
1113 }
1114
1115 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1116 {
1117 struct brw_instruction *insn;
1118 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1119 brw_set_dest(p, insn, brw_ip_reg());
1120 brw_set_src0(insn, brw_ip_reg());
1121 brw_set_src1(insn, brw_imm_d(0x0));
1122 insn->header.compression_control = BRW_COMPRESSION_NONE;
1123 insn->header.execution_size = BRW_EXECUTE_8;
1124 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1125 insn->bits3.if_else.pad0 = 0;
1126 insn->bits3.if_else.pop_count = pop_count;
1127 return insn;
1128 }
1129
1130 /* DO/WHILE loop:
1131 *
1132 * The DO/WHILE is just an unterminated loop -- break or continue are
1133 * used for control within the loop. We have a few ways they can be
1134 * done.
1135 *
1136 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1137 * jip and no DO instruction.
1138 *
1139 * For non-uniform control flow pre-gen6, there's a DO instruction to
1140 * push the mask, and a WHILE to jump back, and BREAK to get out and
1141 * pop the mask.
1142 *
1143 * For gen6, there's no more mask stack, so no need for DO. WHILE
1144 * just points back to the first instruction of the loop.
1145 */
1146 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1147 {
1148 struct intel_context *intel = &p->brw->intel;
1149
1150 if (intel->gen >= 6 || p->single_program_flow) {
1151 return &p->store[p->nr_insn];
1152 } else {
1153 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1154
1155 /* Override the defaults for this instruction:
1156 */
1157 brw_set_dest(p, insn, brw_null_reg());
1158 brw_set_src0(insn, brw_null_reg());
1159 brw_set_src1(insn, brw_null_reg());
1160
1161 insn->header.compression_control = BRW_COMPRESSION_NONE;
1162 insn->header.execution_size = execute_size;
1163 insn->header.predicate_control = BRW_PREDICATE_NONE;
1164 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1165 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1166
1167 return insn;
1168 }
1169 }
1170
1171
1172
1173 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1174 struct brw_instruction *do_insn)
1175 {
1176 struct intel_context *intel = &p->brw->intel;
1177 struct brw_instruction *insn;
1178 GLuint br = 1;
1179
1180 if (intel->gen >= 5)
1181 br = 2;
1182
1183 if (intel->gen >= 6) {
1184 insn = next_insn(p, BRW_OPCODE_WHILE);
1185
1186 brw_set_dest(p, insn, brw_imm_w(0));
1187 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1188 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1189 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1190
1191 insn->header.execution_size = do_insn->header.execution_size;
1192 assert(insn->header.execution_size == BRW_EXECUTE_8);
1193 } else {
1194 if (p->single_program_flow) {
1195 insn = next_insn(p, BRW_OPCODE_ADD);
1196
1197 brw_set_dest(p, insn, brw_ip_reg());
1198 brw_set_src0(insn, brw_ip_reg());
1199 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
1200 insn->header.execution_size = BRW_EXECUTE_1;
1201 } else {
1202 insn = next_insn(p, BRW_OPCODE_WHILE);
1203
1204 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1205
1206 brw_set_dest(p, insn, brw_ip_reg());
1207 brw_set_src0(insn, brw_ip_reg());
1208 brw_set_src1(insn, brw_imm_d(0));
1209
1210 insn->header.execution_size = do_insn->header.execution_size;
1211 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1212 insn->bits3.if_else.pop_count = 0;
1213 insn->bits3.if_else.pad0 = 0;
1214 }
1215 }
1216 insn->header.compression_control = BRW_COMPRESSION_NONE;
1217 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1218
1219 return insn;
1220 }
1221
1222
1223 /* FORWARD JUMPS:
1224 */
1225 void brw_land_fwd_jump(struct brw_compile *p,
1226 struct brw_instruction *jmp_insn)
1227 {
1228 struct intel_context *intel = &p->brw->intel;
1229 struct brw_instruction *landing = &p->store[p->nr_insn];
1230 GLuint jmpi = 1;
1231
1232 if (intel->gen >= 5)
1233 jmpi = 2;
1234
1235 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1236 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1237
1238 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1239 }
1240
1241
1242
1243 /* To integrate with the above, it makes sense that the comparison
1244 * instruction should populate the flag register. It might be simpler
1245 * just to use the flag reg for most WM tasks?
1246 */
1247 void brw_CMP(struct brw_compile *p,
1248 struct brw_reg dest,
1249 GLuint conditional,
1250 struct brw_reg src0,
1251 struct brw_reg src1)
1252 {
1253 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1254
1255 insn->header.destreg__conditionalmod = conditional;
1256 brw_set_dest(p, insn, dest);
1257 brw_set_src0(insn, src0);
1258 brw_set_src1(insn, src1);
1259
1260 /* guess_execution_size(insn, src0); */
1261
1262
1263 /* Make it so that future instructions will use the computed flag
1264 * value until brw_set_predicate_control_flag_value() is called
1265 * again.
1266 */
1267 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1268 dest.nr == 0) {
1269 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1270 p->flag_value = 0xff;
1271 }
1272 }
1273
1274 /* Issue 'wait' instruction for n1, host could program MMIO
1275 to wake up thread. */
1276 void brw_WAIT (struct brw_compile *p)
1277 {
1278 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1279 struct brw_reg src = brw_notification_1_reg();
1280
1281 brw_set_dest(p, insn, src);
1282 brw_set_src0(insn, src);
1283 brw_set_src1(insn, brw_null_reg());
1284 insn->header.execution_size = 0; /* must */
1285 insn->header.predicate_control = 0;
1286 insn->header.compression_control = 0;
1287 }
1288
1289
1290 /***********************************************************************
1291 * Helpers for the various SEND message types:
1292 */
1293
1294 /** Extended math function, float[8].
1295 */
1296 void brw_math( struct brw_compile *p,
1297 struct brw_reg dest,
1298 GLuint function,
1299 GLuint saturate,
1300 GLuint msg_reg_nr,
1301 struct brw_reg src,
1302 GLuint data_type,
1303 GLuint precision )
1304 {
1305 struct intel_context *intel = &p->brw->intel;
1306
1307 if (intel->gen >= 6) {
1308 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1309
1310 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1311 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1312
1313 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1314 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1315
1316 /* Source modifiers are ignored for extended math instructions. */
1317 assert(!src.negate);
1318 assert(!src.abs);
1319
1320 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1321 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1322 assert(src.type == BRW_REGISTER_TYPE_F);
1323 }
1324
1325 /* Math is the same ISA format as other opcodes, except that CondModifier
1326 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1327 */
1328 insn->header.destreg__conditionalmod = function;
1329 insn->header.saturate = saturate;
1330
1331 brw_set_dest(p, insn, dest);
1332 brw_set_src0(insn, src);
1333 brw_set_src1(insn, brw_null_reg());
1334 } else {
1335 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1336 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1337 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1338 /* Example code doesn't set predicate_control for send
1339 * instructions.
1340 */
1341 insn->header.predicate_control = 0;
1342 insn->header.destreg__conditionalmod = msg_reg_nr;
1343
1344 brw_set_dest(p, insn, dest);
1345 brw_set_src0(insn, src);
1346 brw_set_math_message(p->brw,
1347 insn,
1348 msg_length, response_length,
1349 function,
1350 BRW_MATH_INTEGER_UNSIGNED,
1351 precision,
1352 saturate,
1353 data_type);
1354 }
1355 }
1356
1357 /** Extended math function, float[8].
1358 */
1359 void brw_math2(struct brw_compile *p,
1360 struct brw_reg dest,
1361 GLuint function,
1362 struct brw_reg src0,
1363 struct brw_reg src1)
1364 {
1365 struct intel_context *intel = &p->brw->intel;
1366 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1367
1368 assert(intel->gen >= 6);
1369 (void) intel;
1370
1371
1372 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1373 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1374 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1375
1376 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1377 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1378 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1379
1380 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1381 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1382 assert(src0.type == BRW_REGISTER_TYPE_F);
1383 assert(src1.type == BRW_REGISTER_TYPE_F);
1384 }
1385
1386 /* Source modifiers are ignored for extended math instructions. */
1387 assert(!src0.negate);
1388 assert(!src0.abs);
1389 assert(!src1.negate);
1390 assert(!src1.abs);
1391
1392 /* Math is the same ISA format as other opcodes, except that CondModifier
1393 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1394 */
1395 insn->header.destreg__conditionalmod = function;
1396
1397 brw_set_dest(p, insn, dest);
1398 brw_set_src0(insn, src0);
1399 brw_set_src1(insn, src1);
1400 }
1401
1402 /**
1403 * Extended math function, float[16].
1404 * Use 2 send instructions.
1405 */
1406 void brw_math_16( struct brw_compile *p,
1407 struct brw_reg dest,
1408 GLuint function,
1409 GLuint saturate,
1410 GLuint msg_reg_nr,
1411 struct brw_reg src,
1412 GLuint precision )
1413 {
1414 struct intel_context *intel = &p->brw->intel;
1415 struct brw_instruction *insn;
1416 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1417 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1418
1419 if (intel->gen >= 6) {
1420 insn = next_insn(p, BRW_OPCODE_MATH);
1421
1422 /* Math is the same ISA format as other opcodes, except that CondModifier
1423 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1424 */
1425 insn->header.destreg__conditionalmod = function;
1426 insn->header.saturate = saturate;
1427
1428 /* Source modifiers are ignored for extended math instructions. */
1429 assert(!src.negate);
1430 assert(!src.abs);
1431
1432 brw_set_dest(p, insn, dest);
1433 brw_set_src0(insn, src);
1434 brw_set_src1(insn, brw_null_reg());
1435 return;
1436 }
1437
1438 /* First instruction:
1439 */
1440 brw_push_insn_state(p);
1441 brw_set_predicate_control_flag_value(p, 0xff);
1442 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1443
1444 insn = next_insn(p, BRW_OPCODE_SEND);
1445 insn->header.destreg__conditionalmod = msg_reg_nr;
1446
1447 brw_set_dest(p, insn, dest);
1448 brw_set_src0(insn, src);
1449 brw_set_math_message(p->brw,
1450 insn,
1451 msg_length, response_length,
1452 function,
1453 BRW_MATH_INTEGER_UNSIGNED,
1454 precision,
1455 saturate,
1456 BRW_MATH_DATA_VECTOR);
1457
1458 /* Second instruction:
1459 */
1460 insn = next_insn(p, BRW_OPCODE_SEND);
1461 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1462 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1463
1464 brw_set_dest(p, insn, offset(dest,1));
1465 brw_set_src0(insn, src);
1466 brw_set_math_message(p->brw,
1467 insn,
1468 msg_length, response_length,
1469 function,
1470 BRW_MATH_INTEGER_UNSIGNED,
1471 precision,
1472 saturate,
1473 BRW_MATH_DATA_VECTOR);
1474
1475 brw_pop_insn_state(p);
1476 }
1477
1478
1479 /**
1480 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1481 * using a constant offset per channel.
1482 *
1483 * The offset must be aligned to oword size (16 bytes). Used for
1484 * register spilling.
1485 */
1486 void brw_oword_block_write_scratch(struct brw_compile *p,
1487 struct brw_reg mrf,
1488 int num_regs,
1489 GLuint offset)
1490 {
1491 struct intel_context *intel = &p->brw->intel;
1492 uint32_t msg_control, msg_type;
1493 int mlen;
1494
1495 if (intel->gen >= 6)
1496 offset /= 16;
1497
1498 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1499
1500 if (num_regs == 1) {
1501 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1502 mlen = 2;
1503 } else {
1504 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1505 mlen = 3;
1506 }
1507
1508 /* Set up the message header. This is g0, with g0.2 filled with
1509 * the offset. We don't want to leave our offset around in g0 or
1510 * it'll screw up texture samples, so set it up inside the message
1511 * reg.
1512 */
1513 {
1514 brw_push_insn_state(p);
1515 brw_set_mask_control(p, BRW_MASK_DISABLE);
1516 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1517
1518 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1519
1520 /* set message header global offset field (reg 0, element 2) */
1521 brw_MOV(p,
1522 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1523 mrf.nr,
1524 2), BRW_REGISTER_TYPE_UD),
1525 brw_imm_ud(offset));
1526
1527 brw_pop_insn_state(p);
1528 }
1529
1530 {
1531 struct brw_reg dest;
1532 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1533 int send_commit_msg;
1534 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1535 BRW_REGISTER_TYPE_UW);
1536
1537 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1538 insn->header.compression_control = BRW_COMPRESSION_NONE;
1539 src_header = vec16(src_header);
1540 }
1541 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1542 insn->header.destreg__conditionalmod = mrf.nr;
1543
1544 /* Until gen6, writes followed by reads from the same location
1545 * are not guaranteed to be ordered unless write_commit is set.
1546 * If set, then a no-op write is issued to the destination
1547 * register to set a dependency, and a read from the destination
1548 * can be used to ensure the ordering.
1549 *
1550 * For gen6, only writes between different threads need ordering
1551 * protection. Our use of DP writes is all about register
1552 * spilling within a thread.
1553 */
1554 if (intel->gen >= 6) {
1555 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1556 send_commit_msg = 0;
1557 } else {
1558 dest = src_header;
1559 send_commit_msg = 1;
1560 }
1561
1562 brw_set_dest(p, insn, dest);
1563 if (intel->gen >= 6) {
1564 brw_set_src0(insn, mrf);
1565 } else {
1566 brw_set_src0(insn, brw_null_reg());
1567 }
1568
1569 if (intel->gen >= 6)
1570 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1571 else
1572 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1573
1574 brw_set_dp_write_message(p->brw,
1575 insn,
1576 255, /* binding table index (255=stateless) */
1577 msg_control,
1578 msg_type,
1579 mlen,
1580 GL_TRUE, /* header_present */
1581 0, /* pixel scoreboard */
1582 send_commit_msg, /* response_length */
1583 0, /* eot */
1584 send_commit_msg);
1585 }
1586 }
1587
1588
1589 /**
1590 * Read a block of owords (half a GRF each) from the scratch buffer
1591 * using a constant index per channel.
1592 *
1593 * Offset must be aligned to oword size (16 bytes). Used for register
1594 * spilling.
1595 */
1596 void
1597 brw_oword_block_read_scratch(struct brw_compile *p,
1598 struct brw_reg dest,
1599 struct brw_reg mrf,
1600 int num_regs,
1601 GLuint offset)
1602 {
1603 struct intel_context *intel = &p->brw->intel;
1604 uint32_t msg_control;
1605 int rlen;
1606
1607 if (intel->gen >= 6)
1608 offset /= 16;
1609
1610 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1611 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1612
1613 if (num_regs == 1) {
1614 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1615 rlen = 1;
1616 } else {
1617 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1618 rlen = 2;
1619 }
1620
1621 {
1622 brw_push_insn_state(p);
1623 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1624 brw_set_mask_control(p, BRW_MASK_DISABLE);
1625
1626 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1627
1628 /* set message header global offset field (reg 0, element 2) */
1629 brw_MOV(p,
1630 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1631 mrf.nr,
1632 2), BRW_REGISTER_TYPE_UD),
1633 brw_imm_ud(offset));
1634
1635 brw_pop_insn_state(p);
1636 }
1637
1638 {
1639 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1640
1641 assert(insn->header.predicate_control == 0);
1642 insn->header.compression_control = BRW_COMPRESSION_NONE;
1643 insn->header.destreg__conditionalmod = mrf.nr;
1644
1645 brw_set_dest(p, insn, dest); /* UW? */
1646 if (intel->gen >= 6) {
1647 brw_set_src0(insn, mrf);
1648 } else {
1649 brw_set_src0(insn, brw_null_reg());
1650 }
1651
1652 brw_set_dp_read_message(p->brw,
1653 insn,
1654 255, /* binding table index (255=stateless) */
1655 msg_control,
1656 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1657 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1658 1, /* msg_length */
1659 rlen);
1660 }
1661 }
1662
1663 /**
1664 * Read a float[4] vector from the data port Data Cache (const buffer).
1665 * Location (in buffer) should be a multiple of 16.
1666 * Used for fetching shader constants.
1667 */
1668 void brw_oword_block_read(struct brw_compile *p,
1669 struct brw_reg dest,
1670 struct brw_reg mrf,
1671 uint32_t offset,
1672 uint32_t bind_table_index)
1673 {
1674 struct intel_context *intel = &p->brw->intel;
1675
1676 /* On newer hardware, offset is in units of owords. */
1677 if (intel->gen >= 6)
1678 offset /= 16;
1679
1680 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1681
1682 brw_push_insn_state(p);
1683 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1684 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1685 brw_set_mask_control(p, BRW_MASK_DISABLE);
1686
1687 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1688
1689 /* set message header global offset field (reg 0, element 2) */
1690 brw_MOV(p,
1691 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1692 mrf.nr,
1693 2), BRW_REGISTER_TYPE_UD),
1694 brw_imm_ud(offset));
1695
1696 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1697 insn->header.destreg__conditionalmod = mrf.nr;
1698
1699 /* cast dest to a uword[8] vector */
1700 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1701
1702 brw_set_dest(p, insn, dest);
1703 if (intel->gen >= 6) {
1704 brw_set_src0(insn, mrf);
1705 } else {
1706 brw_set_src0(insn, brw_null_reg());
1707 }
1708
1709 brw_set_dp_read_message(p->brw,
1710 insn,
1711 bind_table_index,
1712 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1713 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1714 0, /* source cache = data cache */
1715 1, /* msg_length */
1716 1); /* response_length (1 reg, 2 owords!) */
1717
1718 brw_pop_insn_state(p);
1719 }
1720
1721 /**
1722 * Read a set of dwords from the data port Data Cache (const buffer).
1723 *
1724 * Location (in buffer) appears as UD offsets in the register after
1725 * the provided mrf header reg.
1726 */
1727 void brw_dword_scattered_read(struct brw_compile *p,
1728 struct brw_reg dest,
1729 struct brw_reg mrf,
1730 uint32_t bind_table_index)
1731 {
1732 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1733
1734 brw_push_insn_state(p);
1735 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1736 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1737 brw_set_mask_control(p, BRW_MASK_DISABLE);
1738 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1739 brw_pop_insn_state(p);
1740
1741 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1742 insn->header.destreg__conditionalmod = mrf.nr;
1743
1744 /* cast dest to a uword[8] vector */
1745 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1746
1747 brw_set_dest(p, insn, dest);
1748 brw_set_src0(insn, brw_null_reg());
1749
1750 brw_set_dp_read_message(p->brw,
1751 insn,
1752 bind_table_index,
1753 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1754 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1755 0, /* source cache = data cache */
1756 2, /* msg_length */
1757 1); /* response_length */
1758 }
1759
1760
1761
1762 /**
1763 * Read float[4] constant(s) from VS constant buffer.
1764 * For relative addressing, two float[4] constants will be read into 'dest'.
1765 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1766 */
1767 void brw_dp_READ_4_vs(struct brw_compile *p,
1768 struct brw_reg dest,
1769 GLuint location,
1770 GLuint bind_table_index)
1771 {
1772 struct intel_context *intel = &p->brw->intel;
1773 struct brw_instruction *insn;
1774 GLuint msg_reg_nr = 1;
1775
1776 if (intel->gen >= 6)
1777 location /= 16;
1778
1779 /* Setup MRF[1] with location/offset into const buffer */
1780 brw_push_insn_state(p);
1781 brw_set_access_mode(p, BRW_ALIGN_1);
1782 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1783 brw_set_mask_control(p, BRW_MASK_DISABLE);
1784 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1785 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1786 BRW_REGISTER_TYPE_UD),
1787 brw_imm_ud(location));
1788 brw_pop_insn_state(p);
1789
1790 insn = next_insn(p, BRW_OPCODE_SEND);
1791
1792 insn->header.predicate_control = BRW_PREDICATE_NONE;
1793 insn->header.compression_control = BRW_COMPRESSION_NONE;
1794 insn->header.destreg__conditionalmod = msg_reg_nr;
1795 insn->header.mask_control = BRW_MASK_DISABLE;
1796
1797 brw_set_dest(p, insn, dest);
1798 if (intel->gen >= 6) {
1799 brw_set_src0(insn, brw_message_reg(msg_reg_nr));
1800 } else {
1801 brw_set_src0(insn, brw_null_reg());
1802 }
1803
1804 brw_set_dp_read_message(p->brw,
1805 insn,
1806 bind_table_index,
1807 0,
1808 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1809 0, /* source cache = data cache */
1810 1, /* msg_length */
1811 1); /* response_length (1 Oword) */
1812 }
1813
1814 /**
1815 * Read a float[4] constant per vertex from VS constant buffer, with
1816 * relative addressing.
1817 */
1818 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1819 struct brw_reg dest,
1820 struct brw_reg addr_reg,
1821 GLuint offset,
1822 GLuint bind_table_index)
1823 {
1824 struct intel_context *intel = &p->brw->intel;
1825 struct brw_reg src = brw_vec8_grf(0, 0);
1826 int msg_type;
1827
1828 /* Setup MRF[1] with offset into const buffer */
1829 brw_push_insn_state(p);
1830 brw_set_access_mode(p, BRW_ALIGN_1);
1831 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1832 brw_set_mask_control(p, BRW_MASK_DISABLE);
1833 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1834
1835 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1836 * fields ignored.
1837 */
1838 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1839 addr_reg, brw_imm_d(offset));
1840 brw_pop_insn_state(p);
1841
1842 gen6_resolve_implied_move(p, &src, 0);
1843 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1844
1845 insn->header.predicate_control = BRW_PREDICATE_NONE;
1846 insn->header.compression_control = BRW_COMPRESSION_NONE;
1847 insn->header.destreg__conditionalmod = 0;
1848 insn->header.mask_control = BRW_MASK_DISABLE;
1849
1850 brw_set_dest(p, insn, dest);
1851 brw_set_src0(insn, src);
1852
1853 if (intel->gen == 6)
1854 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1855 else if (intel->gen == 5 || intel->is_g4x)
1856 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1857 else
1858 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1859
1860 brw_set_dp_read_message(p->brw,
1861 insn,
1862 bind_table_index,
1863 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1864 msg_type,
1865 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1866 2, /* msg_length */
1867 1); /* response_length */
1868 }
1869
1870
1871
1872 void brw_fb_WRITE(struct brw_compile *p,
1873 int dispatch_width,
1874 GLuint msg_reg_nr,
1875 struct brw_reg src0,
1876 GLuint binding_table_index,
1877 GLuint msg_length,
1878 GLuint response_length,
1879 GLboolean eot,
1880 GLboolean header_present)
1881 {
1882 struct intel_context *intel = &p->brw->intel;
1883 struct brw_instruction *insn;
1884 GLuint msg_control, msg_type;
1885 struct brw_reg dest;
1886
1887 if (dispatch_width == 16)
1888 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1889 else
1890 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1891
1892 if (intel->gen >= 6 && binding_table_index == 0) {
1893 insn = next_insn(p, BRW_OPCODE_SENDC);
1894 } else {
1895 insn = next_insn(p, BRW_OPCODE_SEND);
1896 }
1897 /* The execution mask is ignored for render target writes. */
1898 insn->header.predicate_control = 0;
1899 insn->header.compression_control = BRW_COMPRESSION_NONE;
1900
1901 if (intel->gen >= 6) {
1902 /* headerless version, just submit color payload */
1903 src0 = brw_message_reg(msg_reg_nr);
1904
1905 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1906 } else {
1907 insn->header.destreg__conditionalmod = msg_reg_nr;
1908
1909 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1910 }
1911
1912 if (dispatch_width == 16)
1913 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1914 else
1915 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1916
1917 brw_set_dest(p, insn, dest);
1918 brw_set_src0(insn, src0);
1919 brw_set_dp_write_message(p->brw,
1920 insn,
1921 binding_table_index,
1922 msg_control,
1923 msg_type,
1924 msg_length,
1925 header_present,
1926 1, /* pixel scoreboard */
1927 response_length,
1928 eot,
1929 0 /* send_commit_msg */);
1930 }
1931
1932
1933 /**
1934 * Texture sample instruction.
1935 * Note: the msg_type plus msg_length values determine exactly what kind
1936 * of sampling operation is performed. See volume 4, page 161 of docs.
1937 */
1938 void brw_SAMPLE(struct brw_compile *p,
1939 struct brw_reg dest,
1940 GLuint msg_reg_nr,
1941 struct brw_reg src0,
1942 GLuint binding_table_index,
1943 GLuint sampler,
1944 GLuint writemask,
1945 GLuint msg_type,
1946 GLuint response_length,
1947 GLuint msg_length,
1948 GLboolean eot,
1949 GLuint header_present,
1950 GLuint simd_mode)
1951 {
1952 struct intel_context *intel = &p->brw->intel;
1953 GLboolean need_stall = 0;
1954
1955 if (writemask == 0) {
1956 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1957 return;
1958 }
1959
1960 /* Hardware doesn't do destination dependency checking on send
1961 * instructions properly. Add a workaround which generates the
1962 * dependency by other means. In practice it seems like this bug
1963 * only crops up for texture samples, and only where registers are
1964 * written by the send and then written again later without being
1965 * read in between. Luckily for us, we already track that
1966 * information and use it to modify the writemask for the
1967 * instruction, so that is a guide for whether a workaround is
1968 * needed.
1969 */
1970 if (writemask != WRITEMASK_XYZW) {
1971 GLuint dst_offset = 0;
1972 GLuint i, newmask = 0, len = 0;
1973
1974 for (i = 0; i < 4; i++) {
1975 if (writemask & (1<<i))
1976 break;
1977 dst_offset += 2;
1978 }
1979 for (; i < 4; i++) {
1980 if (!(writemask & (1<<i)))
1981 break;
1982 newmask |= 1<<i;
1983 len++;
1984 }
1985
1986 if (newmask != writemask) {
1987 need_stall = 1;
1988 /* printf("need stall %x %x\n", newmask , writemask); */
1989 }
1990 else {
1991 GLboolean dispatch_16 = GL_FALSE;
1992
1993 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1994
1995 guess_execution_size(p, p->current, dest);
1996 if (p->current->header.execution_size == BRW_EXECUTE_16)
1997 dispatch_16 = GL_TRUE;
1998
1999 newmask = ~newmask & WRITEMASK_XYZW;
2000
2001 brw_push_insn_state(p);
2002
2003 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2004 brw_set_mask_control(p, BRW_MASK_DISABLE);
2005
2006 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2007 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2008 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2009
2010 brw_pop_insn_state(p);
2011
2012 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2013 dest = offset(dest, dst_offset);
2014
2015 /* For 16-wide dispatch, masked channels are skipped in the
2016 * response. For 8-wide, masked channels still take up slots,
2017 * and are just not written to.
2018 */
2019 if (dispatch_16)
2020 response_length = len * 2;
2021 }
2022 }
2023
2024 {
2025 struct brw_instruction *insn;
2026
2027 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2028
2029 insn = next_insn(p, BRW_OPCODE_SEND);
2030 insn->header.predicate_control = 0; /* XXX */
2031 insn->header.compression_control = BRW_COMPRESSION_NONE;
2032 if (intel->gen < 6)
2033 insn->header.destreg__conditionalmod = msg_reg_nr;
2034
2035 brw_set_dest(p, insn, dest);
2036 brw_set_src0(insn, src0);
2037 brw_set_sampler_message(p->brw, insn,
2038 binding_table_index,
2039 sampler,
2040 msg_type,
2041 response_length,
2042 msg_length,
2043 eot,
2044 header_present,
2045 simd_mode);
2046 }
2047
2048 if (need_stall) {
2049 struct brw_reg reg = vec8(offset(dest, response_length-1));
2050
2051 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2052 */
2053 brw_push_insn_state(p);
2054 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2055 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2056 retype(reg, BRW_REGISTER_TYPE_UD));
2057 brw_pop_insn_state(p);
2058 }
2059
2060 }
2061
2062 /* All these variables are pretty confusing - we might be better off
2063 * using bitmasks and macros for this, in the old style. Or perhaps
2064 * just having the caller instantiate the fields in dword3 itself.
2065 */
2066 void brw_urb_WRITE(struct brw_compile *p,
2067 struct brw_reg dest,
2068 GLuint msg_reg_nr,
2069 struct brw_reg src0,
2070 GLboolean allocate,
2071 GLboolean used,
2072 GLuint msg_length,
2073 GLuint response_length,
2074 GLboolean eot,
2075 GLboolean writes_complete,
2076 GLuint offset,
2077 GLuint swizzle)
2078 {
2079 struct intel_context *intel = &p->brw->intel;
2080 struct brw_instruction *insn;
2081
2082 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2083
2084 insn = next_insn(p, BRW_OPCODE_SEND);
2085
2086 assert(msg_length < BRW_MAX_MRF);
2087
2088 brw_set_dest(p, insn, dest);
2089 brw_set_src0(insn, src0);
2090 brw_set_src1(insn, brw_imm_d(0));
2091
2092 if (intel->gen < 6)
2093 insn->header.destreg__conditionalmod = msg_reg_nr;
2094
2095 brw_set_urb_message(p->brw,
2096 insn,
2097 allocate,
2098 used,
2099 msg_length,
2100 response_length,
2101 eot,
2102 writes_complete,
2103 offset,
2104 swizzle);
2105 }
2106
2107 static int
2108 brw_find_next_block_end(struct brw_compile *p, int start)
2109 {
2110 int ip;
2111
2112 for (ip = start + 1; ip < p->nr_insn; ip++) {
2113 struct brw_instruction *insn = &p->store[ip];
2114
2115 switch (insn->header.opcode) {
2116 case BRW_OPCODE_ENDIF:
2117 case BRW_OPCODE_ELSE:
2118 case BRW_OPCODE_WHILE:
2119 return ip;
2120 }
2121 }
2122 assert(!"not reached");
2123 return start + 1;
2124 }
2125
2126 /* There is no DO instruction on gen6, so to find the end of the loop
2127 * we have to see if the loop is jumping back before our start
2128 * instruction.
2129 */
2130 static int
2131 brw_find_loop_end(struct brw_compile *p, int start)
2132 {
2133 int ip;
2134 int br = 2;
2135
2136 for (ip = start + 1; ip < p->nr_insn; ip++) {
2137 struct brw_instruction *insn = &p->store[ip];
2138
2139 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2140 if (ip + insn->bits1.branch_gen6.jump_count / br < start)
2141 return ip;
2142 }
2143 }
2144 assert(!"not reached");
2145 return start + 1;
2146 }
2147
2148 /* After program generation, go back and update the UIP and JIP of
2149 * BREAK and CONT instructions to their correct locations.
2150 */
2151 void
2152 brw_set_uip_jip(struct brw_compile *p)
2153 {
2154 struct intel_context *intel = &p->brw->intel;
2155 int ip;
2156 int br = 2;
2157
2158 if (intel->gen < 6)
2159 return;
2160
2161 for (ip = 0; ip < p->nr_insn; ip++) {
2162 struct brw_instruction *insn = &p->store[ip];
2163
2164 switch (insn->header.opcode) {
2165 case BRW_OPCODE_BREAK:
2166 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2167 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
2168 break;
2169 case BRW_OPCODE_CONTINUE:
2170 /* JIP is set at CONTINUE emit time, since that's when we
2171 * know where the start of the loop is.
2172 */
2173 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2174 assert(insn->bits3.break_cont.uip != 0);
2175 assert(insn->bits3.break_cont.jip != 0);
2176 break;
2177 }
2178 }
2179 }
2180
2181 void brw_ff_sync(struct brw_compile *p,
2182 struct brw_reg dest,
2183 GLuint msg_reg_nr,
2184 struct brw_reg src0,
2185 GLboolean allocate,
2186 GLuint response_length,
2187 GLboolean eot)
2188 {
2189 struct intel_context *intel = &p->brw->intel;
2190 struct brw_instruction *insn;
2191
2192 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2193
2194 insn = next_insn(p, BRW_OPCODE_SEND);
2195 brw_set_dest(p, insn, dest);
2196 brw_set_src0(insn, src0);
2197 brw_set_src1(insn, brw_imm_d(0));
2198
2199 if (intel->gen < 6)
2200 insn->header.destreg__conditionalmod = msg_reg_nr;
2201
2202 brw_set_ff_sync_message(p->brw,
2203 insn,
2204 allocate,
2205 response_length,
2206 eot);
2207 }