i965: Refactor Sandybridge implied move handling.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size(struct brw_compile *p,
45 struct brw_instruction *insn,
46 struct brw_reg reg)
47 {
48 if (reg.width == BRW_WIDTH_8 && p->compressed)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 /**
56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
57 * registers, implicitly moving the operand to a message register.
58 *
59 * On Sandybridge, this is no longer the case. This function performs the
60 * explicit move; it should be called before emitting a SEND instruction.
61 */
62 static void
63 gen6_resolve_implied_move(struct brw_compile *p,
64 struct brw_reg *src,
65 GLuint msg_reg_nr)
66 {
67 struct intel_context *intel = &p->brw->intel;
68 if (intel->gen != 6)
69 return;
70
71 if (src->file == BRW_ARCHITECTURE_REGISTER_FILE && src->nr == BRW_ARF_NULL)
72 return;
73
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 *src = brw_message_reg(msg_reg_nr);
81 }
82
83
84 static void brw_set_dest(struct brw_compile *p,
85 struct brw_instruction *insn,
86 struct brw_reg dest)
87 {
88 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
89 dest.file != BRW_MESSAGE_REGISTER_FILE)
90 assert(dest.nr < 128);
91
92 insn->bits1.da1.dest_reg_file = dest.file;
93 insn->bits1.da1.dest_reg_type = dest.type;
94 insn->bits1.da1.dest_address_mode = dest.address_mode;
95
96 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
97 insn->bits1.da1.dest_reg_nr = dest.nr;
98
99 if (insn->header.access_mode == BRW_ALIGN_1) {
100 insn->bits1.da1.dest_subreg_nr = dest.subnr;
101 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
102 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
103 insn->bits1.da1.dest_horiz_stride = dest.hstride;
104 }
105 else {
106 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
107 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
108 /* even ignored in da16, still need to set as '01' */
109 insn->bits1.da16.dest_horiz_stride = 1;
110 }
111 }
112 else {
113 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
114
115 /* These are different sizes in align1 vs align16:
116 */
117 if (insn->header.access_mode == BRW_ALIGN_1) {
118 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
119 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
120 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
121 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
122 }
123 else {
124 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
125 /* even ignored in da16, still need to set as '01' */
126 insn->bits1.ia16.dest_horiz_stride = 1;
127 }
128 }
129
130 /* NEW: Set the execution size based on dest.width and
131 * insn->compression_control:
132 */
133 guess_execution_size(p, insn, dest);
134 }
135
136 extern int reg_type_size[];
137
138 static void
139 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
140 {
141 int hstride_for_reg[] = {0, 1, 2, 4};
142 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
143 int width_for_reg[] = {1, 2, 4, 8, 16};
144 int execsize_for_reg[] = {1, 2, 4, 8, 16};
145 int width, hstride, vstride, execsize;
146
147 if (reg.file == BRW_IMMEDIATE_VALUE) {
148 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
149 * mean the destination has to be 128-bit aligned and the
150 * destination horiz stride has to be a word.
151 */
152 if (reg.type == BRW_REGISTER_TYPE_V) {
153 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
154 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
155 }
156
157 return;
158 }
159
160 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
161 reg.file == BRW_ARF_NULL)
162 return;
163
164 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
165 hstride = hstride_for_reg[reg.hstride];
166
167 if (reg.vstride == 0xf) {
168 vstride = -1;
169 } else {
170 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
171 vstride = vstride_for_reg[reg.vstride];
172 }
173
174 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
175 width = width_for_reg[reg.width];
176
177 assert(insn->header.execution_size >= 0 &&
178 insn->header.execution_size < Elements(execsize_for_reg));
179 execsize = execsize_for_reg[insn->header.execution_size];
180
181 /* Restrictions from 3.3.10: Register Region Restrictions. */
182 /* 3. */
183 assert(execsize >= width);
184
185 /* 4. */
186 if (execsize == width && hstride != 0) {
187 assert(vstride == -1 || vstride == width * hstride);
188 }
189
190 /* 5. */
191 if (execsize == width && hstride == 0) {
192 /* no restriction on vstride. */
193 }
194
195 /* 6. */
196 if (width == 1) {
197 assert(hstride == 0);
198 }
199
200 /* 7. */
201 if (execsize == 1 && width == 1) {
202 assert(hstride == 0);
203 assert(vstride == 0);
204 }
205
206 /* 8. */
207 if (vstride == 0 && hstride == 0) {
208 assert(width == 1);
209 }
210
211 /* 10. Check destination issues. */
212 }
213
214 static void brw_set_src0( struct brw_instruction *insn,
215 struct brw_reg reg )
216 {
217 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
218 assert(reg.nr < 128);
219
220 validate_reg(insn, reg);
221
222 insn->bits1.da1.src0_reg_file = reg.file;
223 insn->bits1.da1.src0_reg_type = reg.type;
224 insn->bits2.da1.src0_abs = reg.abs;
225 insn->bits2.da1.src0_negate = reg.negate;
226 insn->bits2.da1.src0_address_mode = reg.address_mode;
227
228 if (reg.file == BRW_IMMEDIATE_VALUE) {
229 insn->bits3.ud = reg.dw1.ud;
230
231 /* Required to set some fields in src1 as well:
232 */
233 insn->bits1.da1.src1_reg_file = 0; /* arf */
234 insn->bits1.da1.src1_reg_type = reg.type;
235 }
236 else
237 {
238 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
239 if (insn->header.access_mode == BRW_ALIGN_1) {
240 insn->bits2.da1.src0_subreg_nr = reg.subnr;
241 insn->bits2.da1.src0_reg_nr = reg.nr;
242 }
243 else {
244 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
245 insn->bits2.da16.src0_reg_nr = reg.nr;
246 }
247 }
248 else {
249 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
250
251 if (insn->header.access_mode == BRW_ALIGN_1) {
252 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
253 }
254 else {
255 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
256 }
257 }
258
259 if (insn->header.access_mode == BRW_ALIGN_1) {
260 if (reg.width == BRW_WIDTH_1 &&
261 insn->header.execution_size == BRW_EXECUTE_1) {
262 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
263 insn->bits2.da1.src0_width = BRW_WIDTH_1;
264 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
265 }
266 else {
267 insn->bits2.da1.src0_horiz_stride = reg.hstride;
268 insn->bits2.da1.src0_width = reg.width;
269 insn->bits2.da1.src0_vert_stride = reg.vstride;
270 }
271 }
272 else {
273 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
274 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
275 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
276 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
277
278 /* This is an oddity of the fact we're using the same
279 * descriptions for registers in align_16 as align_1:
280 */
281 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
282 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
283 else
284 insn->bits2.da16.src0_vert_stride = reg.vstride;
285 }
286 }
287 }
288
289
290 void brw_set_src1( struct brw_instruction *insn,
291 struct brw_reg reg )
292 {
293 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
294
295 assert(reg.nr < 128);
296
297 validate_reg(insn, reg);
298
299 insn->bits1.da1.src1_reg_file = reg.file;
300 insn->bits1.da1.src1_reg_type = reg.type;
301 insn->bits3.da1.src1_abs = reg.abs;
302 insn->bits3.da1.src1_negate = reg.negate;
303
304 /* Only src1 can be immediate in two-argument instructions.
305 */
306 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
307
308 if (reg.file == BRW_IMMEDIATE_VALUE) {
309 insn->bits3.ud = reg.dw1.ud;
310 }
311 else {
312 /* This is a hardware restriction, which may or may not be lifted
313 * in the future:
314 */
315 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
316 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
317
318 if (insn->header.access_mode == BRW_ALIGN_1) {
319 insn->bits3.da1.src1_subreg_nr = reg.subnr;
320 insn->bits3.da1.src1_reg_nr = reg.nr;
321 }
322 else {
323 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
324 insn->bits3.da16.src1_reg_nr = reg.nr;
325 }
326
327 if (insn->header.access_mode == BRW_ALIGN_1) {
328 if (reg.width == BRW_WIDTH_1 &&
329 insn->header.execution_size == BRW_EXECUTE_1) {
330 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
331 insn->bits3.da1.src1_width = BRW_WIDTH_1;
332 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
333 }
334 else {
335 insn->bits3.da1.src1_horiz_stride = reg.hstride;
336 insn->bits3.da1.src1_width = reg.width;
337 insn->bits3.da1.src1_vert_stride = reg.vstride;
338 }
339 }
340 else {
341 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
342 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
343 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
344 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
345
346 /* This is an oddity of the fact we're using the same
347 * descriptions for registers in align_16 as align_1:
348 */
349 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
350 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
351 else
352 insn->bits3.da16.src1_vert_stride = reg.vstride;
353 }
354 }
355 }
356
357
358
359 static void brw_set_math_message( struct brw_context *brw,
360 struct brw_instruction *insn,
361 GLuint msg_length,
362 GLuint response_length,
363 GLuint function,
364 GLuint integer_type,
365 GLboolean low_precision,
366 GLboolean saturate,
367 GLuint dataType )
368 {
369 struct intel_context *intel = &brw->intel;
370 brw_set_src1(insn, brw_imm_d(0));
371
372 if (intel->gen == 5) {
373 insn->bits3.math_gen5.function = function;
374 insn->bits3.math_gen5.int_type = integer_type;
375 insn->bits3.math_gen5.precision = low_precision;
376 insn->bits3.math_gen5.saturate = saturate;
377 insn->bits3.math_gen5.data_type = dataType;
378 insn->bits3.math_gen5.snapshot = 0;
379 insn->bits3.math_gen5.header_present = 0;
380 insn->bits3.math_gen5.response_length = response_length;
381 insn->bits3.math_gen5.msg_length = msg_length;
382 insn->bits3.math_gen5.end_of_thread = 0;
383 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
384 insn->bits2.send_gen5.end_of_thread = 0;
385 } else {
386 insn->bits3.math.function = function;
387 insn->bits3.math.int_type = integer_type;
388 insn->bits3.math.precision = low_precision;
389 insn->bits3.math.saturate = saturate;
390 insn->bits3.math.data_type = dataType;
391 insn->bits3.math.response_length = response_length;
392 insn->bits3.math.msg_length = msg_length;
393 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
394 insn->bits3.math.end_of_thread = 0;
395 }
396 }
397
398
399 static void brw_set_ff_sync_message(struct brw_context *brw,
400 struct brw_instruction *insn,
401 GLboolean allocate,
402 GLuint response_length,
403 GLboolean end_of_thread)
404 {
405 struct intel_context *intel = &brw->intel;
406 brw_set_src1(insn, brw_imm_d(0));
407
408 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
409 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
410 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
411 insn->bits3.urb_gen5.allocate = allocate;
412 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
413 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
414 insn->bits3.urb_gen5.header_present = 1;
415 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
416 insn->bits3.urb_gen5.msg_length = 1;
417 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
418 if (intel->gen >= 6) {
419 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
420 } else {
421 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
422 insn->bits2.send_gen5.end_of_thread = end_of_thread;
423 }
424 }
425
426 static void brw_set_urb_message( struct brw_context *brw,
427 struct brw_instruction *insn,
428 GLboolean allocate,
429 GLboolean used,
430 GLuint msg_length,
431 GLuint response_length,
432 GLboolean end_of_thread,
433 GLboolean complete,
434 GLuint offset,
435 GLuint swizzle_control )
436 {
437 struct intel_context *intel = &brw->intel;
438 brw_set_src1(insn, brw_imm_d(0));
439
440 if (intel->gen >= 5) {
441 insn->bits3.urb_gen5.opcode = 0; /* ? */
442 insn->bits3.urb_gen5.offset = offset;
443 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
444 insn->bits3.urb_gen5.allocate = allocate;
445 insn->bits3.urb_gen5.used = used; /* ? */
446 insn->bits3.urb_gen5.complete = complete;
447 insn->bits3.urb_gen5.header_present = 1;
448 insn->bits3.urb_gen5.response_length = response_length;
449 insn->bits3.urb_gen5.msg_length = msg_length;
450 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
451 if (intel->gen >= 6) {
452 /* For SNB, the SFID bits moved to the condmod bits, and
453 * EOT stayed in bits3 above. Does the EOT bit setting
454 * below on Ironlake even do anything?
455 */
456 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
457 } else {
458 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
459 insn->bits2.send_gen5.end_of_thread = end_of_thread;
460 }
461 } else {
462 insn->bits3.urb.opcode = 0; /* ? */
463 insn->bits3.urb.offset = offset;
464 insn->bits3.urb.swizzle_control = swizzle_control;
465 insn->bits3.urb.allocate = allocate;
466 insn->bits3.urb.used = used; /* ? */
467 insn->bits3.urb.complete = complete;
468 insn->bits3.urb.response_length = response_length;
469 insn->bits3.urb.msg_length = msg_length;
470 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
471 insn->bits3.urb.end_of_thread = end_of_thread;
472 }
473 }
474
475 static void brw_set_dp_write_message( struct brw_context *brw,
476 struct brw_instruction *insn,
477 GLuint binding_table_index,
478 GLuint msg_control,
479 GLuint msg_type,
480 GLuint msg_length,
481 GLboolean header_present,
482 GLuint pixel_scoreboard_clear,
483 GLuint response_length,
484 GLuint end_of_thread,
485 GLuint send_commit_msg)
486 {
487 struct intel_context *intel = &brw->intel;
488 brw_set_src1(insn, brw_imm_ud(0));
489
490 if (intel->gen >= 6) {
491 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
492 insn->bits3.dp_render_cache.msg_control = msg_control;
493 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
494 insn->bits3.dp_render_cache.msg_type = msg_type;
495 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
496 insn->bits3.dp_render_cache.header_present = header_present;
497 insn->bits3.dp_render_cache.response_length = response_length;
498 insn->bits3.dp_render_cache.msg_length = msg_length;
499 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
500 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
501 /* XXX really need below? */
502 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
503 insn->bits2.send_gen5.end_of_thread = end_of_thread;
504 } else if (intel->gen == 5) {
505 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
506 insn->bits3.dp_write_gen5.msg_control = msg_control;
507 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
508 insn->bits3.dp_write_gen5.msg_type = msg_type;
509 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
510 insn->bits3.dp_write_gen5.header_present = header_present;
511 insn->bits3.dp_write_gen5.response_length = response_length;
512 insn->bits3.dp_write_gen5.msg_length = msg_length;
513 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
514 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
515 insn->bits2.send_gen5.end_of_thread = end_of_thread;
516 } else {
517 insn->bits3.dp_write.binding_table_index = binding_table_index;
518 insn->bits3.dp_write.msg_control = msg_control;
519 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
520 insn->bits3.dp_write.msg_type = msg_type;
521 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
522 insn->bits3.dp_write.response_length = response_length;
523 insn->bits3.dp_write.msg_length = msg_length;
524 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
525 insn->bits3.dp_write.end_of_thread = end_of_thread;
526 }
527 }
528
529 static void
530 brw_set_dp_read_message(struct brw_context *brw,
531 struct brw_instruction *insn,
532 GLuint binding_table_index,
533 GLuint msg_control,
534 GLuint msg_type,
535 GLuint target_cache,
536 GLuint msg_length,
537 GLuint response_length)
538 {
539 struct intel_context *intel = &brw->intel;
540 brw_set_src1(insn, brw_imm_d(0));
541
542 if (intel->gen >= 6) {
543 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
544 insn->bits3.dp_render_cache.msg_control = msg_control;
545 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
546 insn->bits3.dp_render_cache.msg_type = msg_type;
547 insn->bits3.dp_render_cache.send_commit_msg = 0;
548 insn->bits3.dp_render_cache.header_present = 1;
549 insn->bits3.dp_render_cache.response_length = response_length;
550 insn->bits3.dp_render_cache.msg_length = msg_length;
551 insn->bits3.dp_render_cache.end_of_thread = 0;
552 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
553 /* XXX really need below? */
554 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
555 insn->bits2.send_gen5.end_of_thread = 0;
556 } else if (intel->gen == 5) {
557 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
558 insn->bits3.dp_read_gen5.msg_control = msg_control;
559 insn->bits3.dp_read_gen5.msg_type = msg_type;
560 insn->bits3.dp_read_gen5.target_cache = target_cache;
561 insn->bits3.dp_read_gen5.header_present = 1;
562 insn->bits3.dp_read_gen5.response_length = response_length;
563 insn->bits3.dp_read_gen5.msg_length = msg_length;
564 insn->bits3.dp_read_gen5.pad1 = 0;
565 insn->bits3.dp_read_gen5.end_of_thread = 0;
566 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
567 insn->bits2.send_gen5.end_of_thread = 0;
568 } else if (intel->is_g4x) {
569 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
570 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
571 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
572 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
573 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
574 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
575 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
576 insn->bits3.dp_read_g4x.pad1 = 0;
577 insn->bits3.dp_read_g4x.end_of_thread = 0;
578 } else {
579 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
580 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
581 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
582 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
583 insn->bits3.dp_read.response_length = response_length; /*16:19*/
584 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
585 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
586 insn->bits3.dp_read.pad1 = 0; /*28:30*/
587 insn->bits3.dp_read.end_of_thread = 0; /*31*/
588 }
589 }
590
591 static void brw_set_sampler_message(struct brw_context *brw,
592 struct brw_instruction *insn,
593 GLuint binding_table_index,
594 GLuint sampler,
595 GLuint msg_type,
596 GLuint response_length,
597 GLuint msg_length,
598 GLboolean eot,
599 GLuint header_present,
600 GLuint simd_mode)
601 {
602 struct intel_context *intel = &brw->intel;
603 assert(eot == 0);
604 brw_set_src1(insn, brw_imm_d(0));
605
606 if (intel->gen >= 5) {
607 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
608 insn->bits3.sampler_gen5.sampler = sampler;
609 insn->bits3.sampler_gen5.msg_type = msg_type;
610 insn->bits3.sampler_gen5.simd_mode = simd_mode;
611 insn->bits3.sampler_gen5.header_present = header_present;
612 insn->bits3.sampler_gen5.response_length = response_length;
613 insn->bits3.sampler_gen5.msg_length = msg_length;
614 insn->bits3.sampler_gen5.end_of_thread = eot;
615 if (intel->gen >= 6)
616 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
617 else {
618 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
619 insn->bits2.send_gen5.end_of_thread = eot;
620 }
621 } else if (intel->is_g4x) {
622 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
623 insn->bits3.sampler_g4x.sampler = sampler;
624 insn->bits3.sampler_g4x.msg_type = msg_type;
625 insn->bits3.sampler_g4x.response_length = response_length;
626 insn->bits3.sampler_g4x.msg_length = msg_length;
627 insn->bits3.sampler_g4x.end_of_thread = eot;
628 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
629 } else {
630 insn->bits3.sampler.binding_table_index = binding_table_index;
631 insn->bits3.sampler.sampler = sampler;
632 insn->bits3.sampler.msg_type = msg_type;
633 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
634 insn->bits3.sampler.response_length = response_length;
635 insn->bits3.sampler.msg_length = msg_length;
636 insn->bits3.sampler.end_of_thread = eot;
637 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
638 }
639 }
640
641
642
643 static struct brw_instruction *next_insn( struct brw_compile *p,
644 GLuint opcode )
645 {
646 struct brw_instruction *insn;
647
648 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
649
650 insn = &p->store[p->nr_insn++];
651 memcpy(insn, p->current, sizeof(*insn));
652
653 /* Reset this one-shot flag:
654 */
655
656 if (p->current->header.destreg__conditionalmod) {
657 p->current->header.destreg__conditionalmod = 0;
658 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
659 }
660
661 insn->header.opcode = opcode;
662 return insn;
663 }
664
665
666 static struct brw_instruction *brw_alu1( struct brw_compile *p,
667 GLuint opcode,
668 struct brw_reg dest,
669 struct brw_reg src )
670 {
671 struct brw_instruction *insn = next_insn(p, opcode);
672 brw_set_dest(p, insn, dest);
673 brw_set_src0(insn, src);
674 return insn;
675 }
676
677 static struct brw_instruction *brw_alu2(struct brw_compile *p,
678 GLuint opcode,
679 struct brw_reg dest,
680 struct brw_reg src0,
681 struct brw_reg src1 )
682 {
683 struct brw_instruction *insn = next_insn(p, opcode);
684 brw_set_dest(p, insn, dest);
685 brw_set_src0(insn, src0);
686 brw_set_src1(insn, src1);
687 return insn;
688 }
689
690
691 /***********************************************************************
692 * Convenience routines.
693 */
694 #define ALU1(OP) \
695 struct brw_instruction *brw_##OP(struct brw_compile *p, \
696 struct brw_reg dest, \
697 struct brw_reg src0) \
698 { \
699 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
700 }
701
702 #define ALU2(OP) \
703 struct brw_instruction *brw_##OP(struct brw_compile *p, \
704 struct brw_reg dest, \
705 struct brw_reg src0, \
706 struct brw_reg src1) \
707 { \
708 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
709 }
710
711 /* Rounding operations (other than RNDD) require two instructions - the first
712 * stores a rounded value (possibly the wrong way) in the dest register, but
713 * also sets a per-channel "increment bit" in the flag register. A predicated
714 * add of 1.0 fixes dest to contain the desired result.
715 */
716 #define ROUND(OP) \
717 void brw_##OP(struct brw_compile *p, \
718 struct brw_reg dest, \
719 struct brw_reg src) \
720 { \
721 struct brw_instruction *rnd, *add; \
722 rnd = next_insn(p, BRW_OPCODE_##OP); \
723 brw_set_dest(p, rnd, dest); \
724 brw_set_src0(rnd, src); \
725 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
726 \
727 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
728 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
729 }
730
731
732 ALU1(MOV)
733 ALU2(SEL)
734 ALU1(NOT)
735 ALU2(AND)
736 ALU2(OR)
737 ALU2(XOR)
738 ALU2(SHR)
739 ALU2(SHL)
740 ALU2(RSR)
741 ALU2(RSL)
742 ALU2(ASR)
743 ALU1(FRC)
744 ALU1(RNDD)
745 ALU2(MAC)
746 ALU2(MACH)
747 ALU1(LZD)
748 ALU2(DP4)
749 ALU2(DPH)
750 ALU2(DP3)
751 ALU2(DP2)
752 ALU2(LINE)
753 ALU2(PLN)
754
755
756 ROUND(RNDZ)
757 ROUND(RNDE)
758
759
760 struct brw_instruction *brw_ADD(struct brw_compile *p,
761 struct brw_reg dest,
762 struct brw_reg src0,
763 struct brw_reg src1)
764 {
765 /* 6.2.2: add */
766 if (src0.type == BRW_REGISTER_TYPE_F ||
767 (src0.file == BRW_IMMEDIATE_VALUE &&
768 src0.type == BRW_REGISTER_TYPE_VF)) {
769 assert(src1.type != BRW_REGISTER_TYPE_UD);
770 assert(src1.type != BRW_REGISTER_TYPE_D);
771 }
772
773 if (src1.type == BRW_REGISTER_TYPE_F ||
774 (src1.file == BRW_IMMEDIATE_VALUE &&
775 src1.type == BRW_REGISTER_TYPE_VF)) {
776 assert(src0.type != BRW_REGISTER_TYPE_UD);
777 assert(src0.type != BRW_REGISTER_TYPE_D);
778 }
779
780 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
781 }
782
783 struct brw_instruction *brw_MUL(struct brw_compile *p,
784 struct brw_reg dest,
785 struct brw_reg src0,
786 struct brw_reg src1)
787 {
788 /* 6.32.38: mul */
789 if (src0.type == BRW_REGISTER_TYPE_D ||
790 src0.type == BRW_REGISTER_TYPE_UD ||
791 src1.type == BRW_REGISTER_TYPE_D ||
792 src1.type == BRW_REGISTER_TYPE_UD) {
793 assert(dest.type != BRW_REGISTER_TYPE_F);
794 }
795
796 if (src0.type == BRW_REGISTER_TYPE_F ||
797 (src0.file == BRW_IMMEDIATE_VALUE &&
798 src0.type == BRW_REGISTER_TYPE_VF)) {
799 assert(src1.type != BRW_REGISTER_TYPE_UD);
800 assert(src1.type != BRW_REGISTER_TYPE_D);
801 }
802
803 if (src1.type == BRW_REGISTER_TYPE_F ||
804 (src1.file == BRW_IMMEDIATE_VALUE &&
805 src1.type == BRW_REGISTER_TYPE_VF)) {
806 assert(src0.type != BRW_REGISTER_TYPE_UD);
807 assert(src0.type != BRW_REGISTER_TYPE_D);
808 }
809
810 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
811 src0.nr != BRW_ARF_ACCUMULATOR);
812 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
813 src1.nr != BRW_ARF_ACCUMULATOR);
814
815 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
816 }
817
818
819 void brw_NOP(struct brw_compile *p)
820 {
821 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
822 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
823 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
824 brw_set_src1(insn, brw_imm_ud(0x0));
825 }
826
827
828
829
830
831 /***********************************************************************
832 * Comparisons, if/else/endif
833 */
834
835 struct brw_instruction *brw_JMPI(struct brw_compile *p,
836 struct brw_reg dest,
837 struct brw_reg src0,
838 struct brw_reg src1)
839 {
840 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
841
842 insn->header.execution_size = 1;
843 insn->header.compression_control = BRW_COMPRESSION_NONE;
844 insn->header.mask_control = BRW_MASK_DISABLE;
845
846 p->current->header.predicate_control = BRW_PREDICATE_NONE;
847
848 return insn;
849 }
850
851 /* EU takes the value from the flag register and pushes it onto some
852 * sort of a stack (presumably merging with any flag value already on
853 * the stack). Within an if block, the flags at the top of the stack
854 * control execution on each channel of the unit, eg. on each of the
855 * 16 pixel values in our wm programs.
856 *
857 * When the matching 'else' instruction is reached (presumably by
858 * countdown of the instruction count patched in by our ELSE/ENDIF
859 * functions), the relevent flags are inverted.
860 *
861 * When the matching 'endif' instruction is reached, the flags are
862 * popped off. If the stack is now empty, normal execution resumes.
863 *
864 * No attempt is made to deal with stack overflow (14 elements?).
865 */
866 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
867 {
868 struct intel_context *intel = &p->brw->intel;
869 struct brw_instruction *insn;
870
871 if (p->single_program_flow) {
872 assert(execute_size == BRW_EXECUTE_1);
873
874 insn = next_insn(p, BRW_OPCODE_ADD);
875 insn->header.predicate_inverse = 1;
876 } else {
877 insn = next_insn(p, BRW_OPCODE_IF);
878 }
879
880 /* Override the defaults for this instruction:
881 */
882 if (intel->gen < 6) {
883 brw_set_dest(p, insn, brw_ip_reg());
884 brw_set_src0(insn, brw_ip_reg());
885 brw_set_src1(insn, brw_imm_d(0x0));
886 } else {
887 brw_set_dest(p, insn, brw_imm_w(0));
888 insn->bits1.branch_gen6.jump_count = 0;
889 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
890 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
891 }
892
893 insn->header.execution_size = execute_size;
894 insn->header.compression_control = BRW_COMPRESSION_NONE;
895 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
896 insn->header.mask_control = BRW_MASK_ENABLE;
897 if (!p->single_program_flow)
898 insn->header.thread_control = BRW_THREAD_SWITCH;
899
900 p->current->header.predicate_control = BRW_PREDICATE_NONE;
901
902 return insn;
903 }
904
905 struct brw_instruction *
906 gen6_IF(struct brw_compile *p, uint32_t conditional,
907 struct brw_reg src0, struct brw_reg src1)
908 {
909 struct brw_instruction *insn;
910
911 insn = next_insn(p, BRW_OPCODE_IF);
912
913 brw_set_dest(p, insn, brw_imm_w(0));
914 insn->header.execution_size = BRW_EXECUTE_8;
915 insn->bits1.branch_gen6.jump_count = 0;
916 brw_set_src0(insn, src0);
917 brw_set_src1(insn, src1);
918
919 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
920 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
921 insn->header.destreg__conditionalmod = conditional;
922
923 if (!p->single_program_flow)
924 insn->header.thread_control = BRW_THREAD_SWITCH;
925
926 return insn;
927 }
928
929 struct brw_instruction *brw_ELSE(struct brw_compile *p,
930 struct brw_instruction *if_insn)
931 {
932 struct intel_context *intel = &p->brw->intel;
933 struct brw_instruction *insn;
934 GLuint br = 1;
935
936 /* jump count is for 64bit data chunk each, so one 128bit
937 instruction requires 2 chunks. */
938 if (intel->gen >= 5)
939 br = 2;
940
941 if (p->single_program_flow) {
942 insn = next_insn(p, BRW_OPCODE_ADD);
943 } else {
944 insn = next_insn(p, BRW_OPCODE_ELSE);
945 }
946
947 if (intel->gen < 6) {
948 brw_set_dest(p, insn, brw_ip_reg());
949 brw_set_src0(insn, brw_ip_reg());
950 brw_set_src1(insn, brw_imm_d(0x0));
951 } else {
952 brw_set_dest(p, insn, brw_imm_w(0));
953 insn->bits1.branch_gen6.jump_count = 0;
954 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
955 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
956 }
957
958 insn->header.compression_control = BRW_COMPRESSION_NONE;
959 insn->header.execution_size = if_insn->header.execution_size;
960 insn->header.mask_control = BRW_MASK_ENABLE;
961 if (!p->single_program_flow)
962 insn->header.thread_control = BRW_THREAD_SWITCH;
963
964 /* Patch the if instruction to point at this instruction.
965 */
966 if (p->single_program_flow) {
967 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
968
969 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
970 } else {
971 assert(if_insn->header.opcode == BRW_OPCODE_IF);
972
973 if (intel->gen < 6) {
974 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
975 if_insn->bits3.if_else.pop_count = 0;
976 if_insn->bits3.if_else.pad0 = 0;
977 } else {
978 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
979 }
980 }
981
982 return insn;
983 }
984
985 void brw_ENDIF(struct brw_compile *p,
986 struct brw_instruction *patch_insn)
987 {
988 struct intel_context *intel = &p->brw->intel;
989 GLuint br = 1;
990
991 if (intel->gen >= 5)
992 br = 2;
993
994 if (p->single_program_flow) {
995 /* In single program flow mode, there's no need to execute an ENDIF,
996 * since we don't need to do any stack operations, and if we're executing
997 * currently, we want to just continue executing.
998 */
999 struct brw_instruction *next = &p->store[p->nr_insn];
1000
1001 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
1002
1003 patch_insn->bits3.ud = (next - patch_insn) * 16;
1004 } else {
1005 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
1006
1007 if (intel->gen < 6) {
1008 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1009 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1010 brw_set_src1(insn, brw_imm_d(0x0));
1011 } else {
1012 brw_set_dest(p, insn, brw_imm_w(0));
1013 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1014 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1015 }
1016
1017 insn->header.compression_control = BRW_COMPRESSION_NONE;
1018 insn->header.execution_size = patch_insn->header.execution_size;
1019 insn->header.mask_control = BRW_MASK_ENABLE;
1020 insn->header.thread_control = BRW_THREAD_SWITCH;
1021
1022 if (intel->gen < 6)
1023 assert(patch_insn->bits3.if_else.jump_count == 0);
1024 else
1025 assert(patch_insn->bits1.branch_gen6.jump_count == 0);
1026
1027 /* Patch the if or else instructions to point at this or the next
1028 * instruction respectively.
1029 */
1030 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
1031 if (intel->gen < 6) {
1032 /* Turn it into an IFF, which means no mask stack operations for
1033 * all-false and jumping past the ENDIF.
1034 */
1035 patch_insn->header.opcode = BRW_OPCODE_IFF;
1036 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1037 patch_insn->bits3.if_else.pop_count = 0;
1038 patch_insn->bits3.if_else.pad0 = 0;
1039 } else {
1040 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1041 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1042 }
1043 } else {
1044 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
1045 if (intel->gen < 6) {
1046 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1047 * matching ENDIF.
1048 */
1049 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1050 patch_insn->bits3.if_else.pop_count = 1;
1051 patch_insn->bits3.if_else.pad0 = 0;
1052 } else {
1053 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1054 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1055 }
1056 }
1057
1058 /* Also pop item off the stack in the endif instruction:
1059 */
1060 if (intel->gen < 6) {
1061 insn->bits3.if_else.jump_count = 0;
1062 insn->bits3.if_else.pop_count = 1;
1063 insn->bits3.if_else.pad0 = 0;
1064 } else {
1065 insn->bits1.branch_gen6.jump_count = 2;
1066 }
1067 }
1068 }
1069
1070 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1071 {
1072 struct intel_context *intel = &p->brw->intel;
1073 struct brw_instruction *insn;
1074
1075 insn = next_insn(p, BRW_OPCODE_BREAK);
1076 if (intel->gen >= 6) {
1077 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1078 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1079 brw_set_src1(insn, brw_imm_d(0x0));
1080 } else {
1081 brw_set_dest(p, insn, brw_ip_reg());
1082 brw_set_src0(insn, brw_ip_reg());
1083 brw_set_src1(insn, brw_imm_d(0x0));
1084 insn->bits3.if_else.pad0 = 0;
1085 insn->bits3.if_else.pop_count = pop_count;
1086 }
1087 insn->header.compression_control = BRW_COMPRESSION_NONE;
1088 insn->header.execution_size = BRW_EXECUTE_8;
1089
1090 return insn;
1091 }
1092
1093 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1094 struct brw_instruction *do_insn)
1095 {
1096 struct brw_instruction *insn;
1097 int br = 2;
1098
1099 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1100 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1101 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1102 brw_set_dest(p, insn, brw_ip_reg());
1103 brw_set_src0(insn, brw_ip_reg());
1104 brw_set_src1(insn, brw_imm_d(0x0));
1105
1106 insn->bits3.break_cont.uip = br * (do_insn - insn);
1107
1108 insn->header.compression_control = BRW_COMPRESSION_NONE;
1109 insn->header.execution_size = BRW_EXECUTE_8;
1110 return insn;
1111 }
1112
1113 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1114 {
1115 struct brw_instruction *insn;
1116 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1117 brw_set_dest(p, insn, brw_ip_reg());
1118 brw_set_src0(insn, brw_ip_reg());
1119 brw_set_src1(insn, brw_imm_d(0x0));
1120 insn->header.compression_control = BRW_COMPRESSION_NONE;
1121 insn->header.execution_size = BRW_EXECUTE_8;
1122 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1123 insn->bits3.if_else.pad0 = 0;
1124 insn->bits3.if_else.pop_count = pop_count;
1125 return insn;
1126 }
1127
1128 /* DO/WHILE loop:
1129 *
1130 * The DO/WHILE is just an unterminated loop -- break or continue are
1131 * used for control within the loop. We have a few ways they can be
1132 * done.
1133 *
1134 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1135 * jip and no DO instruction.
1136 *
1137 * For non-uniform control flow pre-gen6, there's a DO instruction to
1138 * push the mask, and a WHILE to jump back, and BREAK to get out and
1139 * pop the mask.
1140 *
1141 * For gen6, there's no more mask stack, so no need for DO. WHILE
1142 * just points back to the first instruction of the loop.
1143 */
1144 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1145 {
1146 struct intel_context *intel = &p->brw->intel;
1147
1148 if (intel->gen >= 6 || p->single_program_flow) {
1149 return &p->store[p->nr_insn];
1150 } else {
1151 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1152
1153 /* Override the defaults for this instruction:
1154 */
1155 brw_set_dest(p, insn, brw_null_reg());
1156 brw_set_src0(insn, brw_null_reg());
1157 brw_set_src1(insn, brw_null_reg());
1158
1159 insn->header.compression_control = BRW_COMPRESSION_NONE;
1160 insn->header.execution_size = execute_size;
1161 insn->header.predicate_control = BRW_PREDICATE_NONE;
1162 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1163 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1164
1165 return insn;
1166 }
1167 }
1168
1169
1170
1171 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1172 struct brw_instruction *do_insn)
1173 {
1174 struct intel_context *intel = &p->brw->intel;
1175 struct brw_instruction *insn;
1176 GLuint br = 1;
1177
1178 if (intel->gen >= 5)
1179 br = 2;
1180
1181 if (intel->gen >= 6) {
1182 insn = next_insn(p, BRW_OPCODE_WHILE);
1183
1184 brw_set_dest(p, insn, brw_imm_w(0));
1185 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1186 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1187 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1188
1189 insn->header.execution_size = do_insn->header.execution_size;
1190 assert(insn->header.execution_size == BRW_EXECUTE_8);
1191 } else {
1192 if (p->single_program_flow) {
1193 insn = next_insn(p, BRW_OPCODE_ADD);
1194
1195 brw_set_dest(p, insn, brw_ip_reg());
1196 brw_set_src0(insn, brw_ip_reg());
1197 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
1198 insn->header.execution_size = BRW_EXECUTE_1;
1199 } else {
1200 insn = next_insn(p, BRW_OPCODE_WHILE);
1201
1202 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1203
1204 brw_set_dest(p, insn, brw_ip_reg());
1205 brw_set_src0(insn, brw_ip_reg());
1206 brw_set_src1(insn, brw_imm_d(0));
1207
1208 insn->header.execution_size = do_insn->header.execution_size;
1209 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1210 insn->bits3.if_else.pop_count = 0;
1211 insn->bits3.if_else.pad0 = 0;
1212 }
1213 }
1214 insn->header.compression_control = BRW_COMPRESSION_NONE;
1215 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1216
1217 return insn;
1218 }
1219
1220
1221 /* FORWARD JUMPS:
1222 */
1223 void brw_land_fwd_jump(struct brw_compile *p,
1224 struct brw_instruction *jmp_insn)
1225 {
1226 struct intel_context *intel = &p->brw->intel;
1227 struct brw_instruction *landing = &p->store[p->nr_insn];
1228 GLuint jmpi = 1;
1229
1230 if (intel->gen >= 5)
1231 jmpi = 2;
1232
1233 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1234 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1235
1236 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1237 }
1238
1239
1240
1241 /* To integrate with the above, it makes sense that the comparison
1242 * instruction should populate the flag register. It might be simpler
1243 * just to use the flag reg for most WM tasks?
1244 */
1245 void brw_CMP(struct brw_compile *p,
1246 struct brw_reg dest,
1247 GLuint conditional,
1248 struct brw_reg src0,
1249 struct brw_reg src1)
1250 {
1251 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1252
1253 insn->header.destreg__conditionalmod = conditional;
1254 brw_set_dest(p, insn, dest);
1255 brw_set_src0(insn, src0);
1256 brw_set_src1(insn, src1);
1257
1258 /* guess_execution_size(insn, src0); */
1259
1260
1261 /* Make it so that future instructions will use the computed flag
1262 * value until brw_set_predicate_control_flag_value() is called
1263 * again.
1264 */
1265 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1266 dest.nr == 0) {
1267 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1268 p->flag_value = 0xff;
1269 }
1270 }
1271
1272 /* Issue 'wait' instruction for n1, host could program MMIO
1273 to wake up thread. */
1274 void brw_WAIT (struct brw_compile *p)
1275 {
1276 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1277 struct brw_reg src = brw_notification_1_reg();
1278
1279 brw_set_dest(p, insn, src);
1280 brw_set_src0(insn, src);
1281 brw_set_src1(insn, brw_null_reg());
1282 insn->header.execution_size = 0; /* must */
1283 insn->header.predicate_control = 0;
1284 insn->header.compression_control = 0;
1285 }
1286
1287
1288 /***********************************************************************
1289 * Helpers for the various SEND message types:
1290 */
1291
1292 /** Extended math function, float[8].
1293 */
1294 void brw_math( struct brw_compile *p,
1295 struct brw_reg dest,
1296 GLuint function,
1297 GLuint saturate,
1298 GLuint msg_reg_nr,
1299 struct brw_reg src,
1300 GLuint data_type,
1301 GLuint precision )
1302 {
1303 struct intel_context *intel = &p->brw->intel;
1304
1305 if (intel->gen >= 6) {
1306 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1307
1308 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1309 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1310
1311 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1312 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1313
1314 /* Source modifiers are ignored for extended math instructions. */
1315 assert(!src.negate);
1316 assert(!src.abs);
1317
1318 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1319 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1320 assert(src.type == BRW_REGISTER_TYPE_F);
1321 }
1322
1323 /* Math is the same ISA format as other opcodes, except that CondModifier
1324 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1325 */
1326 insn->header.destreg__conditionalmod = function;
1327 insn->header.saturate = saturate;
1328
1329 brw_set_dest(p, insn, dest);
1330 brw_set_src0(insn, src);
1331 brw_set_src1(insn, brw_null_reg());
1332 } else {
1333 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1334 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1335 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1336 /* Example code doesn't set predicate_control for send
1337 * instructions.
1338 */
1339 insn->header.predicate_control = 0;
1340 insn->header.destreg__conditionalmod = msg_reg_nr;
1341
1342 brw_set_dest(p, insn, dest);
1343 brw_set_src0(insn, src);
1344 brw_set_math_message(p->brw,
1345 insn,
1346 msg_length, response_length,
1347 function,
1348 BRW_MATH_INTEGER_UNSIGNED,
1349 precision,
1350 saturate,
1351 data_type);
1352 }
1353 }
1354
1355 /** Extended math function, float[8].
1356 */
1357 void brw_math2(struct brw_compile *p,
1358 struct brw_reg dest,
1359 GLuint function,
1360 struct brw_reg src0,
1361 struct brw_reg src1)
1362 {
1363 struct intel_context *intel = &p->brw->intel;
1364 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1365
1366 assert(intel->gen >= 6);
1367 (void) intel;
1368
1369
1370 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1371 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1372 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1373
1374 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1375 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1376 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1377
1378 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1379 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1380 assert(src0.type == BRW_REGISTER_TYPE_F);
1381 assert(src1.type == BRW_REGISTER_TYPE_F);
1382 }
1383
1384 /* Source modifiers are ignored for extended math instructions. */
1385 assert(!src0.negate);
1386 assert(!src0.abs);
1387 assert(!src1.negate);
1388 assert(!src1.abs);
1389
1390 /* Math is the same ISA format as other opcodes, except that CondModifier
1391 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1392 */
1393 insn->header.destreg__conditionalmod = function;
1394
1395 brw_set_dest(p, insn, dest);
1396 brw_set_src0(insn, src0);
1397 brw_set_src1(insn, src1);
1398 }
1399
1400 /**
1401 * Extended math function, float[16].
1402 * Use 2 send instructions.
1403 */
1404 void brw_math_16( struct brw_compile *p,
1405 struct brw_reg dest,
1406 GLuint function,
1407 GLuint saturate,
1408 GLuint msg_reg_nr,
1409 struct brw_reg src,
1410 GLuint precision )
1411 {
1412 struct intel_context *intel = &p->brw->intel;
1413 struct brw_instruction *insn;
1414 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1415 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1416
1417 if (intel->gen >= 6) {
1418 insn = next_insn(p, BRW_OPCODE_MATH);
1419
1420 /* Math is the same ISA format as other opcodes, except that CondModifier
1421 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1422 */
1423 insn->header.destreg__conditionalmod = function;
1424 insn->header.saturate = saturate;
1425
1426 /* Source modifiers are ignored for extended math instructions. */
1427 assert(!src.negate);
1428 assert(!src.abs);
1429
1430 brw_set_dest(p, insn, dest);
1431 brw_set_src0(insn, src);
1432 brw_set_src1(insn, brw_null_reg());
1433 return;
1434 }
1435
1436 /* First instruction:
1437 */
1438 brw_push_insn_state(p);
1439 brw_set_predicate_control_flag_value(p, 0xff);
1440 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1441
1442 insn = next_insn(p, BRW_OPCODE_SEND);
1443 insn->header.destreg__conditionalmod = msg_reg_nr;
1444
1445 brw_set_dest(p, insn, dest);
1446 brw_set_src0(insn, src);
1447 brw_set_math_message(p->brw,
1448 insn,
1449 msg_length, response_length,
1450 function,
1451 BRW_MATH_INTEGER_UNSIGNED,
1452 precision,
1453 saturate,
1454 BRW_MATH_DATA_VECTOR);
1455
1456 /* Second instruction:
1457 */
1458 insn = next_insn(p, BRW_OPCODE_SEND);
1459 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1460 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1461
1462 brw_set_dest(p, insn, offset(dest,1));
1463 brw_set_src0(insn, src);
1464 brw_set_math_message(p->brw,
1465 insn,
1466 msg_length, response_length,
1467 function,
1468 BRW_MATH_INTEGER_UNSIGNED,
1469 precision,
1470 saturate,
1471 BRW_MATH_DATA_VECTOR);
1472
1473 brw_pop_insn_state(p);
1474 }
1475
1476
1477 /**
1478 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1479 * using a constant offset per channel.
1480 *
1481 * The offset must be aligned to oword size (16 bytes). Used for
1482 * register spilling.
1483 */
1484 void brw_oword_block_write_scratch(struct brw_compile *p,
1485 struct brw_reg mrf,
1486 int num_regs,
1487 GLuint offset)
1488 {
1489 struct intel_context *intel = &p->brw->intel;
1490 uint32_t msg_control;
1491 int mlen;
1492
1493 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1494
1495 if (num_regs == 1) {
1496 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1497 mlen = 2;
1498 } else {
1499 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1500 mlen = 3;
1501 }
1502
1503 /* Set up the message header. This is g0, with g0.2 filled with
1504 * the offset. We don't want to leave our offset around in g0 or
1505 * it'll screw up texture samples, so set it up inside the message
1506 * reg.
1507 */
1508 {
1509 brw_push_insn_state(p);
1510 brw_set_mask_control(p, BRW_MASK_DISABLE);
1511 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1512
1513 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1514
1515 /* set message header global offset field (reg 0, element 2) */
1516 brw_MOV(p,
1517 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1518 mrf.nr,
1519 2), BRW_REGISTER_TYPE_UD),
1520 brw_imm_ud(offset));
1521
1522 brw_pop_insn_state(p);
1523 }
1524
1525 {
1526 struct brw_reg dest;
1527 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1528 int send_commit_msg;
1529 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1530 BRW_REGISTER_TYPE_UW);
1531
1532 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1533 insn->header.compression_control = BRW_COMPRESSION_NONE;
1534 src_header = vec16(src_header);
1535 }
1536 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1537 insn->header.destreg__conditionalmod = mrf.nr;
1538
1539 /* Until gen6, writes followed by reads from the same location
1540 * are not guaranteed to be ordered unless write_commit is set.
1541 * If set, then a no-op write is issued to the destination
1542 * register to set a dependency, and a read from the destination
1543 * can be used to ensure the ordering.
1544 *
1545 * For gen6, only writes between different threads need ordering
1546 * protection. Our use of DP writes is all about register
1547 * spilling within a thread.
1548 */
1549 if (intel->gen >= 6) {
1550 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1551 send_commit_msg = 0;
1552 } else {
1553 dest = src_header;
1554 send_commit_msg = 1;
1555 }
1556
1557 brw_set_dest(p, insn, dest);
1558 brw_set_src0(insn, brw_null_reg());
1559
1560 brw_set_dp_write_message(p->brw,
1561 insn,
1562 255, /* binding table index (255=stateless) */
1563 msg_control,
1564 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1565 mlen,
1566 GL_TRUE, /* header_present */
1567 0, /* pixel scoreboard */
1568 send_commit_msg, /* response_length */
1569 0, /* eot */
1570 send_commit_msg);
1571 }
1572 }
1573
1574
1575 /**
1576 * Read a block of owords (half a GRF each) from the scratch buffer
1577 * using a constant index per channel.
1578 *
1579 * Offset must be aligned to oword size (16 bytes). Used for register
1580 * spilling.
1581 */
1582 void
1583 brw_oword_block_read_scratch(struct brw_compile *p,
1584 struct brw_reg dest,
1585 struct brw_reg mrf,
1586 int num_regs,
1587 GLuint offset)
1588 {
1589 uint32_t msg_control;
1590 int rlen;
1591
1592 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1593 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1594
1595 if (num_regs == 1) {
1596 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1597 rlen = 1;
1598 } else {
1599 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1600 rlen = 2;
1601 }
1602
1603 {
1604 brw_push_insn_state(p);
1605 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1606 brw_set_mask_control(p, BRW_MASK_DISABLE);
1607
1608 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1609
1610 /* set message header global offset field (reg 0, element 2) */
1611 brw_MOV(p,
1612 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1613 mrf.nr,
1614 2), BRW_REGISTER_TYPE_UD),
1615 brw_imm_ud(offset));
1616
1617 brw_pop_insn_state(p);
1618 }
1619
1620 {
1621 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1622
1623 assert(insn->header.predicate_control == 0);
1624 insn->header.compression_control = BRW_COMPRESSION_NONE;
1625 insn->header.destreg__conditionalmod = mrf.nr;
1626
1627 brw_set_dest(p, insn, dest); /* UW? */
1628 brw_set_src0(insn, brw_null_reg());
1629
1630 brw_set_dp_read_message(p->brw,
1631 insn,
1632 255, /* binding table index (255=stateless) */
1633 msg_control,
1634 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1635 1, /* target cache (render/scratch) */
1636 1, /* msg_length */
1637 rlen);
1638 }
1639 }
1640
1641 /**
1642 * Read a float[4] vector from the data port Data Cache (const buffer).
1643 * Location (in buffer) should be a multiple of 16.
1644 * Used for fetching shader constants.
1645 */
1646 void brw_oword_block_read(struct brw_compile *p,
1647 struct brw_reg dest,
1648 struct brw_reg mrf,
1649 uint32_t offset,
1650 uint32_t bind_table_index)
1651 {
1652 struct intel_context *intel = &p->brw->intel;
1653
1654 /* On newer hardware, offset is in units of owords. */
1655 if (intel->gen >= 6)
1656 offset /= 16;
1657
1658 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1659
1660 brw_push_insn_state(p);
1661 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1662 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1663 brw_set_mask_control(p, BRW_MASK_DISABLE);
1664
1665 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1666
1667 /* set message header global offset field (reg 0, element 2) */
1668 brw_MOV(p,
1669 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1670 mrf.nr,
1671 2), BRW_REGISTER_TYPE_UD),
1672 brw_imm_ud(offset));
1673
1674 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1675 insn->header.destreg__conditionalmod = mrf.nr;
1676
1677 /* cast dest to a uword[8] vector */
1678 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1679
1680 brw_set_dest(p, insn, dest);
1681 if (intel->gen >= 6) {
1682 brw_set_src0(insn, mrf);
1683 } else {
1684 brw_set_src0(insn, brw_null_reg());
1685 }
1686
1687 brw_set_dp_read_message(p->brw,
1688 insn,
1689 bind_table_index,
1690 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1691 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1692 0, /* source cache = data cache */
1693 1, /* msg_length */
1694 1); /* response_length (1 reg, 2 owords!) */
1695
1696 brw_pop_insn_state(p);
1697 }
1698
1699 /**
1700 * Read a set of dwords from the data port Data Cache (const buffer).
1701 *
1702 * Location (in buffer) appears as UD offsets in the register after
1703 * the provided mrf header reg.
1704 */
1705 void brw_dword_scattered_read(struct brw_compile *p,
1706 struct brw_reg dest,
1707 struct brw_reg mrf,
1708 uint32_t bind_table_index)
1709 {
1710 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1711
1712 brw_push_insn_state(p);
1713 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1714 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1715 brw_set_mask_control(p, BRW_MASK_DISABLE);
1716 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1717 brw_pop_insn_state(p);
1718
1719 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1720 insn->header.destreg__conditionalmod = mrf.nr;
1721
1722 /* cast dest to a uword[8] vector */
1723 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1724
1725 brw_set_dest(p, insn, dest);
1726 brw_set_src0(insn, brw_null_reg());
1727
1728 brw_set_dp_read_message(p->brw,
1729 insn,
1730 bind_table_index,
1731 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1732 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1733 0, /* source cache = data cache */
1734 2, /* msg_length */
1735 1); /* response_length */
1736 }
1737
1738
1739
1740 /**
1741 * Read float[4] constant(s) from VS constant buffer.
1742 * For relative addressing, two float[4] constants will be read into 'dest'.
1743 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1744 */
1745 void brw_dp_READ_4_vs(struct brw_compile *p,
1746 struct brw_reg dest,
1747 GLuint location,
1748 GLuint bind_table_index)
1749 {
1750 struct intel_context *intel = &p->brw->intel;
1751 struct brw_instruction *insn;
1752 GLuint msg_reg_nr = 1;
1753
1754 if (intel->gen >= 6)
1755 location /= 16;
1756
1757 /* Setup MRF[1] with location/offset into const buffer */
1758 brw_push_insn_state(p);
1759 brw_set_access_mode(p, BRW_ALIGN_1);
1760 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1761 brw_set_mask_control(p, BRW_MASK_DISABLE);
1762 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1763 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1764 BRW_REGISTER_TYPE_UD),
1765 brw_imm_ud(location));
1766 brw_pop_insn_state(p);
1767
1768 insn = next_insn(p, BRW_OPCODE_SEND);
1769
1770 insn->header.predicate_control = BRW_PREDICATE_NONE;
1771 insn->header.compression_control = BRW_COMPRESSION_NONE;
1772 insn->header.destreg__conditionalmod = msg_reg_nr;
1773 insn->header.mask_control = BRW_MASK_DISABLE;
1774
1775 brw_set_dest(p, insn, dest);
1776 if (intel->gen >= 6) {
1777 brw_set_src0(insn, brw_message_reg(msg_reg_nr));
1778 } else {
1779 brw_set_src0(insn, brw_null_reg());
1780 }
1781
1782 brw_set_dp_read_message(p->brw,
1783 insn,
1784 bind_table_index,
1785 0,
1786 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1787 0, /* source cache = data cache */
1788 1, /* msg_length */
1789 1); /* response_length (1 Oword) */
1790 }
1791
1792 /**
1793 * Read a float[4] constant per vertex from VS constant buffer, with
1794 * relative addressing.
1795 */
1796 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1797 struct brw_reg dest,
1798 struct brw_reg addr_reg,
1799 GLuint offset,
1800 GLuint bind_table_index)
1801 {
1802 struct intel_context *intel = &p->brw->intel;
1803 int msg_type;
1804
1805 /* Setup MRF[1] with offset into const buffer */
1806 brw_push_insn_state(p);
1807 brw_set_access_mode(p, BRW_ALIGN_1);
1808 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1809 brw_set_mask_control(p, BRW_MASK_DISABLE);
1810 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1811
1812 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1813 * fields ignored.
1814 */
1815 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1816 addr_reg, brw_imm_d(offset));
1817 brw_pop_insn_state(p);
1818
1819 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1820
1821 insn->header.predicate_control = BRW_PREDICATE_NONE;
1822 insn->header.compression_control = BRW_COMPRESSION_NONE;
1823 insn->header.destreg__conditionalmod = 0;
1824 insn->header.mask_control = BRW_MASK_DISABLE;
1825
1826 brw_set_dest(p, insn, dest);
1827 brw_set_src0(insn, brw_vec8_grf(0, 0));
1828
1829 if (intel->gen == 6)
1830 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1831 else if (intel->gen == 5 || intel->is_g4x)
1832 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1833 else
1834 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1835
1836 brw_set_dp_read_message(p->brw,
1837 insn,
1838 bind_table_index,
1839 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1840 msg_type,
1841 0, /* source cache = data cache */
1842 2, /* msg_length */
1843 1); /* response_length */
1844 }
1845
1846
1847
1848 void brw_fb_WRITE(struct brw_compile *p,
1849 int dispatch_width,
1850 struct brw_reg dest,
1851 GLuint msg_reg_nr,
1852 struct brw_reg src0,
1853 GLuint binding_table_index,
1854 GLuint msg_length,
1855 GLuint response_length,
1856 GLboolean eot,
1857 GLboolean header_present)
1858 {
1859 struct intel_context *intel = &p->brw->intel;
1860 struct brw_instruction *insn;
1861 GLuint msg_control, msg_type;
1862
1863 if (intel->gen >= 6 && binding_table_index == 0) {
1864 insn = next_insn(p, BRW_OPCODE_SENDC);
1865 } else {
1866 insn = next_insn(p, BRW_OPCODE_SEND);
1867 }
1868 /* The execution mask is ignored for render target writes. */
1869 insn->header.predicate_control = 0;
1870 insn->header.compression_control = BRW_COMPRESSION_NONE;
1871
1872 if (intel->gen >= 6) {
1873 /* headerless version, just submit color payload */
1874 src0 = brw_message_reg(msg_reg_nr);
1875
1876 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1877 } else {
1878 insn->header.destreg__conditionalmod = msg_reg_nr;
1879
1880 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1881 }
1882
1883 if (dispatch_width == 16)
1884 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1885 else
1886 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1887
1888 brw_set_dest(p, insn, dest);
1889 brw_set_src0(insn, src0);
1890 brw_set_dp_write_message(p->brw,
1891 insn,
1892 binding_table_index,
1893 msg_control,
1894 msg_type,
1895 msg_length,
1896 header_present,
1897 1, /* pixel scoreboard */
1898 response_length,
1899 eot,
1900 0 /* send_commit_msg */);
1901 }
1902
1903
1904 /**
1905 * Texture sample instruction.
1906 * Note: the msg_type plus msg_length values determine exactly what kind
1907 * of sampling operation is performed. See volume 4, page 161 of docs.
1908 */
1909 void brw_SAMPLE(struct brw_compile *p,
1910 struct brw_reg dest,
1911 GLuint msg_reg_nr,
1912 struct brw_reg src0,
1913 GLuint binding_table_index,
1914 GLuint sampler,
1915 GLuint writemask,
1916 GLuint msg_type,
1917 GLuint response_length,
1918 GLuint msg_length,
1919 GLboolean eot,
1920 GLuint header_present,
1921 GLuint simd_mode)
1922 {
1923 struct intel_context *intel = &p->brw->intel;
1924 GLboolean need_stall = 0;
1925
1926 if (writemask == 0) {
1927 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1928 return;
1929 }
1930
1931 /* Hardware doesn't do destination dependency checking on send
1932 * instructions properly. Add a workaround which generates the
1933 * dependency by other means. In practice it seems like this bug
1934 * only crops up for texture samples, and only where registers are
1935 * written by the send and then written again later without being
1936 * read in between. Luckily for us, we already track that
1937 * information and use it to modify the writemask for the
1938 * instruction, so that is a guide for whether a workaround is
1939 * needed.
1940 */
1941 if (writemask != WRITEMASK_XYZW) {
1942 GLuint dst_offset = 0;
1943 GLuint i, newmask = 0, len = 0;
1944
1945 for (i = 0; i < 4; i++) {
1946 if (writemask & (1<<i))
1947 break;
1948 dst_offset += 2;
1949 }
1950 for (; i < 4; i++) {
1951 if (!(writemask & (1<<i)))
1952 break;
1953 newmask |= 1<<i;
1954 len++;
1955 }
1956
1957 if (newmask != writemask) {
1958 need_stall = 1;
1959 /* printf("need stall %x %x\n", newmask , writemask); */
1960 }
1961 else {
1962 GLboolean dispatch_16 = GL_FALSE;
1963
1964 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1965
1966 guess_execution_size(p, p->current, dest);
1967 if (p->current->header.execution_size == BRW_EXECUTE_16)
1968 dispatch_16 = GL_TRUE;
1969
1970 newmask = ~newmask & WRITEMASK_XYZW;
1971
1972 brw_push_insn_state(p);
1973
1974 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1975 brw_set_mask_control(p, BRW_MASK_DISABLE);
1976
1977 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
1978 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
1979 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1980
1981 brw_pop_insn_state(p);
1982
1983 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1984 dest = offset(dest, dst_offset);
1985
1986 /* For 16-wide dispatch, masked channels are skipped in the
1987 * response. For 8-wide, masked channels still take up slots,
1988 * and are just not written to.
1989 */
1990 if (dispatch_16)
1991 response_length = len * 2;
1992 }
1993 }
1994
1995 {
1996 struct brw_instruction *insn;
1997
1998 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1999
2000 insn = next_insn(p, BRW_OPCODE_SEND);
2001 insn->header.predicate_control = 0; /* XXX */
2002 insn->header.compression_control = BRW_COMPRESSION_NONE;
2003 if (intel->gen < 6)
2004 insn->header.destreg__conditionalmod = msg_reg_nr;
2005
2006 brw_set_dest(p, insn, dest);
2007 brw_set_src0(insn, src0);
2008 brw_set_sampler_message(p->brw, insn,
2009 binding_table_index,
2010 sampler,
2011 msg_type,
2012 response_length,
2013 msg_length,
2014 eot,
2015 header_present,
2016 simd_mode);
2017 }
2018
2019 if (need_stall) {
2020 struct brw_reg reg = vec8(offset(dest, response_length-1));
2021
2022 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2023 */
2024 brw_push_insn_state(p);
2025 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2026 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2027 retype(reg, BRW_REGISTER_TYPE_UD));
2028 brw_pop_insn_state(p);
2029 }
2030
2031 }
2032
2033 /* All these variables are pretty confusing - we might be better off
2034 * using bitmasks and macros for this, in the old style. Or perhaps
2035 * just having the caller instantiate the fields in dword3 itself.
2036 */
2037 void brw_urb_WRITE(struct brw_compile *p,
2038 struct brw_reg dest,
2039 GLuint msg_reg_nr,
2040 struct brw_reg src0,
2041 GLboolean allocate,
2042 GLboolean used,
2043 GLuint msg_length,
2044 GLuint response_length,
2045 GLboolean eot,
2046 GLboolean writes_complete,
2047 GLuint offset,
2048 GLuint swizzle)
2049 {
2050 struct intel_context *intel = &p->brw->intel;
2051 struct brw_instruction *insn;
2052
2053 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2054
2055 insn = next_insn(p, BRW_OPCODE_SEND);
2056
2057 assert(msg_length < BRW_MAX_MRF);
2058
2059 brw_set_dest(p, insn, dest);
2060 brw_set_src0(insn, src0);
2061 brw_set_src1(insn, brw_imm_d(0));
2062
2063 if (intel->gen < 6)
2064 insn->header.destreg__conditionalmod = msg_reg_nr;
2065
2066 brw_set_urb_message(p->brw,
2067 insn,
2068 allocate,
2069 used,
2070 msg_length,
2071 response_length,
2072 eot,
2073 writes_complete,
2074 offset,
2075 swizzle);
2076 }
2077
2078 static int
2079 brw_find_next_block_end(struct brw_compile *p, int start)
2080 {
2081 int ip;
2082
2083 for (ip = start + 1; ip < p->nr_insn; ip++) {
2084 struct brw_instruction *insn = &p->store[ip];
2085
2086 switch (insn->header.opcode) {
2087 case BRW_OPCODE_ENDIF:
2088 case BRW_OPCODE_ELSE:
2089 case BRW_OPCODE_WHILE:
2090 return ip;
2091 }
2092 }
2093 assert(!"not reached");
2094 return start + 1;
2095 }
2096
2097 /* There is no DO instruction on gen6, so to find the end of the loop
2098 * we have to see if the loop is jumping back before our start
2099 * instruction.
2100 */
2101 static int
2102 brw_find_loop_end(struct brw_compile *p, int start)
2103 {
2104 int ip;
2105 int br = 2;
2106
2107 for (ip = start + 1; ip < p->nr_insn; ip++) {
2108 struct brw_instruction *insn = &p->store[ip];
2109
2110 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2111 if (ip + insn->bits1.branch_gen6.jump_count / br < start)
2112 return ip;
2113 }
2114 }
2115 assert(!"not reached");
2116 return start + 1;
2117 }
2118
2119 /* After program generation, go back and update the UIP and JIP of
2120 * BREAK and CONT instructions to their correct locations.
2121 */
2122 void
2123 brw_set_uip_jip(struct brw_compile *p)
2124 {
2125 struct intel_context *intel = &p->brw->intel;
2126 int ip;
2127 int br = 2;
2128
2129 if (intel->gen < 6)
2130 return;
2131
2132 for (ip = 0; ip < p->nr_insn; ip++) {
2133 struct brw_instruction *insn = &p->store[ip];
2134
2135 switch (insn->header.opcode) {
2136 case BRW_OPCODE_BREAK:
2137 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2138 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
2139 break;
2140 case BRW_OPCODE_CONTINUE:
2141 /* JIP is set at CONTINUE emit time, since that's when we
2142 * know where the start of the loop is.
2143 */
2144 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2145 assert(insn->bits3.break_cont.uip != 0);
2146 assert(insn->bits3.break_cont.jip != 0);
2147 break;
2148 }
2149 }
2150 }
2151
2152 void brw_ff_sync(struct brw_compile *p,
2153 struct brw_reg dest,
2154 GLuint msg_reg_nr,
2155 struct brw_reg src0,
2156 GLboolean allocate,
2157 GLuint response_length,
2158 GLboolean eot)
2159 {
2160 struct intel_context *intel = &p->brw->intel;
2161 struct brw_instruction *insn;
2162
2163 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2164
2165 insn = next_insn(p, BRW_OPCODE_SEND);
2166 brw_set_dest(p, insn, dest);
2167 brw_set_src0(insn, src0);
2168 brw_set_src1(insn, brw_imm_d(0));
2169
2170 if (intel->gen < 6)
2171 insn->header.destreg__conditionalmod = msg_reg_nr;
2172
2173 brw_set_ff_sync_message(p->brw,
2174 insn,
2175 allocate,
2176 response_length,
2177 eot);
2178 }