Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size(struct brw_compile *p,
45 struct brw_instruction *insn,
46 struct brw_reg reg)
47 {
48 if (reg.width == BRW_WIDTH_8 && p->compressed)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest(struct brw_compile *p,
56 struct brw_instruction *insn,
57 struct brw_reg dest)
58 {
59 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
60 dest.file != BRW_MESSAGE_REGISTER_FILE)
61 assert(dest.nr < 128);
62
63 insn->bits1.da1.dest_reg_file = dest.file;
64 insn->bits1.da1.dest_reg_type = dest.type;
65 insn->bits1.da1.dest_address_mode = dest.address_mode;
66
67 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
68 insn->bits1.da1.dest_reg_nr = dest.nr;
69
70 if (insn->header.access_mode == BRW_ALIGN_1) {
71 insn->bits1.da1.dest_subreg_nr = dest.subnr;
72 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
73 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
74 insn->bits1.da1.dest_horiz_stride = dest.hstride;
75 }
76 else {
77 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
78 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
79 /* even ignored in da16, still need to set as '01' */
80 insn->bits1.da16.dest_horiz_stride = 1;
81 }
82 }
83 else {
84 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
85
86 /* These are different sizes in align1 vs align16:
87 */
88 if (insn->header.access_mode == BRW_ALIGN_1) {
89 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
90 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
91 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
92 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
93 }
94 else {
95 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
96 /* even ignored in da16, still need to set as '01' */
97 insn->bits1.ia16.dest_horiz_stride = 1;
98 }
99 }
100
101 /* NEW: Set the execution size based on dest.width and
102 * insn->compression_control:
103 */
104 guess_execution_size(p, insn, dest);
105 }
106
107 extern int reg_type_size[];
108
109 static void
110 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
111 {
112 int hstride_for_reg[] = {0, 1, 2, 4};
113 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
114 int width_for_reg[] = {1, 2, 4, 8, 16};
115 int execsize_for_reg[] = {1, 2, 4, 8, 16};
116 int width, hstride, vstride, execsize;
117
118 if (reg.file == BRW_IMMEDIATE_VALUE) {
119 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
120 * mean the destination has to be 128-bit aligned and the
121 * destination horiz stride has to be a word.
122 */
123 if (reg.type == BRW_REGISTER_TYPE_V) {
124 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
125 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
126 }
127
128 return;
129 }
130
131 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
132 reg.file == BRW_ARF_NULL)
133 return;
134
135 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
136 hstride = hstride_for_reg[reg.hstride];
137
138 if (reg.vstride == 0xf) {
139 vstride = -1;
140 } else {
141 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
142 vstride = vstride_for_reg[reg.vstride];
143 }
144
145 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
146 width = width_for_reg[reg.width];
147
148 assert(insn->header.execution_size >= 0 &&
149 insn->header.execution_size < Elements(execsize_for_reg));
150 execsize = execsize_for_reg[insn->header.execution_size];
151
152 /* Restrictions from 3.3.10: Register Region Restrictions. */
153 /* 3. */
154 assert(execsize >= width);
155
156 /* 4. */
157 if (execsize == width && hstride != 0) {
158 assert(vstride == -1 || vstride == width * hstride);
159 }
160
161 /* 5. */
162 if (execsize == width && hstride == 0) {
163 /* no restriction on vstride. */
164 }
165
166 /* 6. */
167 if (width == 1) {
168 assert(hstride == 0);
169 }
170
171 /* 7. */
172 if (execsize == 1 && width == 1) {
173 assert(hstride == 0);
174 assert(vstride == 0);
175 }
176
177 /* 8. */
178 if (vstride == 0 && hstride == 0) {
179 assert(width == 1);
180 }
181
182 /* 10. Check destination issues. */
183 }
184
185 static void brw_set_src0( struct brw_instruction *insn,
186 struct brw_reg reg )
187 {
188 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
189 assert(reg.nr < 128);
190
191 validate_reg(insn, reg);
192
193 insn->bits1.da1.src0_reg_file = reg.file;
194 insn->bits1.da1.src0_reg_type = reg.type;
195 insn->bits2.da1.src0_abs = reg.abs;
196 insn->bits2.da1.src0_negate = reg.negate;
197 insn->bits2.da1.src0_address_mode = reg.address_mode;
198
199 if (reg.file == BRW_IMMEDIATE_VALUE) {
200 insn->bits3.ud = reg.dw1.ud;
201
202 /* Required to set some fields in src1 as well:
203 */
204 insn->bits1.da1.src1_reg_file = 0; /* arf */
205 insn->bits1.da1.src1_reg_type = reg.type;
206 }
207 else
208 {
209 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
210 if (insn->header.access_mode == BRW_ALIGN_1) {
211 insn->bits2.da1.src0_subreg_nr = reg.subnr;
212 insn->bits2.da1.src0_reg_nr = reg.nr;
213 }
214 else {
215 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
216 insn->bits2.da16.src0_reg_nr = reg.nr;
217 }
218 }
219 else {
220 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
221
222 if (insn->header.access_mode == BRW_ALIGN_1) {
223 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
224 }
225 else {
226 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
227 }
228 }
229
230 if (insn->header.access_mode == BRW_ALIGN_1) {
231 if (reg.width == BRW_WIDTH_1 &&
232 insn->header.execution_size == BRW_EXECUTE_1) {
233 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
234 insn->bits2.da1.src0_width = BRW_WIDTH_1;
235 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
236 }
237 else {
238 insn->bits2.da1.src0_horiz_stride = reg.hstride;
239 insn->bits2.da1.src0_width = reg.width;
240 insn->bits2.da1.src0_vert_stride = reg.vstride;
241 }
242 }
243 else {
244 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
245 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
246 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
247 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
248
249 /* This is an oddity of the fact we're using the same
250 * descriptions for registers in align_16 as align_1:
251 */
252 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
253 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
254 else
255 insn->bits2.da16.src0_vert_stride = reg.vstride;
256 }
257 }
258 }
259
260
261 void brw_set_src1( struct brw_instruction *insn,
262 struct brw_reg reg )
263 {
264 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
265
266 assert(reg.nr < 128);
267
268 validate_reg(insn, reg);
269
270 insn->bits1.da1.src1_reg_file = reg.file;
271 insn->bits1.da1.src1_reg_type = reg.type;
272 insn->bits3.da1.src1_abs = reg.abs;
273 insn->bits3.da1.src1_negate = reg.negate;
274
275 /* Only src1 can be immediate in two-argument instructions.
276 */
277 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
278
279 if (reg.file == BRW_IMMEDIATE_VALUE) {
280 insn->bits3.ud = reg.dw1.ud;
281 }
282 else {
283 /* This is a hardware restriction, which may or may not be lifted
284 * in the future:
285 */
286 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
287 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
288
289 if (insn->header.access_mode == BRW_ALIGN_1) {
290 insn->bits3.da1.src1_subreg_nr = reg.subnr;
291 insn->bits3.da1.src1_reg_nr = reg.nr;
292 }
293 else {
294 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
295 insn->bits3.da16.src1_reg_nr = reg.nr;
296 }
297
298 if (insn->header.access_mode == BRW_ALIGN_1) {
299 if (reg.width == BRW_WIDTH_1 &&
300 insn->header.execution_size == BRW_EXECUTE_1) {
301 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
302 insn->bits3.da1.src1_width = BRW_WIDTH_1;
303 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
304 }
305 else {
306 insn->bits3.da1.src1_horiz_stride = reg.hstride;
307 insn->bits3.da1.src1_width = reg.width;
308 insn->bits3.da1.src1_vert_stride = reg.vstride;
309 }
310 }
311 else {
312 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
313 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
314 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
315 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
316
317 /* This is an oddity of the fact we're using the same
318 * descriptions for registers in align_16 as align_1:
319 */
320 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
321 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
322 else
323 insn->bits3.da16.src1_vert_stride = reg.vstride;
324 }
325 }
326 }
327
328
329
330 static void brw_set_math_message( struct brw_context *brw,
331 struct brw_instruction *insn,
332 GLuint msg_length,
333 GLuint response_length,
334 GLuint function,
335 GLuint integer_type,
336 GLboolean low_precision,
337 GLboolean saturate,
338 GLuint dataType )
339 {
340 struct intel_context *intel = &brw->intel;
341 brw_set_src1(insn, brw_imm_d(0));
342
343 if (intel->gen == 5) {
344 insn->bits3.math_gen5.function = function;
345 insn->bits3.math_gen5.int_type = integer_type;
346 insn->bits3.math_gen5.precision = low_precision;
347 insn->bits3.math_gen5.saturate = saturate;
348 insn->bits3.math_gen5.data_type = dataType;
349 insn->bits3.math_gen5.snapshot = 0;
350 insn->bits3.math_gen5.header_present = 0;
351 insn->bits3.math_gen5.response_length = response_length;
352 insn->bits3.math_gen5.msg_length = msg_length;
353 insn->bits3.math_gen5.end_of_thread = 0;
354 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
355 insn->bits2.send_gen5.end_of_thread = 0;
356 } else {
357 insn->bits3.math.function = function;
358 insn->bits3.math.int_type = integer_type;
359 insn->bits3.math.precision = low_precision;
360 insn->bits3.math.saturate = saturate;
361 insn->bits3.math.data_type = dataType;
362 insn->bits3.math.response_length = response_length;
363 insn->bits3.math.msg_length = msg_length;
364 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
365 insn->bits3.math.end_of_thread = 0;
366 }
367 }
368
369
370 static void brw_set_ff_sync_message(struct brw_context *brw,
371 struct brw_instruction *insn,
372 GLboolean allocate,
373 GLuint response_length,
374 GLboolean end_of_thread)
375 {
376 struct intel_context *intel = &brw->intel;
377 brw_set_src1(insn, brw_imm_d(0));
378
379 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
380 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
382 insn->bits3.urb_gen5.allocate = allocate;
383 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
385 insn->bits3.urb_gen5.header_present = 1;
386 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
387 insn->bits3.urb_gen5.msg_length = 1;
388 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
389 if (intel->gen >= 6) {
390 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
391 } else {
392 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
393 insn->bits2.send_gen5.end_of_thread = end_of_thread;
394 }
395 }
396
397 static void brw_set_urb_message( struct brw_context *brw,
398 struct brw_instruction *insn,
399 GLboolean allocate,
400 GLboolean used,
401 GLuint msg_length,
402 GLuint response_length,
403 GLboolean end_of_thread,
404 GLboolean complete,
405 GLuint offset,
406 GLuint swizzle_control )
407 {
408 struct intel_context *intel = &brw->intel;
409 brw_set_src1(insn, brw_imm_d(0));
410
411 if (intel->gen >= 5) {
412 insn->bits3.urb_gen5.opcode = 0; /* ? */
413 insn->bits3.urb_gen5.offset = offset;
414 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
415 insn->bits3.urb_gen5.allocate = allocate;
416 insn->bits3.urb_gen5.used = used; /* ? */
417 insn->bits3.urb_gen5.complete = complete;
418 insn->bits3.urb_gen5.header_present = 1;
419 insn->bits3.urb_gen5.response_length = response_length;
420 insn->bits3.urb_gen5.msg_length = msg_length;
421 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
422 if (intel->gen >= 6) {
423 /* For SNB, the SFID bits moved to the condmod bits, and
424 * EOT stayed in bits3 above. Does the EOT bit setting
425 * below on Ironlake even do anything?
426 */
427 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
428 } else {
429 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
430 insn->bits2.send_gen5.end_of_thread = end_of_thread;
431 }
432 } else {
433 insn->bits3.urb.opcode = 0; /* ? */
434 insn->bits3.urb.offset = offset;
435 insn->bits3.urb.swizzle_control = swizzle_control;
436 insn->bits3.urb.allocate = allocate;
437 insn->bits3.urb.used = used; /* ? */
438 insn->bits3.urb.complete = complete;
439 insn->bits3.urb.response_length = response_length;
440 insn->bits3.urb.msg_length = msg_length;
441 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
442 insn->bits3.urb.end_of_thread = end_of_thread;
443 }
444 }
445
446 static void brw_set_dp_write_message( struct brw_context *brw,
447 struct brw_instruction *insn,
448 GLuint binding_table_index,
449 GLuint msg_control,
450 GLuint msg_type,
451 GLuint msg_length,
452 GLboolean header_present,
453 GLuint pixel_scoreboard_clear,
454 GLuint response_length,
455 GLuint end_of_thread,
456 GLuint send_commit_msg)
457 {
458 struct intel_context *intel = &brw->intel;
459 brw_set_src1(insn, brw_imm_ud(0));
460
461 if (intel->gen >= 6) {
462 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
463 insn->bits3.dp_render_cache.msg_control = msg_control;
464 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
465 insn->bits3.dp_render_cache.msg_type = msg_type;
466 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
467 insn->bits3.dp_render_cache.header_present = header_present;
468 insn->bits3.dp_render_cache.response_length = response_length;
469 insn->bits3.dp_render_cache.msg_length = msg_length;
470 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
471 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
472 /* XXX really need below? */
473 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
474 insn->bits2.send_gen5.end_of_thread = end_of_thread;
475 } else if (intel->gen == 5) {
476 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
477 insn->bits3.dp_write_gen5.msg_control = msg_control;
478 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
479 insn->bits3.dp_write_gen5.msg_type = msg_type;
480 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
481 insn->bits3.dp_write_gen5.header_present = header_present;
482 insn->bits3.dp_write_gen5.response_length = response_length;
483 insn->bits3.dp_write_gen5.msg_length = msg_length;
484 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
485 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
486 insn->bits2.send_gen5.end_of_thread = end_of_thread;
487 } else {
488 insn->bits3.dp_write.binding_table_index = binding_table_index;
489 insn->bits3.dp_write.msg_control = msg_control;
490 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
491 insn->bits3.dp_write.msg_type = msg_type;
492 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
493 insn->bits3.dp_write.response_length = response_length;
494 insn->bits3.dp_write.msg_length = msg_length;
495 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
496 insn->bits3.dp_write.end_of_thread = end_of_thread;
497 }
498 }
499
500 static void
501 brw_set_dp_read_message(struct brw_context *brw,
502 struct brw_instruction *insn,
503 GLuint binding_table_index,
504 GLuint msg_control,
505 GLuint msg_type,
506 GLuint target_cache,
507 GLuint msg_length,
508 GLuint response_length)
509 {
510 struct intel_context *intel = &brw->intel;
511 brw_set_src1(insn, brw_imm_d(0));
512
513 if (intel->gen >= 6) {
514 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
515 insn->bits3.dp_render_cache.msg_control = msg_control;
516 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
517 insn->bits3.dp_render_cache.msg_type = msg_type;
518 insn->bits3.dp_render_cache.send_commit_msg = 0;
519 insn->bits3.dp_render_cache.header_present = 1;
520 insn->bits3.dp_render_cache.response_length = response_length;
521 insn->bits3.dp_render_cache.msg_length = msg_length;
522 insn->bits3.dp_render_cache.end_of_thread = 0;
523 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
524 /* XXX really need below? */
525 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
526 insn->bits2.send_gen5.end_of_thread = 0;
527 } else if (intel->gen == 5) {
528 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
529 insn->bits3.dp_read_gen5.msg_control = msg_control;
530 insn->bits3.dp_read_gen5.msg_type = msg_type;
531 insn->bits3.dp_read_gen5.target_cache = target_cache;
532 insn->bits3.dp_read_gen5.header_present = 1;
533 insn->bits3.dp_read_gen5.response_length = response_length;
534 insn->bits3.dp_read_gen5.msg_length = msg_length;
535 insn->bits3.dp_read_gen5.pad1 = 0;
536 insn->bits3.dp_read_gen5.end_of_thread = 0;
537 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
538 insn->bits2.send_gen5.end_of_thread = 0;
539 } else if (intel->is_g4x) {
540 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
541 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
542 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
543 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
544 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
545 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
546 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
547 insn->bits3.dp_read_g4x.pad1 = 0;
548 insn->bits3.dp_read_g4x.end_of_thread = 0;
549 } else {
550 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
551 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
552 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
553 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
554 insn->bits3.dp_read.response_length = response_length; /*16:19*/
555 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
556 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
557 insn->bits3.dp_read.pad1 = 0; /*28:30*/
558 insn->bits3.dp_read.end_of_thread = 0; /*31*/
559 }
560 }
561
562 static void brw_set_sampler_message(struct brw_context *brw,
563 struct brw_instruction *insn,
564 GLuint binding_table_index,
565 GLuint sampler,
566 GLuint msg_type,
567 GLuint response_length,
568 GLuint msg_length,
569 GLboolean eot,
570 GLuint header_present,
571 GLuint simd_mode)
572 {
573 struct intel_context *intel = &brw->intel;
574 assert(eot == 0);
575 brw_set_src1(insn, brw_imm_d(0));
576
577 if (intel->gen >= 5) {
578 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
579 insn->bits3.sampler_gen5.sampler = sampler;
580 insn->bits3.sampler_gen5.msg_type = msg_type;
581 insn->bits3.sampler_gen5.simd_mode = simd_mode;
582 insn->bits3.sampler_gen5.header_present = header_present;
583 insn->bits3.sampler_gen5.response_length = response_length;
584 insn->bits3.sampler_gen5.msg_length = msg_length;
585 insn->bits3.sampler_gen5.end_of_thread = eot;
586 if (intel->gen >= 6)
587 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
588 else {
589 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
590 insn->bits2.send_gen5.end_of_thread = eot;
591 }
592 } else if (intel->is_g4x) {
593 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
594 insn->bits3.sampler_g4x.sampler = sampler;
595 insn->bits3.sampler_g4x.msg_type = msg_type;
596 insn->bits3.sampler_g4x.response_length = response_length;
597 insn->bits3.sampler_g4x.msg_length = msg_length;
598 insn->bits3.sampler_g4x.end_of_thread = eot;
599 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
600 } else {
601 insn->bits3.sampler.binding_table_index = binding_table_index;
602 insn->bits3.sampler.sampler = sampler;
603 insn->bits3.sampler.msg_type = msg_type;
604 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
605 insn->bits3.sampler.response_length = response_length;
606 insn->bits3.sampler.msg_length = msg_length;
607 insn->bits3.sampler.end_of_thread = eot;
608 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
609 }
610 }
611
612
613
614 static struct brw_instruction *next_insn( struct brw_compile *p,
615 GLuint opcode )
616 {
617 struct brw_instruction *insn;
618
619 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
620
621 insn = &p->store[p->nr_insn++];
622 memcpy(insn, p->current, sizeof(*insn));
623
624 /* Reset this one-shot flag:
625 */
626
627 if (p->current->header.destreg__conditionalmod) {
628 p->current->header.destreg__conditionalmod = 0;
629 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
630 }
631
632 insn->header.opcode = opcode;
633 return insn;
634 }
635
636
637 static struct brw_instruction *brw_alu1( struct brw_compile *p,
638 GLuint opcode,
639 struct brw_reg dest,
640 struct brw_reg src )
641 {
642 struct brw_instruction *insn = next_insn(p, opcode);
643 brw_set_dest(p, insn, dest);
644 brw_set_src0(insn, src);
645 return insn;
646 }
647
648 static struct brw_instruction *brw_alu2(struct brw_compile *p,
649 GLuint opcode,
650 struct brw_reg dest,
651 struct brw_reg src0,
652 struct brw_reg src1 )
653 {
654 struct brw_instruction *insn = next_insn(p, opcode);
655 brw_set_dest(p, insn, dest);
656 brw_set_src0(insn, src0);
657 brw_set_src1(insn, src1);
658 return insn;
659 }
660
661
662 /***********************************************************************
663 * Convenience routines.
664 */
665 #define ALU1(OP) \
666 struct brw_instruction *brw_##OP(struct brw_compile *p, \
667 struct brw_reg dest, \
668 struct brw_reg src0) \
669 { \
670 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
671 }
672
673 #define ALU2(OP) \
674 struct brw_instruction *brw_##OP(struct brw_compile *p, \
675 struct brw_reg dest, \
676 struct brw_reg src0, \
677 struct brw_reg src1) \
678 { \
679 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
680 }
681
682 /* Rounding operations (other than RNDD) require two instructions - the first
683 * stores a rounded value (possibly the wrong way) in the dest register, but
684 * also sets a per-channel "increment bit" in the flag register. A predicated
685 * add of 1.0 fixes dest to contain the desired result.
686 */
687 #define ROUND(OP) \
688 void brw_##OP(struct brw_compile *p, \
689 struct brw_reg dest, \
690 struct brw_reg src) \
691 { \
692 struct brw_instruction *rnd, *add; \
693 rnd = next_insn(p, BRW_OPCODE_##OP); \
694 brw_set_dest(p, rnd, dest); \
695 brw_set_src0(rnd, src); \
696 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
697 \
698 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
699 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
700 }
701
702
703 ALU1(MOV)
704 ALU2(SEL)
705 ALU1(NOT)
706 ALU2(AND)
707 ALU2(OR)
708 ALU2(XOR)
709 ALU2(SHR)
710 ALU2(SHL)
711 ALU2(RSR)
712 ALU2(RSL)
713 ALU2(ASR)
714 ALU1(FRC)
715 ALU1(RNDD)
716 ALU2(MAC)
717 ALU2(MACH)
718 ALU1(LZD)
719 ALU2(DP4)
720 ALU2(DPH)
721 ALU2(DP3)
722 ALU2(DP2)
723 ALU2(LINE)
724 ALU2(PLN)
725
726
727 ROUND(RNDZ)
728 ROUND(RNDE)
729
730
731 struct brw_instruction *brw_ADD(struct brw_compile *p,
732 struct brw_reg dest,
733 struct brw_reg src0,
734 struct brw_reg src1)
735 {
736 /* 6.2.2: add */
737 if (src0.type == BRW_REGISTER_TYPE_F ||
738 (src0.file == BRW_IMMEDIATE_VALUE &&
739 src0.type == BRW_REGISTER_TYPE_VF)) {
740 assert(src1.type != BRW_REGISTER_TYPE_UD);
741 assert(src1.type != BRW_REGISTER_TYPE_D);
742 }
743
744 if (src1.type == BRW_REGISTER_TYPE_F ||
745 (src1.file == BRW_IMMEDIATE_VALUE &&
746 src1.type == BRW_REGISTER_TYPE_VF)) {
747 assert(src0.type != BRW_REGISTER_TYPE_UD);
748 assert(src0.type != BRW_REGISTER_TYPE_D);
749 }
750
751 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
752 }
753
754 struct brw_instruction *brw_MUL(struct brw_compile *p,
755 struct brw_reg dest,
756 struct brw_reg src0,
757 struct brw_reg src1)
758 {
759 /* 6.32.38: mul */
760 if (src0.type == BRW_REGISTER_TYPE_D ||
761 src0.type == BRW_REGISTER_TYPE_UD ||
762 src1.type == BRW_REGISTER_TYPE_D ||
763 src1.type == BRW_REGISTER_TYPE_UD) {
764 assert(dest.type != BRW_REGISTER_TYPE_F);
765 }
766
767 if (src0.type == BRW_REGISTER_TYPE_F ||
768 (src0.file == BRW_IMMEDIATE_VALUE &&
769 src0.type == BRW_REGISTER_TYPE_VF)) {
770 assert(src1.type != BRW_REGISTER_TYPE_UD);
771 assert(src1.type != BRW_REGISTER_TYPE_D);
772 }
773
774 if (src1.type == BRW_REGISTER_TYPE_F ||
775 (src1.file == BRW_IMMEDIATE_VALUE &&
776 src1.type == BRW_REGISTER_TYPE_VF)) {
777 assert(src0.type != BRW_REGISTER_TYPE_UD);
778 assert(src0.type != BRW_REGISTER_TYPE_D);
779 }
780
781 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
782 src0.nr != BRW_ARF_ACCUMULATOR);
783 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
784 src1.nr != BRW_ARF_ACCUMULATOR);
785
786 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
787 }
788
789
790 void brw_NOP(struct brw_compile *p)
791 {
792 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
793 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
794 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
795 brw_set_src1(insn, brw_imm_ud(0x0));
796 }
797
798
799
800
801
802 /***********************************************************************
803 * Comparisons, if/else/endif
804 */
805
806 struct brw_instruction *brw_JMPI(struct brw_compile *p,
807 struct brw_reg dest,
808 struct brw_reg src0,
809 struct brw_reg src1)
810 {
811 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
812
813 insn->header.execution_size = 1;
814 insn->header.compression_control = BRW_COMPRESSION_NONE;
815 insn->header.mask_control = BRW_MASK_DISABLE;
816
817 p->current->header.predicate_control = BRW_PREDICATE_NONE;
818
819 return insn;
820 }
821
822 /* EU takes the value from the flag register and pushes it onto some
823 * sort of a stack (presumably merging with any flag value already on
824 * the stack). Within an if block, the flags at the top of the stack
825 * control execution on each channel of the unit, eg. on each of the
826 * 16 pixel values in our wm programs.
827 *
828 * When the matching 'else' instruction is reached (presumably by
829 * countdown of the instruction count patched in by our ELSE/ENDIF
830 * functions), the relevent flags are inverted.
831 *
832 * When the matching 'endif' instruction is reached, the flags are
833 * popped off. If the stack is now empty, normal execution resumes.
834 *
835 * No attempt is made to deal with stack overflow (14 elements?).
836 */
837 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
838 {
839 struct intel_context *intel = &p->brw->intel;
840 struct brw_instruction *insn;
841
842 if (p->single_program_flow) {
843 assert(execute_size == BRW_EXECUTE_1);
844
845 insn = next_insn(p, BRW_OPCODE_ADD);
846 insn->header.predicate_inverse = 1;
847 } else {
848 insn = next_insn(p, BRW_OPCODE_IF);
849 }
850
851 /* Override the defaults for this instruction:
852 */
853 if (intel->gen < 6) {
854 brw_set_dest(p, insn, brw_ip_reg());
855 brw_set_src0(insn, brw_ip_reg());
856 brw_set_src1(insn, brw_imm_d(0x0));
857 } else {
858 brw_set_dest(p, insn, brw_imm_w(0));
859 insn->bits1.branch_gen6.jump_count = 0;
860 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
861 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
862 }
863
864 insn->header.execution_size = execute_size;
865 insn->header.compression_control = BRW_COMPRESSION_NONE;
866 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
867 insn->header.mask_control = BRW_MASK_ENABLE;
868 if (!p->single_program_flow)
869 insn->header.thread_control = BRW_THREAD_SWITCH;
870
871 p->current->header.predicate_control = BRW_PREDICATE_NONE;
872
873 return insn;
874 }
875
876 struct brw_instruction *
877 brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
878 struct brw_reg src0, struct brw_reg src1)
879 {
880 struct brw_instruction *insn;
881
882 insn = next_insn(p, BRW_OPCODE_IF);
883
884 brw_set_dest(p, insn, brw_imm_w(0));
885 insn->header.execution_size = BRW_EXECUTE_8;
886 insn->bits1.branch_gen6.jump_count = 0;
887 brw_set_src0(insn, src0);
888 brw_set_src1(insn, src1);
889
890 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
891 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
892 insn->header.destreg__conditionalmod = conditional;
893
894 if (!p->single_program_flow)
895 insn->header.thread_control = BRW_THREAD_SWITCH;
896
897 return insn;
898 }
899
900 struct brw_instruction *brw_ELSE(struct brw_compile *p,
901 struct brw_instruction *if_insn)
902 {
903 struct intel_context *intel = &p->brw->intel;
904 struct brw_instruction *insn;
905 GLuint br = 1;
906
907 /* jump count is for 64bit data chunk each, so one 128bit
908 instruction requires 2 chunks. */
909 if (intel->gen >= 5)
910 br = 2;
911
912 if (p->single_program_flow) {
913 insn = next_insn(p, BRW_OPCODE_ADD);
914 } else {
915 insn = next_insn(p, BRW_OPCODE_ELSE);
916 }
917
918 if (intel->gen < 6) {
919 brw_set_dest(p, insn, brw_ip_reg());
920 brw_set_src0(insn, brw_ip_reg());
921 brw_set_src1(insn, brw_imm_d(0x0));
922 } else {
923 brw_set_dest(p, insn, brw_imm_w(0));
924 insn->bits1.branch_gen6.jump_count = 0;
925 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
926 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
927 }
928
929 insn->header.compression_control = BRW_COMPRESSION_NONE;
930 insn->header.execution_size = if_insn->header.execution_size;
931 insn->header.mask_control = BRW_MASK_ENABLE;
932 if (!p->single_program_flow)
933 insn->header.thread_control = BRW_THREAD_SWITCH;
934
935 /* Patch the if instruction to point at this instruction.
936 */
937 if (p->single_program_flow) {
938 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
939
940 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
941 } else {
942 assert(if_insn->header.opcode == BRW_OPCODE_IF);
943
944 if (intel->gen < 6) {
945 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
946 if_insn->bits3.if_else.pop_count = 0;
947 if_insn->bits3.if_else.pad0 = 0;
948 } else {
949 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
950 }
951 }
952
953 return insn;
954 }
955
956 void brw_ENDIF(struct brw_compile *p,
957 struct brw_instruction *patch_insn)
958 {
959 struct intel_context *intel = &p->brw->intel;
960 GLuint br = 1;
961
962 if (intel->gen >= 5)
963 br = 2;
964
965 if (p->single_program_flow) {
966 /* In single program flow mode, there's no need to execute an ENDIF,
967 * since we don't need to do any stack operations, and if we're executing
968 * currently, we want to just continue executing.
969 */
970 struct brw_instruction *next = &p->store[p->nr_insn];
971
972 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
973
974 patch_insn->bits3.ud = (next - patch_insn) * 16;
975 } else {
976 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
977
978 if (intel->gen < 6) {
979 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
980 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
981 brw_set_src1(insn, brw_imm_d(0x0));
982 } else {
983 brw_set_dest(p, insn, brw_imm_w(0));
984 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
985 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
986 }
987
988 insn->header.compression_control = BRW_COMPRESSION_NONE;
989 insn->header.execution_size = patch_insn->header.execution_size;
990 insn->header.mask_control = BRW_MASK_ENABLE;
991 insn->header.thread_control = BRW_THREAD_SWITCH;
992
993 if (intel->gen < 6)
994 assert(patch_insn->bits3.if_else.jump_count == 0);
995 else
996 assert(patch_insn->bits1.branch_gen6.jump_count == 0);
997
998 /* Patch the if or else instructions to point at this or the next
999 * instruction respectively.
1000 */
1001 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
1002 if (intel->gen < 6) {
1003 /* Turn it into an IFF, which means no mask stack operations for
1004 * all-false and jumping past the ENDIF.
1005 */
1006 patch_insn->header.opcode = BRW_OPCODE_IFF;
1007 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1008 patch_insn->bits3.if_else.pop_count = 0;
1009 patch_insn->bits3.if_else.pad0 = 0;
1010 } else {
1011 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1012 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1013 }
1014 } else {
1015 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
1016 if (intel->gen < 6) {
1017 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1018 * matching ENDIF.
1019 */
1020 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1021 patch_insn->bits3.if_else.pop_count = 1;
1022 patch_insn->bits3.if_else.pad0 = 0;
1023 } else {
1024 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1025 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1026 }
1027 }
1028
1029 /* Also pop item off the stack in the endif instruction:
1030 */
1031 if (intel->gen < 6) {
1032 insn->bits3.if_else.jump_count = 0;
1033 insn->bits3.if_else.pop_count = 1;
1034 insn->bits3.if_else.pad0 = 0;
1035 } else {
1036 insn->bits1.branch_gen6.jump_count = 2;
1037 }
1038 }
1039 }
1040
1041 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1042 {
1043 struct intel_context *intel = &p->brw->intel;
1044 struct brw_instruction *insn;
1045
1046 insn = next_insn(p, BRW_OPCODE_BREAK);
1047 if (intel->gen >= 6) {
1048 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1049 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1050 brw_set_src1(insn, brw_imm_d(0x0));
1051 } else {
1052 brw_set_dest(p, insn, brw_ip_reg());
1053 brw_set_src0(insn, brw_ip_reg());
1054 brw_set_src1(insn, brw_imm_d(0x0));
1055 insn->bits3.if_else.pad0 = 0;
1056 insn->bits3.if_else.pop_count = pop_count;
1057 }
1058 insn->header.compression_control = BRW_COMPRESSION_NONE;
1059 insn->header.execution_size = BRW_EXECUTE_8;
1060
1061 return insn;
1062 }
1063
1064 struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
1065 struct brw_instruction *do_insn)
1066 {
1067 struct brw_instruction *insn;
1068 int br = 2;
1069
1070 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1071 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1072 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1073 brw_set_dest(p, insn, brw_ip_reg());
1074 brw_set_src0(insn, brw_ip_reg());
1075 brw_set_src1(insn, brw_imm_d(0x0));
1076
1077 insn->bits3.break_cont.uip = br * (do_insn - insn);
1078
1079 insn->header.compression_control = BRW_COMPRESSION_NONE;
1080 insn->header.execution_size = BRW_EXECUTE_8;
1081 return insn;
1082 }
1083
1084 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1085 {
1086 struct brw_instruction *insn;
1087 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1088 brw_set_dest(p, insn, brw_ip_reg());
1089 brw_set_src0(insn, brw_ip_reg());
1090 brw_set_src1(insn, brw_imm_d(0x0));
1091 insn->header.compression_control = BRW_COMPRESSION_NONE;
1092 insn->header.execution_size = BRW_EXECUTE_8;
1093 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1094 insn->bits3.if_else.pad0 = 0;
1095 insn->bits3.if_else.pop_count = pop_count;
1096 return insn;
1097 }
1098
1099 /* DO/WHILE loop:
1100 *
1101 * The DO/WHILE is just an unterminated loop -- break or continue are
1102 * used for control within the loop. We have a few ways they can be
1103 * done.
1104 *
1105 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1106 * jip and no DO instruction.
1107 *
1108 * For non-uniform control flow pre-gen6, there's a DO instruction to
1109 * push the mask, and a WHILE to jump back, and BREAK to get out and
1110 * pop the mask.
1111 *
1112 * For gen6, there's no more mask stack, so no need for DO. WHILE
1113 * just points back to the first instruction of the loop.
1114 */
1115 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1116 {
1117 struct intel_context *intel = &p->brw->intel;
1118
1119 if (intel->gen >= 6 || p->single_program_flow) {
1120 return &p->store[p->nr_insn];
1121 } else {
1122 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1123
1124 /* Override the defaults for this instruction:
1125 */
1126 brw_set_dest(p, insn, brw_null_reg());
1127 brw_set_src0(insn, brw_null_reg());
1128 brw_set_src1(insn, brw_null_reg());
1129
1130 insn->header.compression_control = BRW_COMPRESSION_NONE;
1131 insn->header.execution_size = execute_size;
1132 insn->header.predicate_control = BRW_PREDICATE_NONE;
1133 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1134 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1135
1136 return insn;
1137 }
1138 }
1139
1140
1141
1142 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1143 struct brw_instruction *do_insn)
1144 {
1145 struct intel_context *intel = &p->brw->intel;
1146 struct brw_instruction *insn;
1147 GLuint br = 1;
1148
1149 if (intel->gen >= 5)
1150 br = 2;
1151
1152 if (intel->gen >= 6) {
1153 insn = next_insn(p, BRW_OPCODE_WHILE);
1154
1155 brw_set_dest(p, insn, brw_imm_w(0));
1156 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1157 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1158 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1159
1160 insn->header.execution_size = do_insn->header.execution_size;
1161 assert(insn->header.execution_size == BRW_EXECUTE_8);
1162 } else {
1163 if (p->single_program_flow) {
1164 insn = next_insn(p, BRW_OPCODE_ADD);
1165
1166 brw_set_dest(p, insn, brw_ip_reg());
1167 brw_set_src0(insn, brw_ip_reg());
1168 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
1169 insn->header.execution_size = BRW_EXECUTE_1;
1170 } else {
1171 insn = next_insn(p, BRW_OPCODE_WHILE);
1172
1173 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1174
1175 brw_set_dest(p, insn, brw_ip_reg());
1176 brw_set_src0(insn, brw_ip_reg());
1177 brw_set_src1(insn, brw_imm_d(0));
1178
1179 insn->header.execution_size = do_insn->header.execution_size;
1180 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1181 insn->bits3.if_else.pop_count = 0;
1182 insn->bits3.if_else.pad0 = 0;
1183 }
1184 }
1185 insn->header.compression_control = BRW_COMPRESSION_NONE;
1186 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1187
1188 return insn;
1189 }
1190
1191
1192 /* FORWARD JUMPS:
1193 */
1194 void brw_land_fwd_jump(struct brw_compile *p,
1195 struct brw_instruction *jmp_insn)
1196 {
1197 struct intel_context *intel = &p->brw->intel;
1198 struct brw_instruction *landing = &p->store[p->nr_insn];
1199 GLuint jmpi = 1;
1200
1201 if (intel->gen >= 5)
1202 jmpi = 2;
1203
1204 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1205 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1206
1207 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1208 }
1209
1210
1211
1212 /* To integrate with the above, it makes sense that the comparison
1213 * instruction should populate the flag register. It might be simpler
1214 * just to use the flag reg for most WM tasks?
1215 */
1216 void brw_CMP(struct brw_compile *p,
1217 struct brw_reg dest,
1218 GLuint conditional,
1219 struct brw_reg src0,
1220 struct brw_reg src1)
1221 {
1222 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1223
1224 insn->header.destreg__conditionalmod = conditional;
1225 brw_set_dest(p, insn, dest);
1226 brw_set_src0(insn, src0);
1227 brw_set_src1(insn, src1);
1228
1229 /* guess_execution_size(insn, src0); */
1230
1231
1232 /* Make it so that future instructions will use the computed flag
1233 * value until brw_set_predicate_control_flag_value() is called
1234 * again.
1235 */
1236 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1237 dest.nr == 0) {
1238 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1239 p->flag_value = 0xff;
1240 }
1241 }
1242
1243 /* Issue 'wait' instruction for n1, host could program MMIO
1244 to wake up thread. */
1245 void brw_WAIT (struct brw_compile *p)
1246 {
1247 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1248 struct brw_reg src = brw_notification_1_reg();
1249
1250 brw_set_dest(p, insn, src);
1251 brw_set_src0(insn, src);
1252 brw_set_src1(insn, brw_null_reg());
1253 insn->header.execution_size = 0; /* must */
1254 insn->header.predicate_control = 0;
1255 insn->header.compression_control = 0;
1256 }
1257
1258
1259 /***********************************************************************
1260 * Helpers for the various SEND message types:
1261 */
1262
1263 /** Extended math function, float[8].
1264 */
1265 void brw_math( struct brw_compile *p,
1266 struct brw_reg dest,
1267 GLuint function,
1268 GLuint saturate,
1269 GLuint msg_reg_nr,
1270 struct brw_reg src,
1271 GLuint data_type,
1272 GLuint precision )
1273 {
1274 struct intel_context *intel = &p->brw->intel;
1275
1276 if (intel->gen >= 6) {
1277 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1278
1279 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1280 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1281
1282 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1283 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1284
1285 /* Source modifiers are ignored for extended math instructions. */
1286 assert(!src.negate);
1287 assert(!src.abs);
1288
1289 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1290 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1291 assert(src.type == BRW_REGISTER_TYPE_F);
1292 }
1293
1294 /* Math is the same ISA format as other opcodes, except that CondModifier
1295 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1296 */
1297 insn->header.destreg__conditionalmod = function;
1298 insn->header.saturate = saturate;
1299
1300 brw_set_dest(p, insn, dest);
1301 brw_set_src0(insn, src);
1302 brw_set_src1(insn, brw_null_reg());
1303 } else {
1304 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1305 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1306 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1307 /* Example code doesn't set predicate_control for send
1308 * instructions.
1309 */
1310 insn->header.predicate_control = 0;
1311 insn->header.destreg__conditionalmod = msg_reg_nr;
1312
1313 brw_set_dest(p, insn, dest);
1314 brw_set_src0(insn, src);
1315 brw_set_math_message(p->brw,
1316 insn,
1317 msg_length, response_length,
1318 function,
1319 BRW_MATH_INTEGER_UNSIGNED,
1320 precision,
1321 saturate,
1322 data_type);
1323 }
1324 }
1325
1326 /** Extended math function, float[8].
1327 */
1328 void brw_math2(struct brw_compile *p,
1329 struct brw_reg dest,
1330 GLuint function,
1331 struct brw_reg src0,
1332 struct brw_reg src1)
1333 {
1334 struct intel_context *intel = &p->brw->intel;
1335 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1336
1337 assert(intel->gen >= 6);
1338 (void) intel;
1339
1340
1341 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1342 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1343 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1344
1345 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1346 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1347 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1348
1349 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1350 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1351 assert(src0.type == BRW_REGISTER_TYPE_F);
1352 assert(src1.type == BRW_REGISTER_TYPE_F);
1353 }
1354
1355 /* Source modifiers are ignored for extended math instructions. */
1356 assert(!src0.negate);
1357 assert(!src0.abs);
1358 assert(!src1.negate);
1359 assert(!src1.abs);
1360
1361 /* Math is the same ISA format as other opcodes, except that CondModifier
1362 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1363 */
1364 insn->header.destreg__conditionalmod = function;
1365
1366 brw_set_dest(p, insn, dest);
1367 brw_set_src0(insn, src0);
1368 brw_set_src1(insn, src1);
1369 }
1370
1371 /**
1372 * Extended math function, float[16].
1373 * Use 2 send instructions.
1374 */
1375 void brw_math_16( struct brw_compile *p,
1376 struct brw_reg dest,
1377 GLuint function,
1378 GLuint saturate,
1379 GLuint msg_reg_nr,
1380 struct brw_reg src,
1381 GLuint precision )
1382 {
1383 struct intel_context *intel = &p->brw->intel;
1384 struct brw_instruction *insn;
1385 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1386 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1387
1388 if (intel->gen >= 6) {
1389 insn = next_insn(p, BRW_OPCODE_MATH);
1390
1391 /* Math is the same ISA format as other opcodes, except that CondModifier
1392 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1393 */
1394 insn->header.destreg__conditionalmod = function;
1395 insn->header.saturate = saturate;
1396
1397 /* Source modifiers are ignored for extended math instructions. */
1398 assert(!src.negate);
1399 assert(!src.abs);
1400
1401 brw_set_dest(p, insn, dest);
1402 brw_set_src0(insn, src);
1403 brw_set_src1(insn, brw_null_reg());
1404 return;
1405 }
1406
1407 /* First instruction:
1408 */
1409 brw_push_insn_state(p);
1410 brw_set_predicate_control_flag_value(p, 0xff);
1411 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1412
1413 insn = next_insn(p, BRW_OPCODE_SEND);
1414 insn->header.destreg__conditionalmod = msg_reg_nr;
1415
1416 brw_set_dest(p, insn, dest);
1417 brw_set_src0(insn, src);
1418 brw_set_math_message(p->brw,
1419 insn,
1420 msg_length, response_length,
1421 function,
1422 BRW_MATH_INTEGER_UNSIGNED,
1423 precision,
1424 saturate,
1425 BRW_MATH_DATA_VECTOR);
1426
1427 /* Second instruction:
1428 */
1429 insn = next_insn(p, BRW_OPCODE_SEND);
1430 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1431 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1432
1433 brw_set_dest(p, insn, offset(dest,1));
1434 brw_set_src0(insn, src);
1435 brw_set_math_message(p->brw,
1436 insn,
1437 msg_length, response_length,
1438 function,
1439 BRW_MATH_INTEGER_UNSIGNED,
1440 precision,
1441 saturate,
1442 BRW_MATH_DATA_VECTOR);
1443
1444 brw_pop_insn_state(p);
1445 }
1446
1447
1448 /**
1449 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1450 * using a constant offset per channel.
1451 *
1452 * The offset must be aligned to oword size (16 bytes). Used for
1453 * register spilling.
1454 */
1455 void brw_oword_block_write_scratch(struct brw_compile *p,
1456 struct brw_reg mrf,
1457 int num_regs,
1458 GLuint offset)
1459 {
1460 struct intel_context *intel = &p->brw->intel;
1461 uint32_t msg_control;
1462 int mlen;
1463
1464 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1465
1466 if (num_regs == 1) {
1467 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1468 mlen = 2;
1469 } else {
1470 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1471 mlen = 3;
1472 }
1473
1474 /* Set up the message header. This is g0, with g0.2 filled with
1475 * the offset. We don't want to leave our offset around in g0 or
1476 * it'll screw up texture samples, so set it up inside the message
1477 * reg.
1478 */
1479 {
1480 brw_push_insn_state(p);
1481 brw_set_mask_control(p, BRW_MASK_DISABLE);
1482 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1483
1484 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1485
1486 /* set message header global offset field (reg 0, element 2) */
1487 brw_MOV(p,
1488 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1489 mrf.nr,
1490 2), BRW_REGISTER_TYPE_UD),
1491 brw_imm_ud(offset));
1492
1493 brw_pop_insn_state(p);
1494 }
1495
1496 {
1497 struct brw_reg dest;
1498 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1499 int send_commit_msg;
1500 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1501 BRW_REGISTER_TYPE_UW);
1502
1503 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1504 insn->header.compression_control = BRW_COMPRESSION_NONE;
1505 src_header = vec16(src_header);
1506 }
1507 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1508 insn->header.destreg__conditionalmod = mrf.nr;
1509
1510 /* Until gen6, writes followed by reads from the same location
1511 * are not guaranteed to be ordered unless write_commit is set.
1512 * If set, then a no-op write is issued to the destination
1513 * register to set a dependency, and a read from the destination
1514 * can be used to ensure the ordering.
1515 *
1516 * For gen6, only writes between different threads need ordering
1517 * protection. Our use of DP writes is all about register
1518 * spilling within a thread.
1519 */
1520 if (intel->gen >= 6) {
1521 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1522 send_commit_msg = 0;
1523 } else {
1524 dest = src_header;
1525 send_commit_msg = 1;
1526 }
1527
1528 brw_set_dest(p, insn, dest);
1529 brw_set_src0(insn, brw_null_reg());
1530
1531 brw_set_dp_write_message(p->brw,
1532 insn,
1533 255, /* binding table index (255=stateless) */
1534 msg_control,
1535 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1536 mlen,
1537 GL_TRUE, /* header_present */
1538 0, /* pixel scoreboard */
1539 send_commit_msg, /* response_length */
1540 0, /* eot */
1541 send_commit_msg);
1542 }
1543 }
1544
1545
1546 /**
1547 * Read a block of owords (half a GRF each) from the scratch buffer
1548 * using a constant index per channel.
1549 *
1550 * Offset must be aligned to oword size (16 bytes). Used for register
1551 * spilling.
1552 */
1553 void
1554 brw_oword_block_read_scratch(struct brw_compile *p,
1555 struct brw_reg dest,
1556 struct brw_reg mrf,
1557 int num_regs,
1558 GLuint offset)
1559 {
1560 uint32_t msg_control;
1561 int rlen;
1562
1563 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1564 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1565
1566 if (num_regs == 1) {
1567 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1568 rlen = 1;
1569 } else {
1570 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1571 rlen = 2;
1572 }
1573
1574 {
1575 brw_push_insn_state(p);
1576 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1577 brw_set_mask_control(p, BRW_MASK_DISABLE);
1578
1579 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1580
1581 /* set message header global offset field (reg 0, element 2) */
1582 brw_MOV(p,
1583 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1584 mrf.nr,
1585 2), BRW_REGISTER_TYPE_UD),
1586 brw_imm_ud(offset));
1587
1588 brw_pop_insn_state(p);
1589 }
1590
1591 {
1592 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1593
1594 assert(insn->header.predicate_control == 0);
1595 insn->header.compression_control = BRW_COMPRESSION_NONE;
1596 insn->header.destreg__conditionalmod = mrf.nr;
1597
1598 brw_set_dest(p, insn, dest); /* UW? */
1599 brw_set_src0(insn, brw_null_reg());
1600
1601 brw_set_dp_read_message(p->brw,
1602 insn,
1603 255, /* binding table index (255=stateless) */
1604 msg_control,
1605 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1606 1, /* target cache (render/scratch) */
1607 1, /* msg_length */
1608 rlen);
1609 }
1610 }
1611
1612 /**
1613 * Read a float[4] vector from the data port Data Cache (const buffer).
1614 * Location (in buffer) should be a multiple of 16.
1615 * Used for fetching shader constants.
1616 */
1617 void brw_oword_block_read(struct brw_compile *p,
1618 struct brw_reg dest,
1619 struct brw_reg mrf,
1620 uint32_t offset,
1621 uint32_t bind_table_index)
1622 {
1623 struct intel_context *intel = &p->brw->intel;
1624
1625 /* On newer hardware, offset is in units of owords. */
1626 if (intel->gen >= 6)
1627 offset /= 16;
1628
1629 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1630
1631 brw_push_insn_state(p);
1632 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1633 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1634 brw_set_mask_control(p, BRW_MASK_DISABLE);
1635
1636 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1637
1638 /* set message header global offset field (reg 0, element 2) */
1639 brw_MOV(p,
1640 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1641 mrf.nr,
1642 2), BRW_REGISTER_TYPE_UD),
1643 brw_imm_ud(offset));
1644
1645 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1646 insn->header.destreg__conditionalmod = mrf.nr;
1647
1648 /* cast dest to a uword[8] vector */
1649 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1650
1651 brw_set_dest(p, insn, dest);
1652 if (intel->gen >= 6) {
1653 brw_set_src0(insn, mrf);
1654 } else {
1655 brw_set_src0(insn, brw_null_reg());
1656 }
1657
1658 brw_set_dp_read_message(p->brw,
1659 insn,
1660 bind_table_index,
1661 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1662 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1663 0, /* source cache = data cache */
1664 1, /* msg_length */
1665 1); /* response_length (1 reg, 2 owords!) */
1666
1667 brw_pop_insn_state(p);
1668 }
1669
1670 /**
1671 * Read a set of dwords from the data port Data Cache (const buffer).
1672 *
1673 * Location (in buffer) appears as UD offsets in the register after
1674 * the provided mrf header reg.
1675 */
1676 void brw_dword_scattered_read(struct brw_compile *p,
1677 struct brw_reg dest,
1678 struct brw_reg mrf,
1679 uint32_t bind_table_index)
1680 {
1681 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1682
1683 brw_push_insn_state(p);
1684 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1685 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1686 brw_set_mask_control(p, BRW_MASK_DISABLE);
1687 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1688 brw_pop_insn_state(p);
1689
1690 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1691 insn->header.destreg__conditionalmod = mrf.nr;
1692
1693 /* cast dest to a uword[8] vector */
1694 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1695
1696 brw_set_dest(p, insn, dest);
1697 brw_set_src0(insn, brw_null_reg());
1698
1699 brw_set_dp_read_message(p->brw,
1700 insn,
1701 bind_table_index,
1702 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1703 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1704 0, /* source cache = data cache */
1705 2, /* msg_length */
1706 1); /* response_length */
1707 }
1708
1709
1710
1711 /**
1712 * Read float[4] constant(s) from VS constant buffer.
1713 * For relative addressing, two float[4] constants will be read into 'dest'.
1714 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1715 */
1716 void brw_dp_READ_4_vs(struct brw_compile *p,
1717 struct brw_reg dest,
1718 GLuint location,
1719 GLuint bind_table_index)
1720 {
1721 struct intel_context *intel = &p->brw->intel;
1722 struct brw_instruction *insn;
1723 GLuint msg_reg_nr = 1;
1724
1725 if (intel->gen >= 6)
1726 location /= 16;
1727
1728 /* Setup MRF[1] with location/offset into const buffer */
1729 brw_push_insn_state(p);
1730 brw_set_access_mode(p, BRW_ALIGN_1);
1731 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1732 brw_set_mask_control(p, BRW_MASK_DISABLE);
1733 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1734 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1735 BRW_REGISTER_TYPE_UD),
1736 brw_imm_ud(location));
1737 brw_pop_insn_state(p);
1738
1739 insn = next_insn(p, BRW_OPCODE_SEND);
1740
1741 insn->header.predicate_control = BRW_PREDICATE_NONE;
1742 insn->header.compression_control = BRW_COMPRESSION_NONE;
1743 insn->header.destreg__conditionalmod = msg_reg_nr;
1744 insn->header.mask_control = BRW_MASK_DISABLE;
1745
1746 brw_set_dest(p, insn, dest);
1747 if (intel->gen >= 6) {
1748 brw_set_src0(insn, brw_message_reg(msg_reg_nr));
1749 } else {
1750 brw_set_src0(insn, brw_null_reg());
1751 }
1752
1753 brw_set_dp_read_message(p->brw,
1754 insn,
1755 bind_table_index,
1756 0,
1757 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1758 0, /* source cache = data cache */
1759 1, /* msg_length */
1760 1); /* response_length (1 Oword) */
1761 }
1762
1763 /**
1764 * Read a float[4] constant per vertex from VS constant buffer, with
1765 * relative addressing.
1766 */
1767 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1768 struct brw_reg dest,
1769 struct brw_reg addr_reg,
1770 GLuint offset,
1771 GLuint bind_table_index)
1772 {
1773 struct intel_context *intel = &p->brw->intel;
1774 int msg_type;
1775
1776 /* Setup MRF[1] with offset into const buffer */
1777 brw_push_insn_state(p);
1778 brw_set_access_mode(p, BRW_ALIGN_1);
1779 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1780 brw_set_mask_control(p, BRW_MASK_DISABLE);
1781 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1782
1783 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1784 * fields ignored.
1785 */
1786 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1787 addr_reg, brw_imm_d(offset));
1788 brw_pop_insn_state(p);
1789
1790 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1791
1792 insn->header.predicate_control = BRW_PREDICATE_NONE;
1793 insn->header.compression_control = BRW_COMPRESSION_NONE;
1794 insn->header.destreg__conditionalmod = 0;
1795 insn->header.mask_control = BRW_MASK_DISABLE;
1796
1797 brw_set_dest(p, insn, dest);
1798 brw_set_src0(insn, brw_vec8_grf(0, 0));
1799
1800 if (intel->gen == 6)
1801 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1802 else if (intel->gen == 5 || intel->is_g4x)
1803 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1804 else
1805 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1806
1807 brw_set_dp_read_message(p->brw,
1808 insn,
1809 bind_table_index,
1810 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1811 msg_type,
1812 0, /* source cache = data cache */
1813 2, /* msg_length */
1814 1); /* response_length */
1815 }
1816
1817
1818
1819 void brw_fb_WRITE(struct brw_compile *p,
1820 int dispatch_width,
1821 struct brw_reg dest,
1822 GLuint msg_reg_nr,
1823 struct brw_reg src0,
1824 GLuint binding_table_index,
1825 GLuint msg_length,
1826 GLuint response_length,
1827 GLboolean eot,
1828 GLboolean header_present)
1829 {
1830 struct intel_context *intel = &p->brw->intel;
1831 struct brw_instruction *insn;
1832 GLuint msg_control, msg_type;
1833
1834 if (intel->gen >= 6 && binding_table_index == 0) {
1835 insn = next_insn(p, BRW_OPCODE_SENDC);
1836 } else {
1837 insn = next_insn(p, BRW_OPCODE_SEND);
1838 }
1839 /* The execution mask is ignored for render target writes. */
1840 insn->header.predicate_control = 0;
1841 insn->header.compression_control = BRW_COMPRESSION_NONE;
1842
1843 if (intel->gen >= 6) {
1844 /* headerless version, just submit color payload */
1845 src0 = brw_message_reg(msg_reg_nr);
1846
1847 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1848 } else {
1849 insn->header.destreg__conditionalmod = msg_reg_nr;
1850
1851 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1852 }
1853
1854 if (dispatch_width == 16)
1855 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1856 else
1857 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1858
1859 brw_set_dest(p, insn, dest);
1860 brw_set_src0(insn, src0);
1861 brw_set_dp_write_message(p->brw,
1862 insn,
1863 binding_table_index,
1864 msg_control,
1865 msg_type,
1866 msg_length,
1867 header_present,
1868 1, /* pixel scoreboard */
1869 response_length,
1870 eot,
1871 0 /* send_commit_msg */);
1872 }
1873
1874
1875 /**
1876 * Texture sample instruction.
1877 * Note: the msg_type plus msg_length values determine exactly what kind
1878 * of sampling operation is performed. See volume 4, page 161 of docs.
1879 */
1880 void brw_SAMPLE(struct brw_compile *p,
1881 struct brw_reg dest,
1882 GLuint msg_reg_nr,
1883 struct brw_reg src0,
1884 GLuint binding_table_index,
1885 GLuint sampler,
1886 GLuint writemask,
1887 GLuint msg_type,
1888 GLuint response_length,
1889 GLuint msg_length,
1890 GLboolean eot,
1891 GLuint header_present,
1892 GLuint simd_mode)
1893 {
1894 struct intel_context *intel = &p->brw->intel;
1895 GLboolean need_stall = 0;
1896
1897 if (writemask == 0) {
1898 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1899 return;
1900 }
1901
1902 /* Hardware doesn't do destination dependency checking on send
1903 * instructions properly. Add a workaround which generates the
1904 * dependency by other means. In practice it seems like this bug
1905 * only crops up for texture samples, and only where registers are
1906 * written by the send and then written again later without being
1907 * read in between. Luckily for us, we already track that
1908 * information and use it to modify the writemask for the
1909 * instruction, so that is a guide for whether a workaround is
1910 * needed.
1911 */
1912 if (writemask != WRITEMASK_XYZW) {
1913 GLuint dst_offset = 0;
1914 GLuint i, newmask = 0, len = 0;
1915
1916 for (i = 0; i < 4; i++) {
1917 if (writemask & (1<<i))
1918 break;
1919 dst_offset += 2;
1920 }
1921 for (; i < 4; i++) {
1922 if (!(writemask & (1<<i)))
1923 break;
1924 newmask |= 1<<i;
1925 len++;
1926 }
1927
1928 if (newmask != writemask) {
1929 need_stall = 1;
1930 /* printf("need stall %x %x\n", newmask , writemask); */
1931 }
1932 else {
1933 GLboolean dispatch_16 = GL_FALSE;
1934
1935 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1936
1937 guess_execution_size(p, p->current, dest);
1938 if (p->current->header.execution_size == BRW_EXECUTE_16)
1939 dispatch_16 = GL_TRUE;
1940
1941 newmask = ~newmask & WRITEMASK_XYZW;
1942
1943 brw_push_insn_state(p);
1944
1945 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1946 brw_set_mask_control(p, BRW_MASK_DISABLE);
1947
1948 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
1949 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
1950 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1951
1952 brw_pop_insn_state(p);
1953
1954 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1955 dest = offset(dest, dst_offset);
1956
1957 /* For 16-wide dispatch, masked channels are skipped in the
1958 * response. For 8-wide, masked channels still take up slots,
1959 * and are just not written to.
1960 */
1961 if (dispatch_16)
1962 response_length = len * 2;
1963 }
1964 }
1965
1966 {
1967 struct brw_instruction *insn;
1968
1969 /* Sandybridge doesn't have the implied move for SENDs,
1970 * and the first message register index comes from src0.
1971 */
1972 if (intel->gen >= 6) {
1973 if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1974 src0.nr != BRW_ARF_NULL) {
1975 brw_push_insn_state(p);
1976 brw_set_mask_control( p, BRW_MASK_DISABLE );
1977 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1978 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
1979 brw_pop_insn_state(p);
1980 }
1981 src0 = brw_message_reg(msg_reg_nr);
1982 }
1983
1984 insn = next_insn(p, BRW_OPCODE_SEND);
1985 insn->header.predicate_control = 0; /* XXX */
1986 insn->header.compression_control = BRW_COMPRESSION_NONE;
1987 if (intel->gen < 6)
1988 insn->header.destreg__conditionalmod = msg_reg_nr;
1989
1990 brw_set_dest(p, insn, dest);
1991 brw_set_src0(insn, src0);
1992 brw_set_sampler_message(p->brw, insn,
1993 binding_table_index,
1994 sampler,
1995 msg_type,
1996 response_length,
1997 msg_length,
1998 eot,
1999 header_present,
2000 simd_mode);
2001 }
2002
2003 if (need_stall) {
2004 struct brw_reg reg = vec8(offset(dest, response_length-1));
2005
2006 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2007 */
2008 brw_push_insn_state(p);
2009 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2010 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2011 retype(reg, BRW_REGISTER_TYPE_UD));
2012 brw_pop_insn_state(p);
2013 }
2014
2015 }
2016
2017 /* All these variables are pretty confusing - we might be better off
2018 * using bitmasks and macros for this, in the old style. Or perhaps
2019 * just having the caller instantiate the fields in dword3 itself.
2020 */
2021 void brw_urb_WRITE(struct brw_compile *p,
2022 struct brw_reg dest,
2023 GLuint msg_reg_nr,
2024 struct brw_reg src0,
2025 GLboolean allocate,
2026 GLboolean used,
2027 GLuint msg_length,
2028 GLuint response_length,
2029 GLboolean eot,
2030 GLboolean writes_complete,
2031 GLuint offset,
2032 GLuint swizzle)
2033 {
2034 struct intel_context *intel = &p->brw->intel;
2035 struct brw_instruction *insn;
2036
2037 /* Sandybridge doesn't have the implied move for SENDs,
2038 * and the first message register index comes from src0.
2039 */
2040 if (intel->gen >= 6) {
2041 brw_push_insn_state(p);
2042 brw_set_mask_control( p, BRW_MASK_DISABLE );
2043 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
2044 retype(src0, BRW_REGISTER_TYPE_UD));
2045 brw_pop_insn_state(p);
2046 src0 = brw_message_reg(msg_reg_nr);
2047 }
2048
2049 insn = next_insn(p, BRW_OPCODE_SEND);
2050
2051 assert(msg_length < BRW_MAX_MRF);
2052
2053 brw_set_dest(p, insn, dest);
2054 brw_set_src0(insn, src0);
2055 brw_set_src1(insn, brw_imm_d(0));
2056
2057 if (intel->gen < 6)
2058 insn->header.destreg__conditionalmod = msg_reg_nr;
2059
2060 brw_set_urb_message(p->brw,
2061 insn,
2062 allocate,
2063 used,
2064 msg_length,
2065 response_length,
2066 eot,
2067 writes_complete,
2068 offset,
2069 swizzle);
2070 }
2071
2072 static int
2073 brw_find_next_block_end(struct brw_compile *p, int start)
2074 {
2075 int ip;
2076
2077 for (ip = start + 1; ip < p->nr_insn; ip++) {
2078 struct brw_instruction *insn = &p->store[ip];
2079
2080 switch (insn->header.opcode) {
2081 case BRW_OPCODE_ENDIF:
2082 case BRW_OPCODE_ELSE:
2083 case BRW_OPCODE_WHILE:
2084 return ip;
2085 }
2086 }
2087 assert(!"not reached");
2088 return start + 1;
2089 }
2090
2091 /* There is no DO instruction on gen6, so to find the end of the loop
2092 * we have to see if the loop is jumping back before our start
2093 * instruction.
2094 */
2095 static int
2096 brw_find_loop_end(struct brw_compile *p, int start)
2097 {
2098 int ip;
2099 int br = 2;
2100
2101 for (ip = start + 1; ip < p->nr_insn; ip++) {
2102 struct brw_instruction *insn = &p->store[ip];
2103
2104 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2105 if (ip + insn->bits1.branch_gen6.jump_count / br < start)
2106 return ip;
2107 }
2108 }
2109 assert(!"not reached");
2110 return start + 1;
2111 }
2112
2113 /* After program generation, go back and update the UIP and JIP of
2114 * BREAK and CONT instructions to their correct locations.
2115 */
2116 void
2117 brw_set_uip_jip(struct brw_compile *p)
2118 {
2119 struct intel_context *intel = &p->brw->intel;
2120 int ip;
2121 int br = 2;
2122
2123 if (intel->gen < 6)
2124 return;
2125
2126 for (ip = 0; ip < p->nr_insn; ip++) {
2127 struct brw_instruction *insn = &p->store[ip];
2128
2129 switch (insn->header.opcode) {
2130 case BRW_OPCODE_BREAK:
2131 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2132 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
2133 break;
2134 case BRW_OPCODE_CONTINUE:
2135 /* JIP is set at CONTINUE emit time, since that's when we
2136 * know where the start of the loop is.
2137 */
2138 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2139 assert(insn->bits3.break_cont.uip != 0);
2140 assert(insn->bits3.break_cont.jip != 0);
2141 break;
2142 }
2143 }
2144 }
2145
2146 void brw_ff_sync(struct brw_compile *p,
2147 struct brw_reg dest,
2148 GLuint msg_reg_nr,
2149 struct brw_reg src0,
2150 GLboolean allocate,
2151 GLuint response_length,
2152 GLboolean eot)
2153 {
2154 struct intel_context *intel = &p->brw->intel;
2155 struct brw_instruction *insn;
2156
2157 /* Sandybridge doesn't have the implied move for SENDs,
2158 * and the first message register index comes from src0.
2159 */
2160 if (intel->gen >= 6) {
2161 brw_push_insn_state(p);
2162 brw_set_mask_control( p, BRW_MASK_DISABLE );
2163 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
2164 retype(src0, BRW_REGISTER_TYPE_UD));
2165 brw_pop_insn_state(p);
2166 src0 = brw_message_reg(msg_reg_nr);
2167 }
2168
2169 insn = next_insn(p, BRW_OPCODE_SEND);
2170 brw_set_dest(p, insn, dest);
2171 brw_set_src0(insn, src0);
2172 brw_set_src1(insn, brw_imm_d(0));
2173
2174 if (intel->gen < 6)
2175 insn->header.destreg__conditionalmod = msg_reg_nr;
2176
2177 brw_set_ff_sync_message(p->brw,
2178 insn,
2179 allocate,
2180 response_length,
2181 eot);
2182 }