i965: Gen6 no longer has the IFF instruction; always use IF.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void brw_set_dp_read_message( struct brw_context *brw,
500 struct brw_instruction *insn,
501 GLuint binding_table_index,
502 GLuint msg_control,
503 GLuint msg_type,
504 GLuint target_cache,
505 GLuint msg_length,
506 GLuint response_length,
507 GLuint end_of_thread )
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen == 5) {
513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514 insn->bits3.dp_read_gen5.msg_control = msg_control;
515 insn->bits3.dp_read_gen5.msg_type = msg_type;
516 insn->bits3.dp_read_gen5.target_cache = target_cache;
517 insn->bits3.dp_read_gen5.header_present = 1;
518 insn->bits3.dp_read_gen5.response_length = response_length;
519 insn->bits3.dp_read_gen5.msg_length = msg_length;
520 insn->bits3.dp_read_gen5.pad1 = 0;
521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 insn->bits2.send_gen5.end_of_thread = end_of_thread;
524 } else {
525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
529 insn->bits3.dp_read.response_length = response_length; /*16:19*/
530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532 insn->bits3.dp_read.pad1 = 0; /*28:30*/
533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
534 }
535 }
536
537 static void brw_set_sampler_message(struct brw_context *brw,
538 struct brw_instruction *insn,
539 GLuint binding_table_index,
540 GLuint sampler,
541 GLuint msg_type,
542 GLuint response_length,
543 GLuint msg_length,
544 GLboolean eot,
545 GLuint header_present,
546 GLuint simd_mode)
547 {
548 struct intel_context *intel = &brw->intel;
549 assert(eot == 0);
550 brw_set_src1(insn, brw_imm_d(0));
551
552 if (intel->gen >= 5) {
553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554 insn->bits3.sampler_gen5.sampler = sampler;
555 insn->bits3.sampler_gen5.msg_type = msg_type;
556 insn->bits3.sampler_gen5.simd_mode = simd_mode;
557 insn->bits3.sampler_gen5.header_present = header_present;
558 insn->bits3.sampler_gen5.response_length = response_length;
559 insn->bits3.sampler_gen5.msg_length = msg_length;
560 insn->bits3.sampler_gen5.end_of_thread = eot;
561 if (intel->gen >= 6)
562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563 else {
564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565 insn->bits2.send_gen5.end_of_thread = eot;
566 }
567 } else if (intel->is_g4x) {
568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569 insn->bits3.sampler_g4x.sampler = sampler;
570 insn->bits3.sampler_g4x.msg_type = msg_type;
571 insn->bits3.sampler_g4x.response_length = response_length;
572 insn->bits3.sampler_g4x.msg_length = msg_length;
573 insn->bits3.sampler_g4x.end_of_thread = eot;
574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575 } else {
576 insn->bits3.sampler.binding_table_index = binding_table_index;
577 insn->bits3.sampler.sampler = sampler;
578 insn->bits3.sampler.msg_type = msg_type;
579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580 insn->bits3.sampler.response_length = response_length;
581 insn->bits3.sampler.msg_length = msg_length;
582 insn->bits3.sampler.end_of_thread = eot;
583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584 }
585 }
586
587
588
589 static struct brw_instruction *next_insn( struct brw_compile *p,
590 GLuint opcode )
591 {
592 struct brw_instruction *insn;
593
594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596 insn = &p->store[p->nr_insn++];
597 memcpy(insn, p->current, sizeof(*insn));
598
599 /* Reset this one-shot flag:
600 */
601
602 if (p->current->header.destreg__conditionalmod) {
603 p->current->header.destreg__conditionalmod = 0;
604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605 }
606
607 insn->header.opcode = opcode;
608 return insn;
609 }
610
611
612 static struct brw_instruction *brw_alu1( struct brw_compile *p,
613 GLuint opcode,
614 struct brw_reg dest,
615 struct brw_reg src )
616 {
617 struct brw_instruction *insn = next_insn(p, opcode);
618 brw_set_dest(insn, dest);
619 brw_set_src0(insn, src);
620 return insn;
621 }
622
623 static struct brw_instruction *brw_alu2(struct brw_compile *p,
624 GLuint opcode,
625 struct brw_reg dest,
626 struct brw_reg src0,
627 struct brw_reg src1 )
628 {
629 struct brw_instruction *insn = next_insn(p, opcode);
630 brw_set_dest(insn, dest);
631 brw_set_src0(insn, src0);
632 brw_set_src1(insn, src1);
633 return insn;
634 }
635
636
637 /***********************************************************************
638 * Convenience routines.
639 */
640 #define ALU1(OP) \
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
644 { \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
646 }
647
648 #define ALU2(OP) \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
653 { \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
655 }
656
657
658 ALU1(MOV)
659 ALU2(SEL)
660 ALU1(NOT)
661 ALU2(AND)
662 ALU2(OR)
663 ALU2(XOR)
664 ALU2(SHR)
665 ALU2(SHL)
666 ALU2(RSR)
667 ALU2(RSL)
668 ALU2(ASR)
669 ALU1(FRC)
670 ALU1(RNDD)
671 ALU1(RNDZ)
672 ALU2(MAC)
673 ALU2(MACH)
674 ALU1(LZD)
675 ALU2(DP4)
676 ALU2(DPH)
677 ALU2(DP3)
678 ALU2(DP2)
679 ALU2(LINE)
680 ALU2(PLN)
681
682 struct brw_instruction *brw_ADD(struct brw_compile *p,
683 struct brw_reg dest,
684 struct brw_reg src0,
685 struct brw_reg src1)
686 {
687 /* 6.2.2: add */
688 if (src0.type == BRW_REGISTER_TYPE_F ||
689 (src0.file == BRW_IMMEDIATE_VALUE &&
690 src0.type == BRW_REGISTER_TYPE_VF)) {
691 assert(src1.type != BRW_REGISTER_TYPE_UD);
692 assert(src1.type != BRW_REGISTER_TYPE_D);
693 }
694
695 if (src1.type == BRW_REGISTER_TYPE_F ||
696 (src1.file == BRW_IMMEDIATE_VALUE &&
697 src1.type == BRW_REGISTER_TYPE_VF)) {
698 assert(src0.type != BRW_REGISTER_TYPE_UD);
699 assert(src0.type != BRW_REGISTER_TYPE_D);
700 }
701
702 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
703 }
704
705 struct brw_instruction *brw_MUL(struct brw_compile *p,
706 struct brw_reg dest,
707 struct brw_reg src0,
708 struct brw_reg src1)
709 {
710 /* 6.32.38: mul */
711 if (src0.type == BRW_REGISTER_TYPE_D ||
712 src0.type == BRW_REGISTER_TYPE_UD ||
713 src1.type == BRW_REGISTER_TYPE_D ||
714 src1.type == BRW_REGISTER_TYPE_UD) {
715 assert(dest.type != BRW_REGISTER_TYPE_F);
716 }
717
718 if (src0.type == BRW_REGISTER_TYPE_F ||
719 (src0.file == BRW_IMMEDIATE_VALUE &&
720 src0.type == BRW_REGISTER_TYPE_VF)) {
721 assert(src1.type != BRW_REGISTER_TYPE_UD);
722 assert(src1.type != BRW_REGISTER_TYPE_D);
723 }
724
725 if (src1.type == BRW_REGISTER_TYPE_F ||
726 (src1.file == BRW_IMMEDIATE_VALUE &&
727 src1.type == BRW_REGISTER_TYPE_VF)) {
728 assert(src0.type != BRW_REGISTER_TYPE_UD);
729 assert(src0.type != BRW_REGISTER_TYPE_D);
730 }
731
732 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
733 src0.nr != BRW_ARF_ACCUMULATOR);
734 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
735 src1.nr != BRW_ARF_ACCUMULATOR);
736
737 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
738 }
739
740
741 void brw_NOP(struct brw_compile *p)
742 {
743 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
744 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
746 brw_set_src1(insn, brw_imm_ud(0x0));
747 }
748
749
750
751
752
753 /***********************************************************************
754 * Comparisons, if/else/endif
755 */
756
757 struct brw_instruction *brw_JMPI(struct brw_compile *p,
758 struct brw_reg dest,
759 struct brw_reg src0,
760 struct brw_reg src1)
761 {
762 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
763
764 insn->header.execution_size = 1;
765 insn->header.compression_control = BRW_COMPRESSION_NONE;
766 insn->header.mask_control = BRW_MASK_DISABLE;
767
768 p->current->header.predicate_control = BRW_PREDICATE_NONE;
769
770 return insn;
771 }
772
773 /* EU takes the value from the flag register and pushes it onto some
774 * sort of a stack (presumably merging with any flag value already on
775 * the stack). Within an if block, the flags at the top of the stack
776 * control execution on each channel of the unit, eg. on each of the
777 * 16 pixel values in our wm programs.
778 *
779 * When the matching 'else' instruction is reached (presumably by
780 * countdown of the instruction count patched in by our ELSE/ENDIF
781 * functions), the relevent flags are inverted.
782 *
783 * When the matching 'endif' instruction is reached, the flags are
784 * popped off. If the stack is now empty, normal execution resumes.
785 *
786 * No attempt is made to deal with stack overflow (14 elements?).
787 */
788 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
789 {
790 struct brw_instruction *insn;
791
792 if (p->single_program_flow) {
793 assert(execute_size == BRW_EXECUTE_1);
794
795 insn = next_insn(p, BRW_OPCODE_ADD);
796 insn->header.predicate_inverse = 1;
797 } else {
798 insn = next_insn(p, BRW_OPCODE_IF);
799 }
800
801 /* Override the defaults for this instruction:
802 */
803 brw_set_dest(insn, brw_ip_reg());
804 brw_set_src0(insn, brw_ip_reg());
805 brw_set_src1(insn, brw_imm_d(0x0));
806
807 insn->header.execution_size = execute_size;
808 insn->header.compression_control = BRW_COMPRESSION_NONE;
809 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
810 insn->header.mask_control = BRW_MASK_ENABLE;
811 if (!p->single_program_flow)
812 insn->header.thread_control = BRW_THREAD_SWITCH;
813
814 p->current->header.predicate_control = BRW_PREDICATE_NONE;
815
816 return insn;
817 }
818
819
820 struct brw_instruction *brw_ELSE(struct brw_compile *p,
821 struct brw_instruction *if_insn)
822 {
823 struct intel_context *intel = &p->brw->intel;
824 struct brw_instruction *insn;
825 GLuint br = 1;
826
827 /* jump count is for 64bit data chunk each, so one 128bit
828 instruction requires 2 chunks. */
829 if (intel->gen >= 5)
830 br = 2;
831
832 if (p->single_program_flow) {
833 insn = next_insn(p, BRW_OPCODE_ADD);
834 } else {
835 insn = next_insn(p, BRW_OPCODE_ELSE);
836 }
837
838 brw_set_dest(insn, brw_ip_reg());
839 brw_set_src0(insn, brw_ip_reg());
840 brw_set_src1(insn, brw_imm_d(0x0));
841
842 insn->header.compression_control = BRW_COMPRESSION_NONE;
843 insn->header.execution_size = if_insn->header.execution_size;
844 insn->header.mask_control = BRW_MASK_ENABLE;
845 if (!p->single_program_flow)
846 insn->header.thread_control = BRW_THREAD_SWITCH;
847
848 /* Patch the if instruction to point at this instruction.
849 */
850 if (p->single_program_flow) {
851 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
852
853 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
854 } else {
855 assert(if_insn->header.opcode == BRW_OPCODE_IF);
856
857 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
858 if_insn->bits3.if_else.pop_count = 0;
859 if_insn->bits3.if_else.pad0 = 0;
860 }
861
862 return insn;
863 }
864
865 void brw_ENDIF(struct brw_compile *p,
866 struct brw_instruction *patch_insn)
867 {
868 struct intel_context *intel = &p->brw->intel;
869 GLuint br = 1;
870
871 if (intel->gen >= 5)
872 br = 2;
873
874 if (p->single_program_flow) {
875 /* In single program flow mode, there's no need to execute an ENDIF,
876 * since we don't need to do any stack operations, and if we're executing
877 * currently, we want to just continue executing.
878 */
879 struct brw_instruction *next = &p->store[p->nr_insn];
880
881 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
882
883 patch_insn->bits3.ud = (next - patch_insn) * 16;
884 } else {
885 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
886
887 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
888 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
889 brw_set_src1(insn, brw_imm_d(0x0));
890
891 insn->header.compression_control = BRW_COMPRESSION_NONE;
892 insn->header.execution_size = patch_insn->header.execution_size;
893 insn->header.mask_control = BRW_MASK_ENABLE;
894 insn->header.thread_control = BRW_THREAD_SWITCH;
895
896 assert(patch_insn->bits3.if_else.jump_count == 0);
897
898 /* Patch the if or else instructions to point at this or the next
899 * instruction respectively.
900 */
901 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
902 if (intel->gen < 6) {
903 /* Automagically turn it into an IFF:
904 */
905 patch_insn->header.opcode = BRW_OPCODE_IFF;
906 }
907 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
908 patch_insn->bits3.if_else.pop_count = 0;
909 patch_insn->bits3.if_else.pad0 = 0;
910 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
911 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
912 patch_insn->bits3.if_else.pop_count = 1;
913 patch_insn->bits3.if_else.pad0 = 0;
914 } else {
915 assert(0);
916 }
917
918 /* Also pop item off the stack in the endif instruction:
919 */
920 insn->bits3.if_else.jump_count = 0;
921 insn->bits3.if_else.pop_count = 1;
922 insn->bits3.if_else.pad0 = 0;
923 }
924 }
925
926 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
927 {
928 struct brw_instruction *insn;
929 insn = next_insn(p, BRW_OPCODE_BREAK);
930 brw_set_dest(insn, brw_ip_reg());
931 brw_set_src0(insn, brw_ip_reg());
932 brw_set_src1(insn, brw_imm_d(0x0));
933 insn->header.compression_control = BRW_COMPRESSION_NONE;
934 insn->header.execution_size = BRW_EXECUTE_8;
935 /* insn->header.mask_control = BRW_MASK_DISABLE; */
936 insn->bits3.if_else.pad0 = 0;
937 insn->bits3.if_else.pop_count = pop_count;
938 return insn;
939 }
940
941 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
942 {
943 struct brw_instruction *insn;
944 insn = next_insn(p, BRW_OPCODE_CONTINUE);
945 brw_set_dest(insn, brw_ip_reg());
946 brw_set_src0(insn, brw_ip_reg());
947 brw_set_src1(insn, brw_imm_d(0x0));
948 insn->header.compression_control = BRW_COMPRESSION_NONE;
949 insn->header.execution_size = BRW_EXECUTE_8;
950 /* insn->header.mask_control = BRW_MASK_DISABLE; */
951 insn->bits3.if_else.pad0 = 0;
952 insn->bits3.if_else.pop_count = pop_count;
953 return insn;
954 }
955
956 /* DO/WHILE loop:
957 */
958 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
959 {
960 if (p->single_program_flow) {
961 return &p->store[p->nr_insn];
962 } else {
963 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
964
965 /* Override the defaults for this instruction:
966 */
967 brw_set_dest(insn, brw_null_reg());
968 brw_set_src0(insn, brw_null_reg());
969 brw_set_src1(insn, brw_null_reg());
970
971 insn->header.compression_control = BRW_COMPRESSION_NONE;
972 insn->header.execution_size = execute_size;
973 insn->header.predicate_control = BRW_PREDICATE_NONE;
974 /* insn->header.mask_control = BRW_MASK_ENABLE; */
975 /* insn->header.mask_control = BRW_MASK_DISABLE; */
976
977 return insn;
978 }
979 }
980
981
982
983 struct brw_instruction *brw_WHILE(struct brw_compile *p,
984 struct brw_instruction *do_insn)
985 {
986 struct intel_context *intel = &p->brw->intel;
987 struct brw_instruction *insn;
988 GLuint br = 1;
989
990 if (intel->gen >= 5)
991 br = 2;
992
993 if (p->single_program_flow)
994 insn = next_insn(p, BRW_OPCODE_ADD);
995 else
996 insn = next_insn(p, BRW_OPCODE_WHILE);
997
998 brw_set_dest(insn, brw_ip_reg());
999 brw_set_src0(insn, brw_ip_reg());
1000 brw_set_src1(insn, brw_imm_d(0x0));
1001
1002 insn->header.compression_control = BRW_COMPRESSION_NONE;
1003
1004 if (p->single_program_flow) {
1005 insn->header.execution_size = BRW_EXECUTE_1;
1006
1007 insn->bits3.d = (do_insn - insn) * 16;
1008 } else {
1009 insn->header.execution_size = do_insn->header.execution_size;
1010
1011 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1012 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1013 insn->bits3.if_else.pop_count = 0;
1014 insn->bits3.if_else.pad0 = 0;
1015 }
1016
1017 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1018
1019 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1020 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1021 return insn;
1022 }
1023
1024
1025 /* FORWARD JUMPS:
1026 */
1027 void brw_land_fwd_jump(struct brw_compile *p,
1028 struct brw_instruction *jmp_insn)
1029 {
1030 struct intel_context *intel = &p->brw->intel;
1031 struct brw_instruction *landing = &p->store[p->nr_insn];
1032 GLuint jmpi = 1;
1033
1034 if (intel->gen >= 5)
1035 jmpi = 2;
1036
1037 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1038 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1039
1040 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1041 }
1042
1043
1044
1045 /* To integrate with the above, it makes sense that the comparison
1046 * instruction should populate the flag register. It might be simpler
1047 * just to use the flag reg for most WM tasks?
1048 */
1049 void brw_CMP(struct brw_compile *p,
1050 struct brw_reg dest,
1051 GLuint conditional,
1052 struct brw_reg src0,
1053 struct brw_reg src1)
1054 {
1055 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1056
1057 insn->header.destreg__conditionalmod = conditional;
1058 brw_set_dest(insn, dest);
1059 brw_set_src0(insn, src0);
1060 brw_set_src1(insn, src1);
1061
1062 /* guess_execution_size(insn, src0); */
1063
1064
1065 /* Make it so that future instructions will use the computed flag
1066 * value until brw_set_predicate_control_flag_value() is called
1067 * again.
1068 */
1069 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1070 dest.nr == 0) {
1071 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1072 p->flag_value = 0xff;
1073 }
1074 }
1075
1076 /* Issue 'wait' instruction for n1, host could program MMIO
1077 to wake up thread. */
1078 void brw_WAIT (struct brw_compile *p)
1079 {
1080 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1081 struct brw_reg src = brw_notification_1_reg();
1082
1083 brw_set_dest(insn, src);
1084 brw_set_src0(insn, src);
1085 brw_set_src1(insn, brw_null_reg());
1086 insn->header.execution_size = 0; /* must */
1087 insn->header.predicate_control = 0;
1088 insn->header.compression_control = 0;
1089 }
1090
1091
1092 /***********************************************************************
1093 * Helpers for the various SEND message types:
1094 */
1095
1096 /** Extended math function, float[8].
1097 */
1098 void brw_math( struct brw_compile *p,
1099 struct brw_reg dest,
1100 GLuint function,
1101 GLuint saturate,
1102 GLuint msg_reg_nr,
1103 struct brw_reg src,
1104 GLuint data_type,
1105 GLuint precision )
1106 {
1107 struct intel_context *intel = &p->brw->intel;
1108
1109 if (intel->gen >= 6) {
1110 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1111
1112 /* Math is the same ISA format as other opcodes, except that CondModifier
1113 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1114 */
1115 insn->header.destreg__conditionalmod = function;
1116
1117 brw_set_dest(insn, dest);
1118 brw_set_src0(insn, src);
1119 brw_set_src1(insn, brw_null_reg());
1120 } else {
1121 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1122 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1123 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1124 /* Example code doesn't set predicate_control for send
1125 * instructions.
1126 */
1127 insn->header.predicate_control = 0;
1128 insn->header.destreg__conditionalmod = msg_reg_nr;
1129
1130 brw_set_dest(insn, dest);
1131 brw_set_src0(insn, src);
1132 brw_set_math_message(p->brw,
1133 insn,
1134 msg_length, response_length,
1135 function,
1136 BRW_MATH_INTEGER_UNSIGNED,
1137 precision,
1138 saturate,
1139 data_type);
1140 }
1141 }
1142
1143 /** Extended math function, float[8].
1144 */
1145 void brw_math2(struct brw_compile *p,
1146 struct brw_reg dest,
1147 GLuint function,
1148 struct brw_reg src0,
1149 struct brw_reg src1)
1150 {
1151 struct intel_context *intel = &p->brw->intel;
1152 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1153
1154 assert(intel->gen >= 6);
1155
1156 /* Math is the same ISA format as other opcodes, except that CondModifier
1157 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1158 */
1159 insn->header.destreg__conditionalmod = function;
1160
1161 brw_set_dest(insn, dest);
1162 brw_set_src0(insn, src0);
1163 brw_set_src1(insn, src1);
1164 }
1165
1166 /**
1167 * Extended math function, float[16].
1168 * Use 2 send instructions.
1169 */
1170 void brw_math_16( struct brw_compile *p,
1171 struct brw_reg dest,
1172 GLuint function,
1173 GLuint saturate,
1174 GLuint msg_reg_nr,
1175 struct brw_reg src,
1176 GLuint precision )
1177 {
1178 struct intel_context *intel = &p->brw->intel;
1179 struct brw_instruction *insn;
1180 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1181 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1182
1183 if (intel->gen >= 6) {
1184 insn = next_insn(p, BRW_OPCODE_MATH);
1185
1186 /* Math is the same ISA format as other opcodes, except that CondModifier
1187 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1188 */
1189 insn->header.destreg__conditionalmod = function;
1190
1191 brw_set_dest(insn, dest);
1192 brw_set_src0(insn, src);
1193 brw_set_src1(insn, brw_null_reg());
1194 return;
1195 }
1196
1197 /* First instruction:
1198 */
1199 brw_push_insn_state(p);
1200 brw_set_predicate_control_flag_value(p, 0xff);
1201 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1202
1203 insn = next_insn(p, BRW_OPCODE_SEND);
1204 insn->header.destreg__conditionalmod = msg_reg_nr;
1205
1206 brw_set_dest(insn, dest);
1207 brw_set_src0(insn, src);
1208 brw_set_math_message(p->brw,
1209 insn,
1210 msg_length, response_length,
1211 function,
1212 BRW_MATH_INTEGER_UNSIGNED,
1213 precision,
1214 saturate,
1215 BRW_MATH_DATA_VECTOR);
1216
1217 /* Second instruction:
1218 */
1219 insn = next_insn(p, BRW_OPCODE_SEND);
1220 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1221 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1222
1223 brw_set_dest(insn, offset(dest,1));
1224 brw_set_src0(insn, src);
1225 brw_set_math_message(p->brw,
1226 insn,
1227 msg_length, response_length,
1228 function,
1229 BRW_MATH_INTEGER_UNSIGNED,
1230 precision,
1231 saturate,
1232 BRW_MATH_DATA_VECTOR);
1233
1234 brw_pop_insn_state(p);
1235 }
1236
1237
1238 /**
1239 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1240 * Scratch offset should be a multiple of 64.
1241 * Used for register spilling.
1242 */
1243 void brw_dp_WRITE_16( struct brw_compile *p,
1244 struct brw_reg src,
1245 GLuint scratch_offset )
1246 {
1247 struct intel_context *intel = &p->brw->intel;
1248 GLuint msg_reg_nr = 1;
1249 {
1250 brw_push_insn_state(p);
1251 brw_set_mask_control(p, BRW_MASK_DISABLE);
1252 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1253
1254 /* set message header global offset field (reg 0, element 2) */
1255 brw_MOV(p,
1256 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1257 brw_imm_d(scratch_offset));
1258
1259 brw_pop_insn_state(p);
1260 }
1261
1262 {
1263 GLuint msg_length = 3;
1264 struct brw_reg dest;
1265 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1266 int send_commit_msg;
1267
1268 insn->header.predicate_control = 0; /* XXX */
1269 insn->header.compression_control = BRW_COMPRESSION_NONE;
1270 insn->header.destreg__conditionalmod = msg_reg_nr;
1271
1272 /* Until gen6, writes followed by reads from the same location
1273 * are not guaranteed to be ordered unless write_commit is set.
1274 * If set, then a no-op write is issued to the destination
1275 * register to set a dependency, and a read from the destination
1276 * can be used to ensure the ordering.
1277 *
1278 * For gen6, only writes between different threads need ordering
1279 * protection. Our use of DP writes is all about register
1280 * spilling within a thread.
1281 */
1282 if (intel->gen >= 6) {
1283 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1284 send_commit_msg = 0;
1285 } else {
1286 dest = brw_uw16_grf(0, 0);
1287 send_commit_msg = 1;
1288 }
1289
1290 brw_set_dest(insn, dest);
1291 brw_set_src0(insn, src);
1292
1293 brw_set_dp_write_message(p->brw,
1294 insn,
1295 255, /* binding table index (255=stateless) */
1296 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1297 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1298 msg_length,
1299 GL_TRUE, /* header_present */
1300 0, /* pixel scoreboard */
1301 send_commit_msg, /* response_length */
1302 0, /* eot */
1303 send_commit_msg);
1304 }
1305 }
1306
1307
1308 /**
1309 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1310 * Scratch offset should be a multiple of 64.
1311 * Used for register spilling.
1312 */
1313 void brw_dp_READ_16( struct brw_compile *p,
1314 struct brw_reg dest,
1315 GLuint scratch_offset )
1316 {
1317 GLuint msg_reg_nr = 1;
1318 {
1319 brw_push_insn_state(p);
1320 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1321 brw_set_mask_control(p, BRW_MASK_DISABLE);
1322
1323 /* set message header global offset field (reg 0, element 2) */
1324 brw_MOV(p,
1325 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1326 brw_imm_d(scratch_offset));
1327
1328 brw_pop_insn_state(p);
1329 }
1330
1331 {
1332 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1333
1334 insn->header.predicate_control = 0; /* XXX */
1335 insn->header.compression_control = BRW_COMPRESSION_NONE;
1336 insn->header.destreg__conditionalmod = msg_reg_nr;
1337
1338 brw_set_dest(insn, dest); /* UW? */
1339 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1340
1341 brw_set_dp_read_message(p->brw,
1342 insn,
1343 255, /* binding table index (255=stateless) */
1344 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1345 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1346 1, /* target cache (render/scratch) */
1347 1, /* msg_length */
1348 2, /* response_length */
1349 0); /* eot */
1350 }
1351 }
1352
1353
1354 /**
1355 * Read a float[4] vector from the data port Data Cache (const buffer).
1356 * Location (in buffer) should be a multiple of 16.
1357 * Used for fetching shader constants.
1358 * If relAddr is true, we'll do an indirect fetch using the address register.
1359 */
1360 void brw_dp_READ_4( struct brw_compile *p,
1361 struct brw_reg dest,
1362 GLboolean relAddr,
1363 GLuint location,
1364 GLuint bind_table_index )
1365 {
1366 /* XXX: relAddr not implemented */
1367 GLuint msg_reg_nr = 1;
1368 {
1369 struct brw_reg b;
1370 brw_push_insn_state(p);
1371 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1372 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1373 brw_set_mask_control(p, BRW_MASK_DISABLE);
1374
1375 /* Setup MRF[1] with location/offset into const buffer */
1376 b = brw_message_reg(msg_reg_nr);
1377 b = retype(b, BRW_REGISTER_TYPE_UD);
1378 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1379 * when the docs say only dword[2] should be set. Hmmm. But it works.
1380 */
1381 brw_MOV(p, b, brw_imm_ud(location));
1382 brw_pop_insn_state(p);
1383 }
1384
1385 {
1386 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1387
1388 insn->header.predicate_control = BRW_PREDICATE_NONE;
1389 insn->header.compression_control = BRW_COMPRESSION_NONE;
1390 insn->header.destreg__conditionalmod = msg_reg_nr;
1391 insn->header.mask_control = BRW_MASK_DISABLE;
1392
1393 /* cast dest to a uword[8] vector */
1394 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1395
1396 brw_set_dest(insn, dest);
1397 brw_set_src0(insn, brw_null_reg());
1398
1399 brw_set_dp_read_message(p->brw,
1400 insn,
1401 bind_table_index,
1402 0, /* msg_control (0 means 1 Oword) */
1403 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1404 0, /* source cache = data cache */
1405 1, /* msg_length */
1406 1, /* response_length (1 Oword) */
1407 0); /* eot */
1408 }
1409 }
1410
1411
1412 /**
1413 * Read float[4] constant(s) from VS constant buffer.
1414 * For relative addressing, two float[4] constants will be read into 'dest'.
1415 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1416 */
1417 void brw_dp_READ_4_vs(struct brw_compile *p,
1418 struct brw_reg dest,
1419 GLuint location,
1420 GLuint bind_table_index)
1421 {
1422 struct brw_instruction *insn;
1423 GLuint msg_reg_nr = 1;
1424 struct brw_reg b;
1425
1426 /*
1427 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1428 location, msg_reg_nr);
1429 */
1430
1431 /* Setup MRF[1] with location/offset into const buffer */
1432 brw_push_insn_state(p);
1433 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1434 brw_set_mask_control(p, BRW_MASK_DISABLE);
1435 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1436
1437 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1438 * when the docs say only dword[2] should be set. Hmmm. But it works.
1439 */
1440 b = brw_message_reg(msg_reg_nr);
1441 b = retype(b, BRW_REGISTER_TYPE_UD);
1442 /*b = get_element_ud(b, 2);*/
1443 brw_MOV(p, b, brw_imm_ud(location));
1444
1445 brw_pop_insn_state(p);
1446
1447 insn = next_insn(p, BRW_OPCODE_SEND);
1448
1449 insn->header.predicate_control = BRW_PREDICATE_NONE;
1450 insn->header.compression_control = BRW_COMPRESSION_NONE;
1451 insn->header.destreg__conditionalmod = msg_reg_nr;
1452 insn->header.mask_control = BRW_MASK_DISABLE;
1453
1454 brw_set_dest(insn, dest);
1455 brw_set_src0(insn, brw_null_reg());
1456
1457 brw_set_dp_read_message(p->brw,
1458 insn,
1459 bind_table_index,
1460 0,
1461 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1462 0, /* source cache = data cache */
1463 1, /* msg_length */
1464 1, /* response_length (1 Oword) */
1465 0); /* eot */
1466 }
1467
1468 /**
1469 * Read a float[4] constant per vertex from VS constant buffer, with
1470 * relative addressing.
1471 */
1472 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1473 struct brw_reg dest,
1474 struct brw_reg addr_reg,
1475 GLuint offset,
1476 GLuint bind_table_index)
1477 {
1478 struct intel_context *intel = &p->brw->intel;
1479 int msg_type;
1480
1481 /* Setup MRF[1] with offset into const buffer */
1482 brw_push_insn_state(p);
1483 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1484 brw_set_mask_control(p, BRW_MASK_DISABLE);
1485 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1486
1487 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1488 * fields ignored.
1489 */
1490 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1491 addr_reg, brw_imm_d(offset));
1492 brw_pop_insn_state(p);
1493
1494 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1495
1496 insn->header.predicate_control = BRW_PREDICATE_NONE;
1497 insn->header.compression_control = BRW_COMPRESSION_NONE;
1498 insn->header.destreg__conditionalmod = 0;
1499 insn->header.mask_control = BRW_MASK_DISABLE;
1500
1501 brw_set_dest(insn, dest);
1502 brw_set_src0(insn, brw_vec8_grf(0, 0));
1503
1504 if (intel->gen == 6)
1505 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1506 else if (intel->gen == 5 || intel->is_g4x)
1507 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1508 else
1509 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1510
1511 brw_set_dp_read_message(p->brw,
1512 insn,
1513 bind_table_index,
1514 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1515 msg_type,
1516 0, /* source cache = data cache */
1517 2, /* msg_length */
1518 1, /* response_length */
1519 0); /* eot */
1520 }
1521
1522
1523
1524 void brw_fb_WRITE(struct brw_compile *p,
1525 int dispatch_width,
1526 struct brw_reg dest,
1527 GLuint msg_reg_nr,
1528 struct brw_reg src0,
1529 GLuint binding_table_index,
1530 GLuint msg_length,
1531 GLuint response_length,
1532 GLboolean eot)
1533 {
1534 struct intel_context *intel = &p->brw->intel;
1535 struct brw_instruction *insn;
1536 GLuint msg_control, msg_type;
1537 GLboolean header_present = GL_TRUE;
1538
1539 insn = next_insn(p, BRW_OPCODE_SEND);
1540 insn->header.predicate_control = 0; /* XXX */
1541 insn->header.compression_control = BRW_COMPRESSION_NONE;
1542
1543 if (intel->gen >= 6) {
1544 if (msg_length == 4)
1545 header_present = GL_FALSE;
1546
1547 /* headerless version, just submit color payload */
1548 src0 = brw_message_reg(msg_reg_nr);
1549
1550 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1551 } else {
1552 insn->header.destreg__conditionalmod = msg_reg_nr;
1553
1554 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1555 }
1556
1557 if (dispatch_width == 16)
1558 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1559 else
1560 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1561
1562 brw_set_dest(insn, dest);
1563 brw_set_src0(insn, src0);
1564 brw_set_dp_write_message(p->brw,
1565 insn,
1566 binding_table_index,
1567 msg_control,
1568 msg_type,
1569 msg_length,
1570 header_present,
1571 1, /* pixel scoreboard */
1572 response_length,
1573 eot,
1574 0 /* send_commit_msg */);
1575 }
1576
1577
1578 /**
1579 * Texture sample instruction.
1580 * Note: the msg_type plus msg_length values determine exactly what kind
1581 * of sampling operation is performed. See volume 4, page 161 of docs.
1582 */
1583 void brw_SAMPLE(struct brw_compile *p,
1584 struct brw_reg dest,
1585 GLuint msg_reg_nr,
1586 struct brw_reg src0,
1587 GLuint binding_table_index,
1588 GLuint sampler,
1589 GLuint writemask,
1590 GLuint msg_type,
1591 GLuint response_length,
1592 GLuint msg_length,
1593 GLboolean eot,
1594 GLuint header_present,
1595 GLuint simd_mode)
1596 {
1597 struct intel_context *intel = &p->brw->intel;
1598 GLboolean need_stall = 0;
1599
1600 if (writemask == 0) {
1601 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1602 return;
1603 }
1604
1605 /* Hardware doesn't do destination dependency checking on send
1606 * instructions properly. Add a workaround which generates the
1607 * dependency by other means. In practice it seems like this bug
1608 * only crops up for texture samples, and only where registers are
1609 * written by the send and then written again later without being
1610 * read in between. Luckily for us, we already track that
1611 * information and use it to modify the writemask for the
1612 * instruction, so that is a guide for whether a workaround is
1613 * needed.
1614 */
1615 if (writemask != WRITEMASK_XYZW) {
1616 GLuint dst_offset = 0;
1617 GLuint i, newmask = 0, len = 0;
1618
1619 for (i = 0; i < 4; i++) {
1620 if (writemask & (1<<i))
1621 break;
1622 dst_offset += 2;
1623 }
1624 for (; i < 4; i++) {
1625 if (!(writemask & (1<<i)))
1626 break;
1627 newmask |= 1<<i;
1628 len++;
1629 }
1630
1631 if (newmask != writemask) {
1632 need_stall = 1;
1633 /* printf("need stall %x %x\n", newmask , writemask); */
1634 }
1635 else {
1636 GLboolean dispatch_16 = GL_FALSE;
1637
1638 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1639
1640 guess_execution_size(p->current, dest);
1641 if (p->current->header.execution_size == BRW_EXECUTE_16)
1642 dispatch_16 = GL_TRUE;
1643
1644 newmask = ~newmask & WRITEMASK_XYZW;
1645
1646 brw_push_insn_state(p);
1647
1648 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1649 brw_set_mask_control(p, BRW_MASK_DISABLE);
1650
1651 brw_MOV(p, m1, brw_vec8_grf(0,0));
1652 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1653
1654 brw_pop_insn_state(p);
1655
1656 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1657 dest = offset(dest, dst_offset);
1658
1659 /* For 16-wide dispatch, masked channels are skipped in the
1660 * response. For 8-wide, masked channels still take up slots,
1661 * and are just not written to.
1662 */
1663 if (dispatch_16)
1664 response_length = len * 2;
1665 }
1666 }
1667
1668 {
1669 struct brw_instruction *insn;
1670
1671 /* Sandybridge doesn't have the implied move for SENDs,
1672 * and the first message register index comes from src0.
1673 */
1674 if (intel->gen >= 6) {
1675 brw_push_insn_state(p);
1676 brw_set_mask_control( p, BRW_MASK_DISABLE );
1677 /* m1 contains header? */
1678 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1679 brw_pop_insn_state(p);
1680 src0 = brw_message_reg(msg_reg_nr);
1681 }
1682
1683 insn = next_insn(p, BRW_OPCODE_SEND);
1684 insn->header.predicate_control = 0; /* XXX */
1685 insn->header.compression_control = BRW_COMPRESSION_NONE;
1686 if (intel->gen < 6)
1687 insn->header.destreg__conditionalmod = msg_reg_nr;
1688
1689 brw_set_dest(insn, dest);
1690 brw_set_src0(insn, src0);
1691 brw_set_sampler_message(p->brw, insn,
1692 binding_table_index,
1693 sampler,
1694 msg_type,
1695 response_length,
1696 msg_length,
1697 eot,
1698 header_present,
1699 simd_mode);
1700 }
1701
1702 if (need_stall) {
1703 struct brw_reg reg = vec8(offset(dest, response_length-1));
1704
1705 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1706 */
1707 brw_push_insn_state(p);
1708 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1709 brw_MOV(p, reg, reg);
1710 brw_pop_insn_state(p);
1711 }
1712
1713 }
1714
1715 /* All these variables are pretty confusing - we might be better off
1716 * using bitmasks and macros for this, in the old style. Or perhaps
1717 * just having the caller instantiate the fields in dword3 itself.
1718 */
1719 void brw_urb_WRITE(struct brw_compile *p,
1720 struct brw_reg dest,
1721 GLuint msg_reg_nr,
1722 struct brw_reg src0,
1723 GLboolean allocate,
1724 GLboolean used,
1725 GLuint msg_length,
1726 GLuint response_length,
1727 GLboolean eot,
1728 GLboolean writes_complete,
1729 GLuint offset,
1730 GLuint swizzle)
1731 {
1732 struct intel_context *intel = &p->brw->intel;
1733 struct brw_instruction *insn;
1734
1735 /* Sandybridge doesn't have the implied move for SENDs,
1736 * and the first message register index comes from src0.
1737 */
1738 if (intel->gen >= 6) {
1739 brw_push_insn_state(p);
1740 brw_set_mask_control( p, BRW_MASK_DISABLE );
1741 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1742 brw_pop_insn_state(p);
1743 src0 = brw_message_reg(msg_reg_nr);
1744 }
1745
1746 insn = next_insn(p, BRW_OPCODE_SEND);
1747
1748 assert(msg_length < BRW_MAX_MRF);
1749
1750 brw_set_dest(insn, dest);
1751 brw_set_src0(insn, src0);
1752 brw_set_src1(insn, brw_imm_d(0));
1753
1754 if (intel->gen < 6)
1755 insn->header.destreg__conditionalmod = msg_reg_nr;
1756
1757 brw_set_urb_message(p->brw,
1758 insn,
1759 allocate,
1760 used,
1761 msg_length,
1762 response_length,
1763 eot,
1764 writes_complete,
1765 offset,
1766 swizzle);
1767 }
1768
1769 void brw_ff_sync(struct brw_compile *p,
1770 struct brw_reg dest,
1771 GLuint msg_reg_nr,
1772 struct brw_reg src0,
1773 GLboolean allocate,
1774 GLuint response_length,
1775 GLboolean eot)
1776 {
1777 struct intel_context *intel = &p->brw->intel;
1778 struct brw_instruction *insn;
1779
1780 /* Sandybridge doesn't have the implied move for SENDs,
1781 * and the first message register index comes from src0.
1782 */
1783 if (intel->gen >= 6) {
1784 brw_push_insn_state(p);
1785 brw_set_mask_control( p, BRW_MASK_DISABLE );
1786 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1787 brw_pop_insn_state(p);
1788 src0 = brw_message_reg(msg_reg_nr);
1789 }
1790
1791 insn = next_insn(p, BRW_OPCODE_SEND);
1792 brw_set_dest(insn, dest);
1793 brw_set_src0(insn, src0);
1794 brw_set_src1(insn, brw_imm_d(0));
1795
1796 if (intel->gen < 6)
1797 insn->header.destreg__conditionalmod = msg_reg_nr;
1798
1799 brw_set_ff_sync_message(p->brw,
1800 insn,
1801 allocate,
1802 response_length,
1803 eot);
1804 }