i965: Fix sampler on sandybridge
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLuint pixel_scoreboard_clear,
452 GLuint response_length,
453 GLuint end_of_thread,
454 GLuint send_commit_msg)
455 {
456 struct intel_context *intel = &brw->intel;
457 brw_set_src1(insn, brw_imm_ud(0));
458
459 if (intel->gen >= 6) {
460 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
461 insn->bits3.dp_render_cache.msg_control = msg_control;
462 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
463 insn->bits3.dp_render_cache.msg_type = msg_type;
464 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
465 insn->bits3.dp_render_cache.header_present = 0; /* XXX */
466 insn->bits3.dp_render_cache.response_length = response_length;
467 insn->bits3.dp_render_cache.msg_length = msg_length;
468 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
469 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
470 /* XXX really need below? */
471 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
472 insn->bits2.send_gen5.end_of_thread = end_of_thread;
473 } else if (intel->gen == 5) {
474 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
475 insn->bits3.dp_write_gen5.msg_control = msg_control;
476 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
477 insn->bits3.dp_write_gen5.msg_type = msg_type;
478 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
479 insn->bits3.dp_write_gen5.header_present = 1;
480 insn->bits3.dp_write_gen5.response_length = response_length;
481 insn->bits3.dp_write_gen5.msg_length = msg_length;
482 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
483 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
484 insn->bits2.send_gen5.end_of_thread = end_of_thread;
485 } else {
486 insn->bits3.dp_write.binding_table_index = binding_table_index;
487 insn->bits3.dp_write.msg_control = msg_control;
488 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
489 insn->bits3.dp_write.msg_type = msg_type;
490 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
491 insn->bits3.dp_write.response_length = response_length;
492 insn->bits3.dp_write.msg_length = msg_length;
493 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
494 insn->bits3.dp_write.end_of_thread = end_of_thread;
495 }
496 }
497
498 static void brw_set_dp_read_message( struct brw_context *brw,
499 struct brw_instruction *insn,
500 GLuint binding_table_index,
501 GLuint msg_control,
502 GLuint msg_type,
503 GLuint target_cache,
504 GLuint msg_length,
505 GLuint response_length,
506 GLuint end_of_thread )
507 {
508 struct intel_context *intel = &brw->intel;
509 brw_set_src1(insn, brw_imm_d(0));
510
511 if (intel->gen == 5) {
512 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
513 insn->bits3.dp_read_gen5.msg_control = msg_control;
514 insn->bits3.dp_read_gen5.msg_type = msg_type;
515 insn->bits3.dp_read_gen5.target_cache = target_cache;
516 insn->bits3.dp_read_gen5.header_present = 1;
517 insn->bits3.dp_read_gen5.response_length = response_length;
518 insn->bits3.dp_read_gen5.msg_length = msg_length;
519 insn->bits3.dp_read_gen5.pad1 = 0;
520 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
521 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
522 insn->bits2.send_gen5.end_of_thread = end_of_thread;
523 } else {
524 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
525 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
526 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
527 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
528 insn->bits3.dp_read.response_length = response_length; /*16:19*/
529 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
530 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
531 insn->bits3.dp_read.pad1 = 0; /*28:30*/
532 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
533 }
534 }
535
536 static void brw_set_sampler_message(struct brw_context *brw,
537 struct brw_instruction *insn,
538 GLuint binding_table_index,
539 GLuint sampler,
540 GLuint msg_type,
541 GLuint response_length,
542 GLuint msg_length,
543 GLboolean eot,
544 GLuint header_present,
545 GLuint simd_mode)
546 {
547 struct intel_context *intel = &brw->intel;
548 assert(eot == 0);
549 brw_set_src1(insn, brw_imm_d(0));
550
551 if (intel->gen >= 5) {
552 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
553 insn->bits3.sampler_gen5.sampler = sampler;
554 insn->bits3.sampler_gen5.msg_type = msg_type;
555 insn->bits3.sampler_gen5.simd_mode = simd_mode;
556 insn->bits3.sampler_gen5.header_present = header_present;
557 insn->bits3.sampler_gen5.response_length = response_length;
558 insn->bits3.sampler_gen5.msg_length = msg_length;
559 insn->bits3.sampler_gen5.end_of_thread = eot;
560 if (intel->gen >= 6)
561 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
562 else {
563 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
564 insn->bits2.send_gen5.end_of_thread = eot;
565 }
566 } else if (intel->is_g4x) {
567 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
568 insn->bits3.sampler_g4x.sampler = sampler;
569 insn->bits3.sampler_g4x.msg_type = msg_type;
570 insn->bits3.sampler_g4x.response_length = response_length;
571 insn->bits3.sampler_g4x.msg_length = msg_length;
572 insn->bits3.sampler_g4x.end_of_thread = eot;
573 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
574 } else {
575 insn->bits3.sampler.binding_table_index = binding_table_index;
576 insn->bits3.sampler.sampler = sampler;
577 insn->bits3.sampler.msg_type = msg_type;
578 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
579 insn->bits3.sampler.response_length = response_length;
580 insn->bits3.sampler.msg_length = msg_length;
581 insn->bits3.sampler.end_of_thread = eot;
582 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
583 }
584 }
585
586
587
588 static struct brw_instruction *next_insn( struct brw_compile *p,
589 GLuint opcode )
590 {
591 struct brw_instruction *insn;
592
593 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
594
595 insn = &p->store[p->nr_insn++];
596 memcpy(insn, p->current, sizeof(*insn));
597
598 /* Reset this one-shot flag:
599 */
600
601 if (p->current->header.destreg__conditionalmod) {
602 p->current->header.destreg__conditionalmod = 0;
603 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
604 }
605
606 insn->header.opcode = opcode;
607 return insn;
608 }
609
610
611 static struct brw_instruction *brw_alu1( struct brw_compile *p,
612 GLuint opcode,
613 struct brw_reg dest,
614 struct brw_reg src )
615 {
616 struct brw_instruction *insn = next_insn(p, opcode);
617 brw_set_dest(insn, dest);
618 brw_set_src0(insn, src);
619 return insn;
620 }
621
622 static struct brw_instruction *brw_alu2(struct brw_compile *p,
623 GLuint opcode,
624 struct brw_reg dest,
625 struct brw_reg src0,
626 struct brw_reg src1 )
627 {
628 struct brw_instruction *insn = next_insn(p, opcode);
629 brw_set_dest(insn, dest);
630 brw_set_src0(insn, src0);
631 brw_set_src1(insn, src1);
632 return insn;
633 }
634
635
636 /***********************************************************************
637 * Convenience routines.
638 */
639 #define ALU1(OP) \
640 struct brw_instruction *brw_##OP(struct brw_compile *p, \
641 struct brw_reg dest, \
642 struct brw_reg src0) \
643 { \
644 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
645 }
646
647 #define ALU2(OP) \
648 struct brw_instruction *brw_##OP(struct brw_compile *p, \
649 struct brw_reg dest, \
650 struct brw_reg src0, \
651 struct brw_reg src1) \
652 { \
653 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
654 }
655
656
657 ALU1(MOV)
658 ALU2(SEL)
659 ALU1(NOT)
660 ALU2(AND)
661 ALU2(OR)
662 ALU2(XOR)
663 ALU2(SHR)
664 ALU2(SHL)
665 ALU2(RSR)
666 ALU2(RSL)
667 ALU2(ASR)
668 ALU1(FRC)
669 ALU1(RNDD)
670 ALU1(RNDZ)
671 ALU2(MAC)
672 ALU2(MACH)
673 ALU1(LZD)
674 ALU2(DP4)
675 ALU2(DPH)
676 ALU2(DP3)
677 ALU2(DP2)
678 ALU2(LINE)
679 ALU2(PLN)
680
681 struct brw_instruction *brw_ADD(struct brw_compile *p,
682 struct brw_reg dest,
683 struct brw_reg src0,
684 struct brw_reg src1)
685 {
686 /* 6.2.2: add */
687 if (src0.type == BRW_REGISTER_TYPE_F ||
688 (src0.file == BRW_IMMEDIATE_VALUE &&
689 src0.type == BRW_REGISTER_TYPE_VF)) {
690 assert(src1.type != BRW_REGISTER_TYPE_UD);
691 assert(src1.type != BRW_REGISTER_TYPE_D);
692 }
693
694 if (src1.type == BRW_REGISTER_TYPE_F ||
695 (src1.file == BRW_IMMEDIATE_VALUE &&
696 src1.type == BRW_REGISTER_TYPE_VF)) {
697 assert(src0.type != BRW_REGISTER_TYPE_UD);
698 assert(src0.type != BRW_REGISTER_TYPE_D);
699 }
700
701 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
702 }
703
704 struct brw_instruction *brw_MUL(struct brw_compile *p,
705 struct brw_reg dest,
706 struct brw_reg src0,
707 struct brw_reg src1)
708 {
709 /* 6.32.38: mul */
710 if (src0.type == BRW_REGISTER_TYPE_D ||
711 src0.type == BRW_REGISTER_TYPE_UD ||
712 src1.type == BRW_REGISTER_TYPE_D ||
713 src1.type == BRW_REGISTER_TYPE_UD) {
714 assert(dest.type != BRW_REGISTER_TYPE_F);
715 }
716
717 if (src0.type == BRW_REGISTER_TYPE_F ||
718 (src0.file == BRW_IMMEDIATE_VALUE &&
719 src0.type == BRW_REGISTER_TYPE_VF)) {
720 assert(src1.type != BRW_REGISTER_TYPE_UD);
721 assert(src1.type != BRW_REGISTER_TYPE_D);
722 }
723
724 if (src1.type == BRW_REGISTER_TYPE_F ||
725 (src1.file == BRW_IMMEDIATE_VALUE &&
726 src1.type == BRW_REGISTER_TYPE_VF)) {
727 assert(src0.type != BRW_REGISTER_TYPE_UD);
728 assert(src0.type != BRW_REGISTER_TYPE_D);
729 }
730
731 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
732 src0.nr != BRW_ARF_ACCUMULATOR);
733 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
734 src1.nr != BRW_ARF_ACCUMULATOR);
735
736 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
737 }
738
739
740 void brw_NOP(struct brw_compile *p)
741 {
742 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
743 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
744 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745 brw_set_src1(insn, brw_imm_ud(0x0));
746 }
747
748
749
750
751
752 /***********************************************************************
753 * Comparisons, if/else/endif
754 */
755
756 struct brw_instruction *brw_JMPI(struct brw_compile *p,
757 struct brw_reg dest,
758 struct brw_reg src0,
759 struct brw_reg src1)
760 {
761 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
762
763 insn->header.execution_size = 1;
764 insn->header.compression_control = BRW_COMPRESSION_NONE;
765 insn->header.mask_control = BRW_MASK_DISABLE;
766
767 p->current->header.predicate_control = BRW_PREDICATE_NONE;
768
769 return insn;
770 }
771
772 /* EU takes the value from the flag register and pushes it onto some
773 * sort of a stack (presumably merging with any flag value already on
774 * the stack). Within an if block, the flags at the top of the stack
775 * control execution on each channel of the unit, eg. on each of the
776 * 16 pixel values in our wm programs.
777 *
778 * When the matching 'else' instruction is reached (presumably by
779 * countdown of the instruction count patched in by our ELSE/ENDIF
780 * functions), the relevent flags are inverted.
781 *
782 * When the matching 'endif' instruction is reached, the flags are
783 * popped off. If the stack is now empty, normal execution resumes.
784 *
785 * No attempt is made to deal with stack overflow (14 elements?).
786 */
787 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
788 {
789 struct brw_instruction *insn;
790
791 if (p->single_program_flow) {
792 assert(execute_size == BRW_EXECUTE_1);
793
794 insn = next_insn(p, BRW_OPCODE_ADD);
795 insn->header.predicate_inverse = 1;
796 } else {
797 insn = next_insn(p, BRW_OPCODE_IF);
798 }
799
800 /* Override the defaults for this instruction:
801 */
802 brw_set_dest(insn, brw_ip_reg());
803 brw_set_src0(insn, brw_ip_reg());
804 brw_set_src1(insn, brw_imm_d(0x0));
805
806 insn->header.execution_size = execute_size;
807 insn->header.compression_control = BRW_COMPRESSION_NONE;
808 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
809 insn->header.mask_control = BRW_MASK_ENABLE;
810 if (!p->single_program_flow)
811 insn->header.thread_control = BRW_THREAD_SWITCH;
812
813 p->current->header.predicate_control = BRW_PREDICATE_NONE;
814
815 return insn;
816 }
817
818
819 struct brw_instruction *brw_ELSE(struct brw_compile *p,
820 struct brw_instruction *if_insn)
821 {
822 struct intel_context *intel = &p->brw->intel;
823 struct brw_instruction *insn;
824 GLuint br = 1;
825
826 /* jump count is for 64bit data chunk each, so one 128bit
827 instruction requires 2 chunks. */
828 if (intel->gen >= 5)
829 br = 2;
830
831 if (p->single_program_flow) {
832 insn = next_insn(p, BRW_OPCODE_ADD);
833 } else {
834 insn = next_insn(p, BRW_OPCODE_ELSE);
835 }
836
837 brw_set_dest(insn, brw_ip_reg());
838 brw_set_src0(insn, brw_ip_reg());
839 brw_set_src1(insn, brw_imm_d(0x0));
840
841 insn->header.compression_control = BRW_COMPRESSION_NONE;
842 insn->header.execution_size = if_insn->header.execution_size;
843 insn->header.mask_control = BRW_MASK_ENABLE;
844 if (!p->single_program_flow)
845 insn->header.thread_control = BRW_THREAD_SWITCH;
846
847 /* Patch the if instruction to point at this instruction.
848 */
849 if (p->single_program_flow) {
850 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
851
852 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
853 } else {
854 assert(if_insn->header.opcode == BRW_OPCODE_IF);
855
856 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
857 if_insn->bits3.if_else.pop_count = 0;
858 if_insn->bits3.if_else.pad0 = 0;
859 }
860
861 return insn;
862 }
863
864 void brw_ENDIF(struct brw_compile *p,
865 struct brw_instruction *patch_insn)
866 {
867 struct intel_context *intel = &p->brw->intel;
868 GLuint br = 1;
869
870 if (intel->gen >= 5)
871 br = 2;
872
873 if (p->single_program_flow) {
874 /* In single program flow mode, there's no need to execute an ENDIF,
875 * since we don't need to do any stack operations, and if we're executing
876 * currently, we want to just continue executing.
877 */
878 struct brw_instruction *next = &p->store[p->nr_insn];
879
880 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
881
882 patch_insn->bits3.ud = (next - patch_insn) * 16;
883 } else {
884 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
885
886 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
887 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
888 brw_set_src1(insn, brw_imm_d(0x0));
889
890 insn->header.compression_control = BRW_COMPRESSION_NONE;
891 insn->header.execution_size = patch_insn->header.execution_size;
892 insn->header.mask_control = BRW_MASK_ENABLE;
893 insn->header.thread_control = BRW_THREAD_SWITCH;
894
895 assert(patch_insn->bits3.if_else.jump_count == 0);
896
897 /* Patch the if or else instructions to point at this or the next
898 * instruction respectively.
899 */
900 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
901 /* Automagically turn it into an IFF:
902 */
903 patch_insn->header.opcode = BRW_OPCODE_IFF;
904 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
905 patch_insn->bits3.if_else.pop_count = 0;
906 patch_insn->bits3.if_else.pad0 = 0;
907 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
908 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
909 patch_insn->bits3.if_else.pop_count = 1;
910 patch_insn->bits3.if_else.pad0 = 0;
911 } else {
912 assert(0);
913 }
914
915 /* Also pop item off the stack in the endif instruction:
916 */
917 insn->bits3.if_else.jump_count = 0;
918 insn->bits3.if_else.pop_count = 1;
919 insn->bits3.if_else.pad0 = 0;
920 }
921 }
922
923 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
924 {
925 struct brw_instruction *insn;
926 insn = next_insn(p, BRW_OPCODE_BREAK);
927 brw_set_dest(insn, brw_ip_reg());
928 brw_set_src0(insn, brw_ip_reg());
929 brw_set_src1(insn, brw_imm_d(0x0));
930 insn->header.compression_control = BRW_COMPRESSION_NONE;
931 insn->header.execution_size = BRW_EXECUTE_8;
932 /* insn->header.mask_control = BRW_MASK_DISABLE; */
933 insn->bits3.if_else.pad0 = 0;
934 insn->bits3.if_else.pop_count = pop_count;
935 return insn;
936 }
937
938 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
939 {
940 struct brw_instruction *insn;
941 insn = next_insn(p, BRW_OPCODE_CONTINUE);
942 brw_set_dest(insn, brw_ip_reg());
943 brw_set_src0(insn, brw_ip_reg());
944 brw_set_src1(insn, brw_imm_d(0x0));
945 insn->header.compression_control = BRW_COMPRESSION_NONE;
946 insn->header.execution_size = BRW_EXECUTE_8;
947 /* insn->header.mask_control = BRW_MASK_DISABLE; */
948 insn->bits3.if_else.pad0 = 0;
949 insn->bits3.if_else.pop_count = pop_count;
950 return insn;
951 }
952
953 /* DO/WHILE loop:
954 */
955 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
956 {
957 if (p->single_program_flow) {
958 return &p->store[p->nr_insn];
959 } else {
960 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
961
962 /* Override the defaults for this instruction:
963 */
964 brw_set_dest(insn, brw_null_reg());
965 brw_set_src0(insn, brw_null_reg());
966 brw_set_src1(insn, brw_null_reg());
967
968 insn->header.compression_control = BRW_COMPRESSION_NONE;
969 insn->header.execution_size = execute_size;
970 insn->header.predicate_control = BRW_PREDICATE_NONE;
971 /* insn->header.mask_control = BRW_MASK_ENABLE; */
972 /* insn->header.mask_control = BRW_MASK_DISABLE; */
973
974 return insn;
975 }
976 }
977
978
979
980 struct brw_instruction *brw_WHILE(struct brw_compile *p,
981 struct brw_instruction *do_insn)
982 {
983 struct intel_context *intel = &p->brw->intel;
984 struct brw_instruction *insn;
985 GLuint br = 1;
986
987 if (intel->gen >= 5)
988 br = 2;
989
990 if (p->single_program_flow)
991 insn = next_insn(p, BRW_OPCODE_ADD);
992 else
993 insn = next_insn(p, BRW_OPCODE_WHILE);
994
995 brw_set_dest(insn, brw_ip_reg());
996 brw_set_src0(insn, brw_ip_reg());
997 brw_set_src1(insn, brw_imm_d(0x0));
998
999 insn->header.compression_control = BRW_COMPRESSION_NONE;
1000
1001 if (p->single_program_flow) {
1002 insn->header.execution_size = BRW_EXECUTE_1;
1003
1004 insn->bits3.d = (do_insn - insn) * 16;
1005 } else {
1006 insn->header.execution_size = do_insn->header.execution_size;
1007
1008 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1009 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1010 insn->bits3.if_else.pop_count = 0;
1011 insn->bits3.if_else.pad0 = 0;
1012 }
1013
1014 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1015
1016 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1017 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1018 return insn;
1019 }
1020
1021
1022 /* FORWARD JUMPS:
1023 */
1024 void brw_land_fwd_jump(struct brw_compile *p,
1025 struct brw_instruction *jmp_insn)
1026 {
1027 struct intel_context *intel = &p->brw->intel;
1028 struct brw_instruction *landing = &p->store[p->nr_insn];
1029 GLuint jmpi = 1;
1030
1031 if (intel->gen >= 5)
1032 jmpi = 2;
1033
1034 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1035 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1036
1037 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1038 }
1039
1040
1041
1042 /* To integrate with the above, it makes sense that the comparison
1043 * instruction should populate the flag register. It might be simpler
1044 * just to use the flag reg for most WM tasks?
1045 */
1046 void brw_CMP(struct brw_compile *p,
1047 struct brw_reg dest,
1048 GLuint conditional,
1049 struct brw_reg src0,
1050 struct brw_reg src1)
1051 {
1052 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1053
1054 insn->header.destreg__conditionalmod = conditional;
1055 brw_set_dest(insn, dest);
1056 brw_set_src0(insn, src0);
1057 brw_set_src1(insn, src1);
1058
1059 /* guess_execution_size(insn, src0); */
1060
1061
1062 /* Make it so that future instructions will use the computed flag
1063 * value until brw_set_predicate_control_flag_value() is called
1064 * again.
1065 */
1066 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1067 dest.nr == 0) {
1068 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1069 p->flag_value = 0xff;
1070 }
1071 }
1072
1073 /* Issue 'wait' instruction for n1, host could program MMIO
1074 to wake up thread. */
1075 void brw_WAIT (struct brw_compile *p)
1076 {
1077 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1078 struct brw_reg src = brw_notification_1_reg();
1079
1080 brw_set_dest(insn, src);
1081 brw_set_src0(insn, src);
1082 brw_set_src1(insn, brw_null_reg());
1083 insn->header.execution_size = 0; /* must */
1084 insn->header.predicate_control = 0;
1085 insn->header.compression_control = 0;
1086 }
1087
1088
1089 /***********************************************************************
1090 * Helpers for the various SEND message types:
1091 */
1092
1093 /** Extended math function, float[8].
1094 */
1095 void brw_math( struct brw_compile *p,
1096 struct brw_reg dest,
1097 GLuint function,
1098 GLuint saturate,
1099 GLuint msg_reg_nr,
1100 struct brw_reg src,
1101 GLuint data_type,
1102 GLuint precision )
1103 {
1104 struct intel_context *intel = &p->brw->intel;
1105
1106 if (intel->gen >= 6) {
1107 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1108
1109 /* Math is the same ISA format as other opcodes, except that CondModifier
1110 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1111 */
1112 insn->header.destreg__conditionalmod = function;
1113
1114 brw_set_dest(insn, dest);
1115 brw_set_src0(insn, src);
1116 brw_set_src1(insn, brw_null_reg());
1117 } else {
1118 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1119 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1120 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1121 /* Example code doesn't set predicate_control for send
1122 * instructions.
1123 */
1124 insn->header.predicate_control = 0;
1125 insn->header.destreg__conditionalmod = msg_reg_nr;
1126
1127 brw_set_dest(insn, dest);
1128 brw_set_src0(insn, src);
1129 brw_set_math_message(p->brw,
1130 insn,
1131 msg_length, response_length,
1132 function,
1133 BRW_MATH_INTEGER_UNSIGNED,
1134 precision,
1135 saturate,
1136 data_type);
1137 }
1138 }
1139
1140 /** Extended math function, float[8].
1141 */
1142 void brw_math2(struct brw_compile *p,
1143 struct brw_reg dest,
1144 GLuint function,
1145 struct brw_reg src0,
1146 struct brw_reg src1)
1147 {
1148 struct intel_context *intel = &p->brw->intel;
1149 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1150
1151 assert(intel->gen >= 6);
1152
1153 /* Math is the same ISA format as other opcodes, except that CondModifier
1154 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1155 */
1156 insn->header.destreg__conditionalmod = function;
1157
1158 brw_set_dest(insn, dest);
1159 brw_set_src0(insn, src0);
1160 brw_set_src1(insn, src1);
1161 }
1162
1163 /**
1164 * Extended math function, float[16].
1165 * Use 2 send instructions.
1166 */
1167 void brw_math_16( struct brw_compile *p,
1168 struct brw_reg dest,
1169 GLuint function,
1170 GLuint saturate,
1171 GLuint msg_reg_nr,
1172 struct brw_reg src,
1173 GLuint precision )
1174 {
1175 struct intel_context *intel = &p->brw->intel;
1176 struct brw_instruction *insn;
1177 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1178 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1179
1180 if (intel->gen >= 6) {
1181 insn = next_insn(p, BRW_OPCODE_MATH);
1182
1183 /* Math is the same ISA format as other opcodes, except that CondModifier
1184 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1185 */
1186 insn->header.destreg__conditionalmod = function;
1187
1188 brw_set_dest(insn, dest);
1189 brw_set_src0(insn, src);
1190 brw_set_src1(insn, brw_null_reg());
1191 return;
1192 }
1193
1194 /* First instruction:
1195 */
1196 brw_push_insn_state(p);
1197 brw_set_predicate_control_flag_value(p, 0xff);
1198 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1199
1200 insn = next_insn(p, BRW_OPCODE_SEND);
1201 insn->header.destreg__conditionalmod = msg_reg_nr;
1202
1203 brw_set_dest(insn, dest);
1204 brw_set_src0(insn, src);
1205 brw_set_math_message(p->brw,
1206 insn,
1207 msg_length, response_length,
1208 function,
1209 BRW_MATH_INTEGER_UNSIGNED,
1210 precision,
1211 saturate,
1212 BRW_MATH_DATA_VECTOR);
1213
1214 /* Second instruction:
1215 */
1216 insn = next_insn(p, BRW_OPCODE_SEND);
1217 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1218 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1219
1220 brw_set_dest(insn, offset(dest,1));
1221 brw_set_src0(insn, src);
1222 brw_set_math_message(p->brw,
1223 insn,
1224 msg_length, response_length,
1225 function,
1226 BRW_MATH_INTEGER_UNSIGNED,
1227 precision,
1228 saturate,
1229 BRW_MATH_DATA_VECTOR);
1230
1231 brw_pop_insn_state(p);
1232 }
1233
1234
1235 /**
1236 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1237 * Scratch offset should be a multiple of 64.
1238 * Used for register spilling.
1239 */
1240 void brw_dp_WRITE_16( struct brw_compile *p,
1241 struct brw_reg src,
1242 GLuint scratch_offset )
1243 {
1244 struct intel_context *intel = &p->brw->intel;
1245 GLuint msg_reg_nr = 1;
1246 {
1247 brw_push_insn_state(p);
1248 brw_set_mask_control(p, BRW_MASK_DISABLE);
1249 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1250
1251 /* set message header global offset field (reg 0, element 2) */
1252 brw_MOV(p,
1253 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1254 brw_imm_d(scratch_offset));
1255
1256 brw_pop_insn_state(p);
1257 }
1258
1259 {
1260 GLuint msg_length = 3;
1261 struct brw_reg dest;
1262 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1263 int send_commit_msg;
1264
1265 insn->header.predicate_control = 0; /* XXX */
1266 insn->header.compression_control = BRW_COMPRESSION_NONE;
1267 insn->header.destreg__conditionalmod = msg_reg_nr;
1268
1269 /* Until gen6, writes followed by reads from the same location
1270 * are not guaranteed to be ordered unless write_commit is set.
1271 * If set, then a no-op write is issued to the destination
1272 * register to set a dependency, and a read from the destination
1273 * can be used to ensure the ordering.
1274 *
1275 * For gen6, only writes between different threads need ordering
1276 * protection. Our use of DP writes is all about register
1277 * spilling within a thread.
1278 */
1279 if (intel->gen >= 6) {
1280 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1281 send_commit_msg = 0;
1282 } else {
1283 dest = brw_uw16_grf(0, 0);
1284 send_commit_msg = 1;
1285 }
1286
1287 brw_set_dest(insn, dest);
1288 brw_set_src0(insn, src);
1289
1290 brw_set_dp_write_message(p->brw,
1291 insn,
1292 255, /* binding table index (255=stateless) */
1293 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1294 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1295 msg_length,
1296 0, /* pixel scoreboard */
1297 send_commit_msg, /* response_length */
1298 0, /* eot */
1299 send_commit_msg);
1300 }
1301 }
1302
1303
1304 /**
1305 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1306 * Scratch offset should be a multiple of 64.
1307 * Used for register spilling.
1308 */
1309 void brw_dp_READ_16( struct brw_compile *p,
1310 struct brw_reg dest,
1311 GLuint scratch_offset )
1312 {
1313 GLuint msg_reg_nr = 1;
1314 {
1315 brw_push_insn_state(p);
1316 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1317 brw_set_mask_control(p, BRW_MASK_DISABLE);
1318
1319 /* set message header global offset field (reg 0, element 2) */
1320 brw_MOV(p,
1321 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1322 brw_imm_d(scratch_offset));
1323
1324 brw_pop_insn_state(p);
1325 }
1326
1327 {
1328 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1329
1330 insn->header.predicate_control = 0; /* XXX */
1331 insn->header.compression_control = BRW_COMPRESSION_NONE;
1332 insn->header.destreg__conditionalmod = msg_reg_nr;
1333
1334 brw_set_dest(insn, dest); /* UW? */
1335 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1336
1337 brw_set_dp_read_message(p->brw,
1338 insn,
1339 255, /* binding table index (255=stateless) */
1340 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1341 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1342 1, /* target cache (render/scratch) */
1343 1, /* msg_length */
1344 2, /* response_length */
1345 0); /* eot */
1346 }
1347 }
1348
1349
1350 /**
1351 * Read a float[4] vector from the data port Data Cache (const buffer).
1352 * Location (in buffer) should be a multiple of 16.
1353 * Used for fetching shader constants.
1354 * If relAddr is true, we'll do an indirect fetch using the address register.
1355 */
1356 void brw_dp_READ_4( struct brw_compile *p,
1357 struct brw_reg dest,
1358 GLboolean relAddr,
1359 GLuint location,
1360 GLuint bind_table_index )
1361 {
1362 /* XXX: relAddr not implemented */
1363 GLuint msg_reg_nr = 1;
1364 {
1365 struct brw_reg b;
1366 brw_push_insn_state(p);
1367 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1368 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1369 brw_set_mask_control(p, BRW_MASK_DISABLE);
1370
1371 /* Setup MRF[1] with location/offset into const buffer */
1372 b = brw_message_reg(msg_reg_nr);
1373 b = retype(b, BRW_REGISTER_TYPE_UD);
1374 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1375 * when the docs say only dword[2] should be set. Hmmm. But it works.
1376 */
1377 brw_MOV(p, b, brw_imm_ud(location));
1378 brw_pop_insn_state(p);
1379 }
1380
1381 {
1382 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1383
1384 insn->header.predicate_control = BRW_PREDICATE_NONE;
1385 insn->header.compression_control = BRW_COMPRESSION_NONE;
1386 insn->header.destreg__conditionalmod = msg_reg_nr;
1387 insn->header.mask_control = BRW_MASK_DISABLE;
1388
1389 /* cast dest to a uword[8] vector */
1390 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1391
1392 brw_set_dest(insn, dest);
1393 brw_set_src0(insn, brw_null_reg());
1394
1395 brw_set_dp_read_message(p->brw,
1396 insn,
1397 bind_table_index,
1398 0, /* msg_control (0 means 1 Oword) */
1399 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1400 0, /* source cache = data cache */
1401 1, /* msg_length */
1402 1, /* response_length (1 Oword) */
1403 0); /* eot */
1404 }
1405 }
1406
1407
1408 /**
1409 * Read float[4] constant(s) from VS constant buffer.
1410 * For relative addressing, two float[4] constants will be read into 'dest'.
1411 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1412 */
1413 void brw_dp_READ_4_vs(struct brw_compile *p,
1414 struct brw_reg dest,
1415 GLuint location,
1416 GLuint bind_table_index)
1417 {
1418 struct brw_instruction *insn;
1419 GLuint msg_reg_nr = 1;
1420 struct brw_reg b;
1421
1422 /*
1423 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1424 location, msg_reg_nr);
1425 */
1426
1427 /* Setup MRF[1] with location/offset into const buffer */
1428 brw_push_insn_state(p);
1429 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1430 brw_set_mask_control(p, BRW_MASK_DISABLE);
1431 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1432
1433 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1434 * when the docs say only dword[2] should be set. Hmmm. But it works.
1435 */
1436 b = brw_message_reg(msg_reg_nr);
1437 b = retype(b, BRW_REGISTER_TYPE_UD);
1438 /*b = get_element_ud(b, 2);*/
1439 brw_MOV(p, b, brw_imm_ud(location));
1440
1441 brw_pop_insn_state(p);
1442
1443 insn = next_insn(p, BRW_OPCODE_SEND);
1444
1445 insn->header.predicate_control = BRW_PREDICATE_NONE;
1446 insn->header.compression_control = BRW_COMPRESSION_NONE;
1447 insn->header.destreg__conditionalmod = msg_reg_nr;
1448 insn->header.mask_control = BRW_MASK_DISABLE;
1449
1450 brw_set_dest(insn, dest);
1451 brw_set_src0(insn, brw_null_reg());
1452
1453 brw_set_dp_read_message(p->brw,
1454 insn,
1455 bind_table_index,
1456 0,
1457 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1458 0, /* source cache = data cache */
1459 1, /* msg_length */
1460 1, /* response_length (1 Oword) */
1461 0); /* eot */
1462 }
1463
1464 /**
1465 * Read a float[4] constant per vertex from VS constant buffer, with
1466 * relative addressing.
1467 */
1468 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1469 struct brw_reg dest,
1470 struct brw_reg addr_reg,
1471 GLuint offset,
1472 GLuint bind_table_index)
1473 {
1474 struct intel_context *intel = &p->brw->intel;
1475 int msg_type;
1476
1477 /* Setup MRF[1] with offset into const buffer */
1478 brw_push_insn_state(p);
1479 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1480 brw_set_mask_control(p, BRW_MASK_DISABLE);
1481 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1482
1483 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1484 * fields ignored.
1485 */
1486 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1487 addr_reg, brw_imm_d(offset));
1488 brw_pop_insn_state(p);
1489
1490 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1491
1492 insn->header.predicate_control = BRW_PREDICATE_NONE;
1493 insn->header.compression_control = BRW_COMPRESSION_NONE;
1494 insn->header.destreg__conditionalmod = 0;
1495 insn->header.mask_control = BRW_MASK_DISABLE;
1496
1497 brw_set_dest(insn, dest);
1498 brw_set_src0(insn, brw_vec8_grf(0, 0));
1499
1500 if (intel->gen == 6)
1501 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1502 else if (intel->gen == 5 || intel->is_g4x)
1503 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1504 else
1505 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1506
1507 brw_set_dp_read_message(p->brw,
1508 insn,
1509 bind_table_index,
1510 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1511 msg_type,
1512 0, /* source cache = data cache */
1513 2, /* msg_length */
1514 1, /* response_length */
1515 0); /* eot */
1516 }
1517
1518
1519
1520 void brw_fb_WRITE(struct brw_compile *p,
1521 int dispatch_width,
1522 struct brw_reg dest,
1523 GLuint msg_reg_nr,
1524 struct brw_reg src0,
1525 GLuint binding_table_index,
1526 GLuint msg_length,
1527 GLuint response_length,
1528 GLboolean eot)
1529 {
1530 struct intel_context *intel = &p->brw->intel;
1531 struct brw_instruction *insn;
1532 GLuint msg_control, msg_type;
1533
1534 insn = next_insn(p, BRW_OPCODE_SEND);
1535 insn->header.predicate_control = 0; /* XXX */
1536 insn->header.compression_control = BRW_COMPRESSION_NONE;
1537
1538 if (intel->gen >= 6) {
1539 /* headerless version, just submit color payload */
1540 src0 = brw_message_reg(msg_reg_nr);
1541
1542 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1543 } else {
1544 insn->header.destreg__conditionalmod = msg_reg_nr;
1545
1546 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1547 }
1548
1549 if (dispatch_width == 16)
1550 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1551 else
1552 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1553
1554 brw_set_dest(insn, dest);
1555 brw_set_src0(insn, src0);
1556 brw_set_dp_write_message(p->brw,
1557 insn,
1558 binding_table_index,
1559 msg_control,
1560 msg_type,
1561 msg_length,
1562 1, /* pixel scoreboard */
1563 response_length,
1564 eot,
1565 0 /* send_commit_msg */);
1566 }
1567
1568
1569 /**
1570 * Texture sample instruction.
1571 * Note: the msg_type plus msg_length values determine exactly what kind
1572 * of sampling operation is performed. See volume 4, page 161 of docs.
1573 */
1574 void brw_SAMPLE(struct brw_compile *p,
1575 struct brw_reg dest,
1576 GLuint msg_reg_nr,
1577 struct brw_reg src0,
1578 GLuint binding_table_index,
1579 GLuint sampler,
1580 GLuint writemask,
1581 GLuint msg_type,
1582 GLuint response_length,
1583 GLuint msg_length,
1584 GLboolean eot,
1585 GLuint header_present,
1586 GLuint simd_mode)
1587 {
1588 struct intel_context *intel = &p->brw->intel;
1589 GLboolean need_stall = 0;
1590
1591 if (writemask == 0) {
1592 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1593 return;
1594 }
1595
1596 /* Hardware doesn't do destination dependency checking on send
1597 * instructions properly. Add a workaround which generates the
1598 * dependency by other means. In practice it seems like this bug
1599 * only crops up for texture samples, and only where registers are
1600 * written by the send and then written again later without being
1601 * read in between. Luckily for us, we already track that
1602 * information and use it to modify the writemask for the
1603 * instruction, so that is a guide for whether a workaround is
1604 * needed.
1605 */
1606 if (writemask != WRITEMASK_XYZW) {
1607 GLuint dst_offset = 0;
1608 GLuint i, newmask = 0, len = 0;
1609
1610 for (i = 0; i < 4; i++) {
1611 if (writemask & (1<<i))
1612 break;
1613 dst_offset += 2;
1614 }
1615 for (; i < 4; i++) {
1616 if (!(writemask & (1<<i)))
1617 break;
1618 newmask |= 1<<i;
1619 len++;
1620 }
1621
1622 if (newmask != writemask) {
1623 need_stall = 1;
1624 /* printf("need stall %x %x\n", newmask , writemask); */
1625 }
1626 else {
1627 GLboolean dispatch_16 = GL_FALSE;
1628
1629 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1630
1631 guess_execution_size(p->current, dest);
1632 if (p->current->header.execution_size == BRW_EXECUTE_16)
1633 dispatch_16 = GL_TRUE;
1634
1635 newmask = ~newmask & WRITEMASK_XYZW;
1636
1637 brw_push_insn_state(p);
1638
1639 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1640 brw_set_mask_control(p, BRW_MASK_DISABLE);
1641
1642 brw_MOV(p, m1, brw_vec8_grf(0,0));
1643 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1644
1645 brw_pop_insn_state(p);
1646
1647 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1648 dest = offset(dest, dst_offset);
1649
1650 /* For 16-wide dispatch, masked channels are skipped in the
1651 * response. For 8-wide, masked channels still take up slots,
1652 * and are just not written to.
1653 */
1654 if (dispatch_16)
1655 response_length = len * 2;
1656 }
1657 }
1658
1659 {
1660 struct brw_instruction *insn;
1661
1662 /* Sandybridge doesn't have the implied move for SENDs,
1663 * and the first message register index comes from src0.
1664 */
1665 if (intel->gen >= 6) {
1666 brw_push_insn_state(p);
1667 brw_set_mask_control( p, BRW_MASK_DISABLE );
1668 /* m1 contains header? */
1669 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1670 brw_pop_insn_state(p);
1671 src0 = brw_message_reg(msg_reg_nr);
1672 }
1673
1674 insn = next_insn(p, BRW_OPCODE_SEND);
1675 insn->header.predicate_control = 0; /* XXX */
1676 insn->header.compression_control = BRW_COMPRESSION_NONE;
1677 if (intel->gen < 6)
1678 insn->header.destreg__conditionalmod = msg_reg_nr;
1679
1680 brw_set_dest(insn, dest);
1681 brw_set_src0(insn, src0);
1682 brw_set_sampler_message(p->brw, insn,
1683 binding_table_index,
1684 sampler,
1685 msg_type,
1686 response_length,
1687 msg_length,
1688 eot,
1689 header_present,
1690 simd_mode);
1691 }
1692
1693 if (need_stall) {
1694 struct brw_reg reg = vec8(offset(dest, response_length-1));
1695
1696 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1697 */
1698 brw_push_insn_state(p);
1699 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1700 brw_MOV(p, reg, reg);
1701 brw_pop_insn_state(p);
1702 }
1703
1704 }
1705
1706 /* All these variables are pretty confusing - we might be better off
1707 * using bitmasks and macros for this, in the old style. Or perhaps
1708 * just having the caller instantiate the fields in dword3 itself.
1709 */
1710 void brw_urb_WRITE(struct brw_compile *p,
1711 struct brw_reg dest,
1712 GLuint msg_reg_nr,
1713 struct brw_reg src0,
1714 GLboolean allocate,
1715 GLboolean used,
1716 GLuint msg_length,
1717 GLuint response_length,
1718 GLboolean eot,
1719 GLboolean writes_complete,
1720 GLuint offset,
1721 GLuint swizzle)
1722 {
1723 struct intel_context *intel = &p->brw->intel;
1724 struct brw_instruction *insn;
1725
1726 /* Sandybridge doesn't have the implied move for SENDs,
1727 * and the first message register index comes from src0.
1728 */
1729 if (intel->gen >= 6) {
1730 brw_push_insn_state(p);
1731 brw_set_mask_control( p, BRW_MASK_DISABLE );
1732 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1733 brw_pop_insn_state(p);
1734 src0 = brw_message_reg(msg_reg_nr);
1735 }
1736
1737 insn = next_insn(p, BRW_OPCODE_SEND);
1738
1739 assert(msg_length < BRW_MAX_MRF);
1740
1741 brw_set_dest(insn, dest);
1742 brw_set_src0(insn, src0);
1743 brw_set_src1(insn, brw_imm_d(0));
1744
1745 if (intel->gen < 6)
1746 insn->header.destreg__conditionalmod = msg_reg_nr;
1747
1748 brw_set_urb_message(p->brw,
1749 insn,
1750 allocate,
1751 used,
1752 msg_length,
1753 response_length,
1754 eot,
1755 writes_complete,
1756 offset,
1757 swizzle);
1758 }
1759
1760 void brw_ff_sync(struct brw_compile *p,
1761 struct brw_reg dest,
1762 GLuint msg_reg_nr,
1763 struct brw_reg src0,
1764 GLboolean allocate,
1765 GLuint response_length,
1766 GLboolean eot)
1767 {
1768 struct intel_context *intel = &p->brw->intel;
1769 struct brw_instruction *insn;
1770
1771 /* Sandybridge doesn't have the implied move for SENDs,
1772 * and the first message register index comes from src0.
1773 */
1774 if (intel->gen >= 6) {
1775 brw_push_insn_state(p);
1776 brw_set_mask_control( p, BRW_MASK_DISABLE );
1777 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1778 brw_pop_insn_state(p);
1779 src0 = brw_message_reg(msg_reg_nr);
1780 }
1781
1782 insn = next_insn(p, BRW_OPCODE_SEND);
1783 brw_set_dest(insn, dest);
1784 brw_set_src0(insn, src0);
1785 brw_set_src1(insn, brw_imm_d(0));
1786
1787 if (intel->gen < 6)
1788 insn->header.destreg__conditionalmod = msg_reg_nr;
1789
1790 brw_set_ff_sync_message(p->brw,
1791 insn,
1792 allocate,
1793 response_length,
1794 eot);
1795 }