Merge commit 'origin/graw-tests'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 }
79 }
80 else {
81 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83 /* These are different sizes in align1 vs align16:
84 */
85 if (insn->header.access_mode == BRW_ALIGN_1) {
86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90 }
91 else {
92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93 }
94 }
95
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
98 */
99 guess_execution_size(insn, dest);
100 }
101
102 static void brw_set_src0( struct brw_instruction *insn,
103 struct brw_reg reg )
104 {
105 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
106 assert(reg.nr < 128);
107
108 insn->bits1.da1.src0_reg_file = reg.file;
109 insn->bits1.da1.src0_reg_type = reg.type;
110 insn->bits2.da1.src0_abs = reg.abs;
111 insn->bits2.da1.src0_negate = reg.negate;
112 insn->bits2.da1.src0_address_mode = reg.address_mode;
113
114 if (reg.file == BRW_IMMEDIATE_VALUE) {
115 insn->bits3.ud = reg.dw1.ud;
116
117 /* Required to set some fields in src1 as well:
118 */
119 insn->bits1.da1.src1_reg_file = 0; /* arf */
120 insn->bits1.da1.src1_reg_type = reg.type;
121 }
122 else
123 {
124 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
125 if (insn->header.access_mode == BRW_ALIGN_1) {
126 insn->bits2.da1.src0_subreg_nr = reg.subnr;
127 insn->bits2.da1.src0_reg_nr = reg.nr;
128 }
129 else {
130 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
131 insn->bits2.da16.src0_reg_nr = reg.nr;
132 }
133 }
134 else {
135 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
136
137 if (insn->header.access_mode == BRW_ALIGN_1) {
138 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
139 }
140 else {
141 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
142 }
143 }
144
145 if (insn->header.access_mode == BRW_ALIGN_1) {
146 if (reg.width == BRW_WIDTH_1 &&
147 insn->header.execution_size == BRW_EXECUTE_1) {
148 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
149 insn->bits2.da1.src0_width = BRW_WIDTH_1;
150 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
151 }
152 else {
153 insn->bits2.da1.src0_horiz_stride = reg.hstride;
154 insn->bits2.da1.src0_width = reg.width;
155 insn->bits2.da1.src0_vert_stride = reg.vstride;
156 }
157 }
158 else {
159 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
160 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
161 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
162 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
163
164 /* This is an oddity of the fact we're using the same
165 * descriptions for registers in align_16 as align_1:
166 */
167 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
168 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
169 else
170 insn->bits2.da16.src0_vert_stride = reg.vstride;
171 }
172 }
173 }
174
175
176 void brw_set_src1( struct brw_instruction *insn,
177 struct brw_reg reg )
178 {
179 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
180
181 assert(reg.nr < 128);
182
183 insn->bits1.da1.src1_reg_file = reg.file;
184 insn->bits1.da1.src1_reg_type = reg.type;
185 insn->bits3.da1.src1_abs = reg.abs;
186 insn->bits3.da1.src1_negate = reg.negate;
187
188 /* Only src1 can be immediate in two-argument instructions.
189 */
190 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
191
192 if (reg.file == BRW_IMMEDIATE_VALUE) {
193 insn->bits3.ud = reg.dw1.ud;
194 }
195 else {
196 /* This is a hardware restriction, which may or may not be lifted
197 * in the future:
198 */
199 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
200 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
201
202 if (insn->header.access_mode == BRW_ALIGN_1) {
203 insn->bits3.da1.src1_subreg_nr = reg.subnr;
204 insn->bits3.da1.src1_reg_nr = reg.nr;
205 }
206 else {
207 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
208 insn->bits3.da16.src1_reg_nr = reg.nr;
209 }
210
211 if (insn->header.access_mode == BRW_ALIGN_1) {
212 if (reg.width == BRW_WIDTH_1 &&
213 insn->header.execution_size == BRW_EXECUTE_1) {
214 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
215 insn->bits3.da1.src1_width = BRW_WIDTH_1;
216 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
217 }
218 else {
219 insn->bits3.da1.src1_horiz_stride = reg.hstride;
220 insn->bits3.da1.src1_width = reg.width;
221 insn->bits3.da1.src1_vert_stride = reg.vstride;
222 }
223 }
224 else {
225 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
226 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
227 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
228 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
229
230 /* This is an oddity of the fact we're using the same
231 * descriptions for registers in align_16 as align_1:
232 */
233 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
234 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
235 else
236 insn->bits3.da16.src1_vert_stride = reg.vstride;
237 }
238 }
239 }
240
241
242
243 static void brw_set_math_message( struct brw_context *brw,
244 struct brw_instruction *insn,
245 GLuint msg_length,
246 GLuint response_length,
247 GLuint function,
248 GLuint integer_type,
249 GLboolean low_precision,
250 GLboolean saturate,
251 GLuint dataType )
252 {
253 struct intel_context *intel = &brw->intel;
254 brw_set_src1(insn, brw_imm_d(0));
255
256 if (intel->gen == 5) {
257 insn->bits3.math_gen5.function = function;
258 insn->bits3.math_gen5.int_type = integer_type;
259 insn->bits3.math_gen5.precision = low_precision;
260 insn->bits3.math_gen5.saturate = saturate;
261 insn->bits3.math_gen5.data_type = dataType;
262 insn->bits3.math_gen5.snapshot = 0;
263 insn->bits3.math_gen5.header_present = 0;
264 insn->bits3.math_gen5.response_length = response_length;
265 insn->bits3.math_gen5.msg_length = msg_length;
266 insn->bits3.math_gen5.end_of_thread = 0;
267 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
268 insn->bits2.send_gen5.end_of_thread = 0;
269 } else {
270 insn->bits3.math.function = function;
271 insn->bits3.math.int_type = integer_type;
272 insn->bits3.math.precision = low_precision;
273 insn->bits3.math.saturate = saturate;
274 insn->bits3.math.data_type = dataType;
275 insn->bits3.math.response_length = response_length;
276 insn->bits3.math.msg_length = msg_length;
277 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
278 insn->bits3.math.end_of_thread = 0;
279 }
280 }
281
282
283 static void brw_set_ff_sync_message(struct brw_context *brw,
284 struct brw_instruction *insn,
285 GLboolean allocate,
286 GLuint response_length,
287 GLboolean end_of_thread)
288 {
289 brw_set_src1(insn, brw_imm_d(0));
290
291 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
292 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
293 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
294 insn->bits3.urb_gen5.allocate = allocate;
295 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
296 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
297 insn->bits3.urb_gen5.header_present = 1;
298 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
299 insn->bits3.urb_gen5.msg_length = 1;
300 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
301 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
302 insn->bits2.send_gen5.end_of_thread = end_of_thread;
303 }
304
305 static void brw_set_urb_message( struct brw_context *brw,
306 struct brw_instruction *insn,
307 GLboolean allocate,
308 GLboolean used,
309 GLuint msg_length,
310 GLuint response_length,
311 GLboolean end_of_thread,
312 GLboolean complete,
313 GLuint offset,
314 GLuint swizzle_control )
315 {
316 struct intel_context *intel = &brw->intel;
317 brw_set_src1(insn, brw_imm_d(0));
318
319 if (intel->gen >= 5) {
320 insn->bits3.urb_gen5.opcode = 0; /* ? */
321 insn->bits3.urb_gen5.offset = offset;
322 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
323 insn->bits3.urb_gen5.allocate = allocate;
324 insn->bits3.urb_gen5.used = used; /* ? */
325 insn->bits3.urb_gen5.complete = complete;
326 insn->bits3.urb_gen5.header_present = 1;
327 insn->bits3.urb_gen5.response_length = response_length;
328 insn->bits3.urb_gen5.msg_length = msg_length;
329 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
330 if (intel->gen >= 6) {
331 /* For SNB, the SFID bits moved to the condmod bits, and
332 * EOT stayed in bits3 above. Does the EOT bit setting
333 * below on Ironlake even do anything?
334 */
335 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
336 } else {
337 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
338 insn->bits2.send_gen5.end_of_thread = end_of_thread;
339 }
340 } else {
341 insn->bits3.urb.opcode = 0; /* ? */
342 insn->bits3.urb.offset = offset;
343 insn->bits3.urb.swizzle_control = swizzle_control;
344 insn->bits3.urb.allocate = allocate;
345 insn->bits3.urb.used = used; /* ? */
346 insn->bits3.urb.complete = complete;
347 insn->bits3.urb.response_length = response_length;
348 insn->bits3.urb.msg_length = msg_length;
349 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
350 insn->bits3.urb.end_of_thread = end_of_thread;
351 }
352 }
353
354 static void brw_set_dp_write_message( struct brw_context *brw,
355 struct brw_instruction *insn,
356 GLuint binding_table_index,
357 GLuint msg_control,
358 GLuint msg_type,
359 GLuint msg_length,
360 GLuint pixel_scoreboard_clear,
361 GLuint response_length,
362 GLuint end_of_thread )
363 {
364 struct intel_context *intel = &brw->intel;
365 brw_set_src1(insn, brw_imm_d(0));
366
367 if (intel->gen == 5) {
368 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
369 insn->bits3.dp_write_gen5.msg_control = msg_control;
370 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
371 insn->bits3.dp_write_gen5.msg_type = msg_type;
372 insn->bits3.dp_write_gen5.send_commit_msg = 0;
373 insn->bits3.dp_write_gen5.header_present = 1;
374 insn->bits3.dp_write_gen5.response_length = response_length;
375 insn->bits3.dp_write_gen5.msg_length = msg_length;
376 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
377 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
378 insn->bits2.send_gen5.end_of_thread = end_of_thread;
379 } else {
380 insn->bits3.dp_write.binding_table_index = binding_table_index;
381 insn->bits3.dp_write.msg_control = msg_control;
382 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
383 insn->bits3.dp_write.msg_type = msg_type;
384 insn->bits3.dp_write.send_commit_msg = 0;
385 insn->bits3.dp_write.response_length = response_length;
386 insn->bits3.dp_write.msg_length = msg_length;
387 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
388 insn->bits3.dp_write.end_of_thread = end_of_thread;
389 }
390 }
391
392 static void brw_set_dp_read_message( struct brw_context *brw,
393 struct brw_instruction *insn,
394 GLuint binding_table_index,
395 GLuint msg_control,
396 GLuint msg_type,
397 GLuint target_cache,
398 GLuint msg_length,
399 GLuint response_length,
400 GLuint end_of_thread )
401 {
402 struct intel_context *intel = &brw->intel;
403 brw_set_src1(insn, brw_imm_d(0));
404
405 if (intel->gen == 5) {
406 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
407 insn->bits3.dp_read_gen5.msg_control = msg_control;
408 insn->bits3.dp_read_gen5.msg_type = msg_type;
409 insn->bits3.dp_read_gen5.target_cache = target_cache;
410 insn->bits3.dp_read_gen5.header_present = 1;
411 insn->bits3.dp_read_gen5.response_length = response_length;
412 insn->bits3.dp_read_gen5.msg_length = msg_length;
413 insn->bits3.dp_read_gen5.pad1 = 0;
414 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
415 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
416 insn->bits2.send_gen5.end_of_thread = end_of_thread;
417 } else {
418 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
419 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
420 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
421 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
422 insn->bits3.dp_read.response_length = response_length; /*16:19*/
423 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
424 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
425 insn->bits3.dp_read.pad1 = 0; /*28:30*/
426 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
427 }
428 }
429
430 static void brw_set_sampler_message(struct brw_context *brw,
431 struct brw_instruction *insn,
432 GLuint binding_table_index,
433 GLuint sampler,
434 GLuint msg_type,
435 GLuint response_length,
436 GLuint msg_length,
437 GLboolean eot,
438 GLuint header_present,
439 GLuint simd_mode)
440 {
441 struct intel_context *intel = &brw->intel;
442 assert(eot == 0);
443 brw_set_src1(insn, brw_imm_d(0));
444
445 if (intel->gen == 5) {
446 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
447 insn->bits3.sampler_gen5.sampler = sampler;
448 insn->bits3.sampler_gen5.msg_type = msg_type;
449 insn->bits3.sampler_gen5.simd_mode = simd_mode;
450 insn->bits3.sampler_gen5.header_present = header_present;
451 insn->bits3.sampler_gen5.response_length = response_length;
452 insn->bits3.sampler_gen5.msg_length = msg_length;
453 insn->bits3.sampler_gen5.end_of_thread = eot;
454 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
455 insn->bits2.send_gen5.end_of_thread = eot;
456 } else if (intel->is_g4x) {
457 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
458 insn->bits3.sampler_g4x.sampler = sampler;
459 insn->bits3.sampler_g4x.msg_type = msg_type;
460 insn->bits3.sampler_g4x.response_length = response_length;
461 insn->bits3.sampler_g4x.msg_length = msg_length;
462 insn->bits3.sampler_g4x.end_of_thread = eot;
463 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
464 } else {
465 insn->bits3.sampler.binding_table_index = binding_table_index;
466 insn->bits3.sampler.sampler = sampler;
467 insn->bits3.sampler.msg_type = msg_type;
468 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
469 insn->bits3.sampler.response_length = response_length;
470 insn->bits3.sampler.msg_length = msg_length;
471 insn->bits3.sampler.end_of_thread = eot;
472 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
473 }
474 }
475
476
477
478 static struct brw_instruction *next_insn( struct brw_compile *p,
479 GLuint opcode )
480 {
481 struct brw_instruction *insn;
482
483 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
484
485 insn = &p->store[p->nr_insn++];
486 memcpy(insn, p->current, sizeof(*insn));
487
488 /* Reset this one-shot flag:
489 */
490
491 if (p->current->header.destreg__conditionalmod) {
492 p->current->header.destreg__conditionalmod = 0;
493 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
494 }
495
496 insn->header.opcode = opcode;
497 return insn;
498 }
499
500
501 static struct brw_instruction *brw_alu1( struct brw_compile *p,
502 GLuint opcode,
503 struct brw_reg dest,
504 struct brw_reg src )
505 {
506 struct brw_instruction *insn = next_insn(p, opcode);
507 brw_set_dest(insn, dest);
508 brw_set_src0(insn, src);
509 return insn;
510 }
511
512 static struct brw_instruction *brw_alu2(struct brw_compile *p,
513 GLuint opcode,
514 struct brw_reg dest,
515 struct brw_reg src0,
516 struct brw_reg src1 )
517 {
518 struct brw_instruction *insn = next_insn(p, opcode);
519 brw_set_dest(insn, dest);
520 brw_set_src0(insn, src0);
521 brw_set_src1(insn, src1);
522 return insn;
523 }
524
525
526 /***********************************************************************
527 * Convenience routines.
528 */
529 #define ALU1(OP) \
530 struct brw_instruction *brw_##OP(struct brw_compile *p, \
531 struct brw_reg dest, \
532 struct brw_reg src0) \
533 { \
534 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
535 }
536
537 #define ALU2(OP) \
538 struct brw_instruction *brw_##OP(struct brw_compile *p, \
539 struct brw_reg dest, \
540 struct brw_reg src0, \
541 struct brw_reg src1) \
542 { \
543 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
544 }
545
546
547 ALU1(MOV)
548 ALU2(SEL)
549 ALU1(NOT)
550 ALU2(AND)
551 ALU2(OR)
552 ALU2(XOR)
553 ALU2(SHR)
554 ALU2(SHL)
555 ALU2(RSR)
556 ALU2(RSL)
557 ALU2(ASR)
558 ALU2(ADD)
559 ALU2(MUL)
560 ALU1(FRC)
561 ALU1(RNDD)
562 ALU1(RNDZ)
563 ALU2(MAC)
564 ALU2(MACH)
565 ALU1(LZD)
566 ALU2(DP4)
567 ALU2(DPH)
568 ALU2(DP3)
569 ALU2(DP2)
570 ALU2(LINE)
571 ALU2(PLN)
572
573
574
575 void brw_NOP(struct brw_compile *p)
576 {
577 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
578 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
579 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
580 brw_set_src1(insn, brw_imm_ud(0x0));
581 }
582
583
584
585
586
587 /***********************************************************************
588 * Comparisons, if/else/endif
589 */
590
591 struct brw_instruction *brw_JMPI(struct brw_compile *p,
592 struct brw_reg dest,
593 struct brw_reg src0,
594 struct brw_reg src1)
595 {
596 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
597
598 insn->header.execution_size = 1;
599 insn->header.compression_control = BRW_COMPRESSION_NONE;
600 insn->header.mask_control = BRW_MASK_DISABLE;
601
602 p->current->header.predicate_control = BRW_PREDICATE_NONE;
603
604 return insn;
605 }
606
607 /* EU takes the value from the flag register and pushes it onto some
608 * sort of a stack (presumably merging with any flag value already on
609 * the stack). Within an if block, the flags at the top of the stack
610 * control execution on each channel of the unit, eg. on each of the
611 * 16 pixel values in our wm programs.
612 *
613 * When the matching 'else' instruction is reached (presumably by
614 * countdown of the instruction count patched in by our ELSE/ENDIF
615 * functions), the relevent flags are inverted.
616 *
617 * When the matching 'endif' instruction is reached, the flags are
618 * popped off. If the stack is now empty, normal execution resumes.
619 *
620 * No attempt is made to deal with stack overflow (14 elements?).
621 */
622 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
623 {
624 struct brw_instruction *insn;
625
626 if (p->single_program_flow) {
627 assert(execute_size == BRW_EXECUTE_1);
628
629 insn = next_insn(p, BRW_OPCODE_ADD);
630 insn->header.predicate_inverse = 1;
631 } else {
632 insn = next_insn(p, BRW_OPCODE_IF);
633 }
634
635 /* Override the defaults for this instruction:
636 */
637 brw_set_dest(insn, brw_ip_reg());
638 brw_set_src0(insn, brw_ip_reg());
639 brw_set_src1(insn, brw_imm_d(0x0));
640
641 insn->header.execution_size = execute_size;
642 insn->header.compression_control = BRW_COMPRESSION_NONE;
643 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
644 insn->header.mask_control = BRW_MASK_ENABLE;
645 if (!p->single_program_flow)
646 insn->header.thread_control = BRW_THREAD_SWITCH;
647
648 p->current->header.predicate_control = BRW_PREDICATE_NONE;
649
650 return insn;
651 }
652
653
654 struct brw_instruction *brw_ELSE(struct brw_compile *p,
655 struct brw_instruction *if_insn)
656 {
657 struct intel_context *intel = &p->brw->intel;
658 struct brw_instruction *insn;
659 GLuint br = 1;
660
661 if (intel->gen == 5)
662 br = 2;
663
664 if (p->single_program_flow) {
665 insn = next_insn(p, BRW_OPCODE_ADD);
666 } else {
667 insn = next_insn(p, BRW_OPCODE_ELSE);
668 }
669
670 brw_set_dest(insn, brw_ip_reg());
671 brw_set_src0(insn, brw_ip_reg());
672 brw_set_src1(insn, brw_imm_d(0x0));
673
674 insn->header.compression_control = BRW_COMPRESSION_NONE;
675 insn->header.execution_size = if_insn->header.execution_size;
676 insn->header.mask_control = BRW_MASK_ENABLE;
677 if (!p->single_program_flow)
678 insn->header.thread_control = BRW_THREAD_SWITCH;
679
680 /* Patch the if instruction to point at this instruction.
681 */
682 if (p->single_program_flow) {
683 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
684
685 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
686 } else {
687 assert(if_insn->header.opcode == BRW_OPCODE_IF);
688
689 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
690 if_insn->bits3.if_else.pop_count = 0;
691 if_insn->bits3.if_else.pad0 = 0;
692 }
693
694 return insn;
695 }
696
697 void brw_ENDIF(struct brw_compile *p,
698 struct brw_instruction *patch_insn)
699 {
700 struct intel_context *intel = &p->brw->intel;
701 GLuint br = 1;
702
703 if (intel->gen == 5)
704 br = 2;
705
706 if (p->single_program_flow) {
707 /* In single program flow mode, there's no need to execute an ENDIF,
708 * since we don't need to do any stack operations, and if we're executing
709 * currently, we want to just continue executing.
710 */
711 struct brw_instruction *next = &p->store[p->nr_insn];
712
713 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
714
715 patch_insn->bits3.ud = (next - patch_insn) * 16;
716 } else {
717 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
718
719 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
721 brw_set_src1(insn, brw_imm_d(0x0));
722
723 insn->header.compression_control = BRW_COMPRESSION_NONE;
724 insn->header.execution_size = patch_insn->header.execution_size;
725 insn->header.mask_control = BRW_MASK_ENABLE;
726 insn->header.thread_control = BRW_THREAD_SWITCH;
727
728 assert(patch_insn->bits3.if_else.jump_count == 0);
729
730 /* Patch the if or else instructions to point at this or the next
731 * instruction respectively.
732 */
733 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
734 /* Automagically turn it into an IFF:
735 */
736 patch_insn->header.opcode = BRW_OPCODE_IFF;
737 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
738 patch_insn->bits3.if_else.pop_count = 0;
739 patch_insn->bits3.if_else.pad0 = 0;
740 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
741 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
742 patch_insn->bits3.if_else.pop_count = 1;
743 patch_insn->bits3.if_else.pad0 = 0;
744 } else {
745 assert(0);
746 }
747
748 /* Also pop item off the stack in the endif instruction:
749 */
750 insn->bits3.if_else.jump_count = 0;
751 insn->bits3.if_else.pop_count = 1;
752 insn->bits3.if_else.pad0 = 0;
753 }
754 }
755
756 struct brw_instruction *brw_BREAK(struct brw_compile *p)
757 {
758 struct brw_instruction *insn;
759 insn = next_insn(p, BRW_OPCODE_BREAK);
760 brw_set_dest(insn, brw_ip_reg());
761 brw_set_src0(insn, brw_ip_reg());
762 brw_set_src1(insn, brw_imm_d(0x0));
763 insn->header.compression_control = BRW_COMPRESSION_NONE;
764 insn->header.execution_size = BRW_EXECUTE_8;
765 /* insn->header.mask_control = BRW_MASK_DISABLE; */
766 insn->bits3.if_else.pad0 = 0;
767 return insn;
768 }
769
770 struct brw_instruction *brw_CONT(struct brw_compile *p)
771 {
772 struct brw_instruction *insn;
773 insn = next_insn(p, BRW_OPCODE_CONTINUE);
774 brw_set_dest(insn, brw_ip_reg());
775 brw_set_src0(insn, brw_ip_reg());
776 brw_set_src1(insn, brw_imm_d(0x0));
777 insn->header.compression_control = BRW_COMPRESSION_NONE;
778 insn->header.execution_size = BRW_EXECUTE_8;
779 /* insn->header.mask_control = BRW_MASK_DISABLE; */
780 insn->bits3.if_else.pad0 = 0;
781 return insn;
782 }
783
784 /* DO/WHILE loop:
785 */
786 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
787 {
788 if (p->single_program_flow) {
789 return &p->store[p->nr_insn];
790 } else {
791 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
792
793 /* Override the defaults for this instruction:
794 */
795 brw_set_dest(insn, brw_null_reg());
796 brw_set_src0(insn, brw_null_reg());
797 brw_set_src1(insn, brw_null_reg());
798
799 insn->header.compression_control = BRW_COMPRESSION_NONE;
800 insn->header.execution_size = execute_size;
801 insn->header.predicate_control = BRW_PREDICATE_NONE;
802 /* insn->header.mask_control = BRW_MASK_ENABLE; */
803 /* insn->header.mask_control = BRW_MASK_DISABLE; */
804
805 return insn;
806 }
807 }
808
809
810
811 struct brw_instruction *brw_WHILE(struct brw_compile *p,
812 struct brw_instruction *do_insn)
813 {
814 struct intel_context *intel = &p->brw->intel;
815 struct brw_instruction *insn;
816 GLuint br = 1;
817
818 if (intel->gen == 5)
819 br = 2;
820
821 if (p->single_program_flow)
822 insn = next_insn(p, BRW_OPCODE_ADD);
823 else
824 insn = next_insn(p, BRW_OPCODE_WHILE);
825
826 brw_set_dest(insn, brw_ip_reg());
827 brw_set_src0(insn, brw_ip_reg());
828 brw_set_src1(insn, brw_imm_d(0x0));
829
830 insn->header.compression_control = BRW_COMPRESSION_NONE;
831
832 if (p->single_program_flow) {
833 insn->header.execution_size = BRW_EXECUTE_1;
834
835 insn->bits3.d = (do_insn - insn) * 16;
836 } else {
837 insn->header.execution_size = do_insn->header.execution_size;
838
839 assert(do_insn->header.opcode == BRW_OPCODE_DO);
840 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
841 insn->bits3.if_else.pop_count = 0;
842 insn->bits3.if_else.pad0 = 0;
843 }
844
845 /* insn->header.mask_control = BRW_MASK_ENABLE; */
846
847 /* insn->header.mask_control = BRW_MASK_DISABLE; */
848 p->current->header.predicate_control = BRW_PREDICATE_NONE;
849 return insn;
850 }
851
852
853 /* FORWARD JUMPS:
854 */
855 void brw_land_fwd_jump(struct brw_compile *p,
856 struct brw_instruction *jmp_insn)
857 {
858 struct intel_context *intel = &p->brw->intel;
859 struct brw_instruction *landing = &p->store[p->nr_insn];
860 GLuint jmpi = 1;
861
862 if (intel->gen == 5)
863 jmpi = 2;
864
865 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
866 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
867
868 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
869 }
870
871
872
873 /* To integrate with the above, it makes sense that the comparison
874 * instruction should populate the flag register. It might be simpler
875 * just to use the flag reg for most WM tasks?
876 */
877 void brw_CMP(struct brw_compile *p,
878 struct brw_reg dest,
879 GLuint conditional,
880 struct brw_reg src0,
881 struct brw_reg src1)
882 {
883 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
884
885 insn->header.destreg__conditionalmod = conditional;
886 brw_set_dest(insn, dest);
887 brw_set_src0(insn, src0);
888 brw_set_src1(insn, src1);
889
890 /* guess_execution_size(insn, src0); */
891
892
893 /* Make it so that future instructions will use the computed flag
894 * value until brw_set_predicate_control_flag_value() is called
895 * again.
896 */
897 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
898 dest.nr == 0) {
899 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
900 p->flag_value = 0xff;
901 }
902 }
903
904
905
906 /***********************************************************************
907 * Helpers for the various SEND message types:
908 */
909
910 /** Extended math function, float[8].
911 */
912 void brw_math( struct brw_compile *p,
913 struct brw_reg dest,
914 GLuint function,
915 GLuint saturate,
916 GLuint msg_reg_nr,
917 struct brw_reg src,
918 GLuint data_type,
919 GLuint precision )
920 {
921 struct intel_context *intel = &p->brw->intel;
922
923 if (intel->gen >= 6) {
924 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
925
926 /* Math is the same ISA format as other opcodes, except that CondModifier
927 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
928 */
929 insn->header.destreg__conditionalmod = function;
930
931 brw_set_dest(insn, dest);
932 brw_set_src0(insn, src);
933 brw_set_src1(insn, brw_null_reg());
934 } else {
935 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
936 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
937 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
938 /* Example code doesn't set predicate_control for send
939 * instructions.
940 */
941 insn->header.predicate_control = 0;
942 insn->header.destreg__conditionalmod = msg_reg_nr;
943
944 brw_set_dest(insn, dest);
945 brw_set_src0(insn, src);
946 brw_set_math_message(p->brw,
947 insn,
948 msg_length, response_length,
949 function,
950 BRW_MATH_INTEGER_UNSIGNED,
951 precision,
952 saturate,
953 data_type);
954 }
955 }
956
957 /**
958 * Extended math function, float[16].
959 * Use 2 send instructions.
960 */
961 void brw_math_16( struct brw_compile *p,
962 struct brw_reg dest,
963 GLuint function,
964 GLuint saturate,
965 GLuint msg_reg_nr,
966 struct brw_reg src,
967 GLuint precision )
968 {
969 struct brw_instruction *insn;
970 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
971 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
972
973 /* First instruction:
974 */
975 brw_push_insn_state(p);
976 brw_set_predicate_control_flag_value(p, 0xff);
977 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
978
979 insn = next_insn(p, BRW_OPCODE_SEND);
980 insn->header.destreg__conditionalmod = msg_reg_nr;
981
982 brw_set_dest(insn, dest);
983 brw_set_src0(insn, src);
984 brw_set_math_message(p->brw,
985 insn,
986 msg_length, response_length,
987 function,
988 BRW_MATH_INTEGER_UNSIGNED,
989 precision,
990 saturate,
991 BRW_MATH_DATA_VECTOR);
992
993 /* Second instruction:
994 */
995 insn = next_insn(p, BRW_OPCODE_SEND);
996 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
997 insn->header.destreg__conditionalmod = msg_reg_nr+1;
998
999 brw_set_dest(insn, offset(dest,1));
1000 brw_set_src0(insn, src);
1001 brw_set_math_message(p->brw,
1002 insn,
1003 msg_length, response_length,
1004 function,
1005 BRW_MATH_INTEGER_UNSIGNED,
1006 precision,
1007 saturate,
1008 BRW_MATH_DATA_VECTOR);
1009
1010 brw_pop_insn_state(p);
1011 }
1012
1013
1014 /**
1015 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1016 * Scratch offset should be a multiple of 64.
1017 * Used for register spilling.
1018 */
1019 void brw_dp_WRITE_16( struct brw_compile *p,
1020 struct brw_reg src,
1021 GLuint scratch_offset )
1022 {
1023 GLuint msg_reg_nr = 1;
1024 {
1025 brw_push_insn_state(p);
1026 brw_set_mask_control(p, BRW_MASK_DISABLE);
1027 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1028
1029 /* set message header global offset field (reg 0, element 2) */
1030 brw_MOV(p,
1031 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1032 brw_imm_d(scratch_offset));
1033
1034 brw_pop_insn_state(p);
1035 }
1036
1037 {
1038 GLuint msg_length = 3;
1039 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1040 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1041
1042 insn->header.predicate_control = 0; /* XXX */
1043 insn->header.compression_control = BRW_COMPRESSION_NONE;
1044 insn->header.destreg__conditionalmod = msg_reg_nr;
1045
1046 brw_set_dest(insn, dest);
1047 brw_set_src0(insn, src);
1048
1049 brw_set_dp_write_message(p->brw,
1050 insn,
1051 255, /* binding table index (255=stateless) */
1052 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1053 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1054 msg_length,
1055 0, /* pixel scoreboard */
1056 0, /* response_length */
1057 0); /* eot */
1058 }
1059 }
1060
1061
1062 /**
1063 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1064 * Scratch offset should be a multiple of 64.
1065 * Used for register spilling.
1066 */
1067 void brw_dp_READ_16( struct brw_compile *p,
1068 struct brw_reg dest,
1069 GLuint scratch_offset )
1070 {
1071 GLuint msg_reg_nr = 1;
1072 {
1073 brw_push_insn_state(p);
1074 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1075 brw_set_mask_control(p, BRW_MASK_DISABLE);
1076
1077 /* set message header global offset field (reg 0, element 2) */
1078 brw_MOV(p,
1079 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1080 brw_imm_d(scratch_offset));
1081
1082 brw_pop_insn_state(p);
1083 }
1084
1085 {
1086 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1087
1088 insn->header.predicate_control = 0; /* XXX */
1089 insn->header.compression_control = BRW_COMPRESSION_NONE;
1090 insn->header.destreg__conditionalmod = msg_reg_nr;
1091
1092 brw_set_dest(insn, dest); /* UW? */
1093 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1094
1095 brw_set_dp_read_message(p->brw,
1096 insn,
1097 255, /* binding table index (255=stateless) */
1098 3, /* msg_control (3 means 4 Owords) */
1099 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1100 1, /* target cache (render/scratch) */
1101 1, /* msg_length */
1102 2, /* response_length */
1103 0); /* eot */
1104 }
1105 }
1106
1107
1108 /**
1109 * Read a float[4] vector from the data port Data Cache (const buffer).
1110 * Location (in buffer) should be a multiple of 16.
1111 * Used for fetching shader constants.
1112 * If relAddr is true, we'll do an indirect fetch using the address register.
1113 */
1114 void brw_dp_READ_4( struct brw_compile *p,
1115 struct brw_reg dest,
1116 GLboolean relAddr,
1117 GLuint location,
1118 GLuint bind_table_index )
1119 {
1120 /* XXX: relAddr not implemented */
1121 GLuint msg_reg_nr = 1;
1122 {
1123 struct brw_reg b;
1124 brw_push_insn_state(p);
1125 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1126 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1127 brw_set_mask_control(p, BRW_MASK_DISABLE);
1128
1129 /* Setup MRF[1] with location/offset into const buffer */
1130 b = brw_message_reg(msg_reg_nr);
1131 b = retype(b, BRW_REGISTER_TYPE_UD);
1132 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1133 * when the docs say only dword[2] should be set. Hmmm. But it works.
1134 */
1135 brw_MOV(p, b, brw_imm_ud(location));
1136 brw_pop_insn_state(p);
1137 }
1138
1139 {
1140 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1141
1142 insn->header.predicate_control = BRW_PREDICATE_NONE;
1143 insn->header.compression_control = BRW_COMPRESSION_NONE;
1144 insn->header.destreg__conditionalmod = msg_reg_nr;
1145 insn->header.mask_control = BRW_MASK_DISABLE;
1146
1147 /* cast dest to a uword[8] vector */
1148 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1149
1150 brw_set_dest(insn, dest);
1151 brw_set_src0(insn, brw_null_reg());
1152
1153 brw_set_dp_read_message(p->brw,
1154 insn,
1155 bind_table_index,
1156 0, /* msg_control (0 means 1 Oword) */
1157 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1158 0, /* source cache = data cache */
1159 1, /* msg_length */
1160 1, /* response_length (1 Oword) */
1161 0); /* eot */
1162 }
1163 }
1164
1165
1166 /**
1167 * Read float[4] constant(s) from VS constant buffer.
1168 * For relative addressing, two float[4] constants will be read into 'dest'.
1169 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1170 */
1171 void brw_dp_READ_4_vs(struct brw_compile *p,
1172 struct brw_reg dest,
1173 GLuint oword,
1174 GLboolean relAddr,
1175 struct brw_reg addrReg,
1176 GLuint location,
1177 GLuint bind_table_index)
1178 {
1179 GLuint msg_reg_nr = 1;
1180
1181 assert(oword < 2);
1182 /*
1183 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1184 location, msg_reg_nr);
1185 */
1186
1187 /* Setup MRF[1] with location/offset into const buffer */
1188 {
1189 struct brw_reg b;
1190
1191 brw_push_insn_state(p);
1192 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1193 brw_set_mask_control(p, BRW_MASK_DISABLE);
1194 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1195 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1196
1197 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1198 * when the docs say only dword[2] should be set. Hmmm. But it works.
1199 */
1200 b = brw_message_reg(msg_reg_nr);
1201 b = retype(b, BRW_REGISTER_TYPE_UD);
1202 /*b = get_element_ud(b, 2);*/
1203 if (relAddr) {
1204 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1205 }
1206 else {
1207 brw_MOV(p, b, brw_imm_ud(location));
1208 }
1209
1210 brw_pop_insn_state(p);
1211 }
1212
1213 {
1214 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1215
1216 insn->header.predicate_control = BRW_PREDICATE_NONE;
1217 insn->header.compression_control = BRW_COMPRESSION_NONE;
1218 insn->header.destreg__conditionalmod = msg_reg_nr;
1219 insn->header.mask_control = BRW_MASK_DISABLE;
1220 /*insn->header.access_mode = BRW_ALIGN_16;*/
1221
1222 brw_set_dest(insn, dest);
1223 brw_set_src0(insn, brw_null_reg());
1224
1225 brw_set_dp_read_message(p->brw,
1226 insn,
1227 bind_table_index,
1228 oword, /* 0 = lower Oword, 1 = upper Oword */
1229 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1230 0, /* source cache = data cache */
1231 1, /* msg_length */
1232 1, /* response_length (1 Oword) */
1233 0); /* eot */
1234 }
1235 }
1236
1237
1238
1239 void brw_fb_WRITE(struct brw_compile *p,
1240 struct brw_reg dest,
1241 GLuint msg_reg_nr,
1242 struct brw_reg src0,
1243 GLuint binding_table_index,
1244 GLuint msg_length,
1245 GLuint response_length,
1246 GLboolean eot)
1247 {
1248 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1249
1250 insn->header.predicate_control = 0; /* XXX */
1251 insn->header.compression_control = BRW_COMPRESSION_NONE;
1252 insn->header.destreg__conditionalmod = msg_reg_nr;
1253
1254 brw_set_dest(insn, dest);
1255 brw_set_src0(insn, src0);
1256 brw_set_dp_write_message(p->brw,
1257 insn,
1258 binding_table_index,
1259 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1260 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1261 msg_length,
1262 1, /* pixel scoreboard */
1263 response_length,
1264 eot);
1265 }
1266
1267
1268 /**
1269 * Texture sample instruction.
1270 * Note: the msg_type plus msg_length values determine exactly what kind
1271 * of sampling operation is performed. See volume 4, page 161 of docs.
1272 */
1273 void brw_SAMPLE(struct brw_compile *p,
1274 struct brw_reg dest,
1275 GLuint msg_reg_nr,
1276 struct brw_reg src0,
1277 GLuint binding_table_index,
1278 GLuint sampler,
1279 GLuint writemask,
1280 GLuint msg_type,
1281 GLuint response_length,
1282 GLuint msg_length,
1283 GLboolean eot,
1284 GLuint header_present,
1285 GLuint simd_mode)
1286 {
1287 GLboolean need_stall = 0;
1288
1289 if (writemask == 0) {
1290 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1291 return;
1292 }
1293
1294 /* Hardware doesn't do destination dependency checking on send
1295 * instructions properly. Add a workaround which generates the
1296 * dependency by other means. In practice it seems like this bug
1297 * only crops up for texture samples, and only where registers are
1298 * written by the send and then written again later without being
1299 * read in between. Luckily for us, we already track that
1300 * information and use it to modify the writemask for the
1301 * instruction, so that is a guide for whether a workaround is
1302 * needed.
1303 */
1304 if (writemask != WRITEMASK_XYZW) {
1305 GLuint dst_offset = 0;
1306 GLuint i, newmask = 0, len = 0;
1307
1308 for (i = 0; i < 4; i++) {
1309 if (writemask & (1<<i))
1310 break;
1311 dst_offset += 2;
1312 }
1313 for (; i < 4; i++) {
1314 if (!(writemask & (1<<i)))
1315 break;
1316 newmask |= 1<<i;
1317 len++;
1318 }
1319
1320 if (newmask != writemask) {
1321 need_stall = 1;
1322 /* printf("need stall %x %x\n", newmask , writemask); */
1323 }
1324 else {
1325 GLboolean dispatch_16 = GL_FALSE;
1326
1327 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1328
1329 guess_execution_size(p->current, dest);
1330 if (p->current->header.execution_size == BRW_EXECUTE_16)
1331 dispatch_16 = GL_TRUE;
1332
1333 newmask = ~newmask & WRITEMASK_XYZW;
1334
1335 brw_push_insn_state(p);
1336
1337 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1338 brw_set_mask_control(p, BRW_MASK_DISABLE);
1339
1340 brw_MOV(p, m1, brw_vec8_grf(0,0));
1341 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1342
1343 brw_pop_insn_state(p);
1344
1345 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1346 dest = offset(dest, dst_offset);
1347
1348 /* For 16-wide dispatch, masked channels are skipped in the
1349 * response. For 8-wide, masked channels still take up slots,
1350 * and are just not written to.
1351 */
1352 if (dispatch_16)
1353 response_length = len * 2;
1354 }
1355 }
1356
1357 {
1358 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1359
1360 insn->header.predicate_control = 0; /* XXX */
1361 insn->header.compression_control = BRW_COMPRESSION_NONE;
1362 insn->header.destreg__conditionalmod = msg_reg_nr;
1363
1364 brw_set_dest(insn, dest);
1365 brw_set_src0(insn, src0);
1366 brw_set_sampler_message(p->brw, insn,
1367 binding_table_index,
1368 sampler,
1369 msg_type,
1370 response_length,
1371 msg_length,
1372 eot,
1373 header_present,
1374 simd_mode);
1375 }
1376
1377 if (need_stall) {
1378 struct brw_reg reg = vec8(offset(dest, response_length-1));
1379
1380 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1381 */
1382 brw_push_insn_state(p);
1383 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1384 brw_MOV(p, reg, reg);
1385 brw_pop_insn_state(p);
1386 }
1387
1388 }
1389
1390 /* All these variables are pretty confusing - we might be better off
1391 * using bitmasks and macros for this, in the old style. Or perhaps
1392 * just having the caller instantiate the fields in dword3 itself.
1393 */
1394 void brw_urb_WRITE(struct brw_compile *p,
1395 struct brw_reg dest,
1396 GLuint msg_reg_nr,
1397 struct brw_reg src0,
1398 GLboolean allocate,
1399 GLboolean used,
1400 GLuint msg_length,
1401 GLuint response_length,
1402 GLboolean eot,
1403 GLboolean writes_complete,
1404 GLuint offset,
1405 GLuint swizzle)
1406 {
1407 struct intel_context *intel = &p->brw->intel;
1408 struct brw_instruction *insn;
1409
1410 /* Sandybridge doesn't have the implied move for SENDs,
1411 * and the first message register index comes from src0.
1412 */
1413 if (intel->gen >= 6) {
1414 brw_push_insn_state(p);
1415 brw_set_mask_control( p, BRW_MASK_DISABLE );
1416 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1417 brw_pop_insn_state(p);
1418 src0 = brw_message_reg(msg_reg_nr);
1419 }
1420
1421 insn = next_insn(p, BRW_OPCODE_SEND);
1422
1423 assert(msg_length < BRW_MAX_MRF);
1424
1425 brw_set_dest(insn, dest);
1426 brw_set_src0(insn, src0);
1427 brw_set_src1(insn, brw_imm_d(0));
1428
1429 if (intel->gen < 6)
1430 insn->header.destreg__conditionalmod = msg_reg_nr;
1431
1432 brw_set_urb_message(p->brw,
1433 insn,
1434 allocate,
1435 used,
1436 msg_length,
1437 response_length,
1438 eot,
1439 writes_complete,
1440 offset,
1441 swizzle);
1442 }
1443
1444 void brw_ff_sync(struct brw_compile *p,
1445 struct brw_reg dest,
1446 GLuint msg_reg_nr,
1447 struct brw_reg src0,
1448 GLboolean allocate,
1449 GLuint response_length,
1450 GLboolean eot)
1451 {
1452 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1453
1454 brw_set_dest(insn, dest);
1455 brw_set_src0(insn, src0);
1456 brw_set_src1(insn, brw_imm_d(0));
1457
1458 insn->header.destreg__conditionalmod = msg_reg_nr;
1459
1460 brw_set_ff_sync_message(p->brw,
1461 insn,
1462 allocate,
1463 response_length,
1464 eot);
1465 }