Add Intel i965G/Q DRI driver.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68 }
69 else {
70 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72 }
73 }
74 else {
75 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77 /* These are different sizes in align1 vs align16:
78 */
79 if (insn->header.access_mode == BRW_ALIGN_1) {
80 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82 }
83 else {
84 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85 }
86 }
87
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
90 */
91 guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95 struct brw_reg reg )
96 {
97 insn->bits1.da1.src0_reg_file = reg.file;
98 insn->bits1.da1.src0_reg_type = reg.type;
99 insn->bits2.da1.src0_abs = reg.abs;
100 insn->bits2.da1.src0_negate = reg.negate;
101 insn->bits2.da1.src0_address_mode = reg.address_mode;
102
103 if (reg.file == BRW_IMMEDIATE_VALUE) {
104 insn->bits3.ud = reg.dw1.ud;
105
106 /* Required to set some fields in src1 as well:
107 */
108 insn->bits1.da1.src1_reg_file = 0; /* arf */
109 insn->bits1.da1.src1_reg_type = reg.type;
110 }
111 else
112 {
113 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114 if (insn->header.access_mode == BRW_ALIGN_1) {
115 insn->bits2.da1.src0_subreg_nr = reg.subnr;
116 insn->bits2.da1.src0_reg_nr = reg.nr;
117 }
118 else {
119 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
120 insn->bits2.da16.src0_reg_nr = reg.nr;
121 }
122 }
123 else {
124 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
125
126 if (insn->header.access_mode == BRW_ALIGN_1) {
127 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
128 }
129 else {
130 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
131 }
132 }
133
134 if (insn->header.access_mode == BRW_ALIGN_1) {
135 if (reg.width == BRW_WIDTH_1 &&
136 insn->header.execution_size == BRW_EXECUTE_1) {
137 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
138 insn->bits2.da1.src0_width = BRW_WIDTH_1;
139 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
140 }
141 else {
142 insn->bits2.da1.src0_horiz_stride = reg.hstride;
143 insn->bits2.da1.src0_width = reg.width;
144 insn->bits2.da1.src0_vert_stride = reg.vstride;
145 }
146 }
147 else {
148 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
149 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
150 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
151 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
152
153 /* This is an oddity of the fact we're using the same
154 * descriptions for registers in align_16 as align_1:
155 */
156 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
157 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
158 else
159 insn->bits2.da16.src0_vert_stride = reg.vstride;
160 }
161 }
162 }
163
164
165 static void brw_set_src1( struct brw_instruction *insn,
166 struct brw_reg reg )
167 {
168 insn->bits1.da1.src1_reg_file = reg.file;
169 insn->bits1.da1.src1_reg_type = reg.type;
170 insn->bits3.da1.src1_abs = reg.abs;
171 insn->bits3.da1.src1_negate = reg.negate;
172
173 /* Only src1 can be immediate in two-argument instructions.
174 */
175 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
176
177 if (reg.file == BRW_IMMEDIATE_VALUE) {
178 insn->bits3.ud = reg.dw1.ud;
179 }
180 else {
181 /* This is a hardware restriction, which may or may not be lifted
182 * in the future:
183 */
184 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
185 assert (reg.file == BRW_GENERAL_REGISTER_FILE);
186
187 if (insn->header.access_mode == BRW_ALIGN_1) {
188 insn->bits3.da1.src1_subreg_nr = reg.subnr;
189 insn->bits3.da1.src1_reg_nr = reg.nr;
190 }
191 else {
192 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
193 insn->bits3.da16.src1_reg_nr = reg.nr;
194 }
195
196 if (insn->header.access_mode == BRW_ALIGN_1) {
197 if (reg.width == BRW_WIDTH_1 &&
198 insn->header.execution_size == BRW_EXECUTE_1) {
199 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
200 insn->bits3.da1.src1_width = BRW_WIDTH_1;
201 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
202 }
203 else {
204 insn->bits3.da1.src1_horiz_stride = reg.hstride;
205 insn->bits3.da1.src1_width = reg.width;
206 insn->bits3.da1.src1_vert_stride = reg.vstride;
207 }
208 }
209 else {
210 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
211 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
212 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
213 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
214
215 /* This is an oddity of the fact we're using the same
216 * descriptions for registers in align_16 as align_1:
217 */
218 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
219 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
220 else
221 insn->bits3.da16.src1_vert_stride = reg.vstride;
222 }
223 }
224 }
225
226
227
228 static void brw_set_math_message( struct brw_instruction *insn,
229 GLuint msg_length,
230 GLuint response_length,
231 GLuint function,
232 GLuint integer_type,
233 GLboolean low_precision,
234 GLboolean saturate,
235 GLuint dataType )
236 {
237 brw_set_src1(insn, brw_imm_d(0));
238
239 insn->bits3.math.function = function;
240 insn->bits3.math.int_type = integer_type;
241 insn->bits3.math.precision = low_precision;
242 insn->bits3.math.saturate = saturate;
243 insn->bits3.math.data_type = dataType;
244 insn->bits3.math.response_length = response_length;
245 insn->bits3.math.msg_length = msg_length;
246 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
247 insn->bits3.math.end_of_thread = 0;
248 }
249
250 static void brw_set_urb_message( struct brw_instruction *insn,
251 GLboolean allocate,
252 GLboolean used,
253 GLuint msg_length,
254 GLuint response_length,
255 GLboolean end_of_thread,
256 GLboolean complete,
257 GLuint offset,
258 GLuint swizzle_control )
259 {
260 brw_set_src1(insn, brw_imm_d(0));
261
262 insn->bits3.urb.opcode = 0; /* ? */
263 insn->bits3.urb.offset = offset;
264 insn->bits3.urb.swizzle_control = swizzle_control;
265 insn->bits3.urb.allocate = allocate;
266 insn->bits3.urb.used = used; /* ? */
267 insn->bits3.urb.complete = complete;
268 insn->bits3.urb.response_length = response_length;
269 insn->bits3.urb.msg_length = msg_length;
270 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
271 insn->bits3.urb.end_of_thread = end_of_thread;
272 }
273
274 static void brw_set_dp_write_message( struct brw_instruction *insn,
275 GLuint binding_table_index,
276 GLuint msg_control,
277 GLuint msg_type,
278 GLuint msg_length,
279 GLuint pixel_scoreboard_clear,
280 GLuint response_length,
281 GLuint end_of_thread )
282 {
283 brw_set_src1(insn, brw_imm_d(0));
284
285 insn->bits3.dp_write.binding_table_index = binding_table_index;
286 insn->bits3.dp_write.msg_control = msg_control;
287 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
288 insn->bits3.dp_write.msg_type = msg_type;
289 insn->bits3.dp_write.send_commit_msg = 0;
290 insn->bits3.dp_write.response_length = response_length;
291 insn->bits3.dp_write.msg_length = msg_length;
292 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
293 insn->bits3.urb.end_of_thread = end_of_thread;
294 }
295
296 static void brw_set_dp_read_message( struct brw_instruction *insn,
297 GLuint binding_table_index,
298 GLuint msg_control,
299 GLuint msg_type,
300 GLuint target_cache,
301 GLuint msg_length,
302 GLuint response_length,
303 GLuint end_of_thread )
304 {
305 brw_set_src1(insn, brw_imm_d(0));
306
307 insn->bits3.dp_read.binding_table_index = binding_table_index;
308 insn->bits3.dp_read.msg_control = msg_control;
309 insn->bits3.dp_read.msg_type = msg_type;
310 insn->bits3.dp_read.target_cache = target_cache;
311 insn->bits3.dp_read.response_length = response_length;
312 insn->bits3.dp_read.msg_length = msg_length;
313 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
314 insn->bits3.dp_read.end_of_thread = end_of_thread;
315 }
316
317 static void brw_set_sampler_message( struct brw_instruction *insn,
318 GLuint binding_table_index,
319 GLuint sampler,
320 GLuint msg_type,
321 GLuint response_length,
322 GLuint msg_length,
323 GLboolean eot)
324 {
325 brw_set_src1(insn, brw_imm_d(0));
326
327 insn->bits3.sampler.binding_table_index = binding_table_index;
328 insn->bits3.sampler.sampler = sampler;
329 insn->bits3.sampler.msg_type = msg_type;
330 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
331 insn->bits3.sampler.response_length = response_length;
332 insn->bits3.sampler.msg_length = msg_length;
333 insn->bits3.sampler.end_of_thread = eot;
334 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
335 }
336
337
338
339 static struct brw_instruction *next_insn( struct brw_compile *p,
340 GLuint opcode )
341 {
342 struct brw_instruction *insn;
343
344 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
345
346 insn = &p->store[p->nr_insn++];
347 memcpy(insn, p->current, sizeof(*insn));
348
349 /* Reset this one-shot flag:
350 */
351
352 if (p->current->header.destreg__conditonalmod) {
353 p->current->header.destreg__conditonalmod = 0;
354 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
355 }
356
357 insn->header.opcode = opcode;
358 return insn;
359 }
360
361
362 static struct brw_instruction *brw_alu1( struct brw_compile *p,
363 GLuint opcode,
364 struct brw_reg dest,
365 struct brw_reg src )
366 {
367 struct brw_instruction *insn = next_insn(p, opcode);
368 brw_set_dest(insn, dest);
369 brw_set_src0(insn, src);
370 return insn;
371 }
372
373 static struct brw_instruction *brw_alu2(struct brw_compile *p,
374 GLuint opcode,
375 struct brw_reg dest,
376 struct brw_reg src0,
377 struct brw_reg src1 )
378 {
379 struct brw_instruction *insn = next_insn(p, opcode);
380 brw_set_dest(insn, dest);
381 brw_set_src0(insn, src0);
382 brw_set_src1(insn, src1);
383 return insn;
384 }
385
386
387 /***********************************************************************
388 * Convenience routines.
389 */
390 #define ALU1(OP) \
391 struct brw_instruction *brw_##OP(struct brw_compile *p, \
392 struct brw_reg dest, \
393 struct brw_reg src0) \
394 { \
395 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
396 }
397
398 #define ALU2(OP) \
399 struct brw_instruction *brw_##OP(struct brw_compile *p, \
400 struct brw_reg dest, \
401 struct brw_reg src0, \
402 struct brw_reg src1) \
403 { \
404 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
405 }
406
407
408 ALU1(MOV)
409 ALU2(SEL)
410 ALU1(NOT)
411 ALU2(AND)
412 ALU2(OR)
413 ALU2(XOR)
414 ALU2(SHR)
415 ALU2(SHL)
416 ALU2(RSR)
417 ALU2(RSL)
418 ALU2(ASR)
419 ALU2(ADD)
420 ALU2(MUL)
421 ALU1(FRC)
422 ALU1(RNDD)
423 ALU2(MAC)
424 ALU2(MACH)
425 ALU1(LZD)
426 ALU2(DP4)
427 ALU2(DPH)
428 ALU2(DP3)
429 ALU2(DP2)
430 ALU2(LINE)
431
432
433
434
435 void brw_NOP(struct brw_compile *p)
436 {
437 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
438 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
439 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
440 brw_set_src1(insn, brw_imm_ud(0x0));
441 }
442
443
444
445
446
447 /***********************************************************************
448 * Comparisons, if/else/endif
449 */
450
451 struct brw_instruction *brw_JMPI(struct brw_compile *p,
452 struct brw_reg dest,
453 struct brw_reg src0,
454 struct brw_reg src1)
455 {
456 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
457
458 p->current->header.predicate_control = BRW_PREDICATE_NONE;
459
460 return insn;
461 }
462
463
464 /* EU takes the value from the flag register and pushes it onto some
465 * sort of a stack (presumably merging with any flag value already on
466 * the stack). Within an if block, the flags at the top of the stack
467 * control execution on each channel of the unit, eg. on each of the
468 * 16 pixel values in our wm programs.
469 *
470 * When the matching 'else' instruction is reached (presumably by
471 * countdown of the instruction count patched in by our ELSE/ENDIF
472 * functions), the relevent flags are inverted.
473 *
474 * When the matching 'endif' instruction is reached, the flags are
475 * popped off. If the stack is now empty, normal execution resumes.
476 *
477 * No attempt is made to deal with stack overflow (14 elements?).
478 */
479 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
480 {
481 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF);
482
483 /* Override the defaults for this instruction:
484 */
485 brw_set_dest(insn, brw_ip_reg());
486 brw_set_src0(insn, brw_ip_reg());
487 brw_set_src1(insn, brw_imm_d(0x0));
488
489 insn->header.execution_size = execute_size;
490 insn->header.compression_control = BRW_COMPRESSION_NONE;
491 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
492 insn->header.mask_control = BRW_MASK_ENABLE;
493
494 p->current->header.predicate_control = BRW_PREDICATE_NONE;
495
496 return insn;
497 }
498
499
500 struct brw_instruction *brw_ELSE(struct brw_compile *p,
501 struct brw_instruction *if_insn)
502 {
503 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE);
504
505 brw_set_dest(insn, brw_ip_reg());
506 brw_set_src0(insn, brw_ip_reg());
507 brw_set_src1(insn, brw_imm_d(0x0));
508
509 insn->header.compression_control = BRW_COMPRESSION_NONE;
510 insn->header.execution_size = if_insn->header.execution_size;
511 insn->header.mask_control = BRW_MASK_ENABLE;
512
513 /* Patch the if instruction to point at this instruction.
514 */
515 assert(if_insn->header.opcode == BRW_OPCODE_IF);
516
517 if_insn->bits3.if_else.jump_count = insn - if_insn;
518 if_insn->bits3.if_else.pop_count = 1;
519 if_insn->bits3.if_else.pad0 = 0;
520
521 return insn;
522 }
523
524 void brw_ENDIF(struct brw_compile *p,
525 struct brw_instruction *patch_insn)
526 {
527 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
528
529 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
530 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
531 brw_set_src1(insn, brw_imm_d(0x0));
532
533 insn->header.compression_control = BRW_COMPRESSION_NONE;
534 insn->header.execution_size = patch_insn->header.execution_size;
535 insn->header.mask_control = BRW_MASK_ENABLE;
536
537 assert(patch_insn->bits3.if_else.jump_count == 0);
538
539 /* Patch the if or else instructions to point at this or the next
540 * instruction respectively.
541 */
542 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
543 /* Automagically turn it into an IFF:
544 */
545 patch_insn->header.opcode = BRW_OPCODE_IFF;
546 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
547 patch_insn->bits3.if_else.pop_count = 0;
548 patch_insn->bits3.if_else.pad0 = 0;
549
550 }
551 else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
552 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
553 patch_insn->bits3.if_else.pop_count = 1;
554 patch_insn->bits3.if_else.pad0 = 0;
555 }
556 else {
557 assert(0);
558 }
559
560 /* Also pop item off the stack in the endif instruction:
561 */
562 insn->bits3.if_else.jump_count = 0;
563 insn->bits3.if_else.pop_count = 1;
564 insn->bits3.if_else.pad0 = 0;
565 }
566
567 /* DO/WHILE loop:
568 */
569 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
570 {
571 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
572
573 /* Override the defaults for this instruction:
574 */
575 brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
576 brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
577 brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
578
579 insn->header.compression_control = BRW_COMPRESSION_NONE;
580 insn->header.execution_size = execute_size;
581 /* insn->header.mask_control = BRW_MASK_ENABLE; */
582
583 return insn;
584 }
585
586
587
588 void brw_WHILE(struct brw_compile *p,
589 struct brw_instruction *do_insn)
590 {
591 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE);
592
593 brw_set_dest(insn, brw_ip_reg());
594 brw_set_src0(insn, brw_ip_reg());
595 brw_set_src1(insn, brw_imm_d(0x0));
596
597 insn->header.compression_control = BRW_COMPRESSION_NONE;
598 insn->header.execution_size = do_insn->header.execution_size;
599
600 assert(do_insn->header.opcode == BRW_OPCODE_DO);
601 insn->bits3.if_else.jump_count = do_insn - insn;
602 insn->bits3.if_else.pop_count = 0;
603 insn->bits3.if_else.pad0 = 0;
604
605 /* insn->header.mask_control = BRW_MASK_ENABLE; */
606
607 p->current->header.predicate_control = BRW_PREDICATE_NONE;
608 }
609
610
611 /* FORWARD JUMPS:
612 */
613 void brw_land_fwd_jump(struct brw_compile *p,
614 struct brw_instruction *jmp_insn)
615 {
616 struct brw_instruction *landing = &p->store[p->nr_insn];
617
618 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
619 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
620
621 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
622 }
623
624
625
626 /* To integrate with the above, it makes sense that the comparison
627 * instruction should populate the flag register. It might be simpler
628 * just to use the flag reg for most WM tasks?
629 */
630 void brw_CMP(struct brw_compile *p,
631 struct brw_reg dest,
632 GLuint conditional,
633 struct brw_reg src0,
634 struct brw_reg src1)
635 {
636 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
637
638 insn->header.destreg__conditonalmod = conditional;
639 brw_set_dest(insn, dest);
640 brw_set_src0(insn, src0);
641 brw_set_src1(insn, src1);
642
643 /* guess_execution_size(insn, src0); */
644
645
646 /* Make it so that future instructions will use the computed flag
647 * value until brw_set_predicate_control_flag_value() is called
648 * again.
649 */
650 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
651 dest.nr == 0) {
652 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
653 p->flag_value = 0xff;
654 }
655 }
656
657
658
659 /***********************************************************************
660 * Helpers for the various SEND message types:
661 */
662
663 /* Invert 8 values
664 */
665 void brw_math( struct brw_compile *p,
666 struct brw_reg dest,
667 GLuint function,
668 GLuint saturate,
669 GLuint msg_reg_nr,
670 struct brw_reg src,
671 GLuint data_type,
672 GLuint precision )
673 {
674 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
675 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
676 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
677
678 /* Example code doesn't set predicate_control for send
679 * instructions.
680 */
681 insn->header.predicate_control = 0;
682 insn->header.destreg__conditonalmod = msg_reg_nr;
683
684 brw_set_dest(insn, dest);
685 brw_set_src0(insn, src);
686 brw_set_math_message(insn,
687 msg_length, response_length,
688 function,
689 BRW_MATH_INTEGER_UNSIGNED,
690 precision,
691 saturate,
692 data_type);
693 }
694
695 /* Use 2 send instructions to invert 16 elements
696 */
697 void brw_math_16( struct brw_compile *p,
698 struct brw_reg dest,
699 GLuint function,
700 GLuint saturate,
701 GLuint msg_reg_nr,
702 struct brw_reg src,
703 GLuint precision )
704 {
705 struct brw_instruction *insn;
706 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
707 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
708
709 /* First instruction:
710 */
711 brw_push_insn_state(p);
712 brw_set_predicate_control_flag_value(p, 0xff);
713 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
714
715 insn = next_insn(p, BRW_OPCODE_SEND);
716 insn->header.destreg__conditonalmod = msg_reg_nr;
717
718 brw_set_dest(insn, dest);
719 brw_set_src0(insn, src);
720 brw_set_math_message(insn,
721 msg_length, response_length,
722 function,
723 BRW_MATH_INTEGER_UNSIGNED,
724 precision,
725 saturate,
726 BRW_MATH_DATA_VECTOR);
727
728 /* Second instruction:
729 */
730 insn = next_insn(p, BRW_OPCODE_SEND);
731 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
732 insn->header.destreg__conditonalmod = msg_reg_nr+1;
733
734 brw_set_dest(insn, offset(dest,1));
735 brw_set_src0(insn, src);
736 brw_set_math_message(insn,
737 msg_length, response_length,
738 function,
739 BRW_MATH_INTEGER_UNSIGNED,
740 precision,
741 saturate,
742 BRW_MATH_DATA_VECTOR);
743
744 brw_pop_insn_state(p);
745 }
746
747
748
749
750 void brw_dp_WRITE_16( struct brw_compile *p,
751 struct brw_reg src,
752 GLuint msg_reg_nr,
753 GLuint scratch_offset )
754 {
755 {
756 brw_push_insn_state(p);
757 brw_set_mask_control(p, BRW_MASK_DISABLE);
758 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
759
760 brw_MOV(p,
761 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
762 brw_imm_d(scratch_offset));
763
764 brw_pop_insn_state(p);
765 }
766
767 {
768 GLuint msg_length = 3;
769 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
770 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
771
772 insn->header.predicate_control = 0; /* XXX */
773 insn->header.compression_control = BRW_COMPRESSION_NONE;
774 insn->header.destreg__conditonalmod = msg_reg_nr;
775
776 brw_set_dest(insn, dest);
777 brw_set_src0(insn, src);
778
779 brw_set_dp_write_message(insn,
780 255, /* bti */
781 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
782 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
783 msg_length,
784 0, /* pixel scoreboard */
785 0, /* response_length */
786 0); /* eot */
787 }
788
789 }
790
791
792 void brw_dp_READ_16( struct brw_compile *p,
793 struct brw_reg dest,
794 GLuint msg_reg_nr,
795 GLuint scratch_offset )
796 {
797 {
798 brw_push_insn_state(p);
799 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
800 brw_set_mask_control(p, BRW_MASK_DISABLE);
801
802 brw_MOV(p,
803 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
804 brw_imm_d(scratch_offset));
805
806 brw_pop_insn_state(p);
807 }
808
809 {
810 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
811
812 insn->header.predicate_control = 0; /* XXX */
813 insn->header.compression_control = BRW_COMPRESSION_NONE;
814 insn->header.destreg__conditonalmod = msg_reg_nr;
815
816 brw_set_dest(insn, dest); /* UW? */
817 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
818
819 brw_set_dp_read_message(insn,
820 255, /* bti */
821 3, /* msg_control */
822 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
823 1, /* target cache */
824 1, /* msg_length */
825 2, /* response_length */
826 0); /* eot */
827 }
828 }
829
830
831 void brw_fb_WRITE(struct brw_compile *p,
832 struct brw_reg dest,
833 GLuint msg_reg_nr,
834 struct brw_reg src0,
835 GLuint binding_table_index,
836 GLuint msg_length,
837 GLuint response_length,
838 GLboolean eot)
839 {
840 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
841
842 insn->header.predicate_control = 0; /* XXX */
843 insn->header.compression_control = BRW_COMPRESSION_NONE;
844 insn->header.destreg__conditonalmod = msg_reg_nr;
845
846 brw_set_dest(insn, dest);
847 brw_set_src0(insn, src0);
848 brw_set_dp_write_message(insn,
849 binding_table_index,
850 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
851 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
852 msg_length,
853 1, /* pixel scoreboard */
854 response_length,
855 eot);
856 }
857
858
859
860 void brw_SAMPLE(struct brw_compile *p,
861 struct brw_reg dest,
862 GLuint msg_reg_nr,
863 struct brw_reg src0,
864 GLuint binding_table_index,
865 GLuint sampler,
866 GLuint writemask,
867 GLuint msg_type,
868 GLuint response_length,
869 GLuint msg_length,
870 GLboolean eot)
871 {
872 GLboolean need_stall = 0;
873
874 if(writemask == 0) {
875 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
876 return;
877 }
878
879 /* Hardware doesn't do destination dependency checking on send
880 * instructions properly. Add a workaround which generates the
881 * dependency by other means. In practice it seems like this bug
882 * only crops up for texture samples, and only where registers are
883 * written by the send and then written again later without being
884 * read in between. Luckily for us, we already track that
885 * information and use it to modify the writemask for the
886 * instruction, so that is a guide for whether a workaround is
887 * needed.
888 */
889 if (writemask != WRITEMASK_XYZW) {
890 GLuint dst_offset = 0;
891 GLuint i, newmask = 0, len = 0;
892
893 for (i = 0; i < 4; i++) {
894 if (writemask & (1<<i))
895 break;
896 dst_offset += 2;
897 }
898 for (; i < 4; i++) {
899 if (!(writemask & (1<<i)))
900 break;
901 newmask |= 1<<i;
902 len++;
903 }
904
905 if (newmask != writemask) {
906 need_stall = 1;
907 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
908 }
909 else {
910 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
911
912 newmask = ~newmask & WRITEMASK_XYZW;
913
914 brw_push_insn_state(p);
915
916 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
917 brw_set_mask_control(p, BRW_MASK_DISABLE);
918
919 brw_MOV(p, m1, brw_vec8_grf(0,0));
920 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
921
922 brw_pop_insn_state(p);
923
924 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
925 dest = offset(dest, dst_offset);
926 response_length = len * 2;
927 }
928 }
929
930 {
931 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
932
933 insn->header.predicate_control = 0; /* XXX */
934 insn->header.compression_control = BRW_COMPRESSION_NONE;
935 insn->header.destreg__conditonalmod = msg_reg_nr;
936
937 brw_set_dest(insn, dest);
938 brw_set_src0(insn, src0);
939 brw_set_sampler_message(insn,
940 binding_table_index,
941 sampler,
942 msg_type,
943 response_length,
944 msg_length,
945 eot);
946 }
947
948 if (need_stall)
949 {
950 struct brw_reg reg = vec8(offset(dest, response_length-1));
951
952 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
953 */
954 brw_push_insn_state(p);
955 brw_set_compression_control(p, GL_FALSE);
956 brw_MOV(p, reg, reg);
957 brw_pop_insn_state(p);
958 }
959
960 }
961
962 /* All these variables are pretty confusing - we might be better off
963 * using bitmasks and macros for this, in the old style. Or perhaps
964 * just having the caller instantiate the fields in dword3 itself.
965 */
966 void brw_urb_WRITE(struct brw_compile *p,
967 struct brw_reg dest,
968 GLuint msg_reg_nr,
969 struct brw_reg src0,
970 GLboolean allocate,
971 GLboolean used,
972 GLuint msg_length,
973 GLuint response_length,
974 GLboolean eot,
975 GLboolean writes_complete,
976 GLuint offset,
977 GLuint swizzle)
978 {
979 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
980
981 assert(msg_length < 16);
982
983 brw_set_dest(insn, dest);
984 brw_set_src0(insn, src0);
985 brw_set_src1(insn, brw_imm_d(0));
986
987 insn->header.destreg__conditonalmod = msg_reg_nr;
988
989 brw_set_urb_message(insn,
990 allocate,
991 used,
992 msg_length,
993 response_length,
994 eot,
995 writes_complete,
996 offset,
997 swizzle);
998 }
999