2423536dd19d6f8319b7b55bfe41594e481563ff
[mesa.git] / src / mesa / pipe / i965simple / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68 }
69 else {
70 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72 }
73 }
74 else {
75 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77 /* These are different sizes in align1 vs align16:
78 */
79 if (insn->header.access_mode == BRW_ALIGN_1) {
80 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82 }
83 else {
84 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85 }
86 }
87
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
90 */
91 guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95 struct brw_reg reg )
96 {
97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99 insn->bits1.da1.src0_reg_file = reg.file;
100 insn->bits1.da1.src0_reg_type = reg.type;
101 insn->bits2.da1.src0_abs = reg.abs;
102 insn->bits2.da1.src0_negate = reg.negate;
103 insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105 if (reg.file == BRW_IMMEDIATE_VALUE) {
106 insn->bits3.ud = reg.dw1.ud;
107
108 /* Required to set some fields in src1 as well:
109 */
110 insn->bits1.da1.src1_reg_file = 0; /* arf */
111 insn->bits1.da1.src1_reg_type = reg.type;
112 }
113 else
114 {
115 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116 if (insn->header.access_mode == BRW_ALIGN_1) {
117 insn->bits2.da1.src0_subreg_nr = reg.subnr;
118 insn->bits2.da1.src0_reg_nr = reg.nr;
119 }
120 else {
121 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122 insn->bits2.da16.src0_reg_nr = reg.nr;
123 }
124 }
125 else {
126 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128 if (insn->header.access_mode == BRW_ALIGN_1) {
129 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
130 }
131 else {
132 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133 }
134 }
135
136 if (insn->header.access_mode == BRW_ALIGN_1) {
137 if (reg.width == BRW_WIDTH_1 &&
138 insn->header.execution_size == BRW_EXECUTE_1) {
139 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140 insn->bits2.da1.src0_width = BRW_WIDTH_1;
141 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142 }
143 else {
144 insn->bits2.da1.src0_horiz_stride = reg.hstride;
145 insn->bits2.da1.src0_width = reg.width;
146 insn->bits2.da1.src0_vert_stride = reg.vstride;
147 }
148 }
149 else {
150 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155 /* This is an oddity of the fact we're using the same
156 * descriptions for registers in align_16 as align_1:
157 */
158 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160 else
161 insn->bits2.da16.src0_vert_stride = reg.vstride;
162 }
163 }
164 }
165
166
167 void brw_set_src1( struct brw_instruction *insn,
168 struct brw_reg reg )
169 {
170 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172 insn->bits1.da1.src1_reg_file = reg.file;
173 insn->bits1.da1.src1_reg_type = reg.type;
174 insn->bits3.da1.src1_abs = reg.abs;
175 insn->bits3.da1.src1_negate = reg.negate;
176
177 /* Only src1 can be immediate in two-argument instructions.
178 */
179 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181 if (reg.file == BRW_IMMEDIATE_VALUE) {
182 insn->bits3.ud = reg.dw1.ud;
183 }
184 else {
185 /* This is a hardware restriction, which may or may not be lifted
186 * in the future:
187 */
188 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191 if (insn->header.access_mode == BRW_ALIGN_1) {
192 insn->bits3.da1.src1_subreg_nr = reg.subnr;
193 insn->bits3.da1.src1_reg_nr = reg.nr;
194 }
195 else {
196 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197 insn->bits3.da16.src1_reg_nr = reg.nr;
198 }
199
200 if (insn->header.access_mode == BRW_ALIGN_1) {
201 if (reg.width == BRW_WIDTH_1 &&
202 insn->header.execution_size == BRW_EXECUTE_1) {
203 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204 insn->bits3.da1.src1_width = BRW_WIDTH_1;
205 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206 }
207 else {
208 insn->bits3.da1.src1_horiz_stride = reg.hstride;
209 insn->bits3.da1.src1_width = reg.width;
210 insn->bits3.da1.src1_vert_stride = reg.vstride;
211 }
212 }
213 else {
214 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219 /* This is an oddity of the fact we're using the same
220 * descriptions for registers in align_16 as align_1:
221 */
222 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224 else
225 insn->bits3.da16.src1_vert_stride = reg.vstride;
226 }
227 }
228 }
229
230
231
232 static void brw_set_math_message( struct brw_instruction *insn,
233 unsigned msg_length,
234 unsigned response_length,
235 unsigned function,
236 unsigned integer_type,
237 boolean low_precision,
238 boolean saturate,
239 unsigned dataType )
240 {
241 brw_set_src1(insn, brw_imm_d(0));
242
243 insn->bits3.math.function = function;
244 insn->bits3.math.int_type = integer_type;
245 insn->bits3.math.precision = low_precision;
246 insn->bits3.math.saturate = saturate;
247 insn->bits3.math.data_type = dataType;
248 insn->bits3.math.response_length = response_length;
249 insn->bits3.math.msg_length = msg_length;
250 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251 insn->bits3.math.end_of_thread = 0;
252 }
253
254 static void brw_set_urb_message( struct brw_instruction *insn,
255 boolean allocate,
256 boolean used,
257 unsigned msg_length,
258 unsigned response_length,
259 boolean end_of_thread,
260 boolean complete,
261 unsigned offset,
262 unsigned swizzle_control )
263 {
264 brw_set_src1(insn, brw_imm_d(0));
265
266 insn->bits3.urb.opcode = 0; /* ? */
267 insn->bits3.urb.offset = offset;
268 insn->bits3.urb.swizzle_control = swizzle_control;
269 insn->bits3.urb.allocate = allocate;
270 insn->bits3.urb.used = used; /* ? */
271 insn->bits3.urb.complete = complete;
272 insn->bits3.urb.response_length = response_length;
273 insn->bits3.urb.msg_length = msg_length;
274 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275 insn->bits3.urb.end_of_thread = end_of_thread;
276 }
277
278 static void brw_set_dp_write_message( struct brw_instruction *insn,
279 unsigned binding_table_index,
280 unsigned msg_control,
281 unsigned msg_type,
282 unsigned msg_length,
283 unsigned pixel_scoreboard_clear,
284 unsigned response_length,
285 unsigned end_of_thread )
286 {
287 brw_set_src1(insn, brw_imm_d(0));
288
289 insn->bits3.dp_write.binding_table_index = binding_table_index;
290 insn->bits3.dp_write.msg_control = msg_control;
291 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292 insn->bits3.dp_write.msg_type = msg_type;
293 insn->bits3.dp_write.send_commit_msg = 0;
294 insn->bits3.dp_write.response_length = response_length;
295 insn->bits3.dp_write.msg_length = msg_length;
296 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297 insn->bits3.urb.end_of_thread = end_of_thread;
298 }
299
300 static void brw_set_dp_read_message( struct brw_instruction *insn,
301 unsigned binding_table_index,
302 unsigned msg_control,
303 unsigned msg_type,
304 unsigned target_cache,
305 unsigned msg_length,
306 unsigned response_length,
307 unsigned end_of_thread )
308 {
309 brw_set_src1(insn, brw_imm_d(0));
310
311 insn->bits3.dp_read.binding_table_index = binding_table_index;
312 insn->bits3.dp_read.msg_control = msg_control;
313 insn->bits3.dp_read.msg_type = msg_type;
314 insn->bits3.dp_read.target_cache = target_cache;
315 insn->bits3.dp_read.response_length = response_length;
316 insn->bits3.dp_read.msg_length = msg_length;
317 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318 insn->bits3.dp_read.end_of_thread = end_of_thread;
319 }
320
321 static void brw_set_sampler_message( struct brw_instruction *insn,
322 unsigned binding_table_index,
323 unsigned sampler,
324 unsigned msg_type,
325 unsigned response_length,
326 unsigned msg_length,
327 boolean eot)
328 {
329 brw_set_src1(insn, brw_imm_d(0));
330
331 insn->bits3.sampler.binding_table_index = binding_table_index;
332 insn->bits3.sampler.sampler = sampler;
333 insn->bits3.sampler.msg_type = msg_type;
334 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
335 insn->bits3.sampler.response_length = response_length;
336 insn->bits3.sampler.msg_length = msg_length;
337 insn->bits3.sampler.end_of_thread = eot;
338 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
339 }
340
341
342
343 static struct brw_instruction *next_insn( struct brw_compile *p,
344 unsigned opcode )
345 {
346 struct brw_instruction *insn;
347
348 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
349
350 insn = &p->store[p->nr_insn++];
351 memcpy(insn, p->current, sizeof(*insn));
352
353 /* Reset this one-shot flag:
354 */
355
356 if (p->current->header.destreg__conditonalmod) {
357 p->current->header.destreg__conditonalmod = 0;
358 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
359 }
360
361 insn->header.opcode = opcode;
362 return insn;
363 }
364
365
366 struct brw_instruction *brw_alu1( struct brw_compile *p,
367 unsigned opcode,
368 struct brw_reg dest,
369 struct brw_reg src )
370 {
371 struct brw_instruction *insn = next_insn(p, opcode);
372 brw_set_dest(insn, dest);
373 brw_set_src0(insn, src);
374 return insn;
375 }
376
377 struct brw_instruction *brw_alu2(struct brw_compile *p,
378 unsigned opcode,
379 struct brw_reg dest,
380 struct brw_reg src0,
381 struct brw_reg src1 )
382 {
383 struct brw_instruction *insn = next_insn(p, opcode);
384 brw_set_dest(insn, dest);
385 brw_set_src0(insn, src0);
386 brw_set_src1(insn, src1);
387 return insn;
388 }
389
390
391 /***********************************************************************
392 * Convenience routines.
393 */
394 #define ALU1(OP) \
395 struct brw_instruction *brw_##OP(struct brw_compile *p, \
396 struct brw_reg dest, \
397 struct brw_reg src0) \
398 { \
399 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
400 }
401
402 #define ALU2(OP) \
403 struct brw_instruction *brw_##OP(struct brw_compile *p, \
404 struct brw_reg dest, \
405 struct brw_reg src0, \
406 struct brw_reg src1) \
407 { \
408 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
409 }
410
411
412 ALU1(MOV)
413 ALU2(SEL)
414 ALU1(NOT)
415 ALU2(AND)
416 ALU2(OR)
417 ALU2(XOR)
418 ALU2(SHR)
419 ALU2(SHL)
420 ALU2(RSR)
421 ALU2(RSL)
422 ALU2(ASR)
423 ALU2(ADD)
424 ALU2(MUL)
425 ALU1(FRC)
426 ALU1(RNDD)
427 ALU2(MAC)
428 ALU2(MACH)
429 ALU1(LZD)
430 ALU2(DP4)
431 ALU2(DPH)
432 ALU2(DP3)
433 ALU2(DP2)
434 ALU2(LINE)
435
436
437
438
439 void brw_NOP(struct brw_compile *p)
440 {
441 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
442 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
443 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
444 brw_set_src1(insn, brw_imm_ud(0x0));
445 }
446
447
448
449
450
451 /***********************************************************************
452 * Comparisons, if/else/endif
453 */
454
455 struct brw_instruction *brw_JMPI(struct brw_compile *p,
456 struct brw_reg dest,
457 struct brw_reg src0,
458 struct brw_reg src1)
459 {
460 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
461
462 p->current->header.predicate_control = BRW_PREDICATE_NONE;
463
464 return insn;
465 }
466
467 /* EU takes the value from the flag register and pushes it onto some
468 * sort of a stack (presumably merging with any flag value already on
469 * the stack). Within an if block, the flags at the top of the stack
470 * control execution on each channel of the unit, eg. on each of the
471 * 16 pixel values in our wm programs.
472 *
473 * When the matching 'else' instruction is reached (presumably by
474 * countdown of the instruction count patched in by our ELSE/ENDIF
475 * functions), the relevent flags are inverted.
476 *
477 * When the matching 'endif' instruction is reached, the flags are
478 * popped off. If the stack is now empty, normal execution resumes.
479 *
480 * No attempt is made to deal with stack overflow (14 elements?).
481 */
482 struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size)
483 {
484 struct brw_instruction *insn;
485
486 if (p->single_program_flow) {
487 assert(execute_size == BRW_EXECUTE_1);
488
489 insn = next_insn(p, BRW_OPCODE_ADD);
490 insn->header.predicate_inverse = 1;
491 } else {
492 insn = next_insn(p, BRW_OPCODE_IF);
493 }
494
495 /* Override the defaults for this instruction:
496 */
497 brw_set_dest(insn, brw_ip_reg());
498 brw_set_src0(insn, brw_ip_reg());
499 brw_set_src1(insn, brw_imm_d(0x0));
500
501 insn->header.execution_size = execute_size;
502 insn->header.compression_control = BRW_COMPRESSION_NONE;
503 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
504 insn->header.mask_control = BRW_MASK_ENABLE;
505
506 p->current->header.predicate_control = BRW_PREDICATE_NONE;
507
508 return insn;
509 }
510
511
512 struct brw_instruction *brw_ELSE(struct brw_compile *p,
513 struct brw_instruction *if_insn)
514 {
515 struct brw_instruction *insn;
516
517 if (p->single_program_flow) {
518 insn = next_insn(p, BRW_OPCODE_ADD);
519 } else {
520 insn = next_insn(p, BRW_OPCODE_ELSE);
521 }
522
523 brw_set_dest(insn, brw_ip_reg());
524 brw_set_src0(insn, brw_ip_reg());
525 brw_set_src1(insn, brw_imm_d(0x0));
526
527 insn->header.compression_control = BRW_COMPRESSION_NONE;
528 insn->header.execution_size = if_insn->header.execution_size;
529 insn->header.mask_control = BRW_MASK_ENABLE;
530
531 /* Patch the if instruction to point at this instruction.
532 */
533 if (p->single_program_flow) {
534 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
535
536 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
537 } else {
538 assert(if_insn->header.opcode == BRW_OPCODE_IF);
539
540 if_insn->bits3.if_else.jump_count = insn - if_insn;
541 if_insn->bits3.if_else.pop_count = 1;
542 if_insn->bits3.if_else.pad0 = 0;
543 }
544
545 return insn;
546 }
547
548 void brw_ENDIF(struct brw_compile *p,
549 struct brw_instruction *patch_insn)
550 {
551 if (p->single_program_flow) {
552 /* In single program flow mode, there's no need to execute an ENDIF,
553 * since we don't need to do any stack operations, and if we're executing
554 * currently, we want to just continue executing.
555 */
556 struct brw_instruction *next = &p->store[p->nr_insn];
557
558 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
559
560 patch_insn->bits3.ud = (next - patch_insn) * 16;
561 } else {
562 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
563
564 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
565 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
566 brw_set_src1(insn, brw_imm_d(0x0));
567
568 insn->header.compression_control = BRW_COMPRESSION_NONE;
569 insn->header.execution_size = patch_insn->header.execution_size;
570 insn->header.mask_control = BRW_MASK_ENABLE;
571
572 assert(patch_insn->bits3.if_else.jump_count == 0);
573
574 /* Patch the if or else instructions to point at this or the next
575 * instruction respectively.
576 */
577 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
578 /* Automagically turn it into an IFF:
579 */
580 patch_insn->header.opcode = BRW_OPCODE_IFF;
581 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
582 patch_insn->bits3.if_else.pop_count = 0;
583 patch_insn->bits3.if_else.pad0 = 0;
584 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
585 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
586 patch_insn->bits3.if_else.pop_count = 1;
587 patch_insn->bits3.if_else.pad0 = 0;
588 } else {
589 assert(0);
590 }
591
592 /* Also pop item off the stack in the endif instruction:
593 */
594 insn->bits3.if_else.jump_count = 0;
595 insn->bits3.if_else.pop_count = 1;
596 insn->bits3.if_else.pad0 = 0;
597 }
598 }
599
600 struct brw_instruction *brw_BREAK(struct brw_compile *p)
601 {
602 struct brw_instruction *insn;
603 insn = next_insn(p, BRW_OPCODE_BREAK);
604 brw_set_dest(insn, brw_ip_reg());
605 brw_set_src0(insn, brw_ip_reg());
606 brw_set_src1(insn, brw_imm_d(0x0));
607 insn->header.compression_control = BRW_COMPRESSION_NONE;
608 insn->header.execution_size = BRW_EXECUTE_8;
609 insn->header.mask_control = BRW_MASK_DISABLE;
610 insn->bits3.if_else.pad0 = 0;
611 return insn;
612 }
613
614 struct brw_instruction *brw_CONT(struct brw_compile *p)
615 {
616 struct brw_instruction *insn;
617 insn = next_insn(p, BRW_OPCODE_CONTINUE);
618 brw_set_dest(insn, brw_ip_reg());
619 brw_set_src0(insn, brw_ip_reg());
620 brw_set_src1(insn, brw_imm_d(0x0));
621 insn->header.compression_control = BRW_COMPRESSION_NONE;
622 insn->header.execution_size = BRW_EXECUTE_8;
623 insn->header.mask_control = BRW_MASK_DISABLE;
624 insn->bits3.if_else.pad0 = 0;
625 return insn;
626 }
627
628 /* DO/WHILE loop:
629 */
630 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
631 {
632 if (p->single_program_flow) {
633 return &p->store[p->nr_insn];
634 } else {
635 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
636
637 /* Override the defaults for this instruction:
638 */
639 brw_set_dest(insn, brw_null_reg());
640 brw_set_src0(insn, brw_null_reg());
641 brw_set_src1(insn, brw_null_reg());
642
643 insn->header.compression_control = BRW_COMPRESSION_NONE;
644 insn->header.execution_size = execute_size;
645 insn->header.predicate_control = BRW_PREDICATE_NONE;
646 /* insn->header.mask_control = BRW_MASK_ENABLE; */
647 insn->header.mask_control = BRW_MASK_DISABLE;
648
649 return insn;
650 }
651 }
652
653
654
655 struct brw_instruction *brw_WHILE(struct brw_compile *p,
656 struct brw_instruction *do_insn)
657 {
658 struct brw_instruction *insn;
659
660 if (p->single_program_flow)
661 insn = next_insn(p, BRW_OPCODE_ADD);
662 else
663 insn = next_insn(p, BRW_OPCODE_WHILE);
664
665 brw_set_dest(insn, brw_ip_reg());
666 brw_set_src0(insn, brw_ip_reg());
667 brw_set_src1(insn, brw_imm_d(0x0));
668
669 insn->header.compression_control = BRW_COMPRESSION_NONE;
670
671 if (p->single_program_flow) {
672 insn->header.execution_size = BRW_EXECUTE_1;
673
674 insn->bits3.d = (do_insn - insn) * 16;
675 } else {
676 insn->header.execution_size = do_insn->header.execution_size;
677
678 assert(do_insn->header.opcode == BRW_OPCODE_DO);
679 insn->bits3.if_else.jump_count = do_insn - insn;
680 insn->bits3.if_else.pop_count = 0;
681 insn->bits3.if_else.pad0 = 0;
682 }
683
684 /* insn->header.mask_control = BRW_MASK_ENABLE; */
685
686 insn->header.mask_control = BRW_MASK_DISABLE;
687 p->current->header.predicate_control = BRW_PREDICATE_NONE;
688 return insn;
689 }
690
691
692 /* FORWARD JUMPS:
693 */
694 void brw_land_fwd_jump(struct brw_compile *p,
695 struct brw_instruction *jmp_insn)
696 {
697 struct brw_instruction *landing = &p->store[p->nr_insn];
698
699 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
700 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
701
702 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
703 }
704
705
706
707 /* To integrate with the above, it makes sense that the comparison
708 * instruction should populate the flag register. It might be simpler
709 * just to use the flag reg for most WM tasks?
710 */
711 void brw_CMP(struct brw_compile *p,
712 struct brw_reg dest,
713 unsigned conditional,
714 struct brw_reg src0,
715 struct brw_reg src1)
716 {
717 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
718
719 insn->header.destreg__conditonalmod = conditional;
720 brw_set_dest(insn, dest);
721 brw_set_src0(insn, src0);
722 brw_set_src1(insn, src1);
723
724 /* guess_execution_size(insn, src0); */
725
726
727 /* Make it so that future instructions will use the computed flag
728 * value until brw_set_predicate_control_flag_value() is called
729 * again.
730 */
731 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
732 dest.nr == 0) {
733 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
734 p->flag_value = 0xff;
735 }
736 }
737
738
739
740 /***********************************************************************
741 * Helpers for the various SEND message types:
742 */
743
744 /* Invert 8 values
745 */
746 void brw_math( struct brw_compile *p,
747 struct brw_reg dest,
748 unsigned function,
749 unsigned saturate,
750 unsigned msg_reg_nr,
751 struct brw_reg src,
752 unsigned data_type,
753 unsigned precision )
754 {
755 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
756 unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
757 unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
758
759 /* Example code doesn't set predicate_control for send
760 * instructions.
761 */
762 insn->header.predicate_control = 0;
763 insn->header.destreg__conditonalmod = msg_reg_nr;
764
765 brw_set_dest(insn, dest);
766 brw_set_src0(insn, src);
767 brw_set_math_message(insn,
768 msg_length, response_length,
769 function,
770 BRW_MATH_INTEGER_UNSIGNED,
771 precision,
772 saturate,
773 data_type);
774 }
775
776 /* Use 2 send instructions to invert 16 elements
777 */
778 void brw_math_16( struct brw_compile *p,
779 struct brw_reg dest,
780 unsigned function,
781 unsigned saturate,
782 unsigned msg_reg_nr,
783 struct brw_reg src,
784 unsigned precision )
785 {
786 struct brw_instruction *insn;
787 unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
788 unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
789
790 /* First instruction:
791 */
792 brw_push_insn_state(p);
793 brw_set_predicate_control_flag_value(p, 0xff);
794 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
795
796 insn = next_insn(p, BRW_OPCODE_SEND);
797 insn->header.destreg__conditonalmod = msg_reg_nr;
798
799 brw_set_dest(insn, dest);
800 brw_set_src0(insn, src);
801 brw_set_math_message(insn,
802 msg_length, response_length,
803 function,
804 BRW_MATH_INTEGER_UNSIGNED,
805 precision,
806 saturate,
807 BRW_MATH_DATA_VECTOR);
808
809 /* Second instruction:
810 */
811 insn = next_insn(p, BRW_OPCODE_SEND);
812 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
813 insn->header.destreg__conditonalmod = msg_reg_nr+1;
814
815 brw_set_dest(insn, offset(dest,1));
816 brw_set_src0(insn, src);
817 brw_set_math_message(insn,
818 msg_length, response_length,
819 function,
820 BRW_MATH_INTEGER_UNSIGNED,
821 precision,
822 saturate,
823 BRW_MATH_DATA_VECTOR);
824
825 brw_pop_insn_state(p);
826 }
827
828
829
830
831 void brw_dp_WRITE_16( struct brw_compile *p,
832 struct brw_reg src,
833 unsigned msg_reg_nr,
834 unsigned scratch_offset )
835 {
836 {
837 brw_push_insn_state(p);
838 brw_set_mask_control(p, BRW_MASK_DISABLE);
839 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
840
841 brw_MOV(p,
842 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
843 brw_imm_d(scratch_offset));
844
845 brw_pop_insn_state(p);
846 }
847
848 {
849 unsigned msg_length = 3;
850 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
851 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
852
853 insn->header.predicate_control = 0; /* XXX */
854 insn->header.compression_control = BRW_COMPRESSION_NONE;
855 insn->header.destreg__conditonalmod = msg_reg_nr;
856
857 brw_set_dest(insn, dest);
858 brw_set_src0(insn, src);
859
860 brw_set_dp_write_message(insn,
861 255, /* bti */
862 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
863 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
864 msg_length,
865 0, /* pixel scoreboard */
866 0, /* response_length */
867 0); /* eot */
868 }
869
870 }
871
872
873 void brw_dp_READ_16( struct brw_compile *p,
874 struct brw_reg dest,
875 unsigned msg_reg_nr,
876 unsigned scratch_offset )
877 {
878 {
879 brw_push_insn_state(p);
880 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
881 brw_set_mask_control(p, BRW_MASK_DISABLE);
882
883 brw_MOV(p,
884 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
885 brw_imm_d(scratch_offset));
886
887 brw_pop_insn_state(p);
888 }
889
890 {
891 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
892
893 insn->header.predicate_control = 0; /* XXX */
894 insn->header.compression_control = BRW_COMPRESSION_NONE;
895 insn->header.destreg__conditonalmod = msg_reg_nr;
896
897 brw_set_dest(insn, dest); /* UW? */
898 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
899
900 brw_set_dp_read_message(insn,
901 255, /* bti */
902 3, /* msg_control */
903 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
904 1, /* target cache */
905 1, /* msg_length */
906 2, /* response_length */
907 0); /* eot */
908 }
909 }
910
911
912 void brw_fb_WRITE(struct brw_compile *p,
913 struct brw_reg dest,
914 unsigned msg_reg_nr,
915 struct brw_reg src0,
916 unsigned binding_table_index,
917 unsigned msg_length,
918 unsigned response_length,
919 boolean eot)
920 {
921 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
922
923 insn->header.predicate_control = 0; /* XXX */
924 insn->header.compression_control = BRW_COMPRESSION_NONE;
925 insn->header.destreg__conditonalmod = msg_reg_nr;
926
927 brw_set_dest(insn, dest);
928 brw_set_src0(insn, src0);
929 brw_set_dp_write_message(insn,
930 binding_table_index,
931 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
932 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
933 msg_length,
934 1, /* pixel scoreboard */
935 response_length,
936 eot);
937 }
938
939
940
941 void brw_SAMPLE(struct brw_compile *p,
942 struct brw_reg dest,
943 unsigned msg_reg_nr,
944 struct brw_reg src0,
945 unsigned binding_table_index,
946 unsigned sampler,
947 unsigned writemask,
948 unsigned msg_type,
949 unsigned response_length,
950 unsigned msg_length,
951 boolean eot)
952 {
953 boolean need_stall = 0;
954
955 if(writemask == 0) {
956 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
957 return;
958 }
959
960 /* Hardware doesn't do destination dependency checking on send
961 * instructions properly. Add a workaround which generates the
962 * dependency by other means. In practice it seems like this bug
963 * only crops up for texture samples, and only where registers are
964 * written by the send and then written again later without being
965 * read in between. Luckily for us, we already track that
966 * information and use it to modify the writemask for the
967 * instruction, so that is a guide for whether a workaround is
968 * needed.
969 */
970 if (writemask != TGSI_WRITEMASK_XYZW) {
971 unsigned dst_offset = 0;
972 unsigned i, newmask = 0, len = 0;
973
974 for (i = 0; i < 4; i++) {
975 if (writemask & (1<<i))
976 break;
977 dst_offset += 2;
978 }
979 for (; i < 4; i++) {
980 if (!(writemask & (1<<i)))
981 break;
982 newmask |= 1<<i;
983 len++;
984 }
985
986 if (newmask != writemask) {
987 need_stall = 1;
988 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
989 }
990 else {
991 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
992
993 newmask = ~newmask & TGSI_WRITEMASK_XYZW;
994
995 brw_push_insn_state(p);
996
997 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
998 brw_set_mask_control(p, BRW_MASK_DISABLE);
999
1000 brw_MOV(p, m1, brw_vec8_grf(0,0));
1001 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1002
1003 brw_pop_insn_state(p);
1004
1005 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1006 dest = offset(dest, dst_offset);
1007 response_length = len * 2;
1008 }
1009 }
1010
1011 {
1012 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1013
1014 insn->header.predicate_control = 0; /* XXX */
1015 insn->header.compression_control = BRW_COMPRESSION_NONE;
1016 insn->header.destreg__conditonalmod = msg_reg_nr;
1017
1018 brw_set_dest(insn, dest);
1019 brw_set_src0(insn, src0);
1020 brw_set_sampler_message(insn,
1021 binding_table_index,
1022 sampler,
1023 msg_type,
1024 response_length,
1025 msg_length,
1026 eot);
1027 }
1028
1029 if (need_stall)
1030 {
1031 struct brw_reg reg = vec8(offset(dest, response_length-1));
1032
1033 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1034 */
1035 brw_push_insn_state(p);
1036 brw_set_compression_control(p, FALSE);
1037 brw_MOV(p, reg, reg);
1038 brw_pop_insn_state(p);
1039 }
1040
1041 }
1042
1043 /* All these variables are pretty confusing - we might be better off
1044 * using bitmasks and macros for this, in the old style. Or perhaps
1045 * just having the caller instantiate the fields in dword3 itself.
1046 */
1047 void brw_urb_WRITE(struct brw_compile *p,
1048 struct brw_reg dest,
1049 unsigned msg_reg_nr,
1050 struct brw_reg src0,
1051 boolean allocate,
1052 boolean used,
1053 unsigned msg_length,
1054 unsigned response_length,
1055 boolean eot,
1056 boolean writes_complete,
1057 unsigned offset,
1058 unsigned swizzle)
1059 {
1060 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1061
1062 assert(msg_length < 16);
1063
1064 brw_set_dest(insn, dest);
1065 brw_set_src0(insn, src0);
1066 brw_set_src1(insn, brw_imm_d(0));
1067
1068 insn->header.destreg__conditonalmod = msg_reg_nr;
1069
1070 brw_set_urb_message(insn,
1071 allocate,
1072 used,
1073 msg_length,
1074 response_length,
1075 eot,
1076 writes_complete,
1077 offset,
1078 swizzle);
1079 }
1080