fefd30bc7f51d8e4c2e8ca3bbd415b72e6217b5e
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68 }
69 else {
70 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72 }
73 }
74 else {
75 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77 /* These are different sizes in align1 vs align16:
78 */
79 if (insn->header.access_mode == BRW_ALIGN_1) {
80 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82 }
83 else {
84 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85 }
86 }
87
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
90 */
91 guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95 struct brw_reg reg )
96 {
97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99 insn->bits1.da1.src0_reg_file = reg.file;
100 insn->bits1.da1.src0_reg_type = reg.type;
101 insn->bits2.da1.src0_abs = reg.abs;
102 insn->bits2.da1.src0_negate = reg.negate;
103 insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105 if (reg.file == BRW_IMMEDIATE_VALUE) {
106 insn->bits3.ud = reg.dw1.ud;
107
108 /* Required to set some fields in src1 as well:
109 */
110 insn->bits1.da1.src1_reg_file = 0; /* arf */
111 insn->bits1.da1.src1_reg_type = reg.type;
112 }
113 else
114 {
115 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116 if (insn->header.access_mode == BRW_ALIGN_1) {
117 insn->bits2.da1.src0_subreg_nr = reg.subnr;
118 insn->bits2.da1.src0_reg_nr = reg.nr;
119 }
120 else {
121 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122 insn->bits2.da16.src0_reg_nr = reg.nr;
123 }
124 }
125 else {
126 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128 if (insn->header.access_mode == BRW_ALIGN_1) {
129 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
130 }
131 else {
132 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133 }
134 }
135
136 if (insn->header.access_mode == BRW_ALIGN_1) {
137 if (reg.width == BRW_WIDTH_1 &&
138 insn->header.execution_size == BRW_EXECUTE_1) {
139 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140 insn->bits2.da1.src0_width = BRW_WIDTH_1;
141 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142 }
143 else {
144 insn->bits2.da1.src0_horiz_stride = reg.hstride;
145 insn->bits2.da1.src0_width = reg.width;
146 insn->bits2.da1.src0_vert_stride = reg.vstride;
147 }
148 }
149 else {
150 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155 /* This is an oddity of the fact we're using the same
156 * descriptions for registers in align_16 as align_1:
157 */
158 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160 else
161 insn->bits2.da16.src0_vert_stride = reg.vstride;
162 }
163 }
164 }
165
166
167 void brw_set_src1( struct brw_instruction *insn,
168 struct brw_reg reg )
169 {
170 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172 insn->bits1.da1.src1_reg_file = reg.file;
173 insn->bits1.da1.src1_reg_type = reg.type;
174 insn->bits3.da1.src1_abs = reg.abs;
175 insn->bits3.da1.src1_negate = reg.negate;
176
177 /* Only src1 can be immediate in two-argument instructions.
178 */
179 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181 if (reg.file == BRW_IMMEDIATE_VALUE) {
182 insn->bits3.ud = reg.dw1.ud;
183 }
184 else {
185 /* This is a hardware restriction, which may or may not be lifted
186 * in the future:
187 */
188 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191 if (insn->header.access_mode == BRW_ALIGN_1) {
192 insn->bits3.da1.src1_subreg_nr = reg.subnr;
193 insn->bits3.da1.src1_reg_nr = reg.nr;
194 }
195 else {
196 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197 insn->bits3.da16.src1_reg_nr = reg.nr;
198 }
199
200 if (insn->header.access_mode == BRW_ALIGN_1) {
201 if (reg.width == BRW_WIDTH_1 &&
202 insn->header.execution_size == BRW_EXECUTE_1) {
203 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204 insn->bits3.da1.src1_width = BRW_WIDTH_1;
205 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206 }
207 else {
208 insn->bits3.da1.src1_horiz_stride = reg.hstride;
209 insn->bits3.da1.src1_width = reg.width;
210 insn->bits3.da1.src1_vert_stride = reg.vstride;
211 }
212 }
213 else {
214 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219 /* This is an oddity of the fact we're using the same
220 * descriptions for registers in align_16 as align_1:
221 */
222 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224 else
225 insn->bits3.da16.src1_vert_stride = reg.vstride;
226 }
227 }
228 }
229
230
231
232 static void brw_set_math_message( struct brw_instruction *insn,
233 GLuint msg_length,
234 GLuint response_length,
235 GLuint function,
236 GLuint integer_type,
237 GLboolean low_precision,
238 GLboolean saturate,
239 GLuint dataType )
240 {
241 brw_set_src1(insn, brw_imm_d(0));
242
243 insn->bits3.math.function = function;
244 insn->bits3.math.int_type = integer_type;
245 insn->bits3.math.precision = low_precision;
246 insn->bits3.math.saturate = saturate;
247 insn->bits3.math.data_type = dataType;
248 insn->bits3.math.response_length = response_length;
249 insn->bits3.math.msg_length = msg_length;
250 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251 insn->bits3.math.end_of_thread = 0;
252 }
253
254 static void brw_set_urb_message( struct brw_instruction *insn,
255 GLboolean allocate,
256 GLboolean used,
257 GLuint msg_length,
258 GLuint response_length,
259 GLboolean end_of_thread,
260 GLboolean complete,
261 GLuint offset,
262 GLuint swizzle_control )
263 {
264 brw_set_src1(insn, brw_imm_d(0));
265
266 insn->bits3.urb.opcode = 0; /* ? */
267 insn->bits3.urb.offset = offset;
268 insn->bits3.urb.swizzle_control = swizzle_control;
269 insn->bits3.urb.allocate = allocate;
270 insn->bits3.urb.used = used; /* ? */
271 insn->bits3.urb.complete = complete;
272 insn->bits3.urb.response_length = response_length;
273 insn->bits3.urb.msg_length = msg_length;
274 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275 insn->bits3.urb.end_of_thread = end_of_thread;
276 }
277
278 static void brw_set_dp_write_message( struct brw_instruction *insn,
279 GLuint binding_table_index,
280 GLuint msg_control,
281 GLuint msg_type,
282 GLuint msg_length,
283 GLuint pixel_scoreboard_clear,
284 GLuint response_length,
285 GLuint end_of_thread )
286 {
287 brw_set_src1(insn, brw_imm_d(0));
288
289 insn->bits3.dp_write.binding_table_index = binding_table_index;
290 insn->bits3.dp_write.msg_control = msg_control;
291 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292 insn->bits3.dp_write.msg_type = msg_type;
293 insn->bits3.dp_write.send_commit_msg = 0;
294 insn->bits3.dp_write.response_length = response_length;
295 insn->bits3.dp_write.msg_length = msg_length;
296 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297 insn->bits3.urb.end_of_thread = end_of_thread;
298 }
299
300 static void brw_set_dp_read_message( struct brw_instruction *insn,
301 GLuint binding_table_index,
302 GLuint msg_control,
303 GLuint msg_type,
304 GLuint target_cache,
305 GLuint msg_length,
306 GLuint response_length,
307 GLuint end_of_thread )
308 {
309 brw_set_src1(insn, brw_imm_d(0));
310
311 insn->bits3.dp_read.binding_table_index = binding_table_index;
312 insn->bits3.dp_read.msg_control = msg_control;
313 insn->bits3.dp_read.msg_type = msg_type;
314 insn->bits3.dp_read.target_cache = target_cache;
315 insn->bits3.dp_read.response_length = response_length;
316 insn->bits3.dp_read.msg_length = msg_length;
317 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318 insn->bits3.dp_read.end_of_thread = end_of_thread;
319 }
320
321 static void brw_set_sampler_message(struct brw_context *brw,
322 struct brw_instruction *insn,
323 GLuint binding_table_index,
324 GLuint sampler,
325 GLuint msg_type,
326 GLuint response_length,
327 GLuint msg_length,
328 GLboolean eot)
329 {
330 brw_set_src1(insn, brw_imm_d(0));
331
332 if (BRW_IS_IGD(brw)) {
333 insn->bits3.sampler_igd.binding_table_index = binding_table_index;
334 insn->bits3.sampler_igd.sampler = sampler;
335 insn->bits3.sampler_igd.msg_type = msg_type;
336 insn->bits3.sampler_igd.response_length = response_length;
337 insn->bits3.sampler_igd.msg_length = msg_length;
338 insn->bits3.sampler_igd.end_of_thread = eot;
339 insn->bits3.sampler_igd.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
340 } else {
341 insn->bits3.sampler.binding_table_index = binding_table_index;
342 insn->bits3.sampler.sampler = sampler;
343 insn->bits3.sampler.msg_type = msg_type;
344 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
345 insn->bits3.sampler.response_length = response_length;
346 insn->bits3.sampler.msg_length = msg_length;
347 insn->bits3.sampler.end_of_thread = eot;
348 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
349 }
350 }
351
352
353
354 static struct brw_instruction *next_insn( struct brw_compile *p,
355 GLuint opcode )
356 {
357 struct brw_instruction *insn;
358
359 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
360
361 insn = &p->store[p->nr_insn++];
362 memcpy(insn, p->current, sizeof(*insn));
363
364 /* Reset this one-shot flag:
365 */
366
367 if (p->current->header.destreg__conditonalmod) {
368 p->current->header.destreg__conditonalmod = 0;
369 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
370 }
371
372 insn->header.opcode = opcode;
373 return insn;
374 }
375
376
377 static struct brw_instruction *brw_alu1( struct brw_compile *p,
378 GLuint opcode,
379 struct brw_reg dest,
380 struct brw_reg src )
381 {
382 struct brw_instruction *insn = next_insn(p, opcode);
383 brw_set_dest(insn, dest);
384 brw_set_src0(insn, src);
385 return insn;
386 }
387
388 static struct brw_instruction *brw_alu2(struct brw_compile *p,
389 GLuint opcode,
390 struct brw_reg dest,
391 struct brw_reg src0,
392 struct brw_reg src1 )
393 {
394 struct brw_instruction *insn = next_insn(p, opcode);
395 brw_set_dest(insn, dest);
396 brw_set_src0(insn, src0);
397 brw_set_src1(insn, src1);
398 return insn;
399 }
400
401
402 /***********************************************************************
403 * Convenience routines.
404 */
405 #define ALU1(OP) \
406 struct brw_instruction *brw_##OP(struct brw_compile *p, \
407 struct brw_reg dest, \
408 struct brw_reg src0) \
409 { \
410 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
411 }
412
413 #define ALU2(OP) \
414 struct brw_instruction *brw_##OP(struct brw_compile *p, \
415 struct brw_reg dest, \
416 struct brw_reg src0, \
417 struct brw_reg src1) \
418 { \
419 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
420 }
421
422
423 ALU1(MOV)
424 ALU2(SEL)
425 ALU1(NOT)
426 ALU2(AND)
427 ALU2(OR)
428 ALU2(XOR)
429 ALU2(SHR)
430 ALU2(SHL)
431 ALU2(RSR)
432 ALU2(RSL)
433 ALU2(ASR)
434 ALU2(ADD)
435 ALU2(MUL)
436 ALU1(FRC)
437 ALU1(RNDD)
438 ALU2(MAC)
439 ALU2(MACH)
440 ALU1(LZD)
441 ALU2(DP4)
442 ALU2(DPH)
443 ALU2(DP3)
444 ALU2(DP2)
445 ALU2(LINE)
446
447
448
449
450 void brw_NOP(struct brw_compile *p)
451 {
452 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
453 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
454 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
455 brw_set_src1(insn, brw_imm_ud(0x0));
456 }
457
458
459
460
461
462 /***********************************************************************
463 * Comparisons, if/else/endif
464 */
465
466 struct brw_instruction *brw_JMPI(struct brw_compile *p,
467 struct brw_reg dest,
468 struct brw_reg src0,
469 struct brw_reg src1)
470 {
471 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
472
473 p->current->header.predicate_control = BRW_PREDICATE_NONE;
474
475 return insn;
476 }
477
478 /* EU takes the value from the flag register and pushes it onto some
479 * sort of a stack (presumably merging with any flag value already on
480 * the stack). Within an if block, the flags at the top of the stack
481 * control execution on each channel of the unit, eg. on each of the
482 * 16 pixel values in our wm programs.
483 *
484 * When the matching 'else' instruction is reached (presumably by
485 * countdown of the instruction count patched in by our ELSE/ENDIF
486 * functions), the relevent flags are inverted.
487 *
488 * When the matching 'endif' instruction is reached, the flags are
489 * popped off. If the stack is now empty, normal execution resumes.
490 *
491 * No attempt is made to deal with stack overflow (14 elements?).
492 */
493 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
494 {
495 struct brw_instruction *insn;
496
497 if (p->single_program_flow) {
498 assert(execute_size == BRW_EXECUTE_1);
499
500 insn = next_insn(p, BRW_OPCODE_ADD);
501 insn->header.predicate_inverse = 1;
502 } else {
503 insn = next_insn(p, BRW_OPCODE_IF);
504 }
505
506 /* Override the defaults for this instruction:
507 */
508 brw_set_dest(insn, brw_ip_reg());
509 brw_set_src0(insn, brw_ip_reg());
510 brw_set_src1(insn, brw_imm_d(0x0));
511
512 insn->header.execution_size = execute_size;
513 insn->header.compression_control = BRW_COMPRESSION_NONE;
514 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
515 insn->header.mask_control = BRW_MASK_ENABLE;
516
517 p->current->header.predicate_control = BRW_PREDICATE_NONE;
518
519 return insn;
520 }
521
522
523 struct brw_instruction *brw_ELSE(struct brw_compile *p,
524 struct brw_instruction *if_insn)
525 {
526 struct brw_instruction *insn;
527
528 if (p->single_program_flow) {
529 insn = next_insn(p, BRW_OPCODE_ADD);
530 } else {
531 insn = next_insn(p, BRW_OPCODE_ELSE);
532 }
533
534 brw_set_dest(insn, brw_ip_reg());
535 brw_set_src0(insn, brw_ip_reg());
536 brw_set_src1(insn, brw_imm_d(0x0));
537
538 insn->header.compression_control = BRW_COMPRESSION_NONE;
539 insn->header.execution_size = if_insn->header.execution_size;
540 insn->header.mask_control = BRW_MASK_ENABLE;
541
542 /* Patch the if instruction to point at this instruction.
543 */
544 if (p->single_program_flow) {
545 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
546
547 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
548 } else {
549 assert(if_insn->header.opcode == BRW_OPCODE_IF);
550
551 if_insn->bits3.if_else.jump_count = insn - if_insn;
552 if_insn->bits3.if_else.pop_count = 1;
553 if_insn->bits3.if_else.pad0 = 0;
554 }
555
556 return insn;
557 }
558
559 void brw_ENDIF(struct brw_compile *p,
560 struct brw_instruction *patch_insn)
561 {
562 if (p->single_program_flow) {
563 /* In single program flow mode, there's no need to execute an ENDIF,
564 * since we don't need to do any stack operations, and if we're executing
565 * currently, we want to just continue executing.
566 */
567 struct brw_instruction *next = &p->store[p->nr_insn];
568
569 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
570
571 patch_insn->bits3.ud = (next - patch_insn) * 16;
572 } else {
573 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
574
575 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
576 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
577 brw_set_src1(insn, brw_imm_d(0x0));
578
579 insn->header.compression_control = BRW_COMPRESSION_NONE;
580 insn->header.execution_size = patch_insn->header.execution_size;
581 insn->header.mask_control = BRW_MASK_ENABLE;
582
583 assert(patch_insn->bits3.if_else.jump_count == 0);
584
585 /* Patch the if or else instructions to point at this or the next
586 * instruction respectively.
587 */
588 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
589 /* Automagically turn it into an IFF:
590 */
591 patch_insn->header.opcode = BRW_OPCODE_IFF;
592 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
593 patch_insn->bits3.if_else.pop_count = 0;
594 patch_insn->bits3.if_else.pad0 = 0;
595 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
596 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
597 patch_insn->bits3.if_else.pop_count = 1;
598 patch_insn->bits3.if_else.pad0 = 0;
599 } else {
600 assert(0);
601 }
602
603 /* Also pop item off the stack in the endif instruction:
604 */
605 insn->bits3.if_else.jump_count = 0;
606 insn->bits3.if_else.pop_count = 1;
607 insn->bits3.if_else.pad0 = 0;
608 }
609 }
610
611 struct brw_instruction *brw_BREAK(struct brw_compile *p)
612 {
613 struct brw_instruction *insn;
614 insn = next_insn(p, BRW_OPCODE_BREAK);
615 brw_set_dest(insn, brw_ip_reg());
616 brw_set_src0(insn, brw_ip_reg());
617 brw_set_src1(insn, brw_imm_d(0x0));
618 insn->header.compression_control = BRW_COMPRESSION_NONE;
619 insn->header.execution_size = BRW_EXECUTE_8;
620 insn->header.mask_control = BRW_MASK_DISABLE;
621 insn->bits3.if_else.pad0 = 0;
622 return insn;
623 }
624
625 struct brw_instruction *brw_CONT(struct brw_compile *p)
626 {
627 struct brw_instruction *insn;
628 insn = next_insn(p, BRW_OPCODE_CONTINUE);
629 brw_set_dest(insn, brw_ip_reg());
630 brw_set_src0(insn, brw_ip_reg());
631 brw_set_src1(insn, brw_imm_d(0x0));
632 insn->header.compression_control = BRW_COMPRESSION_NONE;
633 insn->header.execution_size = BRW_EXECUTE_8;
634 insn->header.mask_control = BRW_MASK_DISABLE;
635 insn->bits3.if_else.pad0 = 0;
636 return insn;
637 }
638
639 /* DO/WHILE loop:
640 */
641 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
642 {
643 if (p->single_program_flow) {
644 return &p->store[p->nr_insn];
645 } else {
646 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
647
648 /* Override the defaults for this instruction:
649 */
650 brw_set_dest(insn, brw_null_reg());
651 brw_set_src0(insn, brw_null_reg());
652 brw_set_src1(insn, brw_null_reg());
653
654 insn->header.compression_control = BRW_COMPRESSION_NONE;
655 insn->header.execution_size = execute_size;
656 insn->header.predicate_control = BRW_PREDICATE_NONE;
657 /* insn->header.mask_control = BRW_MASK_ENABLE; */
658 insn->header.mask_control = BRW_MASK_DISABLE;
659
660 return insn;
661 }
662 }
663
664
665
666 struct brw_instruction *brw_WHILE(struct brw_compile *p,
667 struct brw_instruction *do_insn)
668 {
669 struct brw_instruction *insn;
670
671 if (p->single_program_flow)
672 insn = next_insn(p, BRW_OPCODE_ADD);
673 else
674 insn = next_insn(p, BRW_OPCODE_WHILE);
675
676 brw_set_dest(insn, brw_ip_reg());
677 brw_set_src0(insn, brw_ip_reg());
678 brw_set_src1(insn, brw_imm_d(0x0));
679
680 insn->header.compression_control = BRW_COMPRESSION_NONE;
681
682 if (p->single_program_flow) {
683 insn->header.execution_size = BRW_EXECUTE_1;
684
685 insn->bits3.d = (do_insn - insn) * 16;
686 } else {
687 insn->header.execution_size = do_insn->header.execution_size;
688
689 assert(do_insn->header.opcode == BRW_OPCODE_DO);
690 insn->bits3.if_else.jump_count = do_insn - insn + 1;
691 insn->bits3.if_else.pop_count = 0;
692 insn->bits3.if_else.pad0 = 0;
693 }
694
695 /* insn->header.mask_control = BRW_MASK_ENABLE; */
696
697 insn->header.mask_control = BRW_MASK_DISABLE;
698 p->current->header.predicate_control = BRW_PREDICATE_NONE;
699 return insn;
700 }
701
702
703 /* FORWARD JUMPS:
704 */
705 void brw_land_fwd_jump(struct brw_compile *p,
706 struct brw_instruction *jmp_insn)
707 {
708 struct brw_instruction *landing = &p->store[p->nr_insn];
709
710 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
711 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
712
713 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
714 }
715
716
717
718 /* To integrate with the above, it makes sense that the comparison
719 * instruction should populate the flag register. It might be simpler
720 * just to use the flag reg for most WM tasks?
721 */
722 void brw_CMP(struct brw_compile *p,
723 struct brw_reg dest,
724 GLuint conditional,
725 struct brw_reg src0,
726 struct brw_reg src1)
727 {
728 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
729
730 insn->header.destreg__conditonalmod = conditional;
731 brw_set_dest(insn, dest);
732 brw_set_src0(insn, src0);
733 brw_set_src1(insn, src1);
734
735 /* guess_execution_size(insn, src0); */
736
737
738 /* Make it so that future instructions will use the computed flag
739 * value until brw_set_predicate_control_flag_value() is called
740 * again.
741 */
742 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
743 dest.nr == 0) {
744 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
745 p->flag_value = 0xff;
746 }
747 }
748
749
750
751 /***********************************************************************
752 * Helpers for the various SEND message types:
753 */
754
755 /* Invert 8 values
756 */
757 void brw_math( struct brw_compile *p,
758 struct brw_reg dest,
759 GLuint function,
760 GLuint saturate,
761 GLuint msg_reg_nr,
762 struct brw_reg src,
763 GLuint data_type,
764 GLuint precision )
765 {
766 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
767 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
768 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
769
770 /* Example code doesn't set predicate_control for send
771 * instructions.
772 */
773 insn->header.predicate_control = 0;
774 insn->header.destreg__conditonalmod = msg_reg_nr;
775
776 brw_set_dest(insn, dest);
777 brw_set_src0(insn, src);
778 brw_set_math_message(insn,
779 msg_length, response_length,
780 function,
781 BRW_MATH_INTEGER_UNSIGNED,
782 precision,
783 saturate,
784 data_type);
785 }
786
787 /* Use 2 send instructions to invert 16 elements
788 */
789 void brw_math_16( struct brw_compile *p,
790 struct brw_reg dest,
791 GLuint function,
792 GLuint saturate,
793 GLuint msg_reg_nr,
794 struct brw_reg src,
795 GLuint precision )
796 {
797 struct brw_instruction *insn;
798 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
800
801 /* First instruction:
802 */
803 brw_push_insn_state(p);
804 brw_set_predicate_control_flag_value(p, 0xff);
805 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806
807 insn = next_insn(p, BRW_OPCODE_SEND);
808 insn->header.destreg__conditonalmod = msg_reg_nr;
809
810 brw_set_dest(insn, dest);
811 brw_set_src0(insn, src);
812 brw_set_math_message(insn,
813 msg_length, response_length,
814 function,
815 BRW_MATH_INTEGER_UNSIGNED,
816 precision,
817 saturate,
818 BRW_MATH_DATA_VECTOR);
819
820 /* Second instruction:
821 */
822 insn = next_insn(p, BRW_OPCODE_SEND);
823 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824 insn->header.destreg__conditonalmod = msg_reg_nr+1;
825
826 brw_set_dest(insn, offset(dest,1));
827 brw_set_src0(insn, src);
828 brw_set_math_message(insn,
829 msg_length, response_length,
830 function,
831 BRW_MATH_INTEGER_UNSIGNED,
832 precision,
833 saturate,
834 BRW_MATH_DATA_VECTOR);
835
836 brw_pop_insn_state(p);
837 }
838
839
840
841
842 void brw_dp_WRITE_16( struct brw_compile *p,
843 struct brw_reg src,
844 GLuint msg_reg_nr,
845 GLuint scratch_offset )
846 {
847 {
848 brw_push_insn_state(p);
849 brw_set_mask_control(p, BRW_MASK_DISABLE);
850 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851
852 brw_MOV(p,
853 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
854 brw_imm_d(scratch_offset));
855
856 brw_pop_insn_state(p);
857 }
858
859 {
860 GLuint msg_length = 3;
861 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
863
864 insn->header.predicate_control = 0; /* XXX */
865 insn->header.compression_control = BRW_COMPRESSION_NONE;
866 insn->header.destreg__conditonalmod = msg_reg_nr;
867
868 brw_set_dest(insn, dest);
869 brw_set_src0(insn, src);
870
871 brw_set_dp_write_message(insn,
872 255, /* bti */
873 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875 msg_length,
876 0, /* pixel scoreboard */
877 0, /* response_length */
878 0); /* eot */
879 }
880
881 }
882
883
884 void brw_dp_READ_16( struct brw_compile *p,
885 struct brw_reg dest,
886 GLuint msg_reg_nr,
887 GLuint scratch_offset )
888 {
889 {
890 brw_push_insn_state(p);
891 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892 brw_set_mask_control(p, BRW_MASK_DISABLE);
893
894 brw_MOV(p,
895 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
896 brw_imm_d(scratch_offset));
897
898 brw_pop_insn_state(p);
899 }
900
901 {
902 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
903
904 insn->header.predicate_control = 0; /* XXX */
905 insn->header.compression_control = BRW_COMPRESSION_NONE;
906 insn->header.destreg__conditonalmod = msg_reg_nr;
907
908 brw_set_dest(insn, dest); /* UW? */
909 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
910
911 brw_set_dp_read_message(insn,
912 255, /* bti */
913 3, /* msg_control */
914 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915 1, /* target cache */
916 1, /* msg_length */
917 2, /* response_length */
918 0); /* eot */
919 }
920 }
921
922
923 void brw_fb_WRITE(struct brw_compile *p,
924 struct brw_reg dest,
925 GLuint msg_reg_nr,
926 struct brw_reg src0,
927 GLuint binding_table_index,
928 GLuint msg_length,
929 GLuint response_length,
930 GLboolean eot)
931 {
932 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
933
934 insn->header.predicate_control = 0; /* XXX */
935 insn->header.compression_control = BRW_COMPRESSION_NONE;
936 insn->header.destreg__conditonalmod = msg_reg_nr;
937
938 brw_set_dest(insn, dest);
939 brw_set_src0(insn, src0);
940 brw_set_dp_write_message(insn,
941 binding_table_index,
942 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944 msg_length,
945 1, /* pixel scoreboard */
946 response_length,
947 eot);
948 }
949
950
951
952 void brw_SAMPLE(struct brw_compile *p,
953 struct brw_reg dest,
954 GLuint msg_reg_nr,
955 struct brw_reg src0,
956 GLuint binding_table_index,
957 GLuint sampler,
958 GLuint writemask,
959 GLuint msg_type,
960 GLuint response_length,
961 GLuint msg_length,
962 GLboolean eot)
963 {
964 GLboolean need_stall = 0;
965
966 if(writemask == 0) {
967 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
968 return;
969 }
970
971 /* Hardware doesn't do destination dependency checking on send
972 * instructions properly. Add a workaround which generates the
973 * dependency by other means. In practice it seems like this bug
974 * only crops up for texture samples, and only where registers are
975 * written by the send and then written again later without being
976 * read in between. Luckily for us, we already track that
977 * information and use it to modify the writemask for the
978 * instruction, so that is a guide for whether a workaround is
979 * needed.
980 */
981 if (writemask != WRITEMASK_XYZW) {
982 GLuint dst_offset = 0;
983 GLuint i, newmask = 0, len = 0;
984
985 for (i = 0; i < 4; i++) {
986 if (writemask & (1<<i))
987 break;
988 dst_offset += 2;
989 }
990 for (; i < 4; i++) {
991 if (!(writemask & (1<<i)))
992 break;
993 newmask |= 1<<i;
994 len++;
995 }
996
997 if (newmask != writemask) {
998 need_stall = 1;
999 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1000 }
1001 else {
1002 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003
1004 newmask = ~newmask & WRITEMASK_XYZW;
1005
1006 brw_push_insn_state(p);
1007
1008 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009 brw_set_mask_control(p, BRW_MASK_DISABLE);
1010
1011 brw_MOV(p, m1, brw_vec8_grf(0,0));
1012 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1013
1014 brw_pop_insn_state(p);
1015
1016 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017 dest = offset(dest, dst_offset);
1018 response_length = len * 2;
1019 }
1020 }
1021
1022 {
1023 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1024
1025 insn->header.predicate_control = 0; /* XXX */
1026 insn->header.compression_control = BRW_COMPRESSION_NONE;
1027 insn->header.destreg__conditonalmod = msg_reg_nr;
1028
1029 brw_set_dest(insn, dest);
1030 brw_set_src0(insn, src0);
1031 brw_set_sampler_message(p->brw, insn,
1032 binding_table_index,
1033 sampler,
1034 msg_type,
1035 response_length,
1036 msg_length,
1037 eot);
1038 }
1039
1040 if (need_stall)
1041 {
1042 struct brw_reg reg = vec8(offset(dest, response_length-1));
1043
1044 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1045 */
1046 brw_push_insn_state(p);
1047 brw_set_compression_control(p, GL_FALSE);
1048 brw_MOV(p, reg, reg);
1049 brw_pop_insn_state(p);
1050 }
1051
1052 }
1053
1054 /* All these variables are pretty confusing - we might be better off
1055 * using bitmasks and macros for this, in the old style. Or perhaps
1056 * just having the caller instantiate the fields in dword3 itself.
1057 */
1058 void brw_urb_WRITE(struct brw_compile *p,
1059 struct brw_reg dest,
1060 GLuint msg_reg_nr,
1061 struct brw_reg src0,
1062 GLboolean allocate,
1063 GLboolean used,
1064 GLuint msg_length,
1065 GLuint response_length,
1066 GLboolean eot,
1067 GLboolean writes_complete,
1068 GLuint offset,
1069 GLuint swizzle)
1070 {
1071 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1072
1073 assert(msg_length < 16);
1074
1075 brw_set_dest(insn, dest);
1076 brw_set_src0(insn, src0);
1077 brw_set_src1(insn, brw_imm_d(0));
1078
1079 insn->header.destreg__conditonalmod = msg_reg_nr;
1080
1081 brw_set_urb_message(insn,
1082 allocate,
1083 used,
1084 msg_length,
1085 response_length,
1086 eot,
1087 writes_complete,
1088 offset,
1089 swizzle);
1090 }
1091