6425c91450cb268742a85c5b9fa56db6a259c9e3
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68 }
69 else {
70 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72 }
73 }
74 else {
75 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77 /* These are different sizes in align1 vs align16:
78 */
79 if (insn->header.access_mode == BRW_ALIGN_1) {
80 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82 }
83 else {
84 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85 }
86 }
87
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
90 */
91 guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95 struct brw_reg reg )
96 {
97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99 insn->bits1.da1.src0_reg_file = reg.file;
100 insn->bits1.da1.src0_reg_type = reg.type;
101 insn->bits2.da1.src0_abs = reg.abs;
102 insn->bits2.da1.src0_negate = reg.negate;
103 insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105 if (reg.file == BRW_IMMEDIATE_VALUE) {
106 insn->bits3.ud = reg.dw1.ud;
107
108 /* Required to set some fields in src1 as well:
109 */
110 insn->bits1.da1.src1_reg_file = 0; /* arf */
111 insn->bits1.da1.src1_reg_type = reg.type;
112 }
113 else
114 {
115 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116 if (insn->header.access_mode == BRW_ALIGN_1) {
117 insn->bits2.da1.src0_subreg_nr = reg.subnr;
118 insn->bits2.da1.src0_reg_nr = reg.nr;
119 }
120 else {
121 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122 insn->bits2.da16.src0_reg_nr = reg.nr;
123 }
124 }
125 else {
126 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128 if (insn->header.access_mode == BRW_ALIGN_1) {
129 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
130 }
131 else {
132 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133 }
134 }
135
136 if (insn->header.access_mode == BRW_ALIGN_1) {
137 if (reg.width == BRW_WIDTH_1 &&
138 insn->header.execution_size == BRW_EXECUTE_1) {
139 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140 insn->bits2.da1.src0_width = BRW_WIDTH_1;
141 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142 }
143 else {
144 insn->bits2.da1.src0_horiz_stride = reg.hstride;
145 insn->bits2.da1.src0_width = reg.width;
146 insn->bits2.da1.src0_vert_stride = reg.vstride;
147 }
148 }
149 else {
150 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155 /* This is an oddity of the fact we're using the same
156 * descriptions for registers in align_16 as align_1:
157 */
158 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160 else
161 insn->bits2.da16.src0_vert_stride = reg.vstride;
162 }
163 }
164 }
165
166
167 static void brw_set_src1( struct brw_instruction *insn,
168 struct brw_reg reg )
169 {
170 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172 insn->bits1.da1.src1_reg_file = reg.file;
173 insn->bits1.da1.src1_reg_type = reg.type;
174 insn->bits3.da1.src1_abs = reg.abs;
175 insn->bits3.da1.src1_negate = reg.negate;
176
177 /* Only src1 can be immediate in two-argument instructions.
178 */
179 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181 if (reg.file == BRW_IMMEDIATE_VALUE) {
182 insn->bits3.ud = reg.dw1.ud;
183 }
184 else {
185 /* This is a hardware restriction, which may or may not be lifted
186 * in the future:
187 */
188 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189 assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191 if (insn->header.access_mode == BRW_ALIGN_1) {
192 insn->bits3.da1.src1_subreg_nr = reg.subnr;
193 insn->bits3.da1.src1_reg_nr = reg.nr;
194 }
195 else {
196 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197 insn->bits3.da16.src1_reg_nr = reg.nr;
198 }
199
200 if (insn->header.access_mode == BRW_ALIGN_1) {
201 if (reg.width == BRW_WIDTH_1 &&
202 insn->header.execution_size == BRW_EXECUTE_1) {
203 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204 insn->bits3.da1.src1_width = BRW_WIDTH_1;
205 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206 }
207 else {
208 insn->bits3.da1.src1_horiz_stride = reg.hstride;
209 insn->bits3.da1.src1_width = reg.width;
210 insn->bits3.da1.src1_vert_stride = reg.vstride;
211 }
212 }
213 else {
214 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219 /* This is an oddity of the fact we're using the same
220 * descriptions for registers in align_16 as align_1:
221 */
222 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224 else
225 insn->bits3.da16.src1_vert_stride = reg.vstride;
226 }
227 }
228 }
229
230
231
232 static void brw_set_math_message( struct brw_instruction *insn,
233 GLuint msg_length,
234 GLuint response_length,
235 GLuint function,
236 GLuint integer_type,
237 GLboolean low_precision,
238 GLboolean saturate,
239 GLuint dataType )
240 {
241 brw_set_src1(insn, brw_imm_d(0));
242
243 insn->bits3.math.function = function;
244 insn->bits3.math.int_type = integer_type;
245 insn->bits3.math.precision = low_precision;
246 insn->bits3.math.saturate = saturate;
247 insn->bits3.math.data_type = dataType;
248 insn->bits3.math.response_length = response_length;
249 insn->bits3.math.msg_length = msg_length;
250 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251 insn->bits3.math.end_of_thread = 0;
252 }
253
254 static void brw_set_urb_message( struct brw_instruction *insn,
255 GLboolean allocate,
256 GLboolean used,
257 GLuint msg_length,
258 GLuint response_length,
259 GLboolean end_of_thread,
260 GLboolean complete,
261 GLuint offset,
262 GLuint swizzle_control )
263 {
264 brw_set_src1(insn, brw_imm_d(0));
265
266 insn->bits3.urb.opcode = 0; /* ? */
267 insn->bits3.urb.offset = offset;
268 insn->bits3.urb.swizzle_control = swizzle_control;
269 insn->bits3.urb.allocate = allocate;
270 insn->bits3.urb.used = used; /* ? */
271 insn->bits3.urb.complete = complete;
272 insn->bits3.urb.response_length = response_length;
273 insn->bits3.urb.msg_length = msg_length;
274 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275 insn->bits3.urb.end_of_thread = end_of_thread;
276 }
277
278 static void brw_set_dp_write_message( struct brw_instruction *insn,
279 GLuint binding_table_index,
280 GLuint msg_control,
281 GLuint msg_type,
282 GLuint msg_length,
283 GLuint pixel_scoreboard_clear,
284 GLuint response_length,
285 GLuint end_of_thread )
286 {
287 brw_set_src1(insn, brw_imm_d(0));
288
289 insn->bits3.dp_write.binding_table_index = binding_table_index;
290 insn->bits3.dp_write.msg_control = msg_control;
291 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292 insn->bits3.dp_write.msg_type = msg_type;
293 insn->bits3.dp_write.send_commit_msg = 0;
294 insn->bits3.dp_write.response_length = response_length;
295 insn->bits3.dp_write.msg_length = msg_length;
296 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297 insn->bits3.urb.end_of_thread = end_of_thread;
298 }
299
300 static void brw_set_dp_read_message( struct brw_instruction *insn,
301 GLuint binding_table_index,
302 GLuint msg_control,
303 GLuint msg_type,
304 GLuint target_cache,
305 GLuint msg_length,
306 GLuint response_length,
307 GLuint end_of_thread )
308 {
309 brw_set_src1(insn, brw_imm_d(0));
310
311 insn->bits3.dp_read.binding_table_index = binding_table_index;
312 insn->bits3.dp_read.msg_control = msg_control;
313 insn->bits3.dp_read.msg_type = msg_type;
314 insn->bits3.dp_read.target_cache = target_cache;
315 insn->bits3.dp_read.response_length = response_length;
316 insn->bits3.dp_read.msg_length = msg_length;
317 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318 insn->bits3.dp_read.end_of_thread = end_of_thread;
319 }
320
321 static void brw_set_sampler_message( struct brw_instruction *insn,
322 GLuint binding_table_index,
323 GLuint sampler,
324 GLuint msg_type,
325 GLuint response_length,
326 GLuint msg_length,
327 GLboolean eot)
328 {
329 brw_set_src1(insn, brw_imm_d(0));
330
331 insn->bits3.sampler.binding_table_index = binding_table_index;
332 insn->bits3.sampler.sampler = sampler;
333 insn->bits3.sampler.msg_type = msg_type;
334 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
335 insn->bits3.sampler.response_length = response_length;
336 insn->bits3.sampler.msg_length = msg_length;
337 insn->bits3.sampler.end_of_thread = eot;
338 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
339 }
340
341
342
343 static struct brw_instruction *next_insn( struct brw_compile *p,
344 GLuint opcode )
345 {
346 struct brw_instruction *insn;
347
348 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
349
350 insn = &p->store[p->nr_insn++];
351 memcpy(insn, p->current, sizeof(*insn));
352
353 /* Reset this one-shot flag:
354 */
355
356 if (p->current->header.destreg__conditonalmod) {
357 p->current->header.destreg__conditonalmod = 0;
358 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
359 }
360
361 insn->header.opcode = opcode;
362 return insn;
363 }
364
365
366 static struct brw_instruction *brw_alu1( struct brw_compile *p,
367 GLuint opcode,
368 struct brw_reg dest,
369 struct brw_reg src )
370 {
371 struct brw_instruction *insn = next_insn(p, opcode);
372 brw_set_dest(insn, dest);
373 brw_set_src0(insn, src);
374 return insn;
375 }
376
377 static struct brw_instruction *brw_alu2(struct brw_compile *p,
378 GLuint opcode,
379 struct brw_reg dest,
380 struct brw_reg src0,
381 struct brw_reg src1 )
382 {
383 struct brw_instruction *insn = next_insn(p, opcode);
384 brw_set_dest(insn, dest);
385 brw_set_src0(insn, src0);
386 brw_set_src1(insn, src1);
387 return insn;
388 }
389
390
391 /***********************************************************************
392 * Convenience routines.
393 */
394 #define ALU1(OP) \
395 struct brw_instruction *brw_##OP(struct brw_compile *p, \
396 struct brw_reg dest, \
397 struct brw_reg src0) \
398 { \
399 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
400 }
401
402 #define ALU2(OP) \
403 struct brw_instruction *brw_##OP(struct brw_compile *p, \
404 struct brw_reg dest, \
405 struct brw_reg src0, \
406 struct brw_reg src1) \
407 { \
408 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
409 }
410
411
412 ALU1(MOV)
413 ALU2(SEL)
414 ALU1(NOT)
415 ALU2(AND)
416 ALU2(OR)
417 ALU2(XOR)
418 ALU2(SHR)
419 ALU2(SHL)
420 ALU2(RSR)
421 ALU2(RSL)
422 ALU2(ASR)
423 ALU2(ADD)
424 ALU2(MUL)
425 ALU1(FRC)
426 ALU1(RNDD)
427 ALU2(MAC)
428 ALU2(MACH)
429 ALU1(LZD)
430 ALU2(DP4)
431 ALU2(DPH)
432 ALU2(DP3)
433 ALU2(DP2)
434 ALU2(LINE)
435
436
437
438
439 void brw_NOP(struct brw_compile *p)
440 {
441 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
442 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
443 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
444 brw_set_src1(insn, brw_imm_ud(0x0));
445 }
446
447
448
449
450
451 /***********************************************************************
452 * Comparisons, if/else/endif
453 */
454
455 struct brw_instruction *brw_JMPI(struct brw_compile *p,
456 struct brw_reg dest,
457 struct brw_reg src0,
458 struct brw_reg src1)
459 {
460 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
461
462 p->current->header.predicate_control = BRW_PREDICATE_NONE;
463
464 return insn;
465 }
466
467
468 /* EU takes the value from the flag register and pushes it onto some
469 * sort of a stack (presumably merging with any flag value already on
470 * the stack). Within an if block, the flags at the top of the stack
471 * control execution on each channel of the unit, eg. on each of the
472 * 16 pixel values in our wm programs.
473 *
474 * When the matching 'else' instruction is reached (presumably by
475 * countdown of the instruction count patched in by our ELSE/ENDIF
476 * functions), the relevent flags are inverted.
477 *
478 * When the matching 'endif' instruction is reached, the flags are
479 * popped off. If the stack is now empty, normal execution resumes.
480 *
481 * No attempt is made to deal with stack overflow (14 elements?).
482 */
483 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
484 {
485 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF);
486
487 /* Override the defaults for this instruction:
488 */
489 brw_set_dest(insn, brw_ip_reg());
490 brw_set_src0(insn, brw_ip_reg());
491 brw_set_src1(insn, brw_imm_d(0x0));
492
493 insn->header.execution_size = execute_size;
494 insn->header.compression_control = BRW_COMPRESSION_NONE;
495 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
496 insn->header.mask_control = BRW_MASK_ENABLE;
497
498 p->current->header.predicate_control = BRW_PREDICATE_NONE;
499
500 return insn;
501 }
502
503
504 struct brw_instruction *brw_ELSE(struct brw_compile *p,
505 struct brw_instruction *if_insn)
506 {
507 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE);
508
509 brw_set_dest(insn, brw_ip_reg());
510 brw_set_src0(insn, brw_ip_reg());
511 brw_set_src1(insn, brw_imm_d(0x0));
512
513 insn->header.compression_control = BRW_COMPRESSION_NONE;
514 insn->header.execution_size = if_insn->header.execution_size;
515 insn->header.mask_control = BRW_MASK_ENABLE;
516
517 /* Patch the if instruction to point at this instruction.
518 */
519 assert(if_insn->header.opcode == BRW_OPCODE_IF);
520
521 if_insn->bits3.if_else.jump_count = insn - if_insn;
522 if_insn->bits3.if_else.pop_count = 1;
523 if_insn->bits3.if_else.pad0 = 0;
524
525 return insn;
526 }
527
528 void brw_ENDIF(struct brw_compile *p,
529 struct brw_instruction *patch_insn)
530 {
531 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
532
533 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
534 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
535 brw_set_src1(insn, brw_imm_d(0x0));
536
537 insn->header.compression_control = BRW_COMPRESSION_NONE;
538 insn->header.execution_size = patch_insn->header.execution_size;
539 insn->header.mask_control = BRW_MASK_ENABLE;
540
541 assert(patch_insn->bits3.if_else.jump_count == 0);
542
543 /* Patch the if or else instructions to point at this or the next
544 * instruction respectively.
545 */
546 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
547 /* Automagically turn it into an IFF:
548 */
549 patch_insn->header.opcode = BRW_OPCODE_IFF;
550 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
551 patch_insn->bits3.if_else.pop_count = 0;
552 patch_insn->bits3.if_else.pad0 = 0;
553
554 }
555 else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
556 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
557 patch_insn->bits3.if_else.pop_count = 1;
558 patch_insn->bits3.if_else.pad0 = 0;
559 }
560 else {
561 assert(0);
562 }
563
564 /* Also pop item off the stack in the endif instruction:
565 */
566 insn->bits3.if_else.jump_count = 0;
567 insn->bits3.if_else.pop_count = 1;
568 insn->bits3.if_else.pad0 = 0;
569 }
570
571 /* DO/WHILE loop:
572 */
573 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
574 {
575 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
576
577 /* Override the defaults for this instruction:
578 */
579 brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
580 brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
581 brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
582
583 insn->header.compression_control = BRW_COMPRESSION_NONE;
584 insn->header.execution_size = execute_size;
585 /* insn->header.mask_control = BRW_MASK_ENABLE; */
586
587 return insn;
588 }
589
590
591
592 void brw_WHILE(struct brw_compile *p,
593 struct brw_instruction *do_insn)
594 {
595 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE);
596
597 brw_set_dest(insn, brw_ip_reg());
598 brw_set_src0(insn, brw_ip_reg());
599 brw_set_src1(insn, brw_imm_d(0x0));
600
601 insn->header.compression_control = BRW_COMPRESSION_NONE;
602 insn->header.execution_size = do_insn->header.execution_size;
603
604 assert(do_insn->header.opcode == BRW_OPCODE_DO);
605 insn->bits3.if_else.jump_count = do_insn - insn;
606 insn->bits3.if_else.pop_count = 0;
607 insn->bits3.if_else.pad0 = 0;
608
609 /* insn->header.mask_control = BRW_MASK_ENABLE; */
610
611 p->current->header.predicate_control = BRW_PREDICATE_NONE;
612 }
613
614
615 /* FORWARD JUMPS:
616 */
617 void brw_land_fwd_jump(struct brw_compile *p,
618 struct brw_instruction *jmp_insn)
619 {
620 struct brw_instruction *landing = &p->store[p->nr_insn];
621
622 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
623 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
624
625 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
626 }
627
628
629
630 /* To integrate with the above, it makes sense that the comparison
631 * instruction should populate the flag register. It might be simpler
632 * just to use the flag reg for most WM tasks?
633 */
634 void brw_CMP(struct brw_compile *p,
635 struct brw_reg dest,
636 GLuint conditional,
637 struct brw_reg src0,
638 struct brw_reg src1)
639 {
640 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
641
642 insn->header.destreg__conditonalmod = conditional;
643 brw_set_dest(insn, dest);
644 brw_set_src0(insn, src0);
645 brw_set_src1(insn, src1);
646
647 /* guess_execution_size(insn, src0); */
648
649
650 /* Make it so that future instructions will use the computed flag
651 * value until brw_set_predicate_control_flag_value() is called
652 * again.
653 */
654 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
655 dest.nr == 0) {
656 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
657 p->flag_value = 0xff;
658 }
659 }
660
661
662
663 /***********************************************************************
664 * Helpers for the various SEND message types:
665 */
666
667 /* Invert 8 values
668 */
669 void brw_math( struct brw_compile *p,
670 struct brw_reg dest,
671 GLuint function,
672 GLuint saturate,
673 GLuint msg_reg_nr,
674 struct brw_reg src,
675 GLuint data_type,
676 GLuint precision )
677 {
678 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
679 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
680 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
681
682 /* Example code doesn't set predicate_control for send
683 * instructions.
684 */
685 insn->header.predicate_control = 0;
686 insn->header.destreg__conditonalmod = msg_reg_nr;
687
688 brw_set_dest(insn, dest);
689 brw_set_src0(insn, src);
690 brw_set_math_message(insn,
691 msg_length, response_length,
692 function,
693 BRW_MATH_INTEGER_UNSIGNED,
694 precision,
695 saturate,
696 data_type);
697 }
698
699 /* Use 2 send instructions to invert 16 elements
700 */
701 void brw_math_16( struct brw_compile *p,
702 struct brw_reg dest,
703 GLuint function,
704 GLuint saturate,
705 GLuint msg_reg_nr,
706 struct brw_reg src,
707 GLuint precision )
708 {
709 struct brw_instruction *insn;
710 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
711 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
712
713 /* First instruction:
714 */
715 brw_push_insn_state(p);
716 brw_set_predicate_control_flag_value(p, 0xff);
717 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
718
719 insn = next_insn(p, BRW_OPCODE_SEND);
720 insn->header.destreg__conditonalmod = msg_reg_nr;
721
722 brw_set_dest(insn, dest);
723 brw_set_src0(insn, src);
724 brw_set_math_message(insn,
725 msg_length, response_length,
726 function,
727 BRW_MATH_INTEGER_UNSIGNED,
728 precision,
729 saturate,
730 BRW_MATH_DATA_VECTOR);
731
732 /* Second instruction:
733 */
734 insn = next_insn(p, BRW_OPCODE_SEND);
735 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
736 insn->header.destreg__conditonalmod = msg_reg_nr+1;
737
738 brw_set_dest(insn, offset(dest,1));
739 brw_set_src0(insn, src);
740 brw_set_math_message(insn,
741 msg_length, response_length,
742 function,
743 BRW_MATH_INTEGER_UNSIGNED,
744 precision,
745 saturate,
746 BRW_MATH_DATA_VECTOR);
747
748 brw_pop_insn_state(p);
749 }
750
751
752
753
754 void brw_dp_WRITE_16( struct brw_compile *p,
755 struct brw_reg src,
756 GLuint msg_reg_nr,
757 GLuint scratch_offset )
758 {
759 {
760 brw_push_insn_state(p);
761 brw_set_mask_control(p, BRW_MASK_DISABLE);
762 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
763
764 brw_MOV(p,
765 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
766 brw_imm_d(scratch_offset));
767
768 brw_pop_insn_state(p);
769 }
770
771 {
772 GLuint msg_length = 3;
773 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
774 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
775
776 insn->header.predicate_control = 0; /* XXX */
777 insn->header.compression_control = BRW_COMPRESSION_NONE;
778 insn->header.destreg__conditonalmod = msg_reg_nr;
779
780 brw_set_dest(insn, dest);
781 brw_set_src0(insn, src);
782
783 brw_set_dp_write_message(insn,
784 255, /* bti */
785 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
786 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
787 msg_length,
788 0, /* pixel scoreboard */
789 0, /* response_length */
790 0); /* eot */
791 }
792
793 }
794
795
796 void brw_dp_READ_16( struct brw_compile *p,
797 struct brw_reg dest,
798 GLuint msg_reg_nr,
799 GLuint scratch_offset )
800 {
801 {
802 brw_push_insn_state(p);
803 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
804 brw_set_mask_control(p, BRW_MASK_DISABLE);
805
806 brw_MOV(p,
807 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
808 brw_imm_d(scratch_offset));
809
810 brw_pop_insn_state(p);
811 }
812
813 {
814 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
815
816 insn->header.predicate_control = 0; /* XXX */
817 insn->header.compression_control = BRW_COMPRESSION_NONE;
818 insn->header.destreg__conditonalmod = msg_reg_nr;
819
820 brw_set_dest(insn, dest); /* UW? */
821 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
822
823 brw_set_dp_read_message(insn,
824 255, /* bti */
825 3, /* msg_control */
826 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
827 1, /* target cache */
828 1, /* msg_length */
829 2, /* response_length */
830 0); /* eot */
831 }
832 }
833
834
835 void brw_fb_WRITE(struct brw_compile *p,
836 struct brw_reg dest,
837 GLuint msg_reg_nr,
838 struct brw_reg src0,
839 GLuint binding_table_index,
840 GLuint msg_length,
841 GLuint response_length,
842 GLboolean eot)
843 {
844 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
845
846 insn->header.predicate_control = 0; /* XXX */
847 insn->header.compression_control = BRW_COMPRESSION_NONE;
848 insn->header.destreg__conditonalmod = msg_reg_nr;
849
850 brw_set_dest(insn, dest);
851 brw_set_src0(insn, src0);
852 brw_set_dp_write_message(insn,
853 binding_table_index,
854 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
855 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
856 msg_length,
857 1, /* pixel scoreboard */
858 response_length,
859 eot);
860 }
861
862
863
864 void brw_SAMPLE(struct brw_compile *p,
865 struct brw_reg dest,
866 GLuint msg_reg_nr,
867 struct brw_reg src0,
868 GLuint binding_table_index,
869 GLuint sampler,
870 GLuint writemask,
871 GLuint msg_type,
872 GLuint response_length,
873 GLuint msg_length,
874 GLboolean eot)
875 {
876 GLboolean need_stall = 0;
877
878 if(writemask == 0) {
879 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
880 return;
881 }
882
883 /* Hardware doesn't do destination dependency checking on send
884 * instructions properly. Add a workaround which generates the
885 * dependency by other means. In practice it seems like this bug
886 * only crops up for texture samples, and only where registers are
887 * written by the send and then written again later without being
888 * read in between. Luckily for us, we already track that
889 * information and use it to modify the writemask for the
890 * instruction, so that is a guide for whether a workaround is
891 * needed.
892 */
893 if (writemask != WRITEMASK_XYZW) {
894 GLuint dst_offset = 0;
895 GLuint i, newmask = 0, len = 0;
896
897 for (i = 0; i < 4; i++) {
898 if (writemask & (1<<i))
899 break;
900 dst_offset += 2;
901 }
902 for (; i < 4; i++) {
903 if (!(writemask & (1<<i)))
904 break;
905 newmask |= 1<<i;
906 len++;
907 }
908
909 if (newmask != writemask) {
910 need_stall = 1;
911 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
912 }
913 else {
914 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
915
916 newmask = ~newmask & WRITEMASK_XYZW;
917
918 brw_push_insn_state(p);
919
920 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
921 brw_set_mask_control(p, BRW_MASK_DISABLE);
922
923 brw_MOV(p, m1, brw_vec8_grf(0,0));
924 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
925
926 brw_pop_insn_state(p);
927
928 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
929 dest = offset(dest, dst_offset);
930 response_length = len * 2;
931 }
932 }
933
934 {
935 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
936
937 insn->header.predicate_control = 0; /* XXX */
938 insn->header.compression_control = BRW_COMPRESSION_NONE;
939 insn->header.destreg__conditonalmod = msg_reg_nr;
940
941 brw_set_dest(insn, dest);
942 brw_set_src0(insn, src0);
943 brw_set_sampler_message(insn,
944 binding_table_index,
945 sampler,
946 msg_type,
947 response_length,
948 msg_length,
949 eot);
950 }
951
952 if (need_stall)
953 {
954 struct brw_reg reg = vec8(offset(dest, response_length-1));
955
956 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
957 */
958 brw_push_insn_state(p);
959 brw_set_compression_control(p, GL_FALSE);
960 brw_MOV(p, reg, reg);
961 brw_pop_insn_state(p);
962 }
963
964 }
965
966 /* All these variables are pretty confusing - we might be better off
967 * using bitmasks and macros for this, in the old style. Or perhaps
968 * just having the caller instantiate the fields in dword3 itself.
969 */
970 void brw_urb_WRITE(struct brw_compile *p,
971 struct brw_reg dest,
972 GLuint msg_reg_nr,
973 struct brw_reg src0,
974 GLboolean allocate,
975 GLboolean used,
976 GLuint msg_length,
977 GLuint response_length,
978 GLboolean eot,
979 GLboolean writes_complete,
980 GLuint offset,
981 GLuint swizzle)
982 {
983 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
984
985 assert(msg_length < 16);
986
987 brw_set_dest(insn, dest);
988 brw_set_src0(insn, src0);
989 brw_set_src1(insn, brw_imm_d(0));
990
991 insn->header.destreg__conditonalmod = msg_reg_nr;
992
993 brw_set_urb_message(insn,
994 allocate,
995 used,
996 msg_length,
997 response_length,
998 eot,
999 writes_complete,
1000 offset,
1001 swizzle);
1002 }
1003