Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
59 assert(dest.nr < 128);
60
61 insn->bits1.da1.dest_reg_file = dest.file;
62 insn->bits1.da1.dest_reg_type = dest.type;
63 insn->bits1.da1.dest_address_mode = dest.address_mode;
64
65 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
66 insn->bits1.da1.dest_reg_nr = dest.nr;
67
68 if (insn->header.access_mode == BRW_ALIGN_1) {
69 insn->bits1.da1.dest_subreg_nr = dest.subnr;
70 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
71 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
72 insn->bits1.da1.dest_horiz_stride = dest.hstride;
73 }
74 else {
75 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
76 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
77 }
78 }
79 else {
80 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
81
82 /* These are different sizes in align1 vs align16:
83 */
84 if (insn->header.access_mode == BRW_ALIGN_1) {
85 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
86 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
87 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
88 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
89 }
90 else {
91 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
92 }
93 }
94
95 /* NEW: Set the execution size based on dest.width and
96 * insn->compression_control:
97 */
98 guess_execution_size(insn, dest);
99 }
100
101 static void brw_set_src0( struct brw_instruction *insn,
102 struct brw_reg reg )
103 {
104 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
105
106 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
107 assert(reg.nr < 128);
108
109 insn->bits1.da1.src0_reg_file = reg.file;
110 insn->bits1.da1.src0_reg_type = reg.type;
111 insn->bits2.da1.src0_abs = reg.abs;
112 insn->bits2.da1.src0_negate = reg.negate;
113 insn->bits2.da1.src0_address_mode = reg.address_mode;
114
115 if (reg.file == BRW_IMMEDIATE_VALUE) {
116 insn->bits3.ud = reg.dw1.ud;
117
118 /* Required to set some fields in src1 as well:
119 */
120 insn->bits1.da1.src1_reg_file = 0; /* arf */
121 insn->bits1.da1.src1_reg_type = reg.type;
122 }
123 else
124 {
125 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
126 if (insn->header.access_mode == BRW_ALIGN_1) {
127 insn->bits2.da1.src0_subreg_nr = reg.subnr;
128 insn->bits2.da1.src0_reg_nr = reg.nr;
129 }
130 else {
131 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
132 insn->bits2.da16.src0_reg_nr = reg.nr;
133 }
134 }
135 else {
136 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
137
138 if (insn->header.access_mode == BRW_ALIGN_1) {
139 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
140 }
141 else {
142 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
143 }
144 }
145
146 if (insn->header.access_mode == BRW_ALIGN_1) {
147 if (reg.width == BRW_WIDTH_1 &&
148 insn->header.execution_size == BRW_EXECUTE_1) {
149 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
150 insn->bits2.da1.src0_width = BRW_WIDTH_1;
151 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
152 }
153 else {
154 insn->bits2.da1.src0_horiz_stride = reg.hstride;
155 insn->bits2.da1.src0_width = reg.width;
156 insn->bits2.da1.src0_vert_stride = reg.vstride;
157 }
158 }
159 else {
160 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
161 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
162 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
163 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
164
165 /* This is an oddity of the fact we're using the same
166 * descriptions for registers in align_16 as align_1:
167 */
168 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
169 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
170 else
171 insn->bits2.da16.src0_vert_stride = reg.vstride;
172 }
173 }
174 }
175
176
177 void brw_set_src1( struct brw_instruction *insn,
178 struct brw_reg reg )
179 {
180 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
181
182 assert(reg.nr < 128);
183
184 insn->bits1.da1.src1_reg_file = reg.file;
185 insn->bits1.da1.src1_reg_type = reg.type;
186 insn->bits3.da1.src1_abs = reg.abs;
187 insn->bits3.da1.src1_negate = reg.negate;
188
189 /* Only src1 can be immediate in two-argument instructions.
190 */
191 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
192
193 if (reg.file == BRW_IMMEDIATE_VALUE) {
194 insn->bits3.ud = reg.dw1.ud;
195 }
196 else {
197 /* This is a hardware restriction, which may or may not be lifted
198 * in the future:
199 */
200 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
201 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
202
203 if (insn->header.access_mode == BRW_ALIGN_1) {
204 insn->bits3.da1.src1_subreg_nr = reg.subnr;
205 insn->bits3.da1.src1_reg_nr = reg.nr;
206 }
207 else {
208 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
209 insn->bits3.da16.src1_reg_nr = reg.nr;
210 }
211
212 if (insn->header.access_mode == BRW_ALIGN_1) {
213 if (reg.width == BRW_WIDTH_1 &&
214 insn->header.execution_size == BRW_EXECUTE_1) {
215 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
216 insn->bits3.da1.src1_width = BRW_WIDTH_1;
217 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
218 }
219 else {
220 insn->bits3.da1.src1_horiz_stride = reg.hstride;
221 insn->bits3.da1.src1_width = reg.width;
222 insn->bits3.da1.src1_vert_stride = reg.vstride;
223 }
224 }
225 else {
226 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
227 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
228 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
229 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
230
231 /* This is an oddity of the fact we're using the same
232 * descriptions for registers in align_16 as align_1:
233 */
234 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
235 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
236 else
237 insn->bits3.da16.src1_vert_stride = reg.vstride;
238 }
239 }
240 }
241
242
243
244 static void brw_set_math_message( struct brw_instruction *insn,
245 GLuint msg_length,
246 GLuint response_length,
247 GLuint function,
248 GLuint integer_type,
249 GLboolean low_precision,
250 GLboolean saturate,
251 GLuint dataType )
252 {
253 brw_set_src1(insn, brw_imm_d(0));
254
255 insn->bits3.math.function = function;
256 insn->bits3.math.int_type = integer_type;
257 insn->bits3.math.precision = low_precision;
258 insn->bits3.math.saturate = saturate;
259 insn->bits3.math.data_type = dataType;
260 insn->bits3.math.response_length = response_length;
261 insn->bits3.math.msg_length = msg_length;
262 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
263 insn->bits3.math.end_of_thread = 0;
264 }
265
266 static void brw_set_urb_message( struct brw_instruction *insn,
267 GLboolean allocate,
268 GLboolean used,
269 GLuint msg_length,
270 GLuint response_length,
271 GLboolean end_of_thread,
272 GLboolean complete,
273 GLuint offset,
274 GLuint swizzle_control )
275 {
276 brw_set_src1(insn, brw_imm_d(0));
277
278 insn->bits3.urb.opcode = 0; /* ? */
279 insn->bits3.urb.offset = offset;
280 insn->bits3.urb.swizzle_control = swizzle_control;
281 insn->bits3.urb.allocate = allocate;
282 insn->bits3.urb.used = used; /* ? */
283 insn->bits3.urb.complete = complete;
284 insn->bits3.urb.response_length = response_length;
285 insn->bits3.urb.msg_length = msg_length;
286 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
287 insn->bits3.urb.end_of_thread = end_of_thread;
288 }
289
290 static void brw_set_dp_write_message( struct brw_instruction *insn,
291 GLuint binding_table_index,
292 GLuint msg_control,
293 GLuint msg_type,
294 GLuint msg_length,
295 GLuint pixel_scoreboard_clear,
296 GLuint response_length,
297 GLuint end_of_thread )
298 {
299 brw_set_src1(insn, brw_imm_d(0));
300
301 insn->bits3.dp_write.binding_table_index = binding_table_index;
302 insn->bits3.dp_write.msg_control = msg_control;
303 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
304 insn->bits3.dp_write.msg_type = msg_type;
305 insn->bits3.dp_write.send_commit_msg = 0;
306 insn->bits3.dp_write.response_length = response_length;
307 insn->bits3.dp_write.msg_length = msg_length;
308 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
309 insn->bits3.urb.end_of_thread = end_of_thread;
310 }
311
312 static void brw_set_dp_read_message( struct brw_instruction *insn,
313 GLuint binding_table_index,
314 GLuint msg_control,
315 GLuint msg_type,
316 GLuint target_cache,
317 GLuint msg_length,
318 GLuint response_length,
319 GLuint end_of_thread )
320 {
321 brw_set_src1(insn, brw_imm_d(0));
322
323 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
324 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
325 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
326 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
327 insn->bits3.dp_read.response_length = response_length; /*16:19*/
328 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
329 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
330 insn->bits3.dp_read.pad1 = 0; /*28:30*/
331 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
332 }
333
334 static void brw_set_sampler_message(struct brw_context *brw,
335 struct brw_instruction *insn,
336 GLuint binding_table_index,
337 GLuint sampler,
338 GLuint msg_type,
339 GLuint response_length,
340 GLuint msg_length,
341 GLboolean eot)
342 {
343 brw_set_src1(insn, brw_imm_d(0));
344
345 if (BRW_IS_G4X(brw)) {
346 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
347 insn->bits3.sampler_g4x.sampler = sampler;
348 insn->bits3.sampler_g4x.msg_type = msg_type;
349 insn->bits3.sampler_g4x.response_length = response_length;
350 insn->bits3.sampler_g4x.msg_length = msg_length;
351 insn->bits3.sampler_g4x.end_of_thread = eot;
352 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
353 } else {
354 insn->bits3.sampler.binding_table_index = binding_table_index;
355 insn->bits3.sampler.sampler = sampler;
356 insn->bits3.sampler.msg_type = msg_type;
357 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
358 insn->bits3.sampler.response_length = response_length;
359 insn->bits3.sampler.msg_length = msg_length;
360 insn->bits3.sampler.end_of_thread = eot;
361 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
362 }
363 }
364
365
366
367 static struct brw_instruction *next_insn( struct brw_compile *p,
368 GLuint opcode )
369 {
370 struct brw_instruction *insn;
371
372 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
373
374 insn = &p->store[p->nr_insn++];
375 memcpy(insn, p->current, sizeof(*insn));
376
377 /* Reset this one-shot flag:
378 */
379
380 if (p->current->header.destreg__conditonalmod) {
381 p->current->header.destreg__conditonalmod = 0;
382 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
383 }
384
385 insn->header.opcode = opcode;
386 return insn;
387 }
388
389
390 static struct brw_instruction *brw_alu1( struct brw_compile *p,
391 GLuint opcode,
392 struct brw_reg dest,
393 struct brw_reg src )
394 {
395 struct brw_instruction *insn = next_insn(p, opcode);
396 brw_set_dest(insn, dest);
397 brw_set_src0(insn, src);
398 return insn;
399 }
400
401 static struct brw_instruction *brw_alu2(struct brw_compile *p,
402 GLuint opcode,
403 struct brw_reg dest,
404 struct brw_reg src0,
405 struct brw_reg src1 )
406 {
407 struct brw_instruction *insn = next_insn(p, opcode);
408 brw_set_dest(insn, dest);
409 brw_set_src0(insn, src0);
410 brw_set_src1(insn, src1);
411 return insn;
412 }
413
414
415 /***********************************************************************
416 * Convenience routines.
417 */
418 #define ALU1(OP) \
419 struct brw_instruction *brw_##OP(struct brw_compile *p, \
420 struct brw_reg dest, \
421 struct brw_reg src0) \
422 { \
423 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
424 }
425
426 #define ALU2(OP) \
427 struct brw_instruction *brw_##OP(struct brw_compile *p, \
428 struct brw_reg dest, \
429 struct brw_reg src0, \
430 struct brw_reg src1) \
431 { \
432 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
433 }
434
435
436 ALU1(MOV)
437 ALU2(SEL)
438 ALU1(NOT)
439 ALU2(AND)
440 ALU2(OR)
441 ALU2(XOR)
442 ALU2(SHR)
443 ALU2(SHL)
444 ALU2(RSR)
445 ALU2(RSL)
446 ALU2(ASR)
447 ALU2(ADD)
448 ALU2(MUL)
449 ALU1(FRC)
450 ALU1(RNDD)
451 ALU1(RNDZ)
452 ALU2(MAC)
453 ALU2(MACH)
454 ALU1(LZD)
455 ALU2(DP4)
456 ALU2(DPH)
457 ALU2(DP3)
458 ALU2(DP2)
459 ALU2(LINE)
460
461
462
463
464 void brw_NOP(struct brw_compile *p)
465 {
466 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
467 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
468 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
469 brw_set_src1(insn, brw_imm_ud(0x0));
470 }
471
472
473
474
475
476 /***********************************************************************
477 * Comparisons, if/else/endif
478 */
479
480 struct brw_instruction *brw_JMPI(struct brw_compile *p,
481 struct brw_reg dest,
482 struct brw_reg src0,
483 struct brw_reg src1)
484 {
485 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
486
487 insn->header.execution_size = 1;
488 insn->header.compression_control = BRW_COMPRESSION_NONE;
489 insn->header.mask_control = BRW_MASK_DISABLE;
490
491 p->current->header.predicate_control = BRW_PREDICATE_NONE;
492
493 return insn;
494 }
495
496 /* EU takes the value from the flag register and pushes it onto some
497 * sort of a stack (presumably merging with any flag value already on
498 * the stack). Within an if block, the flags at the top of the stack
499 * control execution on each channel of the unit, eg. on each of the
500 * 16 pixel values in our wm programs.
501 *
502 * When the matching 'else' instruction is reached (presumably by
503 * countdown of the instruction count patched in by our ELSE/ENDIF
504 * functions), the relevent flags are inverted.
505 *
506 * When the matching 'endif' instruction is reached, the flags are
507 * popped off. If the stack is now empty, normal execution resumes.
508 *
509 * No attempt is made to deal with stack overflow (14 elements?).
510 */
511 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
512 {
513 struct brw_instruction *insn;
514
515 if (p->single_program_flow) {
516 assert(execute_size == BRW_EXECUTE_1);
517
518 insn = next_insn(p, BRW_OPCODE_ADD);
519 insn->header.predicate_inverse = 1;
520 } else {
521 insn = next_insn(p, BRW_OPCODE_IF);
522 }
523
524 /* Override the defaults for this instruction:
525 */
526 brw_set_dest(insn, brw_ip_reg());
527 brw_set_src0(insn, brw_ip_reg());
528 brw_set_src1(insn, brw_imm_d(0x0));
529
530 insn->header.execution_size = execute_size;
531 insn->header.compression_control = BRW_COMPRESSION_NONE;
532 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
533 insn->header.mask_control = BRW_MASK_ENABLE;
534 if (!p->single_program_flow)
535 insn->header.thread_control = BRW_THREAD_SWITCH;
536
537 p->current->header.predicate_control = BRW_PREDICATE_NONE;
538
539 return insn;
540 }
541
542
543 struct brw_instruction *brw_ELSE(struct brw_compile *p,
544 struct brw_instruction *if_insn)
545 {
546 struct brw_instruction *insn;
547
548 if (p->single_program_flow) {
549 insn = next_insn(p, BRW_OPCODE_ADD);
550 } else {
551 insn = next_insn(p, BRW_OPCODE_ELSE);
552 }
553
554 brw_set_dest(insn, brw_ip_reg());
555 brw_set_src0(insn, brw_ip_reg());
556 brw_set_src1(insn, brw_imm_d(0x0));
557
558 insn->header.compression_control = BRW_COMPRESSION_NONE;
559 insn->header.execution_size = if_insn->header.execution_size;
560 insn->header.mask_control = BRW_MASK_ENABLE;
561 if (!p->single_program_flow)
562 insn->header.thread_control = BRW_THREAD_SWITCH;
563
564 /* Patch the if instruction to point at this instruction.
565 */
566 if (p->single_program_flow) {
567 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
568
569 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
570 } else {
571 assert(if_insn->header.opcode == BRW_OPCODE_IF);
572
573 if_insn->bits3.if_else.jump_count = insn - if_insn;
574 if_insn->bits3.if_else.pop_count = 1;
575 if_insn->bits3.if_else.pad0 = 0;
576 }
577
578 return insn;
579 }
580
581 void brw_ENDIF(struct brw_compile *p,
582 struct brw_instruction *patch_insn)
583 {
584 if (p->single_program_flow) {
585 /* In single program flow mode, there's no need to execute an ENDIF,
586 * since we don't need to do any stack operations, and if we're executing
587 * currently, we want to just continue executing.
588 */
589 struct brw_instruction *next = &p->store[p->nr_insn];
590
591 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
592
593 patch_insn->bits3.ud = (next - patch_insn) * 16;
594 } else {
595 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
596
597 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
598 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
599 brw_set_src1(insn, brw_imm_d(0x0));
600
601 insn->header.compression_control = BRW_COMPRESSION_NONE;
602 insn->header.execution_size = patch_insn->header.execution_size;
603 insn->header.mask_control = BRW_MASK_ENABLE;
604 insn->header.thread_control = BRW_THREAD_SWITCH;
605
606 assert(patch_insn->bits3.if_else.jump_count == 0);
607
608 /* Patch the if or else instructions to point at this or the next
609 * instruction respectively.
610 */
611 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
612 /* Automagically turn it into an IFF:
613 */
614 patch_insn->header.opcode = BRW_OPCODE_IFF;
615 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
616 patch_insn->bits3.if_else.pop_count = 0;
617 patch_insn->bits3.if_else.pad0 = 0;
618 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
619 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
620 patch_insn->bits3.if_else.pop_count = 1;
621 patch_insn->bits3.if_else.pad0 = 0;
622 } else {
623 assert(0);
624 }
625
626 /* Also pop item off the stack in the endif instruction:
627 */
628 insn->bits3.if_else.jump_count = 0;
629 insn->bits3.if_else.pop_count = 1;
630 insn->bits3.if_else.pad0 = 0;
631 }
632 }
633
634 struct brw_instruction *brw_BREAK(struct brw_compile *p)
635 {
636 struct brw_instruction *insn;
637 insn = next_insn(p, BRW_OPCODE_BREAK);
638 brw_set_dest(insn, brw_ip_reg());
639 brw_set_src0(insn, brw_ip_reg());
640 brw_set_src1(insn, brw_imm_d(0x0));
641 insn->header.compression_control = BRW_COMPRESSION_NONE;
642 insn->header.execution_size = BRW_EXECUTE_8;
643 /* insn->header.mask_control = BRW_MASK_DISABLE; */
644 insn->bits3.if_else.pad0 = 0;
645 return insn;
646 }
647
648 struct brw_instruction *brw_CONT(struct brw_compile *p)
649 {
650 struct brw_instruction *insn;
651 insn = next_insn(p, BRW_OPCODE_CONTINUE);
652 brw_set_dest(insn, brw_ip_reg());
653 brw_set_src0(insn, brw_ip_reg());
654 brw_set_src1(insn, brw_imm_d(0x0));
655 insn->header.compression_control = BRW_COMPRESSION_NONE;
656 insn->header.execution_size = BRW_EXECUTE_8;
657 /* insn->header.mask_control = BRW_MASK_DISABLE; */
658 insn->bits3.if_else.pad0 = 0;
659 return insn;
660 }
661
662 /* DO/WHILE loop:
663 */
664 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
665 {
666 if (p->single_program_flow) {
667 return &p->store[p->nr_insn];
668 } else {
669 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
670
671 /* Override the defaults for this instruction:
672 */
673 brw_set_dest(insn, brw_null_reg());
674 brw_set_src0(insn, brw_null_reg());
675 brw_set_src1(insn, brw_null_reg());
676
677 insn->header.compression_control = BRW_COMPRESSION_NONE;
678 insn->header.execution_size = execute_size;
679 insn->header.predicate_control = BRW_PREDICATE_NONE;
680 /* insn->header.mask_control = BRW_MASK_ENABLE; */
681 /* insn->header.mask_control = BRW_MASK_DISABLE; */
682
683 return insn;
684 }
685 }
686
687
688
689 struct brw_instruction *brw_WHILE(struct brw_compile *p,
690 struct brw_instruction *do_insn)
691 {
692 struct brw_instruction *insn;
693
694 if (p->single_program_flow)
695 insn = next_insn(p, BRW_OPCODE_ADD);
696 else
697 insn = next_insn(p, BRW_OPCODE_WHILE);
698
699 brw_set_dest(insn, brw_ip_reg());
700 brw_set_src0(insn, brw_ip_reg());
701 brw_set_src1(insn, brw_imm_d(0x0));
702
703 insn->header.compression_control = BRW_COMPRESSION_NONE;
704
705 if (p->single_program_flow) {
706 insn->header.execution_size = BRW_EXECUTE_1;
707
708 insn->bits3.d = (do_insn - insn) * 16;
709 } else {
710 insn->header.execution_size = do_insn->header.execution_size;
711
712 assert(do_insn->header.opcode == BRW_OPCODE_DO);
713 insn->bits3.if_else.jump_count = do_insn - insn + 1;
714 insn->bits3.if_else.pop_count = 0;
715 insn->bits3.if_else.pad0 = 0;
716 }
717
718 /* insn->header.mask_control = BRW_MASK_ENABLE; */
719
720 /* insn->header.mask_control = BRW_MASK_DISABLE; */
721 p->current->header.predicate_control = BRW_PREDICATE_NONE;
722 return insn;
723 }
724
725
726 /* FORWARD JUMPS:
727 */
728 void brw_land_fwd_jump(struct brw_compile *p,
729 struct brw_instruction *jmp_insn)
730 {
731 struct brw_instruction *landing = &p->store[p->nr_insn];
732
733 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
734 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
735
736 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
737 }
738
739
740
741 /* To integrate with the above, it makes sense that the comparison
742 * instruction should populate the flag register. It might be simpler
743 * just to use the flag reg for most WM tasks?
744 */
745 void brw_CMP(struct brw_compile *p,
746 struct brw_reg dest,
747 GLuint conditional,
748 struct brw_reg src0,
749 struct brw_reg src1)
750 {
751 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
752
753 insn->header.destreg__conditonalmod = conditional;
754 brw_set_dest(insn, dest);
755 brw_set_src0(insn, src0);
756 brw_set_src1(insn, src1);
757
758 /* guess_execution_size(insn, src0); */
759
760
761 /* Make it so that future instructions will use the computed flag
762 * value until brw_set_predicate_control_flag_value() is called
763 * again.
764 */
765 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
766 dest.nr == 0) {
767 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
768 p->flag_value = 0xff;
769 }
770 }
771
772
773
774 /***********************************************************************
775 * Helpers for the various SEND message types:
776 */
777
778 /** Extended math function, float[8].
779 */
780 void brw_math( struct brw_compile *p,
781 struct brw_reg dest,
782 GLuint function,
783 GLuint saturate,
784 GLuint msg_reg_nr,
785 struct brw_reg src,
786 GLuint data_type,
787 GLuint precision )
788 {
789 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
790 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
791 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
792
793 /* Example code doesn't set predicate_control for send
794 * instructions.
795 */
796 insn->header.predicate_control = 0;
797 insn->header.destreg__conditonalmod = msg_reg_nr;
798
799 brw_set_dest(insn, dest);
800 brw_set_src0(insn, src);
801 brw_set_math_message(insn,
802 msg_length, response_length,
803 function,
804 BRW_MATH_INTEGER_UNSIGNED,
805 precision,
806 saturate,
807 data_type);
808 }
809
810 /**
811 * Extended math function, float[16].
812 * Use 2 send instructions.
813 */
814 void brw_math_16( struct brw_compile *p,
815 struct brw_reg dest,
816 GLuint function,
817 GLuint saturate,
818 GLuint msg_reg_nr,
819 struct brw_reg src,
820 GLuint precision )
821 {
822 struct brw_instruction *insn;
823 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
824 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
825
826 /* First instruction:
827 */
828 brw_push_insn_state(p);
829 brw_set_predicate_control_flag_value(p, 0xff);
830 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
831
832 insn = next_insn(p, BRW_OPCODE_SEND);
833 insn->header.destreg__conditonalmod = msg_reg_nr;
834
835 brw_set_dest(insn, dest);
836 brw_set_src0(insn, src);
837 brw_set_math_message(insn,
838 msg_length, response_length,
839 function,
840 BRW_MATH_INTEGER_UNSIGNED,
841 precision,
842 saturate,
843 BRW_MATH_DATA_VECTOR);
844
845 /* Second instruction:
846 */
847 insn = next_insn(p, BRW_OPCODE_SEND);
848 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
849 insn->header.destreg__conditonalmod = msg_reg_nr+1;
850
851 brw_set_dest(insn, offset(dest,1));
852 brw_set_src0(insn, src);
853 brw_set_math_message(insn,
854 msg_length, response_length,
855 function,
856 BRW_MATH_INTEGER_UNSIGNED,
857 precision,
858 saturate,
859 BRW_MATH_DATA_VECTOR);
860
861 brw_pop_insn_state(p);
862 }
863
864
865 /**
866 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
867 * Scratch offset should be a multiple of 64.
868 * Used for register spilling.
869 */
870 void brw_dp_WRITE_16( struct brw_compile *p,
871 struct brw_reg src,
872 GLuint scratch_offset )
873 {
874 GLuint msg_reg_nr = 1;
875 {
876 brw_push_insn_state(p);
877 brw_set_mask_control(p, BRW_MASK_DISABLE);
878 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
879
880 /* set message header global offset field (reg 0, element 2) */
881 brw_MOV(p,
882 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
883 brw_imm_d(scratch_offset));
884
885 brw_pop_insn_state(p);
886 }
887
888 {
889 GLuint msg_length = 3;
890 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
891 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
892
893 insn->header.predicate_control = 0; /* XXX */
894 insn->header.compression_control = BRW_COMPRESSION_NONE;
895 insn->header.destreg__conditonalmod = msg_reg_nr;
896
897 brw_set_dest(insn, dest);
898 brw_set_src0(insn, src);
899
900 brw_set_dp_write_message(insn,
901 255, /* binding table index (255=stateless) */
902 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
903 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
904 msg_length,
905 0, /* pixel scoreboard */
906 0, /* response_length */
907 0); /* eot */
908 }
909 }
910
911
912 /**
913 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
914 * Scratch offset should be a multiple of 64.
915 * Used for register spilling.
916 */
917 void brw_dp_READ_16( struct brw_compile *p,
918 struct brw_reg dest,
919 GLuint scratch_offset )
920 {
921 GLuint msg_reg_nr = 1;
922 {
923 brw_push_insn_state(p);
924 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
925 brw_set_mask_control(p, BRW_MASK_DISABLE);
926
927 /* set message header global offset field (reg 0, element 2) */
928 brw_MOV(p,
929 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
930 brw_imm_d(scratch_offset));
931
932 brw_pop_insn_state(p);
933 }
934
935 {
936 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
937
938 insn->header.predicate_control = 0; /* XXX */
939 insn->header.compression_control = BRW_COMPRESSION_NONE;
940 insn->header.destreg__conditonalmod = msg_reg_nr;
941
942 brw_set_dest(insn, dest); /* UW? */
943 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
944
945 brw_set_dp_read_message(insn,
946 255, /* binding table index (255=stateless) */
947 3, /* msg_control (3 means 4 Owords) */
948 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
949 1, /* target cache (render/scratch) */
950 1, /* msg_length */
951 2, /* response_length */
952 0); /* eot */
953 }
954 }
955
956
957 /**
958 * Read a float[4] vector from the data port Data Cache (const buffer).
959 * Location (in buffer) should be a multiple of 16.
960 * Used for fetching shader constants.
961 * If relAddr is true, we'll do an indirect fetch using the address register.
962 */
963 void brw_dp_READ_4( struct brw_compile *p,
964 struct brw_reg dest,
965 GLboolean relAddr,
966 GLuint location,
967 GLuint bind_table_index )
968 {
969 /* XXX: relAddr not implemented */
970 GLuint msg_reg_nr = 1;
971 {
972 struct brw_reg b;
973 brw_push_insn_state(p);
974 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
975 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
976 brw_set_mask_control(p, BRW_MASK_DISABLE);
977
978 /* Setup MRF[1] with location/offset into const buffer */
979 b = brw_message_reg(msg_reg_nr);
980 b = retype(b, BRW_REGISTER_TYPE_UD);
981 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
982 * when the docs say only dword[2] should be set. Hmmm. But it works.
983 */
984 brw_MOV(p, b, brw_imm_ud(location));
985 brw_pop_insn_state(p);
986 }
987
988 {
989 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
990
991 insn->header.predicate_control = BRW_PREDICATE_NONE;
992 insn->header.compression_control = BRW_COMPRESSION_NONE;
993 insn->header.destreg__conditonalmod = msg_reg_nr;
994 insn->header.mask_control = BRW_MASK_DISABLE;
995
996 /* cast dest to a uword[8] vector */
997 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
998
999 brw_set_dest(insn, dest);
1000 brw_set_src0(insn, brw_null_reg());
1001
1002 brw_set_dp_read_message(insn,
1003 bind_table_index,
1004 0, /* msg_control (0 means 1 Oword) */
1005 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1006 0, /* source cache = data cache */
1007 1, /* msg_length */
1008 1, /* response_length (1 Oword) */
1009 0); /* eot */
1010 }
1011 }
1012
1013
1014 /**
1015 * Read float[4] constant(s) from VS constant buffer.
1016 * For relative addressing, two float[4] constants will be read into 'dest'.
1017 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1018 */
1019 void brw_dp_READ_4_vs(struct brw_compile *p,
1020 struct brw_reg dest,
1021 GLuint oword,
1022 GLboolean relAddr,
1023 struct brw_reg addrReg,
1024 GLuint location,
1025 GLuint bind_table_index)
1026 {
1027 GLuint msg_reg_nr = 1;
1028
1029 assert(oword < 2);
1030 /*
1031 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1032 location, msg_reg_nr);
1033 */
1034
1035 /* Setup MRF[1] with location/offset into const buffer */
1036 {
1037 struct brw_reg b;
1038
1039 brw_push_insn_state(p);
1040 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1041 brw_set_mask_control(p, BRW_MASK_DISABLE);
1042 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1043 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1044
1045 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1046 * when the docs say only dword[2] should be set. Hmmm. But it works.
1047 */
1048 b = brw_message_reg(msg_reg_nr);
1049 b = retype(b, BRW_REGISTER_TYPE_UD);
1050 /*b = get_element_ud(b, 2);*/
1051 if (relAddr) {
1052 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1053 }
1054 else {
1055 brw_MOV(p, b, brw_imm_ud(location));
1056 }
1057
1058 brw_pop_insn_state(p);
1059 }
1060
1061 {
1062 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1063
1064 insn->header.predicate_control = BRW_PREDICATE_NONE;
1065 insn->header.compression_control = BRW_COMPRESSION_NONE;
1066 insn->header.destreg__conditonalmod = msg_reg_nr;
1067 insn->header.mask_control = BRW_MASK_DISABLE;
1068 /*insn->header.access_mode = BRW_ALIGN_16;*/
1069
1070 brw_set_dest(insn, dest);
1071 brw_set_src0(insn, brw_null_reg());
1072
1073 brw_set_dp_read_message(insn,
1074 bind_table_index,
1075 oword, /* 0 = lower Oword, 1 = upper Oword */
1076 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1077 0, /* source cache = data cache */
1078 1, /* msg_length */
1079 1, /* response_length (1 Oword) */
1080 0); /* eot */
1081 }
1082 }
1083
1084
1085
1086 void brw_fb_WRITE(struct brw_compile *p,
1087 struct brw_reg dest,
1088 GLuint msg_reg_nr,
1089 struct brw_reg src0,
1090 GLuint binding_table_index,
1091 GLuint msg_length,
1092 GLuint response_length,
1093 GLboolean eot)
1094 {
1095 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1096
1097 insn->header.predicate_control = 0; /* XXX */
1098 insn->header.compression_control = BRW_COMPRESSION_NONE;
1099 insn->header.destreg__conditonalmod = msg_reg_nr;
1100
1101 brw_set_dest(insn, dest);
1102 brw_set_src0(insn, src0);
1103 brw_set_dp_write_message(insn,
1104 binding_table_index,
1105 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1106 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1107 msg_length,
1108 1, /* pixel scoreboard */
1109 response_length,
1110 eot);
1111 }
1112
1113
1114 /**
1115 * Texture sample instruction.
1116 * Note: the msg_type plus msg_length values determine exactly what kind
1117 * of sampling operation is performed. See volume 4, page 161 of docs.
1118 */
1119 void brw_SAMPLE(struct brw_compile *p,
1120 struct brw_reg dest,
1121 GLuint msg_reg_nr,
1122 struct brw_reg src0,
1123 GLuint binding_table_index,
1124 GLuint sampler,
1125 GLuint writemask,
1126 GLuint msg_type,
1127 GLuint response_length,
1128 GLuint msg_length,
1129 GLboolean eot)
1130 {
1131 GLboolean need_stall = 0;
1132
1133 if (writemask == 0) {
1134 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1135 return;
1136 }
1137
1138 /* Hardware doesn't do destination dependency checking on send
1139 * instructions properly. Add a workaround which generates the
1140 * dependency by other means. In practice it seems like this bug
1141 * only crops up for texture samples, and only where registers are
1142 * written by the send and then written again later without being
1143 * read in between. Luckily for us, we already track that
1144 * information and use it to modify the writemask for the
1145 * instruction, so that is a guide for whether a workaround is
1146 * needed.
1147 */
1148 if (writemask != WRITEMASK_XYZW) {
1149 GLuint dst_offset = 0;
1150 GLuint i, newmask = 0, len = 0;
1151
1152 for (i = 0; i < 4; i++) {
1153 if (writemask & (1<<i))
1154 break;
1155 dst_offset += 2;
1156 }
1157 for (; i < 4; i++) {
1158 if (!(writemask & (1<<i)))
1159 break;
1160 newmask |= 1<<i;
1161 len++;
1162 }
1163
1164 if (newmask != writemask) {
1165 need_stall = 1;
1166 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1167 }
1168 else {
1169 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1170
1171 newmask = ~newmask & WRITEMASK_XYZW;
1172
1173 brw_push_insn_state(p);
1174
1175 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1176 brw_set_mask_control(p, BRW_MASK_DISABLE);
1177
1178 brw_MOV(p, m1, brw_vec8_grf(0,0));
1179 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1180
1181 brw_pop_insn_state(p);
1182
1183 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1184 dest = offset(dest, dst_offset);
1185 response_length = len * 2;
1186 }
1187 }
1188
1189 {
1190 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1191
1192 insn->header.predicate_control = 0; /* XXX */
1193 insn->header.compression_control = BRW_COMPRESSION_NONE;
1194 insn->header.destreg__conditonalmod = msg_reg_nr;
1195
1196 brw_set_dest(insn, dest);
1197 brw_set_src0(insn, src0);
1198 brw_set_sampler_message(p->brw, insn,
1199 binding_table_index,
1200 sampler,
1201 msg_type,
1202 response_length,
1203 msg_length,
1204 eot);
1205 }
1206
1207 if (need_stall) {
1208 struct brw_reg reg = vec8(offset(dest, response_length-1));
1209
1210 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1211 */
1212 brw_push_insn_state(p);
1213 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1214 brw_MOV(p, reg, reg);
1215 brw_pop_insn_state(p);
1216 }
1217
1218 }
1219
1220 /* All these variables are pretty confusing - we might be better off
1221 * using bitmasks and macros for this, in the old style. Or perhaps
1222 * just having the caller instantiate the fields in dword3 itself.
1223 */
1224 void brw_urb_WRITE(struct brw_compile *p,
1225 struct brw_reg dest,
1226 GLuint msg_reg_nr,
1227 struct brw_reg src0,
1228 GLboolean allocate,
1229 GLboolean used,
1230 GLuint msg_length,
1231 GLuint response_length,
1232 GLboolean eot,
1233 GLboolean writes_complete,
1234 GLuint offset,
1235 GLuint swizzle)
1236 {
1237 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1238
1239 assert(msg_length < BRW_MAX_MRF);
1240
1241 brw_set_dest(insn, dest);
1242 brw_set_src0(insn, src0);
1243 brw_set_src1(insn, brw_imm_d(0));
1244
1245 insn->header.destreg__conditonalmod = msg_reg_nr;
1246
1247 brw_set_urb_message(insn,
1248 allocate,
1249 used,
1250 msg_length,
1251 response_length,
1252 eot,
1253 writes_complete,
1254 offset,
1255 swizzle);
1256 }
1257