radeon/r200/r300: cleanup some of the renderbuffer code
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
68 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
69 insn->bits1.da1.dest_horiz_stride = dest.hstride;
70 }
71 else {
72 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
73 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
74 }
75 }
76 else {
77 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
78
79 /* These are different sizes in align1 vs align16:
80 */
81 if (insn->header.access_mode == BRW_ALIGN_1) {
82 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
83 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
84 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
85 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
86 }
87 else {
88 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 }
90 }
91
92 /* NEW: Set the execution size based on dest.width and
93 * insn->compression_control:
94 */
95 guess_execution_size(insn, dest);
96 }
97
98 static void brw_set_src0( struct brw_instruction *insn,
99 struct brw_reg reg )
100 {
101 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
102
103 insn->bits1.da1.src0_reg_file = reg.file;
104 insn->bits1.da1.src0_reg_type = reg.type;
105 insn->bits2.da1.src0_abs = reg.abs;
106 insn->bits2.da1.src0_negate = reg.negate;
107 insn->bits2.da1.src0_address_mode = reg.address_mode;
108
109 if (reg.file == BRW_IMMEDIATE_VALUE) {
110 insn->bits3.ud = reg.dw1.ud;
111
112 /* Required to set some fields in src1 as well:
113 */
114 insn->bits1.da1.src1_reg_file = 0; /* arf */
115 insn->bits1.da1.src1_reg_type = reg.type;
116 }
117 else
118 {
119 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits2.da1.src0_subreg_nr = reg.subnr;
122 insn->bits2.da1.src0_reg_nr = reg.nr;
123 }
124 else {
125 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
126 insn->bits2.da16.src0_reg_nr = reg.nr;
127 }
128 }
129 else {
130 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
131
132 if (insn->header.access_mode == BRW_ALIGN_1) {
133 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
134 }
135 else {
136 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
137 }
138 }
139
140 if (insn->header.access_mode == BRW_ALIGN_1) {
141 if (reg.width == BRW_WIDTH_1 &&
142 insn->header.execution_size == BRW_EXECUTE_1) {
143 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
144 insn->bits2.da1.src0_width = BRW_WIDTH_1;
145 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
146 }
147 else {
148 insn->bits2.da1.src0_horiz_stride = reg.hstride;
149 insn->bits2.da1.src0_width = reg.width;
150 insn->bits2.da1.src0_vert_stride = reg.vstride;
151 }
152 }
153 else {
154 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
155 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
156 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
157 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
158
159 /* This is an oddity of the fact we're using the same
160 * descriptions for registers in align_16 as align_1:
161 */
162 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
163 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
164 else
165 insn->bits2.da16.src0_vert_stride = reg.vstride;
166 }
167 }
168 }
169
170
171 void brw_set_src1( struct brw_instruction *insn,
172 struct brw_reg reg )
173 {
174 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
175
176 insn->bits1.da1.src1_reg_file = reg.file;
177 insn->bits1.da1.src1_reg_type = reg.type;
178 insn->bits3.da1.src1_abs = reg.abs;
179 insn->bits3.da1.src1_negate = reg.negate;
180
181 /* Only src1 can be immediate in two-argument instructions.
182 */
183 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
184
185 if (reg.file == BRW_IMMEDIATE_VALUE) {
186 insn->bits3.ud = reg.dw1.ud;
187 }
188 else {
189 /* This is a hardware restriction, which may or may not be lifted
190 * in the future:
191 */
192 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
193 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
194
195 if (insn->header.access_mode == BRW_ALIGN_1) {
196 insn->bits3.da1.src1_subreg_nr = reg.subnr;
197 insn->bits3.da1.src1_reg_nr = reg.nr;
198 }
199 else {
200 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
201 insn->bits3.da16.src1_reg_nr = reg.nr;
202 }
203
204 if (insn->header.access_mode == BRW_ALIGN_1) {
205 if (reg.width == BRW_WIDTH_1 &&
206 insn->header.execution_size == BRW_EXECUTE_1) {
207 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
208 insn->bits3.da1.src1_width = BRW_WIDTH_1;
209 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
210 }
211 else {
212 insn->bits3.da1.src1_horiz_stride = reg.hstride;
213 insn->bits3.da1.src1_width = reg.width;
214 insn->bits3.da1.src1_vert_stride = reg.vstride;
215 }
216 }
217 else {
218 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
219 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
220 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
221 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
222
223 /* This is an oddity of the fact we're using the same
224 * descriptions for registers in align_16 as align_1:
225 */
226 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
227 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
228 else
229 insn->bits3.da16.src1_vert_stride = reg.vstride;
230 }
231 }
232 }
233
234
235
236 static void brw_set_math_message( struct brw_instruction *insn,
237 GLuint msg_length,
238 GLuint response_length,
239 GLuint function,
240 GLuint integer_type,
241 GLboolean low_precision,
242 GLboolean saturate,
243 GLuint dataType )
244 {
245 brw_set_src1(insn, brw_imm_d(0));
246
247 insn->bits3.math.function = function;
248 insn->bits3.math.int_type = integer_type;
249 insn->bits3.math.precision = low_precision;
250 insn->bits3.math.saturate = saturate;
251 insn->bits3.math.data_type = dataType;
252 insn->bits3.math.response_length = response_length;
253 insn->bits3.math.msg_length = msg_length;
254 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
255 insn->bits3.math.end_of_thread = 0;
256 }
257
258 static void brw_set_urb_message( struct brw_instruction *insn,
259 GLboolean allocate,
260 GLboolean used,
261 GLuint msg_length,
262 GLuint response_length,
263 GLboolean end_of_thread,
264 GLboolean complete,
265 GLuint offset,
266 GLuint swizzle_control )
267 {
268 brw_set_src1(insn, brw_imm_d(0));
269
270 insn->bits3.urb.opcode = 0; /* ? */
271 insn->bits3.urb.offset = offset;
272 insn->bits3.urb.swizzle_control = swizzle_control;
273 insn->bits3.urb.allocate = allocate;
274 insn->bits3.urb.used = used; /* ? */
275 insn->bits3.urb.complete = complete;
276 insn->bits3.urb.response_length = response_length;
277 insn->bits3.urb.msg_length = msg_length;
278 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
279 insn->bits3.urb.end_of_thread = end_of_thread;
280 }
281
282 static void brw_set_dp_write_message( struct brw_instruction *insn,
283 GLuint binding_table_index,
284 GLuint msg_control,
285 GLuint msg_type,
286 GLuint msg_length,
287 GLuint pixel_scoreboard_clear,
288 GLuint response_length,
289 GLuint end_of_thread )
290 {
291 brw_set_src1(insn, brw_imm_d(0));
292
293 insn->bits3.dp_write.binding_table_index = binding_table_index;
294 insn->bits3.dp_write.msg_control = msg_control;
295 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
296 insn->bits3.dp_write.msg_type = msg_type;
297 insn->bits3.dp_write.send_commit_msg = 0;
298 insn->bits3.dp_write.response_length = response_length;
299 insn->bits3.dp_write.msg_length = msg_length;
300 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
301 insn->bits3.urb.end_of_thread = end_of_thread;
302 }
303
304 static void brw_set_dp_read_message( struct brw_instruction *insn,
305 GLuint binding_table_index,
306 GLuint msg_control,
307 GLuint msg_type,
308 GLuint target_cache,
309 GLuint msg_length,
310 GLuint response_length,
311 GLuint end_of_thread )
312 {
313 brw_set_src1(insn, brw_imm_d(0));
314
315 insn->bits3.dp_read.binding_table_index = binding_table_index;
316 insn->bits3.dp_read.msg_control = msg_control;
317 insn->bits3.dp_read.msg_type = msg_type;
318 insn->bits3.dp_read.target_cache = target_cache;
319 insn->bits3.dp_read.response_length = response_length;
320 insn->bits3.dp_read.msg_length = msg_length;
321 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
322 insn->bits3.dp_read.end_of_thread = end_of_thread;
323 }
324
325 static void brw_set_sampler_message(struct brw_context *brw,
326 struct brw_instruction *insn,
327 GLuint binding_table_index,
328 GLuint sampler,
329 GLuint msg_type,
330 GLuint response_length,
331 GLuint msg_length,
332 GLboolean eot)
333 {
334 brw_set_src1(insn, brw_imm_d(0));
335
336 if (BRW_IS_G4X(brw)) {
337 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
338 insn->bits3.sampler_g4x.sampler = sampler;
339 insn->bits3.sampler_g4x.msg_type = msg_type;
340 insn->bits3.sampler_g4x.response_length = response_length;
341 insn->bits3.sampler_g4x.msg_length = msg_length;
342 insn->bits3.sampler_g4x.end_of_thread = eot;
343 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
344 } else {
345 insn->bits3.sampler.binding_table_index = binding_table_index;
346 insn->bits3.sampler.sampler = sampler;
347 insn->bits3.sampler.msg_type = msg_type;
348 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
349 insn->bits3.sampler.response_length = response_length;
350 insn->bits3.sampler.msg_length = msg_length;
351 insn->bits3.sampler.end_of_thread = eot;
352 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
353 }
354 }
355
356
357
358 static struct brw_instruction *next_insn( struct brw_compile *p,
359 GLuint opcode )
360 {
361 struct brw_instruction *insn;
362
363 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
364
365 insn = &p->store[p->nr_insn++];
366 memcpy(insn, p->current, sizeof(*insn));
367
368 /* Reset this one-shot flag:
369 */
370
371 if (p->current->header.destreg__conditonalmod) {
372 p->current->header.destreg__conditonalmod = 0;
373 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
374 }
375
376 insn->header.opcode = opcode;
377 return insn;
378 }
379
380
381 static struct brw_instruction *brw_alu1( struct brw_compile *p,
382 GLuint opcode,
383 struct brw_reg dest,
384 struct brw_reg src )
385 {
386 struct brw_instruction *insn = next_insn(p, opcode);
387 brw_set_dest(insn, dest);
388 brw_set_src0(insn, src);
389 return insn;
390 }
391
392 static struct brw_instruction *brw_alu2(struct brw_compile *p,
393 GLuint opcode,
394 struct brw_reg dest,
395 struct brw_reg src0,
396 struct brw_reg src1 )
397 {
398 struct brw_instruction *insn = next_insn(p, opcode);
399 brw_set_dest(insn, dest);
400 brw_set_src0(insn, src0);
401 brw_set_src1(insn, src1);
402 return insn;
403 }
404
405
406 /***********************************************************************
407 * Convenience routines.
408 */
409 #define ALU1(OP) \
410 struct brw_instruction *brw_##OP(struct brw_compile *p, \
411 struct brw_reg dest, \
412 struct brw_reg src0) \
413 { \
414 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
415 }
416
417 #define ALU2(OP) \
418 struct brw_instruction *brw_##OP(struct brw_compile *p, \
419 struct brw_reg dest, \
420 struct brw_reg src0, \
421 struct brw_reg src1) \
422 { \
423 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
424 }
425
426
427 ALU1(MOV)
428 ALU2(SEL)
429 ALU1(NOT)
430 ALU2(AND)
431 ALU2(OR)
432 ALU2(XOR)
433 ALU2(SHR)
434 ALU2(SHL)
435 ALU2(RSR)
436 ALU2(RSL)
437 ALU2(ASR)
438 ALU2(ADD)
439 ALU2(MUL)
440 ALU1(FRC)
441 ALU1(RNDD)
442 ALU1(RNDZ)
443 ALU2(MAC)
444 ALU2(MACH)
445 ALU1(LZD)
446 ALU2(DP4)
447 ALU2(DPH)
448 ALU2(DP3)
449 ALU2(DP2)
450 ALU2(LINE)
451
452
453
454
455 void brw_NOP(struct brw_compile *p)
456 {
457 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
458 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
459 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
460 brw_set_src1(insn, brw_imm_ud(0x0));
461 }
462
463
464
465
466
467 /***********************************************************************
468 * Comparisons, if/else/endif
469 */
470
471 struct brw_instruction *brw_JMPI(struct brw_compile *p,
472 struct brw_reg dest,
473 struct brw_reg src0,
474 struct brw_reg src1)
475 {
476 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
477
478 p->current->header.predicate_control = BRW_PREDICATE_NONE;
479
480 return insn;
481 }
482
483 /* EU takes the value from the flag register and pushes it onto some
484 * sort of a stack (presumably merging with any flag value already on
485 * the stack). Within an if block, the flags at the top of the stack
486 * control execution on each channel of the unit, eg. on each of the
487 * 16 pixel values in our wm programs.
488 *
489 * When the matching 'else' instruction is reached (presumably by
490 * countdown of the instruction count patched in by our ELSE/ENDIF
491 * functions), the relevent flags are inverted.
492 *
493 * When the matching 'endif' instruction is reached, the flags are
494 * popped off. If the stack is now empty, normal execution resumes.
495 *
496 * No attempt is made to deal with stack overflow (14 elements?).
497 */
498 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
499 {
500 struct brw_instruction *insn;
501
502 if (p->single_program_flow) {
503 assert(execute_size == BRW_EXECUTE_1);
504
505 insn = next_insn(p, BRW_OPCODE_ADD);
506 insn->header.predicate_inverse = 1;
507 } else {
508 insn = next_insn(p, BRW_OPCODE_IF);
509 }
510
511 /* Override the defaults for this instruction:
512 */
513 brw_set_dest(insn, brw_ip_reg());
514 brw_set_src0(insn, brw_ip_reg());
515 brw_set_src1(insn, brw_imm_d(0x0));
516
517 insn->header.execution_size = execute_size;
518 insn->header.compression_control = BRW_COMPRESSION_NONE;
519 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
520 insn->header.mask_control = BRW_MASK_ENABLE;
521 if (!p->single_program_flow)
522 insn->header.thread_control = BRW_THREAD_SWITCH;
523
524 p->current->header.predicate_control = BRW_PREDICATE_NONE;
525
526 return insn;
527 }
528
529
530 struct brw_instruction *brw_ELSE(struct brw_compile *p,
531 struct brw_instruction *if_insn)
532 {
533 struct brw_instruction *insn;
534
535 if (p->single_program_flow) {
536 insn = next_insn(p, BRW_OPCODE_ADD);
537 } else {
538 insn = next_insn(p, BRW_OPCODE_ELSE);
539 }
540
541 brw_set_dest(insn, brw_ip_reg());
542 brw_set_src0(insn, brw_ip_reg());
543 brw_set_src1(insn, brw_imm_d(0x0));
544
545 insn->header.compression_control = BRW_COMPRESSION_NONE;
546 insn->header.execution_size = if_insn->header.execution_size;
547 insn->header.mask_control = BRW_MASK_ENABLE;
548 if (!p->single_program_flow)
549 insn->header.thread_control = BRW_THREAD_SWITCH;
550
551 /* Patch the if instruction to point at this instruction.
552 */
553 if (p->single_program_flow) {
554 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
555
556 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
557 } else {
558 assert(if_insn->header.opcode == BRW_OPCODE_IF);
559
560 if_insn->bits3.if_else.jump_count = insn - if_insn;
561 if_insn->bits3.if_else.pop_count = 1;
562 if_insn->bits3.if_else.pad0 = 0;
563 }
564
565 return insn;
566 }
567
568 void brw_ENDIF(struct brw_compile *p,
569 struct brw_instruction *patch_insn)
570 {
571 if (p->single_program_flow) {
572 /* In single program flow mode, there's no need to execute an ENDIF,
573 * since we don't need to do any stack operations, and if we're executing
574 * currently, we want to just continue executing.
575 */
576 struct brw_instruction *next = &p->store[p->nr_insn];
577
578 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
579
580 patch_insn->bits3.ud = (next - patch_insn) * 16;
581 } else {
582 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
583
584 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
585 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
586 brw_set_src1(insn, brw_imm_d(0x0));
587
588 insn->header.compression_control = BRW_COMPRESSION_NONE;
589 insn->header.execution_size = patch_insn->header.execution_size;
590 insn->header.mask_control = BRW_MASK_ENABLE;
591 insn->header.thread_control = BRW_THREAD_SWITCH;
592
593 assert(patch_insn->bits3.if_else.jump_count == 0);
594
595 /* Patch the if or else instructions to point at this or the next
596 * instruction respectively.
597 */
598 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
599 /* Automagically turn it into an IFF:
600 */
601 patch_insn->header.opcode = BRW_OPCODE_IFF;
602 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
603 patch_insn->bits3.if_else.pop_count = 0;
604 patch_insn->bits3.if_else.pad0 = 0;
605 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
606 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
607 patch_insn->bits3.if_else.pop_count = 1;
608 patch_insn->bits3.if_else.pad0 = 0;
609 } else {
610 assert(0);
611 }
612
613 /* Also pop item off the stack in the endif instruction:
614 */
615 insn->bits3.if_else.jump_count = 0;
616 insn->bits3.if_else.pop_count = 1;
617 insn->bits3.if_else.pad0 = 0;
618 }
619 }
620
621 struct brw_instruction *brw_BREAK(struct brw_compile *p)
622 {
623 struct brw_instruction *insn;
624 insn = next_insn(p, BRW_OPCODE_BREAK);
625 brw_set_dest(insn, brw_ip_reg());
626 brw_set_src0(insn, brw_ip_reg());
627 brw_set_src1(insn, brw_imm_d(0x0));
628 insn->header.compression_control = BRW_COMPRESSION_NONE;
629 insn->header.execution_size = BRW_EXECUTE_8;
630 /* insn->header.mask_control = BRW_MASK_DISABLE; */
631 insn->bits3.if_else.pad0 = 0;
632 return insn;
633 }
634
635 struct brw_instruction *brw_CONT(struct brw_compile *p)
636 {
637 struct brw_instruction *insn;
638 insn = next_insn(p, BRW_OPCODE_CONTINUE);
639 brw_set_dest(insn, brw_ip_reg());
640 brw_set_src0(insn, brw_ip_reg());
641 brw_set_src1(insn, brw_imm_d(0x0));
642 insn->header.compression_control = BRW_COMPRESSION_NONE;
643 insn->header.execution_size = BRW_EXECUTE_8;
644 /* insn->header.mask_control = BRW_MASK_DISABLE; */
645 insn->bits3.if_else.pad0 = 0;
646 return insn;
647 }
648
649 /* DO/WHILE loop:
650 */
651 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
652 {
653 if (p->single_program_flow) {
654 return &p->store[p->nr_insn];
655 } else {
656 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
657
658 /* Override the defaults for this instruction:
659 */
660 brw_set_dest(insn, brw_null_reg());
661 brw_set_src0(insn, brw_null_reg());
662 brw_set_src1(insn, brw_null_reg());
663
664 insn->header.compression_control = BRW_COMPRESSION_NONE;
665 insn->header.execution_size = execute_size;
666 insn->header.predicate_control = BRW_PREDICATE_NONE;
667 /* insn->header.mask_control = BRW_MASK_ENABLE; */
668 /* insn->header.mask_control = BRW_MASK_DISABLE; */
669
670 return insn;
671 }
672 }
673
674
675
676 struct brw_instruction *brw_WHILE(struct brw_compile *p,
677 struct brw_instruction *do_insn)
678 {
679 struct brw_instruction *insn;
680
681 if (p->single_program_flow)
682 insn = next_insn(p, BRW_OPCODE_ADD);
683 else
684 insn = next_insn(p, BRW_OPCODE_WHILE);
685
686 brw_set_dest(insn, brw_ip_reg());
687 brw_set_src0(insn, brw_ip_reg());
688 brw_set_src1(insn, brw_imm_d(0x0));
689
690 insn->header.compression_control = BRW_COMPRESSION_NONE;
691
692 if (p->single_program_flow) {
693 insn->header.execution_size = BRW_EXECUTE_1;
694
695 insn->bits3.d = (do_insn - insn) * 16;
696 } else {
697 insn->header.execution_size = do_insn->header.execution_size;
698
699 assert(do_insn->header.opcode == BRW_OPCODE_DO);
700 insn->bits3.if_else.jump_count = do_insn - insn + 1;
701 insn->bits3.if_else.pop_count = 0;
702 insn->bits3.if_else.pad0 = 0;
703 }
704
705 /* insn->header.mask_control = BRW_MASK_ENABLE; */
706
707 /* insn->header.mask_control = BRW_MASK_DISABLE; */
708 p->current->header.predicate_control = BRW_PREDICATE_NONE;
709 return insn;
710 }
711
712
713 /* FORWARD JUMPS:
714 */
715 void brw_land_fwd_jump(struct brw_compile *p,
716 struct brw_instruction *jmp_insn)
717 {
718 struct brw_instruction *landing = &p->store[p->nr_insn];
719
720 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
721 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
722
723 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
724 }
725
726
727
728 /* To integrate with the above, it makes sense that the comparison
729 * instruction should populate the flag register. It might be simpler
730 * just to use the flag reg for most WM tasks?
731 */
732 void brw_CMP(struct brw_compile *p,
733 struct brw_reg dest,
734 GLuint conditional,
735 struct brw_reg src0,
736 struct brw_reg src1)
737 {
738 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
739
740 insn->header.destreg__conditonalmod = conditional;
741 brw_set_dest(insn, dest);
742 brw_set_src0(insn, src0);
743 brw_set_src1(insn, src1);
744
745 /* guess_execution_size(insn, src0); */
746
747
748 /* Make it so that future instructions will use the computed flag
749 * value until brw_set_predicate_control_flag_value() is called
750 * again.
751 */
752 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
753 dest.nr == 0) {
754 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
755 p->flag_value = 0xff;
756 }
757 }
758
759
760
761 /***********************************************************************
762 * Helpers for the various SEND message types:
763 */
764
765 /* Invert 8 values
766 */
767 void brw_math( struct brw_compile *p,
768 struct brw_reg dest,
769 GLuint function,
770 GLuint saturate,
771 GLuint msg_reg_nr,
772 struct brw_reg src,
773 GLuint data_type,
774 GLuint precision )
775 {
776 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
777 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
778 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
779
780 /* Example code doesn't set predicate_control for send
781 * instructions.
782 */
783 insn->header.predicate_control = 0;
784 insn->header.destreg__conditonalmod = msg_reg_nr;
785
786 brw_set_dest(insn, dest);
787 brw_set_src0(insn, src);
788 brw_set_math_message(insn,
789 msg_length, response_length,
790 function,
791 BRW_MATH_INTEGER_UNSIGNED,
792 precision,
793 saturate,
794 data_type);
795 }
796
797 /* Use 2 send instructions to invert 16 elements
798 */
799 void brw_math_16( struct brw_compile *p,
800 struct brw_reg dest,
801 GLuint function,
802 GLuint saturate,
803 GLuint msg_reg_nr,
804 struct brw_reg src,
805 GLuint precision )
806 {
807 struct brw_instruction *insn;
808 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
809 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
810
811 /* First instruction:
812 */
813 brw_push_insn_state(p);
814 brw_set_predicate_control_flag_value(p, 0xff);
815 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
816
817 insn = next_insn(p, BRW_OPCODE_SEND);
818 insn->header.destreg__conditonalmod = msg_reg_nr;
819
820 brw_set_dest(insn, dest);
821 brw_set_src0(insn, src);
822 brw_set_math_message(insn,
823 msg_length, response_length,
824 function,
825 BRW_MATH_INTEGER_UNSIGNED,
826 precision,
827 saturate,
828 BRW_MATH_DATA_VECTOR);
829
830 /* Second instruction:
831 */
832 insn = next_insn(p, BRW_OPCODE_SEND);
833 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
834 insn->header.destreg__conditonalmod = msg_reg_nr+1;
835
836 brw_set_dest(insn, offset(dest,1));
837 brw_set_src0(insn, src);
838 brw_set_math_message(insn,
839 msg_length, response_length,
840 function,
841 BRW_MATH_INTEGER_UNSIGNED,
842 precision,
843 saturate,
844 BRW_MATH_DATA_VECTOR);
845
846 brw_pop_insn_state(p);
847 }
848
849
850
851
852 void brw_dp_WRITE_16( struct brw_compile *p,
853 struct brw_reg src,
854 GLuint msg_reg_nr,
855 GLuint scratch_offset )
856 {
857 {
858 brw_push_insn_state(p);
859 brw_set_mask_control(p, BRW_MASK_DISABLE);
860 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
861
862 brw_MOV(p,
863 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
864 brw_imm_d(scratch_offset));
865
866 brw_pop_insn_state(p);
867 }
868
869 {
870 GLuint msg_length = 3;
871 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
872 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
873
874 insn->header.predicate_control = 0; /* XXX */
875 insn->header.compression_control = BRW_COMPRESSION_NONE;
876 insn->header.destreg__conditonalmod = msg_reg_nr;
877
878 brw_set_dest(insn, dest);
879 brw_set_src0(insn, src);
880
881 brw_set_dp_write_message(insn,
882 255, /* bti */
883 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
884 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
885 msg_length,
886 0, /* pixel scoreboard */
887 0, /* response_length */
888 0); /* eot */
889 }
890
891 }
892
893
894 void brw_dp_READ_16( struct brw_compile *p,
895 struct brw_reg dest,
896 GLuint msg_reg_nr,
897 GLuint scratch_offset )
898 {
899 {
900 brw_push_insn_state(p);
901 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
902 brw_set_mask_control(p, BRW_MASK_DISABLE);
903
904 brw_MOV(p,
905 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
906 brw_imm_d(scratch_offset));
907
908 brw_pop_insn_state(p);
909 }
910
911 {
912 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
913
914 insn->header.predicate_control = 0; /* XXX */
915 insn->header.compression_control = BRW_COMPRESSION_NONE;
916 insn->header.destreg__conditonalmod = msg_reg_nr;
917
918 brw_set_dest(insn, dest); /* UW? */
919 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
920
921 brw_set_dp_read_message(insn,
922 255, /* bti */
923 3, /* msg_control */
924 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
925 1, /* target cache */
926 1, /* msg_length */
927 2, /* response_length */
928 0); /* eot */
929 }
930 }
931
932
933 void brw_fb_WRITE(struct brw_compile *p,
934 struct brw_reg dest,
935 GLuint msg_reg_nr,
936 struct brw_reg src0,
937 GLuint binding_table_index,
938 GLuint msg_length,
939 GLuint response_length,
940 GLboolean eot)
941 {
942 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
943
944 insn->header.predicate_control = 0; /* XXX */
945 insn->header.compression_control = BRW_COMPRESSION_NONE;
946 insn->header.destreg__conditonalmod = msg_reg_nr;
947
948 brw_set_dest(insn, dest);
949 brw_set_src0(insn, src0);
950 brw_set_dp_write_message(insn,
951 binding_table_index,
952 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
953 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
954 msg_length,
955 1, /* pixel scoreboard */
956 response_length,
957 eot);
958 }
959
960
961
962 void brw_SAMPLE(struct brw_compile *p,
963 struct brw_reg dest,
964 GLuint msg_reg_nr,
965 struct brw_reg src0,
966 GLuint binding_table_index,
967 GLuint sampler,
968 GLuint writemask,
969 GLuint msg_type,
970 GLuint response_length,
971 GLuint msg_length,
972 GLboolean eot)
973 {
974 GLboolean need_stall = 0;
975
976 if(writemask == 0) {
977 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
978 return;
979 }
980
981 /* Hardware doesn't do destination dependency checking on send
982 * instructions properly. Add a workaround which generates the
983 * dependency by other means. In practice it seems like this bug
984 * only crops up for texture samples, and only where registers are
985 * written by the send and then written again later without being
986 * read in between. Luckily for us, we already track that
987 * information and use it to modify the writemask for the
988 * instruction, so that is a guide for whether a workaround is
989 * needed.
990 */
991 if (writemask != WRITEMASK_XYZW) {
992 GLuint dst_offset = 0;
993 GLuint i, newmask = 0, len = 0;
994
995 for (i = 0; i < 4; i++) {
996 if (writemask & (1<<i))
997 break;
998 dst_offset += 2;
999 }
1000 for (; i < 4; i++) {
1001 if (!(writemask & (1<<i)))
1002 break;
1003 newmask |= 1<<i;
1004 len++;
1005 }
1006
1007 if (newmask != writemask) {
1008 need_stall = 1;
1009 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1010 }
1011 else {
1012 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1013
1014 newmask = ~newmask & WRITEMASK_XYZW;
1015
1016 brw_push_insn_state(p);
1017
1018 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1019 brw_set_mask_control(p, BRW_MASK_DISABLE);
1020
1021 brw_MOV(p, m1, brw_vec8_grf(0,0));
1022 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1023
1024 brw_pop_insn_state(p);
1025
1026 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1027 dest = offset(dest, dst_offset);
1028 response_length = len * 2;
1029 }
1030 }
1031
1032 {
1033 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1034
1035 insn->header.predicate_control = 0; /* XXX */
1036 insn->header.compression_control = BRW_COMPRESSION_NONE;
1037 insn->header.destreg__conditonalmod = msg_reg_nr;
1038
1039 brw_set_dest(insn, dest);
1040 brw_set_src0(insn, src0);
1041 brw_set_sampler_message(p->brw, insn,
1042 binding_table_index,
1043 sampler,
1044 msg_type,
1045 response_length,
1046 msg_length,
1047 eot);
1048 }
1049
1050 if (need_stall)
1051 {
1052 struct brw_reg reg = vec8(offset(dest, response_length-1));
1053
1054 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1055 */
1056 brw_push_insn_state(p);
1057 brw_set_compression_control(p, GL_FALSE);
1058 brw_MOV(p, reg, reg);
1059 brw_pop_insn_state(p);
1060 }
1061
1062 }
1063
1064 /* All these variables are pretty confusing - we might be better off
1065 * using bitmasks and macros for this, in the old style. Or perhaps
1066 * just having the caller instantiate the fields in dword3 itself.
1067 */
1068 void brw_urb_WRITE(struct brw_compile *p,
1069 struct brw_reg dest,
1070 GLuint msg_reg_nr,
1071 struct brw_reg src0,
1072 GLboolean allocate,
1073 GLboolean used,
1074 GLuint msg_length,
1075 GLuint response_length,
1076 GLboolean eot,
1077 GLboolean writes_complete,
1078 GLuint offset,
1079 GLuint swizzle)
1080 {
1081 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1082
1083 assert(msg_length < 16);
1084
1085 brw_set_dest(insn, dest);
1086 brw_set_src0(insn, src0);
1087 brw_set_src1(insn, brw_imm_d(0));
1088
1089 insn->header.destreg__conditonalmod = msg_reg_nr;
1090
1091 brw_set_urb_message(insn,
1092 allocate,
1093 used,
1094 msg_length,
1095 response_length,
1096 eot,
1097 writes_complete,
1098 offset,
1099 swizzle);
1100 }
1101