i965: Merge GM45 into the G4X chipset define.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 insn->bits1.da1.dest_reg_file = dest.file;
59 insn->bits1.da1.dest_reg_type = dest.type;
60 insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63 insn->bits1.da1.dest_reg_nr = dest.nr;
64
65 if (insn->header.access_mode == BRW_ALIGN_1) {
66 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
68 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
69 insn->bits1.da1.dest_horiz_stride = dest.hstride;
70 }
71 else {
72 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
73 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
74 }
75 }
76 else {
77 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
78
79 /* These are different sizes in align1 vs align16:
80 */
81 if (insn->header.access_mode == BRW_ALIGN_1) {
82 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
83 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
84 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
85 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
86 }
87 else {
88 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 }
90 }
91
92 /* NEW: Set the execution size based on dest.width and
93 * insn->compression_control:
94 */
95 guess_execution_size(insn, dest);
96 }
97
98 static void brw_set_src0( struct brw_instruction *insn,
99 struct brw_reg reg )
100 {
101 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
102
103 insn->bits1.da1.src0_reg_file = reg.file;
104 insn->bits1.da1.src0_reg_type = reg.type;
105 insn->bits2.da1.src0_abs = reg.abs;
106 insn->bits2.da1.src0_negate = reg.negate;
107 insn->bits2.da1.src0_address_mode = reg.address_mode;
108
109 if (reg.file == BRW_IMMEDIATE_VALUE) {
110 insn->bits3.ud = reg.dw1.ud;
111
112 /* Required to set some fields in src1 as well:
113 */
114 insn->bits1.da1.src1_reg_file = 0; /* arf */
115 insn->bits1.da1.src1_reg_type = reg.type;
116 }
117 else
118 {
119 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
120 if (insn->header.access_mode == BRW_ALIGN_1) {
121 insn->bits2.da1.src0_subreg_nr = reg.subnr;
122 insn->bits2.da1.src0_reg_nr = reg.nr;
123 }
124 else {
125 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
126 insn->bits2.da16.src0_reg_nr = reg.nr;
127 }
128 }
129 else {
130 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
131
132 if (insn->header.access_mode == BRW_ALIGN_1) {
133 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
134 }
135 else {
136 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
137 }
138 }
139
140 if (insn->header.access_mode == BRW_ALIGN_1) {
141 if (reg.width == BRW_WIDTH_1 &&
142 insn->header.execution_size == BRW_EXECUTE_1) {
143 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
144 insn->bits2.da1.src0_width = BRW_WIDTH_1;
145 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
146 }
147 else {
148 insn->bits2.da1.src0_horiz_stride = reg.hstride;
149 insn->bits2.da1.src0_width = reg.width;
150 insn->bits2.da1.src0_vert_stride = reg.vstride;
151 }
152 }
153 else {
154 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
155 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
156 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
157 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
158
159 /* This is an oddity of the fact we're using the same
160 * descriptions for registers in align_16 as align_1:
161 */
162 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
163 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
164 else
165 insn->bits2.da16.src0_vert_stride = reg.vstride;
166 }
167 }
168 }
169
170
171 void brw_set_src1( struct brw_instruction *insn,
172 struct brw_reg reg )
173 {
174 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
175
176 insn->bits1.da1.src1_reg_file = reg.file;
177 insn->bits1.da1.src1_reg_type = reg.type;
178 insn->bits3.da1.src1_abs = reg.abs;
179 insn->bits3.da1.src1_negate = reg.negate;
180
181 /* Only src1 can be immediate in two-argument instructions.
182 */
183 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
184
185 if (reg.file == BRW_IMMEDIATE_VALUE) {
186 insn->bits3.ud = reg.dw1.ud;
187 }
188 else {
189 /* This is a hardware restriction, which may or may not be lifted
190 * in the future:
191 */
192 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
193 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
194
195 if (insn->header.access_mode == BRW_ALIGN_1) {
196 insn->bits3.da1.src1_subreg_nr = reg.subnr;
197 insn->bits3.da1.src1_reg_nr = reg.nr;
198 }
199 else {
200 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
201 insn->bits3.da16.src1_reg_nr = reg.nr;
202 }
203
204 if (insn->header.access_mode == BRW_ALIGN_1) {
205 if (reg.width == BRW_WIDTH_1 &&
206 insn->header.execution_size == BRW_EXECUTE_1) {
207 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
208 insn->bits3.da1.src1_width = BRW_WIDTH_1;
209 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
210 }
211 else {
212 insn->bits3.da1.src1_horiz_stride = reg.hstride;
213 insn->bits3.da1.src1_width = reg.width;
214 insn->bits3.da1.src1_vert_stride = reg.vstride;
215 }
216 }
217 else {
218 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
219 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
220 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
221 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
222
223 /* This is an oddity of the fact we're using the same
224 * descriptions for registers in align_16 as align_1:
225 */
226 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
227 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
228 else
229 insn->bits3.da16.src1_vert_stride = reg.vstride;
230 }
231 }
232 }
233
234
235
236 static void brw_set_math_message( struct brw_instruction *insn,
237 GLuint msg_length,
238 GLuint response_length,
239 GLuint function,
240 GLuint integer_type,
241 GLboolean low_precision,
242 GLboolean saturate,
243 GLuint dataType )
244 {
245 brw_set_src1(insn, brw_imm_d(0));
246
247 insn->bits3.math.function = function;
248 insn->bits3.math.int_type = integer_type;
249 insn->bits3.math.precision = low_precision;
250 insn->bits3.math.saturate = saturate;
251 insn->bits3.math.data_type = dataType;
252 insn->bits3.math.response_length = response_length;
253 insn->bits3.math.msg_length = msg_length;
254 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
255 insn->bits3.math.end_of_thread = 0;
256 }
257
258 static void brw_set_urb_message( struct brw_instruction *insn,
259 GLboolean allocate,
260 GLboolean used,
261 GLuint msg_length,
262 GLuint response_length,
263 GLboolean end_of_thread,
264 GLboolean complete,
265 GLuint offset,
266 GLuint swizzle_control )
267 {
268 brw_set_src1(insn, brw_imm_d(0));
269
270 insn->bits3.urb.opcode = 0; /* ? */
271 insn->bits3.urb.offset = offset;
272 insn->bits3.urb.swizzle_control = swizzle_control;
273 insn->bits3.urb.allocate = allocate;
274 insn->bits3.urb.used = used; /* ? */
275 insn->bits3.urb.complete = complete;
276 insn->bits3.urb.response_length = response_length;
277 insn->bits3.urb.msg_length = msg_length;
278 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
279 insn->bits3.urb.end_of_thread = end_of_thread;
280 }
281
282 static void brw_set_dp_write_message( struct brw_instruction *insn,
283 GLuint binding_table_index,
284 GLuint msg_control,
285 GLuint msg_type,
286 GLuint msg_length,
287 GLuint pixel_scoreboard_clear,
288 GLuint response_length,
289 GLuint end_of_thread )
290 {
291 brw_set_src1(insn, brw_imm_d(0));
292
293 insn->bits3.dp_write.binding_table_index = binding_table_index;
294 insn->bits3.dp_write.msg_control = msg_control;
295 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
296 insn->bits3.dp_write.msg_type = msg_type;
297 insn->bits3.dp_write.send_commit_msg = 0;
298 insn->bits3.dp_write.response_length = response_length;
299 insn->bits3.dp_write.msg_length = msg_length;
300 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
301 insn->bits3.urb.end_of_thread = end_of_thread;
302 }
303
304 static void brw_set_dp_read_message( struct brw_instruction *insn,
305 GLuint binding_table_index,
306 GLuint msg_control,
307 GLuint msg_type,
308 GLuint target_cache,
309 GLuint msg_length,
310 GLuint response_length,
311 GLuint end_of_thread )
312 {
313 brw_set_src1(insn, brw_imm_d(0));
314
315 insn->bits3.dp_read.binding_table_index = binding_table_index;
316 insn->bits3.dp_read.msg_control = msg_control;
317 insn->bits3.dp_read.msg_type = msg_type;
318 insn->bits3.dp_read.target_cache = target_cache;
319 insn->bits3.dp_read.response_length = response_length;
320 insn->bits3.dp_read.msg_length = msg_length;
321 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
322 insn->bits3.dp_read.end_of_thread = end_of_thread;
323 }
324
325 static void brw_set_sampler_message(struct brw_context *brw,
326 struct brw_instruction *insn,
327 GLuint binding_table_index,
328 GLuint sampler,
329 GLuint msg_type,
330 GLuint response_length,
331 GLuint msg_length,
332 GLboolean eot)
333 {
334 brw_set_src1(insn, brw_imm_d(0));
335
336 if (BRW_IS_G4X(brw)) {
337 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
338 insn->bits3.sampler_g4x.sampler = sampler;
339 insn->bits3.sampler_g4x.msg_type = msg_type;
340 insn->bits3.sampler_g4x.response_length = response_length;
341 insn->bits3.sampler_g4x.msg_length = msg_length;
342 insn->bits3.sampler_g4x.end_of_thread = eot;
343 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
344 } else {
345 insn->bits3.sampler.binding_table_index = binding_table_index;
346 insn->bits3.sampler.sampler = sampler;
347 insn->bits3.sampler.msg_type = msg_type;
348 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
349 insn->bits3.sampler.response_length = response_length;
350 insn->bits3.sampler.msg_length = msg_length;
351 insn->bits3.sampler.end_of_thread = eot;
352 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
353 }
354 }
355
356
357
358 static struct brw_instruction *next_insn( struct brw_compile *p,
359 GLuint opcode )
360 {
361 struct brw_instruction *insn;
362
363 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
364
365 insn = &p->store[p->nr_insn++];
366 memcpy(insn, p->current, sizeof(*insn));
367
368 /* Reset this one-shot flag:
369 */
370
371 if (p->current->header.destreg__conditonalmod) {
372 p->current->header.destreg__conditonalmod = 0;
373 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
374 }
375
376 insn->header.opcode = opcode;
377 return insn;
378 }
379
380
381 static struct brw_instruction *brw_alu1( struct brw_compile *p,
382 GLuint opcode,
383 struct brw_reg dest,
384 struct brw_reg src )
385 {
386 struct brw_instruction *insn = next_insn(p, opcode);
387 brw_set_dest(insn, dest);
388 brw_set_src0(insn, src);
389 return insn;
390 }
391
392 static struct brw_instruction *brw_alu2(struct brw_compile *p,
393 GLuint opcode,
394 struct brw_reg dest,
395 struct brw_reg src0,
396 struct brw_reg src1 )
397 {
398 struct brw_instruction *insn = next_insn(p, opcode);
399 brw_set_dest(insn, dest);
400 brw_set_src0(insn, src0);
401 brw_set_src1(insn, src1);
402 return insn;
403 }
404
405
406 /***********************************************************************
407 * Convenience routines.
408 */
409 #define ALU1(OP) \
410 struct brw_instruction *brw_##OP(struct brw_compile *p, \
411 struct brw_reg dest, \
412 struct brw_reg src0) \
413 { \
414 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
415 }
416
417 #define ALU2(OP) \
418 struct brw_instruction *brw_##OP(struct brw_compile *p, \
419 struct brw_reg dest, \
420 struct brw_reg src0, \
421 struct brw_reg src1) \
422 { \
423 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
424 }
425
426
427 ALU1(MOV)
428 ALU2(SEL)
429 ALU1(NOT)
430 ALU2(AND)
431 ALU2(OR)
432 ALU2(XOR)
433 ALU2(SHR)
434 ALU2(SHL)
435 ALU2(RSR)
436 ALU2(RSL)
437 ALU2(ASR)
438 ALU2(ADD)
439 ALU2(MUL)
440 ALU1(FRC)
441 ALU1(RNDD)
442 ALU2(MAC)
443 ALU2(MACH)
444 ALU1(LZD)
445 ALU2(DP4)
446 ALU2(DPH)
447 ALU2(DP3)
448 ALU2(DP2)
449 ALU2(LINE)
450
451
452
453
454 void brw_NOP(struct brw_compile *p)
455 {
456 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
457 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
458 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
459 brw_set_src1(insn, brw_imm_ud(0x0));
460 }
461
462
463
464
465
466 /***********************************************************************
467 * Comparisons, if/else/endif
468 */
469
470 struct brw_instruction *brw_JMPI(struct brw_compile *p,
471 struct brw_reg dest,
472 struct brw_reg src0,
473 struct brw_reg src1)
474 {
475 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
476
477 p->current->header.predicate_control = BRW_PREDICATE_NONE;
478
479 return insn;
480 }
481
482 /* EU takes the value from the flag register and pushes it onto some
483 * sort of a stack (presumably merging with any flag value already on
484 * the stack). Within an if block, the flags at the top of the stack
485 * control execution on each channel of the unit, eg. on each of the
486 * 16 pixel values in our wm programs.
487 *
488 * When the matching 'else' instruction is reached (presumably by
489 * countdown of the instruction count patched in by our ELSE/ENDIF
490 * functions), the relevent flags are inverted.
491 *
492 * When the matching 'endif' instruction is reached, the flags are
493 * popped off. If the stack is now empty, normal execution resumes.
494 *
495 * No attempt is made to deal with stack overflow (14 elements?).
496 */
497 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
498 {
499 struct brw_instruction *insn;
500
501 if (p->single_program_flow) {
502 assert(execute_size == BRW_EXECUTE_1);
503
504 insn = next_insn(p, BRW_OPCODE_ADD);
505 insn->header.predicate_inverse = 1;
506 } else {
507 insn = next_insn(p, BRW_OPCODE_IF);
508 }
509
510 /* Override the defaults for this instruction:
511 */
512 brw_set_dest(insn, brw_ip_reg());
513 brw_set_src0(insn, brw_ip_reg());
514 brw_set_src1(insn, brw_imm_d(0x0));
515
516 insn->header.execution_size = execute_size;
517 insn->header.compression_control = BRW_COMPRESSION_NONE;
518 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
519 insn->header.mask_control = BRW_MASK_ENABLE;
520 if (!p->single_program_flow)
521 insn->header.thread_control = BRW_THREAD_SWITCH;
522
523 p->current->header.predicate_control = BRW_PREDICATE_NONE;
524
525 return insn;
526 }
527
528
529 struct brw_instruction *brw_ELSE(struct brw_compile *p,
530 struct brw_instruction *if_insn)
531 {
532 struct brw_instruction *insn;
533
534 if (p->single_program_flow) {
535 insn = next_insn(p, BRW_OPCODE_ADD);
536 } else {
537 insn = next_insn(p, BRW_OPCODE_ELSE);
538 }
539
540 brw_set_dest(insn, brw_ip_reg());
541 brw_set_src0(insn, brw_ip_reg());
542 brw_set_src1(insn, brw_imm_d(0x0));
543
544 insn->header.compression_control = BRW_COMPRESSION_NONE;
545 insn->header.execution_size = if_insn->header.execution_size;
546 insn->header.mask_control = BRW_MASK_ENABLE;
547 if (!p->single_program_flow)
548 insn->header.thread_control = BRW_THREAD_SWITCH;
549
550 /* Patch the if instruction to point at this instruction.
551 */
552 if (p->single_program_flow) {
553 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
554
555 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
556 } else {
557 assert(if_insn->header.opcode == BRW_OPCODE_IF);
558
559 if_insn->bits3.if_else.jump_count = insn - if_insn;
560 if_insn->bits3.if_else.pop_count = 1;
561 if_insn->bits3.if_else.pad0 = 0;
562 }
563
564 return insn;
565 }
566
567 void brw_ENDIF(struct brw_compile *p,
568 struct brw_instruction *patch_insn)
569 {
570 if (p->single_program_flow) {
571 /* In single program flow mode, there's no need to execute an ENDIF,
572 * since we don't need to do any stack operations, and if we're executing
573 * currently, we want to just continue executing.
574 */
575 struct brw_instruction *next = &p->store[p->nr_insn];
576
577 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
578
579 patch_insn->bits3.ud = (next - patch_insn) * 16;
580 } else {
581 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
582
583 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
584 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
585 brw_set_src1(insn, brw_imm_d(0x0));
586
587 insn->header.compression_control = BRW_COMPRESSION_NONE;
588 insn->header.execution_size = patch_insn->header.execution_size;
589 insn->header.mask_control = BRW_MASK_ENABLE;
590 insn->header.thread_control = BRW_THREAD_SWITCH;
591
592 assert(patch_insn->bits3.if_else.jump_count == 0);
593
594 /* Patch the if or else instructions to point at this or the next
595 * instruction respectively.
596 */
597 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
598 /* Automagically turn it into an IFF:
599 */
600 patch_insn->header.opcode = BRW_OPCODE_IFF;
601 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
602 patch_insn->bits3.if_else.pop_count = 0;
603 patch_insn->bits3.if_else.pad0 = 0;
604 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
605 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
606 patch_insn->bits3.if_else.pop_count = 1;
607 patch_insn->bits3.if_else.pad0 = 0;
608 } else {
609 assert(0);
610 }
611
612 /* Also pop item off the stack in the endif instruction:
613 */
614 insn->bits3.if_else.jump_count = 0;
615 insn->bits3.if_else.pop_count = 1;
616 insn->bits3.if_else.pad0 = 0;
617 }
618 }
619
620 struct brw_instruction *brw_BREAK(struct brw_compile *p)
621 {
622 struct brw_instruction *insn;
623 insn = next_insn(p, BRW_OPCODE_BREAK);
624 brw_set_dest(insn, brw_ip_reg());
625 brw_set_src0(insn, brw_ip_reg());
626 brw_set_src1(insn, brw_imm_d(0x0));
627 insn->header.compression_control = BRW_COMPRESSION_NONE;
628 insn->header.execution_size = BRW_EXECUTE_8;
629 /* insn->header.mask_control = BRW_MASK_DISABLE; */
630 insn->bits3.if_else.pad0 = 0;
631 return insn;
632 }
633
634 struct brw_instruction *brw_CONT(struct brw_compile *p)
635 {
636 struct brw_instruction *insn;
637 insn = next_insn(p, BRW_OPCODE_CONTINUE);
638 brw_set_dest(insn, brw_ip_reg());
639 brw_set_src0(insn, brw_ip_reg());
640 brw_set_src1(insn, brw_imm_d(0x0));
641 insn->header.compression_control = BRW_COMPRESSION_NONE;
642 insn->header.execution_size = BRW_EXECUTE_8;
643 /* insn->header.mask_control = BRW_MASK_DISABLE; */
644 insn->bits3.if_else.pad0 = 0;
645 return insn;
646 }
647
648 /* DO/WHILE loop:
649 */
650 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
651 {
652 if (p->single_program_flow) {
653 return &p->store[p->nr_insn];
654 } else {
655 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
656
657 /* Override the defaults for this instruction:
658 */
659 brw_set_dest(insn, brw_null_reg());
660 brw_set_src0(insn, brw_null_reg());
661 brw_set_src1(insn, brw_null_reg());
662
663 insn->header.compression_control = BRW_COMPRESSION_NONE;
664 insn->header.execution_size = execute_size;
665 insn->header.predicate_control = BRW_PREDICATE_NONE;
666 /* insn->header.mask_control = BRW_MASK_ENABLE; */
667 /* insn->header.mask_control = BRW_MASK_DISABLE; */
668
669 return insn;
670 }
671 }
672
673
674
675 struct brw_instruction *brw_WHILE(struct brw_compile *p,
676 struct brw_instruction *do_insn)
677 {
678 struct brw_instruction *insn;
679
680 if (p->single_program_flow)
681 insn = next_insn(p, BRW_OPCODE_ADD);
682 else
683 insn = next_insn(p, BRW_OPCODE_WHILE);
684
685 brw_set_dest(insn, brw_ip_reg());
686 brw_set_src0(insn, brw_ip_reg());
687 brw_set_src1(insn, brw_imm_d(0x0));
688
689 insn->header.compression_control = BRW_COMPRESSION_NONE;
690
691 if (p->single_program_flow) {
692 insn->header.execution_size = BRW_EXECUTE_1;
693
694 insn->bits3.d = (do_insn - insn) * 16;
695 } else {
696 insn->header.execution_size = do_insn->header.execution_size;
697
698 assert(do_insn->header.opcode == BRW_OPCODE_DO);
699 insn->bits3.if_else.jump_count = do_insn - insn + 1;
700 insn->bits3.if_else.pop_count = 0;
701 insn->bits3.if_else.pad0 = 0;
702 }
703
704 /* insn->header.mask_control = BRW_MASK_ENABLE; */
705
706 /* insn->header.mask_control = BRW_MASK_DISABLE; */
707 p->current->header.predicate_control = BRW_PREDICATE_NONE;
708 return insn;
709 }
710
711
712 /* FORWARD JUMPS:
713 */
714 void brw_land_fwd_jump(struct brw_compile *p,
715 struct brw_instruction *jmp_insn)
716 {
717 struct brw_instruction *landing = &p->store[p->nr_insn];
718
719 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
720 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
721
722 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
723 }
724
725
726
727 /* To integrate with the above, it makes sense that the comparison
728 * instruction should populate the flag register. It might be simpler
729 * just to use the flag reg for most WM tasks?
730 */
731 void brw_CMP(struct brw_compile *p,
732 struct brw_reg dest,
733 GLuint conditional,
734 struct brw_reg src0,
735 struct brw_reg src1)
736 {
737 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
738
739 insn->header.destreg__conditonalmod = conditional;
740 brw_set_dest(insn, dest);
741 brw_set_src0(insn, src0);
742 brw_set_src1(insn, src1);
743
744 /* guess_execution_size(insn, src0); */
745
746
747 /* Make it so that future instructions will use the computed flag
748 * value until brw_set_predicate_control_flag_value() is called
749 * again.
750 */
751 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
752 dest.nr == 0) {
753 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
754 p->flag_value = 0xff;
755 }
756 }
757
758
759
760 /***********************************************************************
761 * Helpers for the various SEND message types:
762 */
763
764 /* Invert 8 values
765 */
766 void brw_math( struct brw_compile *p,
767 struct brw_reg dest,
768 GLuint function,
769 GLuint saturate,
770 GLuint msg_reg_nr,
771 struct brw_reg src,
772 GLuint data_type,
773 GLuint precision )
774 {
775 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
776 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
777 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
778
779 /* Example code doesn't set predicate_control for send
780 * instructions.
781 */
782 insn->header.predicate_control = 0;
783 insn->header.destreg__conditonalmod = msg_reg_nr;
784
785 brw_set_dest(insn, dest);
786 brw_set_src0(insn, src);
787 brw_set_math_message(insn,
788 msg_length, response_length,
789 function,
790 BRW_MATH_INTEGER_UNSIGNED,
791 precision,
792 saturate,
793 data_type);
794 }
795
796 /* Use 2 send instructions to invert 16 elements
797 */
798 void brw_math_16( struct brw_compile *p,
799 struct brw_reg dest,
800 GLuint function,
801 GLuint saturate,
802 GLuint msg_reg_nr,
803 struct brw_reg src,
804 GLuint precision )
805 {
806 struct brw_instruction *insn;
807 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
808 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
809
810 /* First instruction:
811 */
812 brw_push_insn_state(p);
813 brw_set_predicate_control_flag_value(p, 0xff);
814 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
815
816 insn = next_insn(p, BRW_OPCODE_SEND);
817 insn->header.destreg__conditonalmod = msg_reg_nr;
818
819 brw_set_dest(insn, dest);
820 brw_set_src0(insn, src);
821 brw_set_math_message(insn,
822 msg_length, response_length,
823 function,
824 BRW_MATH_INTEGER_UNSIGNED,
825 precision,
826 saturate,
827 BRW_MATH_DATA_VECTOR);
828
829 /* Second instruction:
830 */
831 insn = next_insn(p, BRW_OPCODE_SEND);
832 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
833 insn->header.destreg__conditonalmod = msg_reg_nr+1;
834
835 brw_set_dest(insn, offset(dest,1));
836 brw_set_src0(insn, src);
837 brw_set_math_message(insn,
838 msg_length, response_length,
839 function,
840 BRW_MATH_INTEGER_UNSIGNED,
841 precision,
842 saturate,
843 BRW_MATH_DATA_VECTOR);
844
845 brw_pop_insn_state(p);
846 }
847
848
849
850
851 void brw_dp_WRITE_16( struct brw_compile *p,
852 struct brw_reg src,
853 GLuint msg_reg_nr,
854 GLuint scratch_offset )
855 {
856 {
857 brw_push_insn_state(p);
858 brw_set_mask_control(p, BRW_MASK_DISABLE);
859 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
860
861 brw_MOV(p,
862 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
863 brw_imm_d(scratch_offset));
864
865 brw_pop_insn_state(p);
866 }
867
868 {
869 GLuint msg_length = 3;
870 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
871 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
872
873 insn->header.predicate_control = 0; /* XXX */
874 insn->header.compression_control = BRW_COMPRESSION_NONE;
875 insn->header.destreg__conditonalmod = msg_reg_nr;
876
877 brw_set_dest(insn, dest);
878 brw_set_src0(insn, src);
879
880 brw_set_dp_write_message(insn,
881 255, /* bti */
882 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
883 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
884 msg_length,
885 0, /* pixel scoreboard */
886 0, /* response_length */
887 0); /* eot */
888 }
889
890 }
891
892
893 void brw_dp_READ_16( struct brw_compile *p,
894 struct brw_reg dest,
895 GLuint msg_reg_nr,
896 GLuint scratch_offset )
897 {
898 {
899 brw_push_insn_state(p);
900 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
901 brw_set_mask_control(p, BRW_MASK_DISABLE);
902
903 brw_MOV(p,
904 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
905 brw_imm_d(scratch_offset));
906
907 brw_pop_insn_state(p);
908 }
909
910 {
911 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
912
913 insn->header.predicate_control = 0; /* XXX */
914 insn->header.compression_control = BRW_COMPRESSION_NONE;
915 insn->header.destreg__conditonalmod = msg_reg_nr;
916
917 brw_set_dest(insn, dest); /* UW? */
918 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
919
920 brw_set_dp_read_message(insn,
921 255, /* bti */
922 3, /* msg_control */
923 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
924 1, /* target cache */
925 1, /* msg_length */
926 2, /* response_length */
927 0); /* eot */
928 }
929 }
930
931
932 void brw_fb_WRITE(struct brw_compile *p,
933 struct brw_reg dest,
934 GLuint msg_reg_nr,
935 struct brw_reg src0,
936 GLuint binding_table_index,
937 GLuint msg_length,
938 GLuint response_length,
939 GLboolean eot)
940 {
941 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
942
943 insn->header.predicate_control = 0; /* XXX */
944 insn->header.compression_control = BRW_COMPRESSION_NONE;
945 insn->header.destreg__conditonalmod = msg_reg_nr;
946
947 brw_set_dest(insn, dest);
948 brw_set_src0(insn, src0);
949 brw_set_dp_write_message(insn,
950 binding_table_index,
951 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
952 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
953 msg_length,
954 1, /* pixel scoreboard */
955 response_length,
956 eot);
957 }
958
959
960
961 void brw_SAMPLE(struct brw_compile *p,
962 struct brw_reg dest,
963 GLuint msg_reg_nr,
964 struct brw_reg src0,
965 GLuint binding_table_index,
966 GLuint sampler,
967 GLuint writemask,
968 GLuint msg_type,
969 GLuint response_length,
970 GLuint msg_length,
971 GLboolean eot)
972 {
973 GLboolean need_stall = 0;
974
975 if(writemask == 0) {
976 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
977 return;
978 }
979
980 /* Hardware doesn't do destination dependency checking on send
981 * instructions properly. Add a workaround which generates the
982 * dependency by other means. In practice it seems like this bug
983 * only crops up for texture samples, and only where registers are
984 * written by the send and then written again later without being
985 * read in between. Luckily for us, we already track that
986 * information and use it to modify the writemask for the
987 * instruction, so that is a guide for whether a workaround is
988 * needed.
989 */
990 if (writemask != WRITEMASK_XYZW) {
991 GLuint dst_offset = 0;
992 GLuint i, newmask = 0, len = 0;
993
994 for (i = 0; i < 4; i++) {
995 if (writemask & (1<<i))
996 break;
997 dst_offset += 2;
998 }
999 for (; i < 4; i++) {
1000 if (!(writemask & (1<<i)))
1001 break;
1002 newmask |= 1<<i;
1003 len++;
1004 }
1005
1006 if (newmask != writemask) {
1007 need_stall = 1;
1008 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1009 }
1010 else {
1011 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1012
1013 newmask = ~newmask & WRITEMASK_XYZW;
1014
1015 brw_push_insn_state(p);
1016
1017 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1018 brw_set_mask_control(p, BRW_MASK_DISABLE);
1019
1020 brw_MOV(p, m1, brw_vec8_grf(0,0));
1021 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1022
1023 brw_pop_insn_state(p);
1024
1025 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1026 dest = offset(dest, dst_offset);
1027 response_length = len * 2;
1028 }
1029 }
1030
1031 {
1032 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1033
1034 insn->header.predicate_control = 0; /* XXX */
1035 insn->header.compression_control = BRW_COMPRESSION_NONE;
1036 insn->header.destreg__conditonalmod = msg_reg_nr;
1037
1038 brw_set_dest(insn, dest);
1039 brw_set_src0(insn, src0);
1040 brw_set_sampler_message(p->brw, insn,
1041 binding_table_index,
1042 sampler,
1043 msg_type,
1044 response_length,
1045 msg_length,
1046 eot);
1047 }
1048
1049 if (need_stall)
1050 {
1051 struct brw_reg reg = vec8(offset(dest, response_length-1));
1052
1053 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1054 */
1055 brw_push_insn_state(p);
1056 brw_set_compression_control(p, GL_FALSE);
1057 brw_MOV(p, reg, reg);
1058 brw_pop_insn_state(p);
1059 }
1060
1061 }
1062
1063 /* All these variables are pretty confusing - we might be better off
1064 * using bitmasks and macros for this, in the old style. Or perhaps
1065 * just having the caller instantiate the fields in dword3 itself.
1066 */
1067 void brw_urb_WRITE(struct brw_compile *p,
1068 struct brw_reg dest,
1069 GLuint msg_reg_nr,
1070 struct brw_reg src0,
1071 GLboolean allocate,
1072 GLboolean used,
1073 GLuint msg_length,
1074 GLuint response_length,
1075 GLboolean eot,
1076 GLboolean writes_complete,
1077 GLuint offset,
1078 GLuint swizzle)
1079 {
1080 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1081
1082 assert(msg_length < 16);
1083
1084 brw_set_dest(insn, dest);
1085 brw_set_src0(insn, src0);
1086 brw_set_src1(insn, brw_imm_d(0));
1087
1088 insn->header.destreg__conditonalmod = msg_reg_nr;
1089
1090 brw_set_urb_message(insn,
1091 allocate,
1092 used,
1093 msg_length,
1094 response_length,
1095 eot,
1096 writes_complete,
1097 offset,
1098 swizzle);
1099 }
1100