i965: Add SNB math opcode support.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 }
79 }
80 else {
81 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83 /* These are different sizes in align1 vs align16:
84 */
85 if (insn->header.access_mode == BRW_ALIGN_1) {
86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90 }
91 else {
92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93 }
94 }
95
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
98 */
99 guess_execution_size(insn, dest);
100 }
101
102 static void brw_set_src0( struct brw_instruction *insn,
103 struct brw_reg reg )
104 {
105 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
106
107 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
108 assert(reg.nr < 128);
109
110 insn->bits1.da1.src0_reg_file = reg.file;
111 insn->bits1.da1.src0_reg_type = reg.type;
112 insn->bits2.da1.src0_abs = reg.abs;
113 insn->bits2.da1.src0_negate = reg.negate;
114 insn->bits2.da1.src0_address_mode = reg.address_mode;
115
116 if (reg.file == BRW_IMMEDIATE_VALUE) {
117 insn->bits3.ud = reg.dw1.ud;
118
119 /* Required to set some fields in src1 as well:
120 */
121 insn->bits1.da1.src1_reg_file = 0; /* arf */
122 insn->bits1.da1.src1_reg_type = reg.type;
123 }
124 else
125 {
126 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits2.da1.src0_subreg_nr = reg.subnr;
129 insn->bits2.da1.src0_reg_nr = reg.nr;
130 }
131 else {
132 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
133 insn->bits2.da16.src0_reg_nr = reg.nr;
134 }
135 }
136 else {
137 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
138
139 if (insn->header.access_mode == BRW_ALIGN_1) {
140 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
141 }
142 else {
143 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
144 }
145 }
146
147 if (insn->header.access_mode == BRW_ALIGN_1) {
148 if (reg.width == BRW_WIDTH_1 &&
149 insn->header.execution_size == BRW_EXECUTE_1) {
150 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
151 insn->bits2.da1.src0_width = BRW_WIDTH_1;
152 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
153 }
154 else {
155 insn->bits2.da1.src0_horiz_stride = reg.hstride;
156 insn->bits2.da1.src0_width = reg.width;
157 insn->bits2.da1.src0_vert_stride = reg.vstride;
158 }
159 }
160 else {
161 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
162 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
163 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
164 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
165
166 /* This is an oddity of the fact we're using the same
167 * descriptions for registers in align_16 as align_1:
168 */
169 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
170 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
171 else
172 insn->bits2.da16.src0_vert_stride = reg.vstride;
173 }
174 }
175 }
176
177
178 void brw_set_src1( struct brw_instruction *insn,
179 struct brw_reg reg )
180 {
181 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
182
183 assert(reg.nr < 128);
184
185 insn->bits1.da1.src1_reg_file = reg.file;
186 insn->bits1.da1.src1_reg_type = reg.type;
187 insn->bits3.da1.src1_abs = reg.abs;
188 insn->bits3.da1.src1_negate = reg.negate;
189
190 /* Only src1 can be immediate in two-argument instructions.
191 */
192 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
193
194 if (reg.file == BRW_IMMEDIATE_VALUE) {
195 insn->bits3.ud = reg.dw1.ud;
196 }
197 else {
198 /* This is a hardware restriction, which may or may not be lifted
199 * in the future:
200 */
201 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
202 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
203
204 if (insn->header.access_mode == BRW_ALIGN_1) {
205 insn->bits3.da1.src1_subreg_nr = reg.subnr;
206 insn->bits3.da1.src1_reg_nr = reg.nr;
207 }
208 else {
209 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
210 insn->bits3.da16.src1_reg_nr = reg.nr;
211 }
212
213 if (insn->header.access_mode == BRW_ALIGN_1) {
214 if (reg.width == BRW_WIDTH_1 &&
215 insn->header.execution_size == BRW_EXECUTE_1) {
216 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
217 insn->bits3.da1.src1_width = BRW_WIDTH_1;
218 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
219 }
220 else {
221 insn->bits3.da1.src1_horiz_stride = reg.hstride;
222 insn->bits3.da1.src1_width = reg.width;
223 insn->bits3.da1.src1_vert_stride = reg.vstride;
224 }
225 }
226 else {
227 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
228 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
229 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
230 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
231
232 /* This is an oddity of the fact we're using the same
233 * descriptions for registers in align_16 as align_1:
234 */
235 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
236 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
237 else
238 insn->bits3.da16.src1_vert_stride = reg.vstride;
239 }
240 }
241 }
242
243
244
245 static void brw_set_math_message( struct brw_context *brw,
246 struct brw_instruction *insn,
247 GLuint msg_length,
248 GLuint response_length,
249 GLuint function,
250 GLuint integer_type,
251 GLboolean low_precision,
252 GLboolean saturate,
253 GLuint dataType )
254 {
255 struct intel_context *intel = &brw->intel;
256 brw_set_src1(insn, brw_imm_d(0));
257
258 if (intel->is_ironlake) {
259 insn->bits3.math_igdng.function = function;
260 insn->bits3.math_igdng.int_type = integer_type;
261 insn->bits3.math_igdng.precision = low_precision;
262 insn->bits3.math_igdng.saturate = saturate;
263 insn->bits3.math_igdng.data_type = dataType;
264 insn->bits3.math_igdng.snapshot = 0;
265 insn->bits3.math_igdng.header_present = 0;
266 insn->bits3.math_igdng.response_length = response_length;
267 insn->bits3.math_igdng.msg_length = msg_length;
268 insn->bits3.math_igdng.end_of_thread = 0;
269 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
270 insn->bits2.send_igdng.end_of_thread = 0;
271 } else {
272 insn->bits3.math.function = function;
273 insn->bits3.math.int_type = integer_type;
274 insn->bits3.math.precision = low_precision;
275 insn->bits3.math.saturate = saturate;
276 insn->bits3.math.data_type = dataType;
277 insn->bits3.math.response_length = response_length;
278 insn->bits3.math.msg_length = msg_length;
279 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
280 insn->bits3.math.end_of_thread = 0;
281 }
282 }
283
284
285 static void brw_set_ff_sync_message( struct brw_context *brw,
286 struct brw_instruction *insn,
287 GLboolean allocate,
288 GLboolean used,
289 GLuint msg_length,
290 GLuint response_length,
291 GLboolean end_of_thread,
292 GLboolean complete,
293 GLuint offset,
294 GLuint swizzle_control )
295 {
296 brw_set_src1(insn, brw_imm_d(0));
297
298 insn->bits3.urb_igdng.opcode = 1;
299 insn->bits3.urb_igdng.offset = offset;
300 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
301 insn->bits3.urb_igdng.allocate = allocate;
302 insn->bits3.urb_igdng.used = used;
303 insn->bits3.urb_igdng.complete = complete;
304 insn->bits3.urb_igdng.header_present = 1;
305 insn->bits3.urb_igdng.response_length = response_length;
306 insn->bits3.urb_igdng.msg_length = msg_length;
307 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
308 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
309 insn->bits2.send_igdng.end_of_thread = end_of_thread;
310 }
311
312 static void brw_set_urb_message( struct brw_context *brw,
313 struct brw_instruction *insn,
314 GLboolean allocate,
315 GLboolean used,
316 GLuint msg_length,
317 GLuint response_length,
318 GLboolean end_of_thread,
319 GLboolean complete,
320 GLuint offset,
321 GLuint swizzle_control )
322 {
323 struct intel_context *intel = &brw->intel;
324 brw_set_src1(insn, brw_imm_d(0));
325
326 if (intel->is_ironlake) {
327 insn->bits3.urb_igdng.opcode = 0; /* ? */
328 insn->bits3.urb_igdng.offset = offset;
329 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
330 insn->bits3.urb_igdng.allocate = allocate;
331 insn->bits3.urb_igdng.used = used; /* ? */
332 insn->bits3.urb_igdng.complete = complete;
333 insn->bits3.urb_igdng.header_present = 1;
334 insn->bits3.urb_igdng.response_length = response_length;
335 insn->bits3.urb_igdng.msg_length = msg_length;
336 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
337 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
338 insn->bits2.send_igdng.end_of_thread = end_of_thread;
339 } else {
340 insn->bits3.urb.opcode = 0; /* ? */
341 insn->bits3.urb.offset = offset;
342 insn->bits3.urb.swizzle_control = swizzle_control;
343 insn->bits3.urb.allocate = allocate;
344 insn->bits3.urb.used = used; /* ? */
345 insn->bits3.urb.complete = complete;
346 insn->bits3.urb.response_length = response_length;
347 insn->bits3.urb.msg_length = msg_length;
348 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
349 insn->bits3.urb.end_of_thread = end_of_thread;
350 }
351 }
352
353 static void brw_set_dp_write_message( struct brw_context *brw,
354 struct brw_instruction *insn,
355 GLuint binding_table_index,
356 GLuint msg_control,
357 GLuint msg_type,
358 GLuint msg_length,
359 GLuint pixel_scoreboard_clear,
360 GLuint response_length,
361 GLuint end_of_thread )
362 {
363 struct intel_context *intel = &brw->intel;
364 brw_set_src1(insn, brw_imm_d(0));
365
366 if (intel->is_ironlake) {
367 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
368 insn->bits3.dp_write_igdng.msg_control = msg_control;
369 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
370 insn->bits3.dp_write_igdng.msg_type = msg_type;
371 insn->bits3.dp_write_igdng.send_commit_msg = 0;
372 insn->bits3.dp_write_igdng.header_present = 1;
373 insn->bits3.dp_write_igdng.response_length = response_length;
374 insn->bits3.dp_write_igdng.msg_length = msg_length;
375 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
376 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
377 insn->bits2.send_igdng.end_of_thread = end_of_thread;
378 } else {
379 insn->bits3.dp_write.binding_table_index = binding_table_index;
380 insn->bits3.dp_write.msg_control = msg_control;
381 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
382 insn->bits3.dp_write.msg_type = msg_type;
383 insn->bits3.dp_write.send_commit_msg = 0;
384 insn->bits3.dp_write.response_length = response_length;
385 insn->bits3.dp_write.msg_length = msg_length;
386 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
387 insn->bits3.dp_write.end_of_thread = end_of_thread;
388 }
389 }
390
391 static void brw_set_dp_read_message( struct brw_context *brw,
392 struct brw_instruction *insn,
393 GLuint binding_table_index,
394 GLuint msg_control,
395 GLuint msg_type,
396 GLuint target_cache,
397 GLuint msg_length,
398 GLuint response_length,
399 GLuint end_of_thread )
400 {
401 struct intel_context *intel = &brw->intel;
402 brw_set_src1(insn, brw_imm_d(0));
403
404 if (intel->is_ironlake) {
405 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
406 insn->bits3.dp_read_igdng.msg_control = msg_control;
407 insn->bits3.dp_read_igdng.msg_type = msg_type;
408 insn->bits3.dp_read_igdng.target_cache = target_cache;
409 insn->bits3.dp_read_igdng.header_present = 1;
410 insn->bits3.dp_read_igdng.response_length = response_length;
411 insn->bits3.dp_read_igdng.msg_length = msg_length;
412 insn->bits3.dp_read_igdng.pad1 = 0;
413 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
414 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
415 insn->bits2.send_igdng.end_of_thread = end_of_thread;
416 } else {
417 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
418 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
419 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
420 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
421 insn->bits3.dp_read.response_length = response_length; /*16:19*/
422 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
423 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
424 insn->bits3.dp_read.pad1 = 0; /*28:30*/
425 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
426 }
427 }
428
429 static void brw_set_sampler_message(struct brw_context *brw,
430 struct brw_instruction *insn,
431 GLuint binding_table_index,
432 GLuint sampler,
433 GLuint msg_type,
434 GLuint response_length,
435 GLuint msg_length,
436 GLboolean eot,
437 GLuint header_present,
438 GLuint simd_mode)
439 {
440 struct intel_context *intel = &brw->intel;
441 assert(eot == 0);
442 brw_set_src1(insn, brw_imm_d(0));
443
444 if (intel->is_ironlake) {
445 insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
446 insn->bits3.sampler_igdng.sampler = sampler;
447 insn->bits3.sampler_igdng.msg_type = msg_type;
448 insn->bits3.sampler_igdng.simd_mode = simd_mode;
449 insn->bits3.sampler_igdng.header_present = header_present;
450 insn->bits3.sampler_igdng.response_length = response_length;
451 insn->bits3.sampler_igdng.msg_length = msg_length;
452 insn->bits3.sampler_igdng.end_of_thread = eot;
453 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
454 insn->bits2.send_igdng.end_of_thread = eot;
455 } else if (intel->is_g4x) {
456 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
457 insn->bits3.sampler_g4x.sampler = sampler;
458 insn->bits3.sampler_g4x.msg_type = msg_type;
459 insn->bits3.sampler_g4x.response_length = response_length;
460 insn->bits3.sampler_g4x.msg_length = msg_length;
461 insn->bits3.sampler_g4x.end_of_thread = eot;
462 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
463 } else {
464 insn->bits3.sampler.binding_table_index = binding_table_index;
465 insn->bits3.sampler.sampler = sampler;
466 insn->bits3.sampler.msg_type = msg_type;
467 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
468 insn->bits3.sampler.response_length = response_length;
469 insn->bits3.sampler.msg_length = msg_length;
470 insn->bits3.sampler.end_of_thread = eot;
471 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
472 }
473 }
474
475
476
477 static struct brw_instruction *next_insn( struct brw_compile *p,
478 GLuint opcode )
479 {
480 struct brw_instruction *insn;
481
482 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
483
484 insn = &p->store[p->nr_insn++];
485 memcpy(insn, p->current, sizeof(*insn));
486
487 /* Reset this one-shot flag:
488 */
489
490 if (p->current->header.destreg__conditionalmod) {
491 p->current->header.destreg__conditionalmod = 0;
492 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
493 }
494
495 insn->header.opcode = opcode;
496 return insn;
497 }
498
499
500 static struct brw_instruction *brw_alu1( struct brw_compile *p,
501 GLuint opcode,
502 struct brw_reg dest,
503 struct brw_reg src )
504 {
505 struct brw_instruction *insn = next_insn(p, opcode);
506 brw_set_dest(insn, dest);
507 brw_set_src0(insn, src);
508 return insn;
509 }
510
511 static struct brw_instruction *brw_alu2(struct brw_compile *p,
512 GLuint opcode,
513 struct brw_reg dest,
514 struct brw_reg src0,
515 struct brw_reg src1 )
516 {
517 struct brw_instruction *insn = next_insn(p, opcode);
518 brw_set_dest(insn, dest);
519 brw_set_src0(insn, src0);
520 brw_set_src1(insn, src1);
521 return insn;
522 }
523
524
525 /***********************************************************************
526 * Convenience routines.
527 */
528 #define ALU1(OP) \
529 struct brw_instruction *brw_##OP(struct brw_compile *p, \
530 struct brw_reg dest, \
531 struct brw_reg src0) \
532 { \
533 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
534 }
535
536 #define ALU2(OP) \
537 struct brw_instruction *brw_##OP(struct brw_compile *p, \
538 struct brw_reg dest, \
539 struct brw_reg src0, \
540 struct brw_reg src1) \
541 { \
542 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
543 }
544
545
546 ALU1(MOV)
547 ALU2(SEL)
548 ALU1(NOT)
549 ALU2(AND)
550 ALU2(OR)
551 ALU2(XOR)
552 ALU2(SHR)
553 ALU2(SHL)
554 ALU2(RSR)
555 ALU2(RSL)
556 ALU2(ASR)
557 ALU2(ADD)
558 ALU2(MUL)
559 ALU1(FRC)
560 ALU1(RNDD)
561 ALU1(RNDZ)
562 ALU2(MAC)
563 ALU2(MACH)
564 ALU1(LZD)
565 ALU2(DP4)
566 ALU2(DPH)
567 ALU2(DP3)
568 ALU2(DP2)
569 ALU2(LINE)
570
571
572
573
574 void brw_NOP(struct brw_compile *p)
575 {
576 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
577 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
578 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
579 brw_set_src1(insn, brw_imm_ud(0x0));
580 }
581
582
583
584
585
586 /***********************************************************************
587 * Comparisons, if/else/endif
588 */
589
590 struct brw_instruction *brw_JMPI(struct brw_compile *p,
591 struct brw_reg dest,
592 struct brw_reg src0,
593 struct brw_reg src1)
594 {
595 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
596
597 insn->header.execution_size = 1;
598 insn->header.compression_control = BRW_COMPRESSION_NONE;
599 insn->header.mask_control = BRW_MASK_DISABLE;
600
601 p->current->header.predicate_control = BRW_PREDICATE_NONE;
602
603 return insn;
604 }
605
606 /* EU takes the value from the flag register and pushes it onto some
607 * sort of a stack (presumably merging with any flag value already on
608 * the stack). Within an if block, the flags at the top of the stack
609 * control execution on each channel of the unit, eg. on each of the
610 * 16 pixel values in our wm programs.
611 *
612 * When the matching 'else' instruction is reached (presumably by
613 * countdown of the instruction count patched in by our ELSE/ENDIF
614 * functions), the relevent flags are inverted.
615 *
616 * When the matching 'endif' instruction is reached, the flags are
617 * popped off. If the stack is now empty, normal execution resumes.
618 *
619 * No attempt is made to deal with stack overflow (14 elements?).
620 */
621 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
622 {
623 struct brw_instruction *insn;
624
625 if (p->single_program_flow) {
626 assert(execute_size == BRW_EXECUTE_1);
627
628 insn = next_insn(p, BRW_OPCODE_ADD);
629 insn->header.predicate_inverse = 1;
630 } else {
631 insn = next_insn(p, BRW_OPCODE_IF);
632 }
633
634 /* Override the defaults for this instruction:
635 */
636 brw_set_dest(insn, brw_ip_reg());
637 brw_set_src0(insn, brw_ip_reg());
638 brw_set_src1(insn, brw_imm_d(0x0));
639
640 insn->header.execution_size = execute_size;
641 insn->header.compression_control = BRW_COMPRESSION_NONE;
642 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
643 insn->header.mask_control = BRW_MASK_ENABLE;
644 if (!p->single_program_flow)
645 insn->header.thread_control = BRW_THREAD_SWITCH;
646
647 p->current->header.predicate_control = BRW_PREDICATE_NONE;
648
649 return insn;
650 }
651
652
653 struct brw_instruction *brw_ELSE(struct brw_compile *p,
654 struct brw_instruction *if_insn)
655 {
656 struct intel_context *intel = &p->brw->intel;
657 struct brw_instruction *insn;
658 GLuint br = 1;
659
660 if (intel->is_ironlake)
661 br = 2;
662
663 if (p->single_program_flow) {
664 insn = next_insn(p, BRW_OPCODE_ADD);
665 } else {
666 insn = next_insn(p, BRW_OPCODE_ELSE);
667 }
668
669 brw_set_dest(insn, brw_ip_reg());
670 brw_set_src0(insn, brw_ip_reg());
671 brw_set_src1(insn, brw_imm_d(0x0));
672
673 insn->header.compression_control = BRW_COMPRESSION_NONE;
674 insn->header.execution_size = if_insn->header.execution_size;
675 insn->header.mask_control = BRW_MASK_ENABLE;
676 if (!p->single_program_flow)
677 insn->header.thread_control = BRW_THREAD_SWITCH;
678
679 /* Patch the if instruction to point at this instruction.
680 */
681 if (p->single_program_flow) {
682 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
683
684 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
685 } else {
686 assert(if_insn->header.opcode == BRW_OPCODE_IF);
687
688 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
689 if_insn->bits3.if_else.pop_count = 0;
690 if_insn->bits3.if_else.pad0 = 0;
691 }
692
693 return insn;
694 }
695
696 void brw_ENDIF(struct brw_compile *p,
697 struct brw_instruction *patch_insn)
698 {
699 struct intel_context *intel = &p->brw->intel;
700 GLuint br = 1;
701
702 if (intel->is_ironlake)
703 br = 2;
704
705 if (p->single_program_flow) {
706 /* In single program flow mode, there's no need to execute an ENDIF,
707 * since we don't need to do any stack operations, and if we're executing
708 * currently, we want to just continue executing.
709 */
710 struct brw_instruction *next = &p->store[p->nr_insn];
711
712 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
713
714 patch_insn->bits3.ud = (next - patch_insn) * 16;
715 } else {
716 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
717
718 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
719 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720 brw_set_src1(insn, brw_imm_d(0x0));
721
722 insn->header.compression_control = BRW_COMPRESSION_NONE;
723 insn->header.execution_size = patch_insn->header.execution_size;
724 insn->header.mask_control = BRW_MASK_ENABLE;
725 insn->header.thread_control = BRW_THREAD_SWITCH;
726
727 assert(patch_insn->bits3.if_else.jump_count == 0);
728
729 /* Patch the if or else instructions to point at this or the next
730 * instruction respectively.
731 */
732 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
733 /* Automagically turn it into an IFF:
734 */
735 patch_insn->header.opcode = BRW_OPCODE_IFF;
736 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
737 patch_insn->bits3.if_else.pop_count = 0;
738 patch_insn->bits3.if_else.pad0 = 0;
739 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
740 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
741 patch_insn->bits3.if_else.pop_count = 1;
742 patch_insn->bits3.if_else.pad0 = 0;
743 } else {
744 assert(0);
745 }
746
747 /* Also pop item off the stack in the endif instruction:
748 */
749 insn->bits3.if_else.jump_count = 0;
750 insn->bits3.if_else.pop_count = 1;
751 insn->bits3.if_else.pad0 = 0;
752 }
753 }
754
755 struct brw_instruction *brw_BREAK(struct brw_compile *p)
756 {
757 struct brw_instruction *insn;
758 insn = next_insn(p, BRW_OPCODE_BREAK);
759 brw_set_dest(insn, brw_ip_reg());
760 brw_set_src0(insn, brw_ip_reg());
761 brw_set_src1(insn, brw_imm_d(0x0));
762 insn->header.compression_control = BRW_COMPRESSION_NONE;
763 insn->header.execution_size = BRW_EXECUTE_8;
764 /* insn->header.mask_control = BRW_MASK_DISABLE; */
765 insn->bits3.if_else.pad0 = 0;
766 return insn;
767 }
768
769 struct brw_instruction *brw_CONT(struct brw_compile *p)
770 {
771 struct brw_instruction *insn;
772 insn = next_insn(p, BRW_OPCODE_CONTINUE);
773 brw_set_dest(insn, brw_ip_reg());
774 brw_set_src0(insn, brw_ip_reg());
775 brw_set_src1(insn, brw_imm_d(0x0));
776 insn->header.compression_control = BRW_COMPRESSION_NONE;
777 insn->header.execution_size = BRW_EXECUTE_8;
778 /* insn->header.mask_control = BRW_MASK_DISABLE; */
779 insn->bits3.if_else.pad0 = 0;
780 return insn;
781 }
782
783 /* DO/WHILE loop:
784 */
785 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
786 {
787 if (p->single_program_flow) {
788 return &p->store[p->nr_insn];
789 } else {
790 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
791
792 /* Override the defaults for this instruction:
793 */
794 brw_set_dest(insn, brw_null_reg());
795 brw_set_src0(insn, brw_null_reg());
796 brw_set_src1(insn, brw_null_reg());
797
798 insn->header.compression_control = BRW_COMPRESSION_NONE;
799 insn->header.execution_size = execute_size;
800 insn->header.predicate_control = BRW_PREDICATE_NONE;
801 /* insn->header.mask_control = BRW_MASK_ENABLE; */
802 /* insn->header.mask_control = BRW_MASK_DISABLE; */
803
804 return insn;
805 }
806 }
807
808
809
810 struct brw_instruction *brw_WHILE(struct brw_compile *p,
811 struct brw_instruction *do_insn)
812 {
813 struct intel_context *intel = &p->brw->intel;
814 struct brw_instruction *insn;
815 GLuint br = 1;
816
817 if (intel->is_ironlake)
818 br = 2;
819
820 if (p->single_program_flow)
821 insn = next_insn(p, BRW_OPCODE_ADD);
822 else
823 insn = next_insn(p, BRW_OPCODE_WHILE);
824
825 brw_set_dest(insn, brw_ip_reg());
826 brw_set_src0(insn, brw_ip_reg());
827 brw_set_src1(insn, brw_imm_d(0x0));
828
829 insn->header.compression_control = BRW_COMPRESSION_NONE;
830
831 if (p->single_program_flow) {
832 insn->header.execution_size = BRW_EXECUTE_1;
833
834 insn->bits3.d = (do_insn - insn) * 16;
835 } else {
836 insn->header.execution_size = do_insn->header.execution_size;
837
838 assert(do_insn->header.opcode == BRW_OPCODE_DO);
839 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
840 insn->bits3.if_else.pop_count = 0;
841 insn->bits3.if_else.pad0 = 0;
842 }
843
844 /* insn->header.mask_control = BRW_MASK_ENABLE; */
845
846 /* insn->header.mask_control = BRW_MASK_DISABLE; */
847 p->current->header.predicate_control = BRW_PREDICATE_NONE;
848 return insn;
849 }
850
851
852 /* FORWARD JUMPS:
853 */
854 void brw_land_fwd_jump(struct brw_compile *p,
855 struct brw_instruction *jmp_insn)
856 {
857 struct intel_context *intel = &p->brw->intel;
858 struct brw_instruction *landing = &p->store[p->nr_insn];
859 GLuint jmpi = 1;
860
861 if (intel->is_ironlake)
862 jmpi = 2;
863
864 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
865 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
866
867 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
868 }
869
870
871
872 /* To integrate with the above, it makes sense that the comparison
873 * instruction should populate the flag register. It might be simpler
874 * just to use the flag reg for most WM tasks?
875 */
876 void brw_CMP(struct brw_compile *p,
877 struct brw_reg dest,
878 GLuint conditional,
879 struct brw_reg src0,
880 struct brw_reg src1)
881 {
882 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
883
884 insn->header.destreg__conditionalmod = conditional;
885 brw_set_dest(insn, dest);
886 brw_set_src0(insn, src0);
887 brw_set_src1(insn, src1);
888
889 /* guess_execution_size(insn, src0); */
890
891
892 /* Make it so that future instructions will use the computed flag
893 * value until brw_set_predicate_control_flag_value() is called
894 * again.
895 */
896 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
897 dest.nr == 0) {
898 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
899 p->flag_value = 0xff;
900 }
901 }
902
903
904
905 /***********************************************************************
906 * Helpers for the various SEND message types:
907 */
908
909 /** Extended math function, float[8].
910 */
911 void brw_math( struct brw_compile *p,
912 struct brw_reg dest,
913 GLuint function,
914 GLuint saturate,
915 GLuint msg_reg_nr,
916 struct brw_reg src,
917 GLuint data_type,
918 GLuint precision )
919 {
920 struct intel_context *intel = &p->brw->intel;
921
922 if (intel->gen >= 6) {
923 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
924
925 /* Math is the same ISA format as other opcodes, except that CondModifier
926 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
927 */
928 insn->header.destreg__conditionalmod = function;
929
930 brw_set_dest(insn, dest);
931 brw_set_src0(insn, src);
932 brw_set_src1(insn, brw_null_reg());
933 } else {
934 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
935 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
936 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
937 /* Example code doesn't set predicate_control for send
938 * instructions.
939 */
940 insn->header.predicate_control = 0;
941 insn->header.destreg__conditionalmod = msg_reg_nr;
942
943 brw_set_dest(insn, dest);
944 brw_set_src0(insn, src);
945 brw_set_math_message(p->brw,
946 insn,
947 msg_length, response_length,
948 function,
949 BRW_MATH_INTEGER_UNSIGNED,
950 precision,
951 saturate,
952 data_type);
953 }
954 }
955
956 /**
957 * Extended math function, float[16].
958 * Use 2 send instructions.
959 */
960 void brw_math_16( struct brw_compile *p,
961 struct brw_reg dest,
962 GLuint function,
963 GLuint saturate,
964 GLuint msg_reg_nr,
965 struct brw_reg src,
966 GLuint precision )
967 {
968 struct brw_instruction *insn;
969 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
970 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
971
972 /* First instruction:
973 */
974 brw_push_insn_state(p);
975 brw_set_predicate_control_flag_value(p, 0xff);
976 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
977
978 insn = next_insn(p, BRW_OPCODE_SEND);
979 insn->header.destreg__conditionalmod = msg_reg_nr;
980
981 brw_set_dest(insn, dest);
982 brw_set_src0(insn, src);
983 brw_set_math_message(p->brw,
984 insn,
985 msg_length, response_length,
986 function,
987 BRW_MATH_INTEGER_UNSIGNED,
988 precision,
989 saturate,
990 BRW_MATH_DATA_VECTOR);
991
992 /* Second instruction:
993 */
994 insn = next_insn(p, BRW_OPCODE_SEND);
995 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
996 insn->header.destreg__conditionalmod = msg_reg_nr+1;
997
998 brw_set_dest(insn, offset(dest,1));
999 brw_set_src0(insn, src);
1000 brw_set_math_message(p->brw,
1001 insn,
1002 msg_length, response_length,
1003 function,
1004 BRW_MATH_INTEGER_UNSIGNED,
1005 precision,
1006 saturate,
1007 BRW_MATH_DATA_VECTOR);
1008
1009 brw_pop_insn_state(p);
1010 }
1011
1012
1013 /**
1014 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1015 * Scratch offset should be a multiple of 64.
1016 * Used for register spilling.
1017 */
1018 void brw_dp_WRITE_16( struct brw_compile *p,
1019 struct brw_reg src,
1020 GLuint scratch_offset )
1021 {
1022 GLuint msg_reg_nr = 1;
1023 {
1024 brw_push_insn_state(p);
1025 brw_set_mask_control(p, BRW_MASK_DISABLE);
1026 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1027
1028 /* set message header global offset field (reg 0, element 2) */
1029 brw_MOV(p,
1030 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1031 brw_imm_d(scratch_offset));
1032
1033 brw_pop_insn_state(p);
1034 }
1035
1036 {
1037 GLuint msg_length = 3;
1038 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1039 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1040
1041 insn->header.predicate_control = 0; /* XXX */
1042 insn->header.compression_control = BRW_COMPRESSION_NONE;
1043 insn->header.destreg__conditionalmod = msg_reg_nr;
1044
1045 brw_set_dest(insn, dest);
1046 brw_set_src0(insn, src);
1047
1048 brw_set_dp_write_message(p->brw,
1049 insn,
1050 255, /* binding table index (255=stateless) */
1051 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1052 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1053 msg_length,
1054 0, /* pixel scoreboard */
1055 0, /* response_length */
1056 0); /* eot */
1057 }
1058 }
1059
1060
1061 /**
1062 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1063 * Scratch offset should be a multiple of 64.
1064 * Used for register spilling.
1065 */
1066 void brw_dp_READ_16( struct brw_compile *p,
1067 struct brw_reg dest,
1068 GLuint scratch_offset )
1069 {
1070 GLuint msg_reg_nr = 1;
1071 {
1072 brw_push_insn_state(p);
1073 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1074 brw_set_mask_control(p, BRW_MASK_DISABLE);
1075
1076 /* set message header global offset field (reg 0, element 2) */
1077 brw_MOV(p,
1078 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1079 brw_imm_d(scratch_offset));
1080
1081 brw_pop_insn_state(p);
1082 }
1083
1084 {
1085 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1086
1087 insn->header.predicate_control = 0; /* XXX */
1088 insn->header.compression_control = BRW_COMPRESSION_NONE;
1089 insn->header.destreg__conditionalmod = msg_reg_nr;
1090
1091 brw_set_dest(insn, dest); /* UW? */
1092 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1093
1094 brw_set_dp_read_message(p->brw,
1095 insn,
1096 255, /* binding table index (255=stateless) */
1097 3, /* msg_control (3 means 4 Owords) */
1098 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1099 1, /* target cache (render/scratch) */
1100 1, /* msg_length */
1101 2, /* response_length */
1102 0); /* eot */
1103 }
1104 }
1105
1106
1107 /**
1108 * Read a float[4] vector from the data port Data Cache (const buffer).
1109 * Location (in buffer) should be a multiple of 16.
1110 * Used for fetching shader constants.
1111 * If relAddr is true, we'll do an indirect fetch using the address register.
1112 */
1113 void brw_dp_READ_4( struct brw_compile *p,
1114 struct brw_reg dest,
1115 GLboolean relAddr,
1116 GLuint location,
1117 GLuint bind_table_index )
1118 {
1119 /* XXX: relAddr not implemented */
1120 GLuint msg_reg_nr = 1;
1121 {
1122 struct brw_reg b;
1123 brw_push_insn_state(p);
1124 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1125 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1126 brw_set_mask_control(p, BRW_MASK_DISABLE);
1127
1128 /* Setup MRF[1] with location/offset into const buffer */
1129 b = brw_message_reg(msg_reg_nr);
1130 b = retype(b, BRW_REGISTER_TYPE_UD);
1131 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1132 * when the docs say only dword[2] should be set. Hmmm. But it works.
1133 */
1134 brw_MOV(p, b, brw_imm_ud(location));
1135 brw_pop_insn_state(p);
1136 }
1137
1138 {
1139 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1140
1141 insn->header.predicate_control = BRW_PREDICATE_NONE;
1142 insn->header.compression_control = BRW_COMPRESSION_NONE;
1143 insn->header.destreg__conditionalmod = msg_reg_nr;
1144 insn->header.mask_control = BRW_MASK_DISABLE;
1145
1146 /* cast dest to a uword[8] vector */
1147 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1148
1149 brw_set_dest(insn, dest);
1150 brw_set_src0(insn, brw_null_reg());
1151
1152 brw_set_dp_read_message(p->brw,
1153 insn,
1154 bind_table_index,
1155 0, /* msg_control (0 means 1 Oword) */
1156 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1157 0, /* source cache = data cache */
1158 1, /* msg_length */
1159 1, /* response_length (1 Oword) */
1160 0); /* eot */
1161 }
1162 }
1163
1164
1165 /**
1166 * Read float[4] constant(s) from VS constant buffer.
1167 * For relative addressing, two float[4] constants will be read into 'dest'.
1168 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1169 */
1170 void brw_dp_READ_4_vs(struct brw_compile *p,
1171 struct brw_reg dest,
1172 GLuint oword,
1173 GLboolean relAddr,
1174 struct brw_reg addrReg,
1175 GLuint location,
1176 GLuint bind_table_index)
1177 {
1178 GLuint msg_reg_nr = 1;
1179
1180 assert(oword < 2);
1181 /*
1182 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1183 location, msg_reg_nr);
1184 */
1185
1186 /* Setup MRF[1] with location/offset into const buffer */
1187 {
1188 struct brw_reg b;
1189
1190 brw_push_insn_state(p);
1191 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1192 brw_set_mask_control(p, BRW_MASK_DISABLE);
1193 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1194 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1195
1196 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1197 * when the docs say only dword[2] should be set. Hmmm. But it works.
1198 */
1199 b = brw_message_reg(msg_reg_nr);
1200 b = retype(b, BRW_REGISTER_TYPE_UD);
1201 /*b = get_element_ud(b, 2);*/
1202 if (relAddr) {
1203 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1204 }
1205 else {
1206 brw_MOV(p, b, brw_imm_ud(location));
1207 }
1208
1209 brw_pop_insn_state(p);
1210 }
1211
1212 {
1213 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1214
1215 insn->header.predicate_control = BRW_PREDICATE_NONE;
1216 insn->header.compression_control = BRW_COMPRESSION_NONE;
1217 insn->header.destreg__conditionalmod = msg_reg_nr;
1218 insn->header.mask_control = BRW_MASK_DISABLE;
1219 /*insn->header.access_mode = BRW_ALIGN_16;*/
1220
1221 brw_set_dest(insn, dest);
1222 brw_set_src0(insn, brw_null_reg());
1223
1224 brw_set_dp_read_message(p->brw,
1225 insn,
1226 bind_table_index,
1227 oword, /* 0 = lower Oword, 1 = upper Oword */
1228 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1229 0, /* source cache = data cache */
1230 1, /* msg_length */
1231 1, /* response_length (1 Oword) */
1232 0); /* eot */
1233 }
1234 }
1235
1236
1237
1238 void brw_fb_WRITE(struct brw_compile *p,
1239 struct brw_reg dest,
1240 GLuint msg_reg_nr,
1241 struct brw_reg src0,
1242 GLuint binding_table_index,
1243 GLuint msg_length,
1244 GLuint response_length,
1245 GLboolean eot)
1246 {
1247 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1248
1249 insn->header.predicate_control = 0; /* XXX */
1250 insn->header.compression_control = BRW_COMPRESSION_NONE;
1251 insn->header.destreg__conditionalmod = msg_reg_nr;
1252
1253 brw_set_dest(insn, dest);
1254 brw_set_src0(insn, src0);
1255 brw_set_dp_write_message(p->brw,
1256 insn,
1257 binding_table_index,
1258 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1259 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1260 msg_length,
1261 1, /* pixel scoreboard */
1262 response_length,
1263 eot);
1264 }
1265
1266
1267 /**
1268 * Texture sample instruction.
1269 * Note: the msg_type plus msg_length values determine exactly what kind
1270 * of sampling operation is performed. See volume 4, page 161 of docs.
1271 */
1272 void brw_SAMPLE(struct brw_compile *p,
1273 struct brw_reg dest,
1274 GLuint msg_reg_nr,
1275 struct brw_reg src0,
1276 GLuint binding_table_index,
1277 GLuint sampler,
1278 GLuint writemask,
1279 GLuint msg_type,
1280 GLuint response_length,
1281 GLuint msg_length,
1282 GLboolean eot,
1283 GLuint header_present,
1284 GLuint simd_mode)
1285 {
1286 GLboolean need_stall = 0;
1287
1288 if (writemask == 0) {
1289 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1290 return;
1291 }
1292
1293 /* Hardware doesn't do destination dependency checking on send
1294 * instructions properly. Add a workaround which generates the
1295 * dependency by other means. In practice it seems like this bug
1296 * only crops up for texture samples, and only where registers are
1297 * written by the send and then written again later without being
1298 * read in between. Luckily for us, we already track that
1299 * information and use it to modify the writemask for the
1300 * instruction, so that is a guide for whether a workaround is
1301 * needed.
1302 */
1303 if (writemask != WRITEMASK_XYZW) {
1304 GLuint dst_offset = 0;
1305 GLuint i, newmask = 0, len = 0;
1306
1307 for (i = 0; i < 4; i++) {
1308 if (writemask & (1<<i))
1309 break;
1310 dst_offset += 2;
1311 }
1312 for (; i < 4; i++) {
1313 if (!(writemask & (1<<i)))
1314 break;
1315 newmask |= 1<<i;
1316 len++;
1317 }
1318
1319 if (newmask != writemask) {
1320 need_stall = 1;
1321 /* printf("need stall %x %x\n", newmask , writemask); */
1322 }
1323 else {
1324 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1325
1326 newmask = ~newmask & WRITEMASK_XYZW;
1327
1328 brw_push_insn_state(p);
1329
1330 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1331 brw_set_mask_control(p, BRW_MASK_DISABLE);
1332
1333 brw_MOV(p, m1, brw_vec8_grf(0,0));
1334 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1335
1336 brw_pop_insn_state(p);
1337
1338 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1339 dest = offset(dest, dst_offset);
1340 response_length = len * 2;
1341 }
1342 }
1343
1344 {
1345 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1346
1347 insn->header.predicate_control = 0; /* XXX */
1348 insn->header.compression_control = BRW_COMPRESSION_NONE;
1349 insn->header.destreg__conditionalmod = msg_reg_nr;
1350
1351 brw_set_dest(insn, dest);
1352 brw_set_src0(insn, src0);
1353 brw_set_sampler_message(p->brw, insn,
1354 binding_table_index,
1355 sampler,
1356 msg_type,
1357 response_length,
1358 msg_length,
1359 eot,
1360 header_present,
1361 simd_mode);
1362 }
1363
1364 if (need_stall) {
1365 struct brw_reg reg = vec8(offset(dest, response_length-1));
1366
1367 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1368 */
1369 brw_push_insn_state(p);
1370 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1371 brw_MOV(p, reg, reg);
1372 brw_pop_insn_state(p);
1373 }
1374
1375 }
1376
1377 /* All these variables are pretty confusing - we might be better off
1378 * using bitmasks and macros for this, in the old style. Or perhaps
1379 * just having the caller instantiate the fields in dword3 itself.
1380 */
1381 void brw_urb_WRITE(struct brw_compile *p,
1382 struct brw_reg dest,
1383 GLuint msg_reg_nr,
1384 struct brw_reg src0,
1385 GLboolean allocate,
1386 GLboolean used,
1387 GLuint msg_length,
1388 GLuint response_length,
1389 GLboolean eot,
1390 GLboolean writes_complete,
1391 GLuint offset,
1392 GLuint swizzle)
1393 {
1394 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1395
1396 assert(msg_length < BRW_MAX_MRF);
1397
1398 brw_set_dest(insn, dest);
1399 brw_set_src0(insn, src0);
1400 brw_set_src1(insn, brw_imm_d(0));
1401
1402 insn->header.destreg__conditionalmod = msg_reg_nr;
1403
1404 brw_set_urb_message(p->brw,
1405 insn,
1406 allocate,
1407 used,
1408 msg_length,
1409 response_length,
1410 eot,
1411 writes_complete,
1412 offset,
1413 swizzle);
1414 }
1415
1416 void brw_ff_sync(struct brw_compile *p,
1417 struct brw_reg dest,
1418 GLuint msg_reg_nr,
1419 struct brw_reg src0,
1420 GLboolean allocate,
1421 GLboolean used,
1422 GLuint msg_length,
1423 GLuint response_length,
1424 GLboolean eot,
1425 GLboolean writes_complete,
1426 GLuint offset,
1427 GLuint swizzle)
1428 {
1429 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1430
1431 assert(msg_length < 16);
1432
1433 brw_set_dest(insn, dest);
1434 brw_set_src0(insn, src0);
1435 brw_set_src1(insn, brw_imm_d(0));
1436
1437 insn->header.destreg__conditionalmod = msg_reg_nr;
1438
1439 brw_set_ff_sync_message(p->brw,
1440 insn,
1441 allocate,
1442 used,
1443 msg_length,
1444 response_length,
1445 eot,
1446 writes_complete,
1447 offset,
1448 swizzle);
1449 }