2412014248c247bc067ad68742bb2483b40af917
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
59 assert(dest.nr < 128);
60
61 insn->bits1.da1.dest_reg_file = dest.file;
62 insn->bits1.da1.dest_reg_type = dest.type;
63 insn->bits1.da1.dest_address_mode = dest.address_mode;
64
65 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
66 insn->bits1.da1.dest_reg_nr = dest.nr;
67
68 if (insn->header.access_mode == BRW_ALIGN_1) {
69 insn->bits1.da1.dest_subreg_nr = dest.subnr;
70 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
71 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
72 insn->bits1.da1.dest_horiz_stride = dest.hstride;
73 }
74 else {
75 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
76 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
77 }
78 }
79 else {
80 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
81
82 /* These are different sizes in align1 vs align16:
83 */
84 if (insn->header.access_mode == BRW_ALIGN_1) {
85 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
86 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
87 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
88 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
89 }
90 else {
91 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
92 }
93 }
94
95 /* NEW: Set the execution size based on dest.width and
96 * insn->compression_control:
97 */
98 guess_execution_size(insn, dest);
99 }
100
101 static void brw_set_src0( struct brw_instruction *insn,
102 struct brw_reg reg )
103 {
104 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
105
106 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
107 assert(reg.nr < 128);
108
109 insn->bits1.da1.src0_reg_file = reg.file;
110 insn->bits1.da1.src0_reg_type = reg.type;
111 insn->bits2.da1.src0_abs = reg.abs;
112 insn->bits2.da1.src0_negate = reg.negate;
113 insn->bits2.da1.src0_address_mode = reg.address_mode;
114
115 if (reg.file == BRW_IMMEDIATE_VALUE) {
116 insn->bits3.ud = reg.dw1.ud;
117
118 /* Required to set some fields in src1 as well:
119 */
120 insn->bits1.da1.src1_reg_file = 0; /* arf */
121 insn->bits1.da1.src1_reg_type = reg.type;
122 }
123 else
124 {
125 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
126 if (insn->header.access_mode == BRW_ALIGN_1) {
127 insn->bits2.da1.src0_subreg_nr = reg.subnr;
128 insn->bits2.da1.src0_reg_nr = reg.nr;
129 }
130 else {
131 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
132 insn->bits2.da16.src0_reg_nr = reg.nr;
133 }
134 }
135 else {
136 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
137
138 if (insn->header.access_mode == BRW_ALIGN_1) {
139 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
140 }
141 else {
142 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
143 }
144 }
145
146 if (insn->header.access_mode == BRW_ALIGN_1) {
147 if (reg.width == BRW_WIDTH_1 &&
148 insn->header.execution_size == BRW_EXECUTE_1) {
149 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
150 insn->bits2.da1.src0_width = BRW_WIDTH_1;
151 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
152 }
153 else {
154 insn->bits2.da1.src0_horiz_stride = reg.hstride;
155 insn->bits2.da1.src0_width = reg.width;
156 insn->bits2.da1.src0_vert_stride = reg.vstride;
157 }
158 }
159 else {
160 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
161 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
162 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
163 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
164
165 /* This is an oddity of the fact we're using the same
166 * descriptions for registers in align_16 as align_1:
167 */
168 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
169 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
170 else
171 insn->bits2.da16.src0_vert_stride = reg.vstride;
172 }
173 }
174 }
175
176
177 void brw_set_src1( struct brw_instruction *insn,
178 struct brw_reg reg )
179 {
180 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
181
182 assert(reg.nr < 128);
183
184 insn->bits1.da1.src1_reg_file = reg.file;
185 insn->bits1.da1.src1_reg_type = reg.type;
186 insn->bits3.da1.src1_abs = reg.abs;
187 insn->bits3.da1.src1_negate = reg.negate;
188
189 /* Only src1 can be immediate in two-argument instructions.
190 */
191 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
192
193 if (reg.file == BRW_IMMEDIATE_VALUE) {
194 insn->bits3.ud = reg.dw1.ud;
195 }
196 else {
197 /* This is a hardware restriction, which may or may not be lifted
198 * in the future:
199 */
200 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
201 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
202
203 if (insn->header.access_mode == BRW_ALIGN_1) {
204 insn->bits3.da1.src1_subreg_nr = reg.subnr;
205 insn->bits3.da1.src1_reg_nr = reg.nr;
206 }
207 else {
208 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
209 insn->bits3.da16.src1_reg_nr = reg.nr;
210 }
211
212 if (insn->header.access_mode == BRW_ALIGN_1) {
213 if (reg.width == BRW_WIDTH_1 &&
214 insn->header.execution_size == BRW_EXECUTE_1) {
215 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
216 insn->bits3.da1.src1_width = BRW_WIDTH_1;
217 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
218 }
219 else {
220 insn->bits3.da1.src1_horiz_stride = reg.hstride;
221 insn->bits3.da1.src1_width = reg.width;
222 insn->bits3.da1.src1_vert_stride = reg.vstride;
223 }
224 }
225 else {
226 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
227 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
228 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
229 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
230
231 /* This is an oddity of the fact we're using the same
232 * descriptions for registers in align_16 as align_1:
233 */
234 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
235 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
236 else
237 insn->bits3.da16.src1_vert_stride = reg.vstride;
238 }
239 }
240 }
241
242
243
244 static void brw_set_math_message( struct brw_context *brw,
245 struct brw_instruction *insn,
246 GLuint msg_length,
247 GLuint response_length,
248 GLuint function,
249 GLuint integer_type,
250 GLboolean low_precision,
251 GLboolean saturate,
252 GLuint dataType )
253 {
254 brw_set_src1(insn, brw_imm_d(0));
255
256 if (BRW_IS_IGDNG(brw)) {
257 insn->bits3.math_igdng.function = function;
258 insn->bits3.math_igdng.int_type = integer_type;
259 insn->bits3.math_igdng.precision = low_precision;
260 insn->bits3.math_igdng.saturate = saturate;
261 insn->bits3.math_igdng.data_type = dataType;
262 insn->bits3.math_igdng.snapshot = 0;
263 insn->bits3.math_igdng.header_present = 0;
264 insn->bits3.math_igdng.response_length = response_length;
265 insn->bits3.math_igdng.msg_length = msg_length;
266 insn->bits3.math_igdng.end_of_thread = 0;
267 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
268 insn->bits2.send_igdng.end_of_thread = 0;
269 } else {
270 insn->bits3.math.function = function;
271 insn->bits3.math.int_type = integer_type;
272 insn->bits3.math.precision = low_precision;
273 insn->bits3.math.saturate = saturate;
274 insn->bits3.math.data_type = dataType;
275 insn->bits3.math.response_length = response_length;
276 insn->bits3.math.msg_length = msg_length;
277 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
278 insn->bits3.math.end_of_thread = 0;
279 }
280 }
281
282
283 static void brw_set_ff_sync_message( struct brw_context *brw,
284 struct brw_instruction *insn,
285 GLboolean allocate,
286 GLboolean used,
287 GLuint msg_length,
288 GLuint response_length,
289 GLboolean end_of_thread,
290 GLboolean complete,
291 GLuint offset,
292 GLuint swizzle_control )
293 {
294 brw_set_src1(insn, brw_imm_d(0));
295
296 insn->bits3.urb_igdng.opcode = 1;
297 insn->bits3.urb_igdng.offset = offset;
298 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
299 insn->bits3.urb_igdng.allocate = allocate;
300 insn->bits3.urb_igdng.used = used;
301 insn->bits3.urb_igdng.complete = complete;
302 insn->bits3.urb_igdng.header_present = 1;
303 insn->bits3.urb_igdng.response_length = response_length;
304 insn->bits3.urb_igdng.msg_length = msg_length;
305 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
306 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
307 insn->bits2.send_igdng.end_of_thread = end_of_thread;
308 }
309
310 static void brw_set_urb_message( struct brw_context *brw,
311 struct brw_instruction *insn,
312 GLboolean allocate,
313 GLboolean used,
314 GLuint msg_length,
315 GLuint response_length,
316 GLboolean end_of_thread,
317 GLboolean complete,
318 GLuint offset,
319 GLuint swizzle_control )
320 {
321 brw_set_src1(insn, brw_imm_d(0));
322
323 if (BRW_IS_IGDNG(brw)) {
324 insn->bits3.urb_igdng.opcode = 0; /* ? */
325 insn->bits3.urb_igdng.offset = offset;
326 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
327 insn->bits3.urb_igdng.allocate = allocate;
328 insn->bits3.urb_igdng.used = used; /* ? */
329 insn->bits3.urb_igdng.complete = complete;
330 insn->bits3.urb_igdng.header_present = 1;
331 insn->bits3.urb_igdng.response_length = response_length;
332 insn->bits3.urb_igdng.msg_length = msg_length;
333 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
334 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
335 insn->bits2.send_igdng.end_of_thread = end_of_thread;
336 } else {
337 insn->bits3.urb.opcode = 0; /* ? */
338 insn->bits3.urb.offset = offset;
339 insn->bits3.urb.swizzle_control = swizzle_control;
340 insn->bits3.urb.allocate = allocate;
341 insn->bits3.urb.used = used; /* ? */
342 insn->bits3.urb.complete = complete;
343 insn->bits3.urb.response_length = response_length;
344 insn->bits3.urb.msg_length = msg_length;
345 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
346 insn->bits3.urb.end_of_thread = end_of_thread;
347 }
348 }
349
350 static void brw_set_dp_write_message( struct brw_context *brw,
351 struct brw_instruction *insn,
352 GLuint binding_table_index,
353 GLuint msg_control,
354 GLuint msg_type,
355 GLuint msg_length,
356 GLuint pixel_scoreboard_clear,
357 GLuint response_length,
358 GLuint end_of_thread )
359 {
360 brw_set_src1(insn, brw_imm_d(0));
361
362 if (BRW_IS_IGDNG(brw)) {
363 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
364 insn->bits3.dp_write_igdng.msg_control = msg_control;
365 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
366 insn->bits3.dp_write_igdng.msg_type = msg_type;
367 insn->bits3.dp_write_igdng.send_commit_msg = 0;
368 insn->bits3.dp_write_igdng.header_present = 1;
369 insn->bits3.dp_write_igdng.response_length = response_length;
370 insn->bits3.dp_write_igdng.msg_length = msg_length;
371 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
372 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
373 insn->bits2.send_igdng.end_of_thread = end_of_thread;
374 } else {
375 insn->bits3.dp_write.binding_table_index = binding_table_index;
376 insn->bits3.dp_write.msg_control = msg_control;
377 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
378 insn->bits3.dp_write.msg_type = msg_type;
379 insn->bits3.dp_write.send_commit_msg = 0;
380 insn->bits3.dp_write.response_length = response_length;
381 insn->bits3.dp_write.msg_length = msg_length;
382 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
383 insn->bits3.dp_write.end_of_thread = end_of_thread;
384 }
385 }
386
387 static void brw_set_dp_read_message( struct brw_context *brw,
388 struct brw_instruction *insn,
389 GLuint binding_table_index,
390 GLuint msg_control,
391 GLuint msg_type,
392 GLuint target_cache,
393 GLuint msg_length,
394 GLuint response_length,
395 GLuint end_of_thread )
396 {
397 brw_set_src1(insn, brw_imm_d(0));
398
399 if (BRW_IS_IGDNG(brw)) {
400 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
401 insn->bits3.dp_read_igdng.msg_control = msg_control;
402 insn->bits3.dp_read_igdng.msg_type = msg_type;
403 insn->bits3.dp_read_igdng.target_cache = target_cache;
404 insn->bits3.dp_read_igdng.header_present = 1;
405 insn->bits3.dp_read_igdng.response_length = response_length;
406 insn->bits3.dp_read_igdng.msg_length = msg_length;
407 insn->bits3.dp_read_igdng.pad1 = 0;
408 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
409 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
410 insn->bits2.send_igdng.end_of_thread = end_of_thread;
411 } else {
412 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
413 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
414 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
415 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
416 insn->bits3.dp_read.response_length = response_length; /*16:19*/
417 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
418 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
419 insn->bits3.dp_read.pad1 = 0; /*28:30*/
420 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
421 }
422 }
423
424 static void brw_set_sampler_message(struct brw_context *brw,
425 struct brw_instruction *insn,
426 GLuint binding_table_index,
427 GLuint sampler,
428 GLuint msg_type,
429 GLuint response_length,
430 GLuint msg_length,
431 GLboolean eot,
432 GLuint header_present,
433 GLuint simd_mode)
434 {
435 assert(eot == 0);
436 brw_set_src1(insn, brw_imm_d(0));
437
438 if (BRW_IS_IGDNG(brw)) {
439 insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
440 insn->bits3.sampler_igdng.sampler = sampler;
441 insn->bits3.sampler_igdng.msg_type = msg_type;
442 insn->bits3.sampler_igdng.simd_mode = simd_mode;
443 insn->bits3.sampler_igdng.header_present = header_present;
444 insn->bits3.sampler_igdng.response_length = response_length;
445 insn->bits3.sampler_igdng.msg_length = msg_length;
446 insn->bits3.sampler_igdng.end_of_thread = eot;
447 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
448 insn->bits2.send_igdng.end_of_thread = eot;
449 } else if (BRW_IS_G4X(brw)) {
450 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
451 insn->bits3.sampler_g4x.sampler = sampler;
452 insn->bits3.sampler_g4x.msg_type = msg_type;
453 insn->bits3.sampler_g4x.response_length = response_length;
454 insn->bits3.sampler_g4x.msg_length = msg_length;
455 insn->bits3.sampler_g4x.end_of_thread = eot;
456 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
457 } else {
458 insn->bits3.sampler.binding_table_index = binding_table_index;
459 insn->bits3.sampler.sampler = sampler;
460 insn->bits3.sampler.msg_type = msg_type;
461 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
462 insn->bits3.sampler.response_length = response_length;
463 insn->bits3.sampler.msg_length = msg_length;
464 insn->bits3.sampler.end_of_thread = eot;
465 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
466 }
467 }
468
469
470
471 static struct brw_instruction *next_insn( struct brw_compile *p,
472 GLuint opcode )
473 {
474 struct brw_instruction *insn;
475
476 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
477
478 insn = &p->store[p->nr_insn++];
479 memcpy(insn, p->current, sizeof(*insn));
480
481 /* Reset this one-shot flag:
482 */
483
484 if (p->current->header.destreg__conditonalmod) {
485 p->current->header.destreg__conditonalmod = 0;
486 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
487 }
488
489 insn->header.opcode = opcode;
490 return insn;
491 }
492
493
494 static struct brw_instruction *brw_alu1( struct brw_compile *p,
495 GLuint opcode,
496 struct brw_reg dest,
497 struct brw_reg src )
498 {
499 struct brw_instruction *insn = next_insn(p, opcode);
500 brw_set_dest(insn, dest);
501 brw_set_src0(insn, src);
502 return insn;
503 }
504
505 static struct brw_instruction *brw_alu2(struct brw_compile *p,
506 GLuint opcode,
507 struct brw_reg dest,
508 struct brw_reg src0,
509 struct brw_reg src1 )
510 {
511 struct brw_instruction *insn = next_insn(p, opcode);
512 brw_set_dest(insn, dest);
513 brw_set_src0(insn, src0);
514 brw_set_src1(insn, src1);
515 return insn;
516 }
517
518
519 /***********************************************************************
520 * Convenience routines.
521 */
522 #define ALU1(OP) \
523 struct brw_instruction *brw_##OP(struct brw_compile *p, \
524 struct brw_reg dest, \
525 struct brw_reg src0) \
526 { \
527 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
528 }
529
530 #define ALU2(OP) \
531 struct brw_instruction *brw_##OP(struct brw_compile *p, \
532 struct brw_reg dest, \
533 struct brw_reg src0, \
534 struct brw_reg src1) \
535 { \
536 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
537 }
538
539
540 ALU1(MOV)
541 ALU2(SEL)
542 ALU1(NOT)
543 ALU2(AND)
544 ALU2(OR)
545 ALU2(XOR)
546 ALU2(SHR)
547 ALU2(SHL)
548 ALU2(RSR)
549 ALU2(RSL)
550 ALU2(ASR)
551 ALU2(ADD)
552 ALU2(MUL)
553 ALU1(FRC)
554 ALU1(RNDD)
555 ALU1(RNDZ)
556 ALU2(MAC)
557 ALU2(MACH)
558 ALU1(LZD)
559 ALU2(DP4)
560 ALU2(DPH)
561 ALU2(DP3)
562 ALU2(DP2)
563 ALU2(LINE)
564
565
566
567
568 void brw_NOP(struct brw_compile *p)
569 {
570 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
571 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
572 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573 brw_set_src1(insn, brw_imm_ud(0x0));
574 }
575
576
577
578
579
580 /***********************************************************************
581 * Comparisons, if/else/endif
582 */
583
584 struct brw_instruction *brw_JMPI(struct brw_compile *p,
585 struct brw_reg dest,
586 struct brw_reg src0,
587 struct brw_reg src1)
588 {
589 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
590
591 insn->header.execution_size = 1;
592 insn->header.compression_control = BRW_COMPRESSION_NONE;
593 insn->header.mask_control = BRW_MASK_DISABLE;
594
595 p->current->header.predicate_control = BRW_PREDICATE_NONE;
596
597 return insn;
598 }
599
600 /* EU takes the value from the flag register and pushes it onto some
601 * sort of a stack (presumably merging with any flag value already on
602 * the stack). Within an if block, the flags at the top of the stack
603 * control execution on each channel of the unit, eg. on each of the
604 * 16 pixel values in our wm programs.
605 *
606 * When the matching 'else' instruction is reached (presumably by
607 * countdown of the instruction count patched in by our ELSE/ENDIF
608 * functions), the relevent flags are inverted.
609 *
610 * When the matching 'endif' instruction is reached, the flags are
611 * popped off. If the stack is now empty, normal execution resumes.
612 *
613 * No attempt is made to deal with stack overflow (14 elements?).
614 */
615 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
616 {
617 struct brw_instruction *insn;
618
619 if (p->single_program_flow) {
620 assert(execute_size == BRW_EXECUTE_1);
621
622 insn = next_insn(p, BRW_OPCODE_ADD);
623 insn->header.predicate_inverse = 1;
624 } else {
625 insn = next_insn(p, BRW_OPCODE_IF);
626 }
627
628 /* Override the defaults for this instruction:
629 */
630 brw_set_dest(insn, brw_ip_reg());
631 brw_set_src0(insn, brw_ip_reg());
632 brw_set_src1(insn, brw_imm_d(0x0));
633
634 insn->header.execution_size = execute_size;
635 insn->header.compression_control = BRW_COMPRESSION_NONE;
636 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
637 insn->header.mask_control = BRW_MASK_ENABLE;
638 if (!p->single_program_flow)
639 insn->header.thread_control = BRW_THREAD_SWITCH;
640
641 p->current->header.predicate_control = BRW_PREDICATE_NONE;
642
643 return insn;
644 }
645
646
647 struct brw_instruction *brw_ELSE(struct brw_compile *p,
648 struct brw_instruction *if_insn)
649 {
650 struct brw_instruction *insn;
651 GLuint br = 1;
652
653 if (BRW_IS_IGDNG(p->brw))
654 br = 2;
655
656 if (p->single_program_flow) {
657 insn = next_insn(p, BRW_OPCODE_ADD);
658 } else {
659 insn = next_insn(p, BRW_OPCODE_ELSE);
660 }
661
662 brw_set_dest(insn, brw_ip_reg());
663 brw_set_src0(insn, brw_ip_reg());
664 brw_set_src1(insn, brw_imm_d(0x0));
665
666 insn->header.compression_control = BRW_COMPRESSION_NONE;
667 insn->header.execution_size = if_insn->header.execution_size;
668 insn->header.mask_control = BRW_MASK_ENABLE;
669 if (!p->single_program_flow)
670 insn->header.thread_control = BRW_THREAD_SWITCH;
671
672 /* Patch the if instruction to point at this instruction.
673 */
674 if (p->single_program_flow) {
675 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
676
677 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
678 } else {
679 assert(if_insn->header.opcode == BRW_OPCODE_IF);
680
681 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
682 if_insn->bits3.if_else.pop_count = 1;
683 if_insn->bits3.if_else.pad0 = 0;
684 }
685
686 return insn;
687 }
688
689 void brw_ENDIF(struct brw_compile *p,
690 struct brw_instruction *patch_insn)
691 {
692 GLuint br = 1;
693
694 if (BRW_IS_IGDNG(p->brw))
695 br = 2;
696
697 if (p->single_program_flow) {
698 /* In single program flow mode, there's no need to execute an ENDIF,
699 * since we don't need to do any stack operations, and if we're executing
700 * currently, we want to just continue executing.
701 */
702 struct brw_instruction *next = &p->store[p->nr_insn];
703
704 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
705
706 patch_insn->bits3.ud = (next - patch_insn) * 16;
707 } else {
708 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
709
710 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
711 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
712 brw_set_src1(insn, brw_imm_d(0x0));
713
714 insn->header.compression_control = BRW_COMPRESSION_NONE;
715 insn->header.execution_size = patch_insn->header.execution_size;
716 insn->header.mask_control = BRW_MASK_ENABLE;
717 insn->header.thread_control = BRW_THREAD_SWITCH;
718
719 assert(patch_insn->bits3.if_else.jump_count == 0);
720
721 /* Patch the if or else instructions to point at this or the next
722 * instruction respectively.
723 */
724 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
725 /* Automagically turn it into an IFF:
726 */
727 patch_insn->header.opcode = BRW_OPCODE_IFF;
728 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
729 patch_insn->bits3.if_else.pop_count = 0;
730 patch_insn->bits3.if_else.pad0 = 0;
731 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
732 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
733 patch_insn->bits3.if_else.pop_count = 1;
734 patch_insn->bits3.if_else.pad0 = 0;
735 } else {
736 assert(0);
737 }
738
739 /* Also pop item off the stack in the endif instruction:
740 */
741 insn->bits3.if_else.jump_count = 0;
742 insn->bits3.if_else.pop_count = 1;
743 insn->bits3.if_else.pad0 = 0;
744 }
745 }
746
747 struct brw_instruction *brw_BREAK(struct brw_compile *p)
748 {
749 struct brw_instruction *insn;
750 insn = next_insn(p, BRW_OPCODE_BREAK);
751 brw_set_dest(insn, brw_ip_reg());
752 brw_set_src0(insn, brw_ip_reg());
753 brw_set_src1(insn, brw_imm_d(0x0));
754 insn->header.compression_control = BRW_COMPRESSION_NONE;
755 insn->header.execution_size = BRW_EXECUTE_8;
756 /* insn->header.mask_control = BRW_MASK_DISABLE; */
757 insn->bits3.if_else.pad0 = 0;
758 return insn;
759 }
760
761 struct brw_instruction *brw_CONT(struct brw_compile *p)
762 {
763 struct brw_instruction *insn;
764 insn = next_insn(p, BRW_OPCODE_CONTINUE);
765 brw_set_dest(insn, brw_ip_reg());
766 brw_set_src0(insn, brw_ip_reg());
767 brw_set_src1(insn, brw_imm_d(0x0));
768 insn->header.compression_control = BRW_COMPRESSION_NONE;
769 insn->header.execution_size = BRW_EXECUTE_8;
770 /* insn->header.mask_control = BRW_MASK_DISABLE; */
771 insn->bits3.if_else.pad0 = 0;
772 return insn;
773 }
774
775 /* DO/WHILE loop:
776 */
777 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
778 {
779 if (p->single_program_flow) {
780 return &p->store[p->nr_insn];
781 } else {
782 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
783
784 /* Override the defaults for this instruction:
785 */
786 brw_set_dest(insn, brw_null_reg());
787 brw_set_src0(insn, brw_null_reg());
788 brw_set_src1(insn, brw_null_reg());
789
790 insn->header.compression_control = BRW_COMPRESSION_NONE;
791 insn->header.execution_size = execute_size;
792 insn->header.predicate_control = BRW_PREDICATE_NONE;
793 /* insn->header.mask_control = BRW_MASK_ENABLE; */
794 /* insn->header.mask_control = BRW_MASK_DISABLE; */
795
796 return insn;
797 }
798 }
799
800
801
802 struct brw_instruction *brw_WHILE(struct brw_compile *p,
803 struct brw_instruction *do_insn)
804 {
805 struct brw_instruction *insn;
806 GLuint br = 1;
807
808 if (BRW_IS_IGDNG(p->brw))
809 br = 2;
810
811 if (p->single_program_flow)
812 insn = next_insn(p, BRW_OPCODE_ADD);
813 else
814 insn = next_insn(p, BRW_OPCODE_WHILE);
815
816 brw_set_dest(insn, brw_ip_reg());
817 brw_set_src0(insn, brw_ip_reg());
818 brw_set_src1(insn, brw_imm_d(0x0));
819
820 insn->header.compression_control = BRW_COMPRESSION_NONE;
821
822 if (p->single_program_flow) {
823 insn->header.execution_size = BRW_EXECUTE_1;
824
825 insn->bits3.d = (do_insn - insn) * 16;
826 } else {
827 insn->header.execution_size = do_insn->header.execution_size;
828
829 assert(do_insn->header.opcode == BRW_OPCODE_DO);
830 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
831 insn->bits3.if_else.pop_count = 0;
832 insn->bits3.if_else.pad0 = 0;
833 }
834
835 /* insn->header.mask_control = BRW_MASK_ENABLE; */
836
837 /* insn->header.mask_control = BRW_MASK_DISABLE; */
838 p->current->header.predicate_control = BRW_PREDICATE_NONE;
839 return insn;
840 }
841
842
843 /* FORWARD JUMPS:
844 */
845 void brw_land_fwd_jump(struct brw_compile *p,
846 struct brw_instruction *jmp_insn)
847 {
848 struct brw_instruction *landing = &p->store[p->nr_insn];
849 GLuint jmpi = 1;
850
851 if (BRW_IS_IGDNG(p->brw))
852 jmpi = 2;
853
854 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
855 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
856
857 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
858 }
859
860
861
862 /* To integrate with the above, it makes sense that the comparison
863 * instruction should populate the flag register. It might be simpler
864 * just to use the flag reg for most WM tasks?
865 */
866 void brw_CMP(struct brw_compile *p,
867 struct brw_reg dest,
868 GLuint conditional,
869 struct brw_reg src0,
870 struct brw_reg src1)
871 {
872 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
873
874 insn->header.destreg__conditonalmod = conditional;
875 brw_set_dest(insn, dest);
876 brw_set_src0(insn, src0);
877 brw_set_src1(insn, src1);
878
879 /* guess_execution_size(insn, src0); */
880
881
882 /* Make it so that future instructions will use the computed flag
883 * value until brw_set_predicate_control_flag_value() is called
884 * again.
885 */
886 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
887 dest.nr == 0) {
888 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
889 p->flag_value = 0xff;
890 }
891 }
892
893
894
895 /***********************************************************************
896 * Helpers for the various SEND message types:
897 */
898
899 /** Extended math function, float[8].
900 */
901 void brw_math( struct brw_compile *p,
902 struct brw_reg dest,
903 GLuint function,
904 GLuint saturate,
905 GLuint msg_reg_nr,
906 struct brw_reg src,
907 GLuint data_type,
908 GLuint precision )
909 {
910 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
911 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
912 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
913
914 /* Example code doesn't set predicate_control for send
915 * instructions.
916 */
917 insn->header.predicate_control = 0;
918 insn->header.destreg__conditonalmod = msg_reg_nr;
919
920 brw_set_dest(insn, dest);
921 brw_set_src0(insn, src);
922 brw_set_math_message(p->brw,
923 insn,
924 msg_length, response_length,
925 function,
926 BRW_MATH_INTEGER_UNSIGNED,
927 precision,
928 saturate,
929 data_type);
930 }
931
932 /**
933 * Extended math function, float[16].
934 * Use 2 send instructions.
935 */
936 void brw_math_16( struct brw_compile *p,
937 struct brw_reg dest,
938 GLuint function,
939 GLuint saturate,
940 GLuint msg_reg_nr,
941 struct brw_reg src,
942 GLuint precision )
943 {
944 struct brw_instruction *insn;
945 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
946 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
947
948 /* First instruction:
949 */
950 brw_push_insn_state(p);
951 brw_set_predicate_control_flag_value(p, 0xff);
952 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
953
954 insn = next_insn(p, BRW_OPCODE_SEND);
955 insn->header.destreg__conditonalmod = msg_reg_nr;
956
957 brw_set_dest(insn, dest);
958 brw_set_src0(insn, src);
959 brw_set_math_message(p->brw,
960 insn,
961 msg_length, response_length,
962 function,
963 BRW_MATH_INTEGER_UNSIGNED,
964 precision,
965 saturate,
966 BRW_MATH_DATA_VECTOR);
967
968 /* Second instruction:
969 */
970 insn = next_insn(p, BRW_OPCODE_SEND);
971 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
972 insn->header.destreg__conditonalmod = msg_reg_nr+1;
973
974 brw_set_dest(insn, offset(dest,1));
975 brw_set_src0(insn, src);
976 brw_set_math_message(p->brw,
977 insn,
978 msg_length, response_length,
979 function,
980 BRW_MATH_INTEGER_UNSIGNED,
981 precision,
982 saturate,
983 BRW_MATH_DATA_VECTOR);
984
985 brw_pop_insn_state(p);
986 }
987
988
989 /**
990 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
991 * Scratch offset should be a multiple of 64.
992 * Used for register spilling.
993 */
994 void brw_dp_WRITE_16( struct brw_compile *p,
995 struct brw_reg src,
996 GLuint scratch_offset )
997 {
998 GLuint msg_reg_nr = 1;
999 {
1000 brw_push_insn_state(p);
1001 brw_set_mask_control(p, BRW_MASK_DISABLE);
1002 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1003
1004 /* set message header global offset field (reg 0, element 2) */
1005 brw_MOV(p,
1006 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1007 brw_imm_d(scratch_offset));
1008
1009 brw_pop_insn_state(p);
1010 }
1011
1012 {
1013 GLuint msg_length = 3;
1014 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1015 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1016
1017 insn->header.predicate_control = 0; /* XXX */
1018 insn->header.compression_control = BRW_COMPRESSION_NONE;
1019 insn->header.destreg__conditonalmod = msg_reg_nr;
1020
1021 brw_set_dest(insn, dest);
1022 brw_set_src0(insn, src);
1023
1024 brw_set_dp_write_message(p->brw,
1025 insn,
1026 255, /* binding table index (255=stateless) */
1027 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1028 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1029 msg_length,
1030 0, /* pixel scoreboard */
1031 0, /* response_length */
1032 0); /* eot */
1033 }
1034 }
1035
1036
1037 /**
1038 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1039 * Scratch offset should be a multiple of 64.
1040 * Used for register spilling.
1041 */
1042 void brw_dp_READ_16( struct brw_compile *p,
1043 struct brw_reg dest,
1044 GLuint scratch_offset )
1045 {
1046 GLuint msg_reg_nr = 1;
1047 {
1048 brw_push_insn_state(p);
1049 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1050 brw_set_mask_control(p, BRW_MASK_DISABLE);
1051
1052 /* set message header global offset field (reg 0, element 2) */
1053 brw_MOV(p,
1054 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1055 brw_imm_d(scratch_offset));
1056
1057 brw_pop_insn_state(p);
1058 }
1059
1060 {
1061 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1062
1063 insn->header.predicate_control = 0; /* XXX */
1064 insn->header.compression_control = BRW_COMPRESSION_NONE;
1065 insn->header.destreg__conditonalmod = msg_reg_nr;
1066
1067 brw_set_dest(insn, dest); /* UW? */
1068 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1069
1070 brw_set_dp_read_message(p->brw,
1071 insn,
1072 255, /* binding table index (255=stateless) */
1073 3, /* msg_control (3 means 4 Owords) */
1074 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1075 1, /* target cache (render/scratch) */
1076 1, /* msg_length */
1077 2, /* response_length */
1078 0); /* eot */
1079 }
1080 }
1081
1082
1083 /**
1084 * Read a float[4] vector from the data port Data Cache (const buffer).
1085 * Location (in buffer) should be a multiple of 16.
1086 * Used for fetching shader constants.
1087 * If relAddr is true, we'll do an indirect fetch using the address register.
1088 */
1089 void brw_dp_READ_4( struct brw_compile *p,
1090 struct brw_reg dest,
1091 GLboolean relAddr,
1092 GLuint location,
1093 GLuint bind_table_index )
1094 {
1095 /* XXX: relAddr not implemented */
1096 GLuint msg_reg_nr = 1;
1097 {
1098 struct brw_reg b;
1099 brw_push_insn_state(p);
1100 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1101 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1102 brw_set_mask_control(p, BRW_MASK_DISABLE);
1103
1104 /* Setup MRF[1] with location/offset into const buffer */
1105 b = brw_message_reg(msg_reg_nr);
1106 b = retype(b, BRW_REGISTER_TYPE_UD);
1107 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1108 * when the docs say only dword[2] should be set. Hmmm. But it works.
1109 */
1110 brw_MOV(p, b, brw_imm_ud(location));
1111 brw_pop_insn_state(p);
1112 }
1113
1114 {
1115 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1116
1117 insn->header.predicate_control = BRW_PREDICATE_NONE;
1118 insn->header.compression_control = BRW_COMPRESSION_NONE;
1119 insn->header.destreg__conditonalmod = msg_reg_nr;
1120 insn->header.mask_control = BRW_MASK_DISABLE;
1121
1122 /* cast dest to a uword[8] vector */
1123 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1124
1125 brw_set_dest(insn, dest);
1126 brw_set_src0(insn, brw_null_reg());
1127
1128 brw_set_dp_read_message(p->brw,
1129 insn,
1130 bind_table_index,
1131 0, /* msg_control (0 means 1 Oword) */
1132 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1133 0, /* source cache = data cache */
1134 1, /* msg_length */
1135 1, /* response_length (1 Oword) */
1136 0); /* eot */
1137 }
1138 }
1139
1140
1141 /**
1142 * Read float[4] constant(s) from VS constant buffer.
1143 * For relative addressing, two float[4] constants will be read into 'dest'.
1144 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1145 */
1146 void brw_dp_READ_4_vs(struct brw_compile *p,
1147 struct brw_reg dest,
1148 GLuint oword,
1149 GLboolean relAddr,
1150 struct brw_reg addrReg,
1151 GLuint location,
1152 GLuint bind_table_index)
1153 {
1154 GLuint msg_reg_nr = 1;
1155
1156 assert(oword < 2);
1157 /*
1158 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1159 location, msg_reg_nr);
1160 */
1161
1162 /* Setup MRF[1] with location/offset into const buffer */
1163 {
1164 struct brw_reg b;
1165
1166 brw_push_insn_state(p);
1167 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1168 brw_set_mask_control(p, BRW_MASK_DISABLE);
1169 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1170 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1171
1172 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1173 * when the docs say only dword[2] should be set. Hmmm. But it works.
1174 */
1175 b = brw_message_reg(msg_reg_nr);
1176 b = retype(b, BRW_REGISTER_TYPE_UD);
1177 /*b = get_element_ud(b, 2);*/
1178 if (relAddr) {
1179 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1180 }
1181 else {
1182 brw_MOV(p, b, brw_imm_ud(location));
1183 }
1184
1185 brw_pop_insn_state(p);
1186 }
1187
1188 {
1189 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1190
1191 insn->header.predicate_control = BRW_PREDICATE_NONE;
1192 insn->header.compression_control = BRW_COMPRESSION_NONE;
1193 insn->header.destreg__conditonalmod = msg_reg_nr;
1194 insn->header.mask_control = BRW_MASK_DISABLE;
1195 /*insn->header.access_mode = BRW_ALIGN_16;*/
1196
1197 brw_set_dest(insn, dest);
1198 brw_set_src0(insn, brw_null_reg());
1199
1200 brw_set_dp_read_message(p->brw,
1201 insn,
1202 bind_table_index,
1203 oword, /* 0 = lower Oword, 1 = upper Oword */
1204 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1205 0, /* source cache = data cache */
1206 1, /* msg_length */
1207 1, /* response_length (1 Oword) */
1208 0); /* eot */
1209 }
1210 }
1211
1212
1213
1214 void brw_fb_WRITE(struct brw_compile *p,
1215 struct brw_reg dest,
1216 GLuint msg_reg_nr,
1217 struct brw_reg src0,
1218 GLuint binding_table_index,
1219 GLuint msg_length,
1220 GLuint response_length,
1221 GLboolean eot)
1222 {
1223 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1224
1225 insn->header.predicate_control = 0; /* XXX */
1226 insn->header.compression_control = BRW_COMPRESSION_NONE;
1227 insn->header.destreg__conditonalmod = msg_reg_nr;
1228
1229 brw_set_dest(insn, dest);
1230 brw_set_src0(insn, src0);
1231 brw_set_dp_write_message(p->brw,
1232 insn,
1233 binding_table_index,
1234 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1235 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1236 msg_length,
1237 1, /* pixel scoreboard */
1238 response_length,
1239 eot);
1240 }
1241
1242
1243 /**
1244 * Texture sample instruction.
1245 * Note: the msg_type plus msg_length values determine exactly what kind
1246 * of sampling operation is performed. See volume 4, page 161 of docs.
1247 */
1248 void brw_SAMPLE(struct brw_compile *p,
1249 struct brw_reg dest,
1250 GLuint msg_reg_nr,
1251 struct brw_reg src0,
1252 GLuint binding_table_index,
1253 GLuint sampler,
1254 GLuint writemask,
1255 GLuint msg_type,
1256 GLuint response_length,
1257 GLuint msg_length,
1258 GLboolean eot,
1259 GLuint header_present,
1260 GLuint simd_mode)
1261 {
1262 GLboolean need_stall = 0;
1263
1264 if (writemask == 0) {
1265 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1266 return;
1267 }
1268
1269 /* Hardware doesn't do destination dependency checking on send
1270 * instructions properly. Add a workaround which generates the
1271 * dependency by other means. In practice it seems like this bug
1272 * only crops up for texture samples, and only where registers are
1273 * written by the send and then written again later without being
1274 * read in between. Luckily for us, we already track that
1275 * information and use it to modify the writemask for the
1276 * instruction, so that is a guide for whether a workaround is
1277 * needed.
1278 */
1279 if (writemask != WRITEMASK_XYZW) {
1280 GLuint dst_offset = 0;
1281 GLuint i, newmask = 0, len = 0;
1282
1283 for (i = 0; i < 4; i++) {
1284 if (writemask & (1<<i))
1285 break;
1286 dst_offset += 2;
1287 }
1288 for (; i < 4; i++) {
1289 if (!(writemask & (1<<i)))
1290 break;
1291 newmask |= 1<<i;
1292 len++;
1293 }
1294
1295 if (newmask != writemask) {
1296 need_stall = 1;
1297 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1298 }
1299 else {
1300 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1301
1302 newmask = ~newmask & WRITEMASK_XYZW;
1303
1304 brw_push_insn_state(p);
1305
1306 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1307 brw_set_mask_control(p, BRW_MASK_DISABLE);
1308
1309 brw_MOV(p, m1, brw_vec8_grf(0,0));
1310 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1311
1312 brw_pop_insn_state(p);
1313
1314 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1315 dest = offset(dest, dst_offset);
1316 response_length = len * 2;
1317 }
1318 }
1319
1320 {
1321 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1322
1323 insn->header.predicate_control = 0; /* XXX */
1324 insn->header.compression_control = BRW_COMPRESSION_NONE;
1325 insn->header.destreg__conditonalmod = msg_reg_nr;
1326
1327 brw_set_dest(insn, dest);
1328 brw_set_src0(insn, src0);
1329 brw_set_sampler_message(p->brw, insn,
1330 binding_table_index,
1331 sampler,
1332 msg_type,
1333 response_length,
1334 msg_length,
1335 eot,
1336 header_present,
1337 simd_mode);
1338 }
1339
1340 if (need_stall) {
1341 struct brw_reg reg = vec8(offset(dest, response_length-1));
1342
1343 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1344 */
1345 brw_push_insn_state(p);
1346 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1347 brw_MOV(p, reg, reg);
1348 brw_pop_insn_state(p);
1349 }
1350
1351 }
1352
1353 /* All these variables are pretty confusing - we might be better off
1354 * using bitmasks and macros for this, in the old style. Or perhaps
1355 * just having the caller instantiate the fields in dword3 itself.
1356 */
1357 void brw_urb_WRITE(struct brw_compile *p,
1358 struct brw_reg dest,
1359 GLuint msg_reg_nr,
1360 struct brw_reg src0,
1361 GLboolean allocate,
1362 GLboolean used,
1363 GLuint msg_length,
1364 GLuint response_length,
1365 GLboolean eot,
1366 GLboolean writes_complete,
1367 GLuint offset,
1368 GLuint swizzle)
1369 {
1370 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1371
1372 assert(msg_length < BRW_MAX_MRF);
1373
1374 brw_set_dest(insn, dest);
1375 brw_set_src0(insn, src0);
1376 brw_set_src1(insn, brw_imm_d(0));
1377
1378 insn->header.destreg__conditonalmod = msg_reg_nr;
1379
1380 brw_set_urb_message(p->brw,
1381 insn,
1382 allocate,
1383 used,
1384 msg_length,
1385 response_length,
1386 eot,
1387 writes_complete,
1388 offset,
1389 swizzle);
1390 }
1391
1392 void brw_ff_sync(struct brw_compile *p,
1393 struct brw_reg dest,
1394 GLuint msg_reg_nr,
1395 struct brw_reg src0,
1396 GLboolean allocate,
1397 GLboolean used,
1398 GLuint msg_length,
1399 GLuint response_length,
1400 GLboolean eot,
1401 GLboolean writes_complete,
1402 GLuint offset,
1403 GLuint swizzle)
1404 {
1405 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1406
1407 assert(msg_length < 16);
1408
1409 brw_set_dest(insn, dest);
1410 brw_set_src0(insn, src0);
1411 brw_set_src1(insn, brw_imm_d(0));
1412
1413 insn->header.destreg__conditonalmod = msg_reg_nr;
1414
1415 brw_set_ff_sync_message(p->brw,
1416 insn,
1417 allocate,
1418 used,
1419 msg_length,
1420 response_length,
1421 eot,
1422 writes_complete,
1423 offset,
1424 swizzle);
1425 }