Merge branch 'mesa_7_6_branch' into mesa_7_7_branch
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 }
79 }
80 else {
81 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83 /* These are different sizes in align1 vs align16:
84 */
85 if (insn->header.access_mode == BRW_ALIGN_1) {
86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90 }
91 else {
92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93 }
94 }
95
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
98 */
99 guess_execution_size(insn, dest);
100 }
101
102 static void brw_set_src0( struct brw_instruction *insn,
103 struct brw_reg reg )
104 {
105 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
106
107 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
108 assert(reg.nr < 128);
109
110 insn->bits1.da1.src0_reg_file = reg.file;
111 insn->bits1.da1.src0_reg_type = reg.type;
112 insn->bits2.da1.src0_abs = reg.abs;
113 insn->bits2.da1.src0_negate = reg.negate;
114 insn->bits2.da1.src0_address_mode = reg.address_mode;
115
116 if (reg.file == BRW_IMMEDIATE_VALUE) {
117 insn->bits3.ud = reg.dw1.ud;
118
119 /* Required to set some fields in src1 as well:
120 */
121 insn->bits1.da1.src1_reg_file = 0; /* arf */
122 insn->bits1.da1.src1_reg_type = reg.type;
123 }
124 else
125 {
126 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits2.da1.src0_subreg_nr = reg.subnr;
129 insn->bits2.da1.src0_reg_nr = reg.nr;
130 }
131 else {
132 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
133 insn->bits2.da16.src0_reg_nr = reg.nr;
134 }
135 }
136 else {
137 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
138
139 if (insn->header.access_mode == BRW_ALIGN_1) {
140 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
141 }
142 else {
143 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
144 }
145 }
146
147 if (insn->header.access_mode == BRW_ALIGN_1) {
148 if (reg.width == BRW_WIDTH_1 &&
149 insn->header.execution_size == BRW_EXECUTE_1) {
150 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
151 insn->bits2.da1.src0_width = BRW_WIDTH_1;
152 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
153 }
154 else {
155 insn->bits2.da1.src0_horiz_stride = reg.hstride;
156 insn->bits2.da1.src0_width = reg.width;
157 insn->bits2.da1.src0_vert_stride = reg.vstride;
158 }
159 }
160 else {
161 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
162 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
163 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
164 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
165
166 /* This is an oddity of the fact we're using the same
167 * descriptions for registers in align_16 as align_1:
168 */
169 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
170 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
171 else
172 insn->bits2.da16.src0_vert_stride = reg.vstride;
173 }
174 }
175 }
176
177
178 void brw_set_src1( struct brw_instruction *insn,
179 struct brw_reg reg )
180 {
181 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
182
183 assert(reg.nr < 128);
184
185 insn->bits1.da1.src1_reg_file = reg.file;
186 insn->bits1.da1.src1_reg_type = reg.type;
187 insn->bits3.da1.src1_abs = reg.abs;
188 insn->bits3.da1.src1_negate = reg.negate;
189
190 /* Only src1 can be immediate in two-argument instructions.
191 */
192 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
193
194 if (reg.file == BRW_IMMEDIATE_VALUE) {
195 insn->bits3.ud = reg.dw1.ud;
196 }
197 else {
198 /* This is a hardware restriction, which may or may not be lifted
199 * in the future:
200 */
201 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
202 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
203
204 if (insn->header.access_mode == BRW_ALIGN_1) {
205 insn->bits3.da1.src1_subreg_nr = reg.subnr;
206 insn->bits3.da1.src1_reg_nr = reg.nr;
207 }
208 else {
209 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
210 insn->bits3.da16.src1_reg_nr = reg.nr;
211 }
212
213 if (insn->header.access_mode == BRW_ALIGN_1) {
214 if (reg.width == BRW_WIDTH_1 &&
215 insn->header.execution_size == BRW_EXECUTE_1) {
216 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
217 insn->bits3.da1.src1_width = BRW_WIDTH_1;
218 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
219 }
220 else {
221 insn->bits3.da1.src1_horiz_stride = reg.hstride;
222 insn->bits3.da1.src1_width = reg.width;
223 insn->bits3.da1.src1_vert_stride = reg.vstride;
224 }
225 }
226 else {
227 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
228 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
229 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
230 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
231
232 /* This is an oddity of the fact we're using the same
233 * descriptions for registers in align_16 as align_1:
234 */
235 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
236 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
237 else
238 insn->bits3.da16.src1_vert_stride = reg.vstride;
239 }
240 }
241 }
242
243
244
245 static void brw_set_math_message( struct brw_context *brw,
246 struct brw_instruction *insn,
247 GLuint msg_length,
248 GLuint response_length,
249 GLuint function,
250 GLuint integer_type,
251 GLboolean low_precision,
252 GLboolean saturate,
253 GLuint dataType )
254 {
255 brw_set_src1(insn, brw_imm_d(0));
256
257 if (BRW_IS_IGDNG(brw)) {
258 insn->bits3.math_igdng.function = function;
259 insn->bits3.math_igdng.int_type = integer_type;
260 insn->bits3.math_igdng.precision = low_precision;
261 insn->bits3.math_igdng.saturate = saturate;
262 insn->bits3.math_igdng.data_type = dataType;
263 insn->bits3.math_igdng.snapshot = 0;
264 insn->bits3.math_igdng.header_present = 0;
265 insn->bits3.math_igdng.response_length = response_length;
266 insn->bits3.math_igdng.msg_length = msg_length;
267 insn->bits3.math_igdng.end_of_thread = 0;
268 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
269 insn->bits2.send_igdng.end_of_thread = 0;
270 } else {
271 insn->bits3.math.function = function;
272 insn->bits3.math.int_type = integer_type;
273 insn->bits3.math.precision = low_precision;
274 insn->bits3.math.saturate = saturate;
275 insn->bits3.math.data_type = dataType;
276 insn->bits3.math.response_length = response_length;
277 insn->bits3.math.msg_length = msg_length;
278 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
279 insn->bits3.math.end_of_thread = 0;
280 }
281 }
282
283
284 static void brw_set_ff_sync_message( struct brw_context *brw,
285 struct brw_instruction *insn,
286 GLboolean allocate,
287 GLboolean used,
288 GLuint msg_length,
289 GLuint response_length,
290 GLboolean end_of_thread,
291 GLboolean complete,
292 GLuint offset,
293 GLuint swizzle_control )
294 {
295 brw_set_src1(insn, brw_imm_d(0));
296
297 insn->bits3.urb_igdng.opcode = 1;
298 insn->bits3.urb_igdng.offset = offset;
299 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
300 insn->bits3.urb_igdng.allocate = allocate;
301 insn->bits3.urb_igdng.used = used;
302 insn->bits3.urb_igdng.complete = complete;
303 insn->bits3.urb_igdng.header_present = 1;
304 insn->bits3.urb_igdng.response_length = response_length;
305 insn->bits3.urb_igdng.msg_length = msg_length;
306 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
307 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
308 insn->bits2.send_igdng.end_of_thread = end_of_thread;
309 }
310
311 static void brw_set_urb_message( struct brw_context *brw,
312 struct brw_instruction *insn,
313 GLboolean allocate,
314 GLboolean used,
315 GLuint msg_length,
316 GLuint response_length,
317 GLboolean end_of_thread,
318 GLboolean complete,
319 GLuint offset,
320 GLuint swizzle_control )
321 {
322 brw_set_src1(insn, brw_imm_d(0));
323
324 if (BRW_IS_IGDNG(brw)) {
325 insn->bits3.urb_igdng.opcode = 0; /* ? */
326 insn->bits3.urb_igdng.offset = offset;
327 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
328 insn->bits3.urb_igdng.allocate = allocate;
329 insn->bits3.urb_igdng.used = used; /* ? */
330 insn->bits3.urb_igdng.complete = complete;
331 insn->bits3.urb_igdng.header_present = 1;
332 insn->bits3.urb_igdng.response_length = response_length;
333 insn->bits3.urb_igdng.msg_length = msg_length;
334 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
335 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
336 insn->bits2.send_igdng.end_of_thread = end_of_thread;
337 } else {
338 insn->bits3.urb.opcode = 0; /* ? */
339 insn->bits3.urb.offset = offset;
340 insn->bits3.urb.swizzle_control = swizzle_control;
341 insn->bits3.urb.allocate = allocate;
342 insn->bits3.urb.used = used; /* ? */
343 insn->bits3.urb.complete = complete;
344 insn->bits3.urb.response_length = response_length;
345 insn->bits3.urb.msg_length = msg_length;
346 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
347 insn->bits3.urb.end_of_thread = end_of_thread;
348 }
349 }
350
351 static void brw_set_dp_write_message( struct brw_context *brw,
352 struct brw_instruction *insn,
353 GLuint binding_table_index,
354 GLuint msg_control,
355 GLuint msg_type,
356 GLuint msg_length,
357 GLuint pixel_scoreboard_clear,
358 GLuint response_length,
359 GLuint end_of_thread )
360 {
361 brw_set_src1(insn, brw_imm_d(0));
362
363 if (BRW_IS_IGDNG(brw)) {
364 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
365 insn->bits3.dp_write_igdng.msg_control = msg_control;
366 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
367 insn->bits3.dp_write_igdng.msg_type = msg_type;
368 insn->bits3.dp_write_igdng.send_commit_msg = 0;
369 insn->bits3.dp_write_igdng.header_present = 1;
370 insn->bits3.dp_write_igdng.response_length = response_length;
371 insn->bits3.dp_write_igdng.msg_length = msg_length;
372 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
373 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
374 insn->bits2.send_igdng.end_of_thread = end_of_thread;
375 } else {
376 insn->bits3.dp_write.binding_table_index = binding_table_index;
377 insn->bits3.dp_write.msg_control = msg_control;
378 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
379 insn->bits3.dp_write.msg_type = msg_type;
380 insn->bits3.dp_write.send_commit_msg = 0;
381 insn->bits3.dp_write.response_length = response_length;
382 insn->bits3.dp_write.msg_length = msg_length;
383 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
384 insn->bits3.dp_write.end_of_thread = end_of_thread;
385 }
386 }
387
388 static void brw_set_dp_read_message( struct brw_context *brw,
389 struct brw_instruction *insn,
390 GLuint binding_table_index,
391 GLuint msg_control,
392 GLuint msg_type,
393 GLuint target_cache,
394 GLuint msg_length,
395 GLuint response_length,
396 GLuint end_of_thread )
397 {
398 brw_set_src1(insn, brw_imm_d(0));
399
400 if (BRW_IS_IGDNG(brw)) {
401 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
402 insn->bits3.dp_read_igdng.msg_control = msg_control;
403 insn->bits3.dp_read_igdng.msg_type = msg_type;
404 insn->bits3.dp_read_igdng.target_cache = target_cache;
405 insn->bits3.dp_read_igdng.header_present = 1;
406 insn->bits3.dp_read_igdng.response_length = response_length;
407 insn->bits3.dp_read_igdng.msg_length = msg_length;
408 insn->bits3.dp_read_igdng.pad1 = 0;
409 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
410 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
411 insn->bits2.send_igdng.end_of_thread = end_of_thread;
412 } else {
413 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
414 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
415 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
416 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
417 insn->bits3.dp_read.response_length = response_length; /*16:19*/
418 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
419 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
420 insn->bits3.dp_read.pad1 = 0; /*28:30*/
421 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
422 }
423 }
424
425 static void brw_set_sampler_message(struct brw_context *brw,
426 struct brw_instruction *insn,
427 GLuint binding_table_index,
428 GLuint sampler,
429 GLuint msg_type,
430 GLuint response_length,
431 GLuint msg_length,
432 GLboolean eot,
433 GLuint header_present,
434 GLuint simd_mode)
435 {
436 assert(eot == 0);
437 brw_set_src1(insn, brw_imm_d(0));
438
439 if (BRW_IS_IGDNG(brw)) {
440 insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
441 insn->bits3.sampler_igdng.sampler = sampler;
442 insn->bits3.sampler_igdng.msg_type = msg_type;
443 insn->bits3.sampler_igdng.simd_mode = simd_mode;
444 insn->bits3.sampler_igdng.header_present = header_present;
445 insn->bits3.sampler_igdng.response_length = response_length;
446 insn->bits3.sampler_igdng.msg_length = msg_length;
447 insn->bits3.sampler_igdng.end_of_thread = eot;
448 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
449 insn->bits2.send_igdng.end_of_thread = eot;
450 } else if (BRW_IS_G4X(brw)) {
451 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
452 insn->bits3.sampler_g4x.sampler = sampler;
453 insn->bits3.sampler_g4x.msg_type = msg_type;
454 insn->bits3.sampler_g4x.response_length = response_length;
455 insn->bits3.sampler_g4x.msg_length = msg_length;
456 insn->bits3.sampler_g4x.end_of_thread = eot;
457 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
458 } else {
459 insn->bits3.sampler.binding_table_index = binding_table_index;
460 insn->bits3.sampler.sampler = sampler;
461 insn->bits3.sampler.msg_type = msg_type;
462 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
463 insn->bits3.sampler.response_length = response_length;
464 insn->bits3.sampler.msg_length = msg_length;
465 insn->bits3.sampler.end_of_thread = eot;
466 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
467 }
468 }
469
470
471
472 static struct brw_instruction *next_insn( struct brw_compile *p,
473 GLuint opcode )
474 {
475 struct brw_instruction *insn;
476
477 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
478
479 insn = &p->store[p->nr_insn++];
480 memcpy(insn, p->current, sizeof(*insn));
481
482 /* Reset this one-shot flag:
483 */
484
485 if (p->current->header.destreg__conditionalmod) {
486 p->current->header.destreg__conditionalmod = 0;
487 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
488 }
489
490 insn->header.opcode = opcode;
491 return insn;
492 }
493
494
495 static struct brw_instruction *brw_alu1( struct brw_compile *p,
496 GLuint opcode,
497 struct brw_reg dest,
498 struct brw_reg src )
499 {
500 struct brw_instruction *insn = next_insn(p, opcode);
501 brw_set_dest(insn, dest);
502 brw_set_src0(insn, src);
503 return insn;
504 }
505
506 static struct brw_instruction *brw_alu2(struct brw_compile *p,
507 GLuint opcode,
508 struct brw_reg dest,
509 struct brw_reg src0,
510 struct brw_reg src1 )
511 {
512 struct brw_instruction *insn = next_insn(p, opcode);
513 brw_set_dest(insn, dest);
514 brw_set_src0(insn, src0);
515 brw_set_src1(insn, src1);
516 return insn;
517 }
518
519
520 /***********************************************************************
521 * Convenience routines.
522 */
523 #define ALU1(OP) \
524 struct brw_instruction *brw_##OP(struct brw_compile *p, \
525 struct brw_reg dest, \
526 struct brw_reg src0) \
527 { \
528 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
529 }
530
531 #define ALU2(OP) \
532 struct brw_instruction *brw_##OP(struct brw_compile *p, \
533 struct brw_reg dest, \
534 struct brw_reg src0, \
535 struct brw_reg src1) \
536 { \
537 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
538 }
539
540
541 ALU1(MOV)
542 ALU2(SEL)
543 ALU1(NOT)
544 ALU2(AND)
545 ALU2(OR)
546 ALU2(XOR)
547 ALU2(SHR)
548 ALU2(SHL)
549 ALU2(RSR)
550 ALU2(RSL)
551 ALU2(ASR)
552 ALU2(ADD)
553 ALU2(MUL)
554 ALU1(FRC)
555 ALU1(RNDD)
556 ALU1(RNDZ)
557 ALU2(MAC)
558 ALU2(MACH)
559 ALU1(LZD)
560 ALU2(DP4)
561 ALU2(DPH)
562 ALU2(DP3)
563 ALU2(DP2)
564 ALU2(LINE)
565
566
567
568
569 void brw_NOP(struct brw_compile *p)
570 {
571 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
572 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
573 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
574 brw_set_src1(insn, brw_imm_ud(0x0));
575 }
576
577
578
579
580
581 /***********************************************************************
582 * Comparisons, if/else/endif
583 */
584
585 struct brw_instruction *brw_JMPI(struct brw_compile *p,
586 struct brw_reg dest,
587 struct brw_reg src0,
588 struct brw_reg src1)
589 {
590 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
591
592 insn->header.execution_size = 1;
593 insn->header.compression_control = BRW_COMPRESSION_NONE;
594 insn->header.mask_control = BRW_MASK_DISABLE;
595
596 p->current->header.predicate_control = BRW_PREDICATE_NONE;
597
598 return insn;
599 }
600
601 /* EU takes the value from the flag register and pushes it onto some
602 * sort of a stack (presumably merging with any flag value already on
603 * the stack). Within an if block, the flags at the top of the stack
604 * control execution on each channel of the unit, eg. on each of the
605 * 16 pixel values in our wm programs.
606 *
607 * When the matching 'else' instruction is reached (presumably by
608 * countdown of the instruction count patched in by our ELSE/ENDIF
609 * functions), the relevent flags are inverted.
610 *
611 * When the matching 'endif' instruction is reached, the flags are
612 * popped off. If the stack is now empty, normal execution resumes.
613 *
614 * No attempt is made to deal with stack overflow (14 elements?).
615 */
616 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
617 {
618 struct brw_instruction *insn;
619
620 if (p->single_program_flow) {
621 assert(execute_size == BRW_EXECUTE_1);
622
623 insn = next_insn(p, BRW_OPCODE_ADD);
624 insn->header.predicate_inverse = 1;
625 } else {
626 insn = next_insn(p, BRW_OPCODE_IF);
627 }
628
629 /* Override the defaults for this instruction:
630 */
631 brw_set_dest(insn, brw_ip_reg());
632 brw_set_src0(insn, brw_ip_reg());
633 brw_set_src1(insn, brw_imm_d(0x0));
634
635 insn->header.execution_size = execute_size;
636 insn->header.compression_control = BRW_COMPRESSION_NONE;
637 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
638 insn->header.mask_control = BRW_MASK_ENABLE;
639 if (!p->single_program_flow)
640 insn->header.thread_control = BRW_THREAD_SWITCH;
641
642 p->current->header.predicate_control = BRW_PREDICATE_NONE;
643
644 return insn;
645 }
646
647
648 struct brw_instruction *brw_ELSE(struct brw_compile *p,
649 struct brw_instruction *if_insn)
650 {
651 struct brw_instruction *insn;
652 GLuint br = 1;
653
654 if (BRW_IS_IGDNG(p->brw))
655 br = 2;
656
657 if (p->single_program_flow) {
658 insn = next_insn(p, BRW_OPCODE_ADD);
659 } else {
660 insn = next_insn(p, BRW_OPCODE_ELSE);
661 }
662
663 brw_set_dest(insn, brw_ip_reg());
664 brw_set_src0(insn, brw_ip_reg());
665 brw_set_src1(insn, brw_imm_d(0x0));
666
667 insn->header.compression_control = BRW_COMPRESSION_NONE;
668 insn->header.execution_size = if_insn->header.execution_size;
669 insn->header.mask_control = BRW_MASK_ENABLE;
670 if (!p->single_program_flow)
671 insn->header.thread_control = BRW_THREAD_SWITCH;
672
673 /* Patch the if instruction to point at this instruction.
674 */
675 if (p->single_program_flow) {
676 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
677
678 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
679 } else {
680 assert(if_insn->header.opcode == BRW_OPCODE_IF);
681
682 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
683 if_insn->bits3.if_else.pop_count = 0;
684 if_insn->bits3.if_else.pad0 = 0;
685 }
686
687 return insn;
688 }
689
690 void brw_ENDIF(struct brw_compile *p,
691 struct brw_instruction *patch_insn)
692 {
693 GLuint br = 1;
694
695 if (BRW_IS_IGDNG(p->brw))
696 br = 2;
697
698 if (p->single_program_flow) {
699 /* In single program flow mode, there's no need to execute an ENDIF,
700 * since we don't need to do any stack operations, and if we're executing
701 * currently, we want to just continue executing.
702 */
703 struct brw_instruction *next = &p->store[p->nr_insn];
704
705 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
706
707 patch_insn->bits3.ud = (next - patch_insn) * 16;
708 } else {
709 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
710
711 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
712 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
713 brw_set_src1(insn, brw_imm_d(0x0));
714
715 insn->header.compression_control = BRW_COMPRESSION_NONE;
716 insn->header.execution_size = patch_insn->header.execution_size;
717 insn->header.mask_control = BRW_MASK_ENABLE;
718 insn->header.thread_control = BRW_THREAD_SWITCH;
719
720 assert(patch_insn->bits3.if_else.jump_count == 0);
721
722 /* Patch the if or else instructions to point at this or the next
723 * instruction respectively.
724 */
725 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
726 /* Automagically turn it into an IFF:
727 */
728 patch_insn->header.opcode = BRW_OPCODE_IFF;
729 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
730 patch_insn->bits3.if_else.pop_count = 0;
731 patch_insn->bits3.if_else.pad0 = 0;
732 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
733 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
734 patch_insn->bits3.if_else.pop_count = 1;
735 patch_insn->bits3.if_else.pad0 = 0;
736 } else {
737 assert(0);
738 }
739
740 /* Also pop item off the stack in the endif instruction:
741 */
742 insn->bits3.if_else.jump_count = 0;
743 insn->bits3.if_else.pop_count = 1;
744 insn->bits3.if_else.pad0 = 0;
745 }
746 }
747
748 struct brw_instruction *brw_BREAK(struct brw_compile *p)
749 {
750 struct brw_instruction *insn;
751 insn = next_insn(p, BRW_OPCODE_BREAK);
752 brw_set_dest(insn, brw_ip_reg());
753 brw_set_src0(insn, brw_ip_reg());
754 brw_set_src1(insn, brw_imm_d(0x0));
755 insn->header.compression_control = BRW_COMPRESSION_NONE;
756 insn->header.execution_size = BRW_EXECUTE_8;
757 /* insn->header.mask_control = BRW_MASK_DISABLE; */
758 insn->bits3.if_else.pad0 = 0;
759 return insn;
760 }
761
762 struct brw_instruction *brw_CONT(struct brw_compile *p)
763 {
764 struct brw_instruction *insn;
765 insn = next_insn(p, BRW_OPCODE_CONTINUE);
766 brw_set_dest(insn, brw_ip_reg());
767 brw_set_src0(insn, brw_ip_reg());
768 brw_set_src1(insn, brw_imm_d(0x0));
769 insn->header.compression_control = BRW_COMPRESSION_NONE;
770 insn->header.execution_size = BRW_EXECUTE_8;
771 /* insn->header.mask_control = BRW_MASK_DISABLE; */
772 insn->bits3.if_else.pad0 = 0;
773 return insn;
774 }
775
776 /* DO/WHILE loop:
777 */
778 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
779 {
780 if (p->single_program_flow) {
781 return &p->store[p->nr_insn];
782 } else {
783 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
784
785 /* Override the defaults for this instruction:
786 */
787 brw_set_dest(insn, brw_null_reg());
788 brw_set_src0(insn, brw_null_reg());
789 brw_set_src1(insn, brw_null_reg());
790
791 insn->header.compression_control = BRW_COMPRESSION_NONE;
792 insn->header.execution_size = execute_size;
793 insn->header.predicate_control = BRW_PREDICATE_NONE;
794 /* insn->header.mask_control = BRW_MASK_ENABLE; */
795 /* insn->header.mask_control = BRW_MASK_DISABLE; */
796
797 return insn;
798 }
799 }
800
801
802
803 struct brw_instruction *brw_WHILE(struct brw_compile *p,
804 struct brw_instruction *do_insn)
805 {
806 struct brw_instruction *insn;
807 GLuint br = 1;
808
809 if (BRW_IS_IGDNG(p->brw))
810 br = 2;
811
812 if (p->single_program_flow)
813 insn = next_insn(p, BRW_OPCODE_ADD);
814 else
815 insn = next_insn(p, BRW_OPCODE_WHILE);
816
817 brw_set_dest(insn, brw_ip_reg());
818 brw_set_src0(insn, brw_ip_reg());
819 brw_set_src1(insn, brw_imm_d(0x0));
820
821 insn->header.compression_control = BRW_COMPRESSION_NONE;
822
823 if (p->single_program_flow) {
824 insn->header.execution_size = BRW_EXECUTE_1;
825
826 insn->bits3.d = (do_insn - insn) * 16;
827 } else {
828 insn->header.execution_size = do_insn->header.execution_size;
829
830 assert(do_insn->header.opcode == BRW_OPCODE_DO);
831 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
832 insn->bits3.if_else.pop_count = 0;
833 insn->bits3.if_else.pad0 = 0;
834 }
835
836 /* insn->header.mask_control = BRW_MASK_ENABLE; */
837
838 /* insn->header.mask_control = BRW_MASK_DISABLE; */
839 p->current->header.predicate_control = BRW_PREDICATE_NONE;
840 return insn;
841 }
842
843
844 /* FORWARD JUMPS:
845 */
846 void brw_land_fwd_jump(struct brw_compile *p,
847 struct brw_instruction *jmp_insn)
848 {
849 struct brw_instruction *landing = &p->store[p->nr_insn];
850 GLuint jmpi = 1;
851
852 if (BRW_IS_IGDNG(p->brw))
853 jmpi = 2;
854
855 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
856 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
857
858 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
859 }
860
861
862
863 /* To integrate with the above, it makes sense that the comparison
864 * instruction should populate the flag register. It might be simpler
865 * just to use the flag reg for most WM tasks?
866 */
867 void brw_CMP(struct brw_compile *p,
868 struct brw_reg dest,
869 GLuint conditional,
870 struct brw_reg src0,
871 struct brw_reg src1)
872 {
873 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
874
875 insn->header.destreg__conditionalmod = conditional;
876 brw_set_dest(insn, dest);
877 brw_set_src0(insn, src0);
878 brw_set_src1(insn, src1);
879
880 /* guess_execution_size(insn, src0); */
881
882
883 /* Make it so that future instructions will use the computed flag
884 * value until brw_set_predicate_control_flag_value() is called
885 * again.
886 */
887 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
888 dest.nr == 0) {
889 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
890 p->flag_value = 0xff;
891 }
892 }
893
894
895
896 /***********************************************************************
897 * Helpers for the various SEND message types:
898 */
899
900 /** Extended math function, float[8].
901 */
902 void brw_math( struct brw_compile *p,
903 struct brw_reg dest,
904 GLuint function,
905 GLuint saturate,
906 GLuint msg_reg_nr,
907 struct brw_reg src,
908 GLuint data_type,
909 GLuint precision )
910 {
911 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
912 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
913 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
914
915 /* Example code doesn't set predicate_control for send
916 * instructions.
917 */
918 insn->header.predicate_control = 0;
919 insn->header.destreg__conditionalmod = msg_reg_nr;
920
921 brw_set_dest(insn, dest);
922 brw_set_src0(insn, src);
923 brw_set_math_message(p->brw,
924 insn,
925 msg_length, response_length,
926 function,
927 BRW_MATH_INTEGER_UNSIGNED,
928 precision,
929 saturate,
930 data_type);
931 }
932
933 /**
934 * Extended math function, float[16].
935 * Use 2 send instructions.
936 */
937 void brw_math_16( struct brw_compile *p,
938 struct brw_reg dest,
939 GLuint function,
940 GLuint saturate,
941 GLuint msg_reg_nr,
942 struct brw_reg src,
943 GLuint precision )
944 {
945 struct brw_instruction *insn;
946 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
947 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
948
949 /* First instruction:
950 */
951 brw_push_insn_state(p);
952 brw_set_predicate_control_flag_value(p, 0xff);
953 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
954
955 insn = next_insn(p, BRW_OPCODE_SEND);
956 insn->header.destreg__conditionalmod = msg_reg_nr;
957
958 brw_set_dest(insn, dest);
959 brw_set_src0(insn, src);
960 brw_set_math_message(p->brw,
961 insn,
962 msg_length, response_length,
963 function,
964 BRW_MATH_INTEGER_UNSIGNED,
965 precision,
966 saturate,
967 BRW_MATH_DATA_VECTOR);
968
969 /* Second instruction:
970 */
971 insn = next_insn(p, BRW_OPCODE_SEND);
972 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
973 insn->header.destreg__conditionalmod = msg_reg_nr+1;
974
975 brw_set_dest(insn, offset(dest,1));
976 brw_set_src0(insn, src);
977 brw_set_math_message(p->brw,
978 insn,
979 msg_length, response_length,
980 function,
981 BRW_MATH_INTEGER_UNSIGNED,
982 precision,
983 saturate,
984 BRW_MATH_DATA_VECTOR);
985
986 brw_pop_insn_state(p);
987 }
988
989
990 /**
991 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
992 * Scratch offset should be a multiple of 64.
993 * Used for register spilling.
994 */
995 void brw_dp_WRITE_16( struct brw_compile *p,
996 struct brw_reg src,
997 GLuint scratch_offset )
998 {
999 GLuint msg_reg_nr = 1;
1000 {
1001 brw_push_insn_state(p);
1002 brw_set_mask_control(p, BRW_MASK_DISABLE);
1003 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1004
1005 /* set message header global offset field (reg 0, element 2) */
1006 brw_MOV(p,
1007 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1008 brw_imm_d(scratch_offset));
1009
1010 brw_pop_insn_state(p);
1011 }
1012
1013 {
1014 GLuint msg_length = 3;
1015 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1016 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1017
1018 insn->header.predicate_control = 0; /* XXX */
1019 insn->header.compression_control = BRW_COMPRESSION_NONE;
1020 insn->header.destreg__conditionalmod = msg_reg_nr;
1021
1022 brw_set_dest(insn, dest);
1023 brw_set_src0(insn, src);
1024
1025 brw_set_dp_write_message(p->brw,
1026 insn,
1027 255, /* binding table index (255=stateless) */
1028 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1029 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1030 msg_length,
1031 0, /* pixel scoreboard */
1032 0, /* response_length */
1033 0); /* eot */
1034 }
1035 }
1036
1037
1038 /**
1039 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1040 * Scratch offset should be a multiple of 64.
1041 * Used for register spilling.
1042 */
1043 void brw_dp_READ_16( struct brw_compile *p,
1044 struct brw_reg dest,
1045 GLuint scratch_offset )
1046 {
1047 GLuint msg_reg_nr = 1;
1048 {
1049 brw_push_insn_state(p);
1050 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1051 brw_set_mask_control(p, BRW_MASK_DISABLE);
1052
1053 /* set message header global offset field (reg 0, element 2) */
1054 brw_MOV(p,
1055 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1056 brw_imm_d(scratch_offset));
1057
1058 brw_pop_insn_state(p);
1059 }
1060
1061 {
1062 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1063
1064 insn->header.predicate_control = 0; /* XXX */
1065 insn->header.compression_control = BRW_COMPRESSION_NONE;
1066 insn->header.destreg__conditionalmod = msg_reg_nr;
1067
1068 brw_set_dest(insn, dest); /* UW? */
1069 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1070
1071 brw_set_dp_read_message(p->brw,
1072 insn,
1073 255, /* binding table index (255=stateless) */
1074 3, /* msg_control (3 means 4 Owords) */
1075 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1076 1, /* target cache (render/scratch) */
1077 1, /* msg_length */
1078 2, /* response_length */
1079 0); /* eot */
1080 }
1081 }
1082
1083
1084 /**
1085 * Read a float[4] vector from the data port Data Cache (const buffer).
1086 * Location (in buffer) should be a multiple of 16.
1087 * Used for fetching shader constants.
1088 * If relAddr is true, we'll do an indirect fetch using the address register.
1089 */
1090 void brw_dp_READ_4( struct brw_compile *p,
1091 struct brw_reg dest,
1092 GLboolean relAddr,
1093 GLuint location,
1094 GLuint bind_table_index )
1095 {
1096 /* XXX: relAddr not implemented */
1097 GLuint msg_reg_nr = 1;
1098 {
1099 struct brw_reg b;
1100 brw_push_insn_state(p);
1101 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1102 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1103 brw_set_mask_control(p, BRW_MASK_DISABLE);
1104
1105 /* Setup MRF[1] with location/offset into const buffer */
1106 b = brw_message_reg(msg_reg_nr);
1107 b = retype(b, BRW_REGISTER_TYPE_UD);
1108 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1109 * when the docs say only dword[2] should be set. Hmmm. But it works.
1110 */
1111 brw_MOV(p, b, brw_imm_ud(location));
1112 brw_pop_insn_state(p);
1113 }
1114
1115 {
1116 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1117
1118 insn->header.predicate_control = BRW_PREDICATE_NONE;
1119 insn->header.compression_control = BRW_COMPRESSION_NONE;
1120 insn->header.destreg__conditionalmod = msg_reg_nr;
1121 insn->header.mask_control = BRW_MASK_DISABLE;
1122
1123 /* cast dest to a uword[8] vector */
1124 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1125
1126 brw_set_dest(insn, dest);
1127 brw_set_src0(insn, brw_null_reg());
1128
1129 brw_set_dp_read_message(p->brw,
1130 insn,
1131 bind_table_index,
1132 0, /* msg_control (0 means 1 Oword) */
1133 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1134 0, /* source cache = data cache */
1135 1, /* msg_length */
1136 1, /* response_length (1 Oword) */
1137 0); /* eot */
1138 }
1139 }
1140
1141
1142 /**
1143 * Read float[4] constant(s) from VS constant buffer.
1144 * For relative addressing, two float[4] constants will be read into 'dest'.
1145 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1146 */
1147 void brw_dp_READ_4_vs(struct brw_compile *p,
1148 struct brw_reg dest,
1149 GLuint oword,
1150 GLboolean relAddr,
1151 struct brw_reg addrReg,
1152 GLuint location,
1153 GLuint bind_table_index)
1154 {
1155 GLuint msg_reg_nr = 1;
1156
1157 assert(oword < 2);
1158 /*
1159 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1160 location, msg_reg_nr);
1161 */
1162
1163 /* Setup MRF[1] with location/offset into const buffer */
1164 {
1165 struct brw_reg b;
1166
1167 brw_push_insn_state(p);
1168 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1169 brw_set_mask_control(p, BRW_MASK_DISABLE);
1170 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1171 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1172
1173 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1174 * when the docs say only dword[2] should be set. Hmmm. But it works.
1175 */
1176 b = brw_message_reg(msg_reg_nr);
1177 b = retype(b, BRW_REGISTER_TYPE_UD);
1178 /*b = get_element_ud(b, 2);*/
1179 if (relAddr) {
1180 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1181 }
1182 else {
1183 brw_MOV(p, b, brw_imm_ud(location));
1184 }
1185
1186 brw_pop_insn_state(p);
1187 }
1188
1189 {
1190 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1191
1192 insn->header.predicate_control = BRW_PREDICATE_NONE;
1193 insn->header.compression_control = BRW_COMPRESSION_NONE;
1194 insn->header.destreg__conditionalmod = msg_reg_nr;
1195 insn->header.mask_control = BRW_MASK_DISABLE;
1196 /*insn->header.access_mode = BRW_ALIGN_16;*/
1197
1198 brw_set_dest(insn, dest);
1199 brw_set_src0(insn, brw_null_reg());
1200
1201 brw_set_dp_read_message(p->brw,
1202 insn,
1203 bind_table_index,
1204 oword, /* 0 = lower Oword, 1 = upper Oword */
1205 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1206 0, /* source cache = data cache */
1207 1, /* msg_length */
1208 1, /* response_length (1 Oword) */
1209 0); /* eot */
1210 }
1211 }
1212
1213
1214
1215 void brw_fb_WRITE(struct brw_compile *p,
1216 struct brw_reg dest,
1217 GLuint msg_reg_nr,
1218 struct brw_reg src0,
1219 GLuint binding_table_index,
1220 GLuint msg_length,
1221 GLuint response_length,
1222 GLboolean eot)
1223 {
1224 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1225
1226 insn->header.predicate_control = 0; /* XXX */
1227 insn->header.compression_control = BRW_COMPRESSION_NONE;
1228 insn->header.destreg__conditionalmod = msg_reg_nr;
1229
1230 brw_set_dest(insn, dest);
1231 brw_set_src0(insn, src0);
1232 brw_set_dp_write_message(p->brw,
1233 insn,
1234 binding_table_index,
1235 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1236 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1237 msg_length,
1238 1, /* pixel scoreboard */
1239 response_length,
1240 eot);
1241 }
1242
1243
1244 /**
1245 * Texture sample instruction.
1246 * Note: the msg_type plus msg_length values determine exactly what kind
1247 * of sampling operation is performed. See volume 4, page 161 of docs.
1248 */
1249 void brw_SAMPLE(struct brw_compile *p,
1250 struct brw_reg dest,
1251 GLuint msg_reg_nr,
1252 struct brw_reg src0,
1253 GLuint binding_table_index,
1254 GLuint sampler,
1255 GLuint writemask,
1256 GLuint msg_type,
1257 GLuint response_length,
1258 GLuint msg_length,
1259 GLboolean eot,
1260 GLuint header_present,
1261 GLuint simd_mode)
1262 {
1263 GLboolean need_stall = 0;
1264
1265 if (writemask == 0) {
1266 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1267 return;
1268 }
1269
1270 /* Hardware doesn't do destination dependency checking on send
1271 * instructions properly. Add a workaround which generates the
1272 * dependency by other means. In practice it seems like this bug
1273 * only crops up for texture samples, and only where registers are
1274 * written by the send and then written again later without being
1275 * read in between. Luckily for us, we already track that
1276 * information and use it to modify the writemask for the
1277 * instruction, so that is a guide for whether a workaround is
1278 * needed.
1279 */
1280 if (writemask != WRITEMASK_XYZW) {
1281 GLuint dst_offset = 0;
1282 GLuint i, newmask = 0, len = 0;
1283
1284 for (i = 0; i < 4; i++) {
1285 if (writemask & (1<<i))
1286 break;
1287 dst_offset += 2;
1288 }
1289 for (; i < 4; i++) {
1290 if (!(writemask & (1<<i)))
1291 break;
1292 newmask |= 1<<i;
1293 len++;
1294 }
1295
1296 if (newmask != writemask) {
1297 need_stall = 1;
1298 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1299 }
1300 else {
1301 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1302
1303 newmask = ~newmask & WRITEMASK_XYZW;
1304
1305 brw_push_insn_state(p);
1306
1307 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1308 brw_set_mask_control(p, BRW_MASK_DISABLE);
1309
1310 brw_MOV(p, m1, brw_vec8_grf(0,0));
1311 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1312
1313 brw_pop_insn_state(p);
1314
1315 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1316 dest = offset(dest, dst_offset);
1317 response_length = len * 2;
1318 }
1319 }
1320
1321 {
1322 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1323
1324 insn->header.predicate_control = 0; /* XXX */
1325 insn->header.compression_control = BRW_COMPRESSION_NONE;
1326 insn->header.destreg__conditionalmod = msg_reg_nr;
1327
1328 brw_set_dest(insn, dest);
1329 brw_set_src0(insn, src0);
1330 brw_set_sampler_message(p->brw, insn,
1331 binding_table_index,
1332 sampler,
1333 msg_type,
1334 response_length,
1335 msg_length,
1336 eot,
1337 header_present,
1338 simd_mode);
1339 }
1340
1341 if (need_stall) {
1342 struct brw_reg reg = vec8(offset(dest, response_length-1));
1343
1344 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1345 */
1346 brw_push_insn_state(p);
1347 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1348 brw_MOV(p, reg, reg);
1349 brw_pop_insn_state(p);
1350 }
1351
1352 }
1353
1354 /* All these variables are pretty confusing - we might be better off
1355 * using bitmasks and macros for this, in the old style. Or perhaps
1356 * just having the caller instantiate the fields in dword3 itself.
1357 */
1358 void brw_urb_WRITE(struct brw_compile *p,
1359 struct brw_reg dest,
1360 GLuint msg_reg_nr,
1361 struct brw_reg src0,
1362 GLboolean allocate,
1363 GLboolean used,
1364 GLuint msg_length,
1365 GLuint response_length,
1366 GLboolean eot,
1367 GLboolean writes_complete,
1368 GLuint offset,
1369 GLuint swizzle)
1370 {
1371 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1372
1373 assert(msg_length < BRW_MAX_MRF);
1374
1375 brw_set_dest(insn, dest);
1376 brw_set_src0(insn, src0);
1377 brw_set_src1(insn, brw_imm_d(0));
1378
1379 insn->header.destreg__conditionalmod = msg_reg_nr;
1380
1381 brw_set_urb_message(p->brw,
1382 insn,
1383 allocate,
1384 used,
1385 msg_length,
1386 response_length,
1387 eot,
1388 writes_complete,
1389 offset,
1390 swizzle);
1391 }
1392
1393 void brw_ff_sync(struct brw_compile *p,
1394 struct brw_reg dest,
1395 GLuint msg_reg_nr,
1396 struct brw_reg src0,
1397 GLboolean allocate,
1398 GLboolean used,
1399 GLuint msg_length,
1400 GLuint response_length,
1401 GLboolean eot,
1402 GLboolean writes_complete,
1403 GLuint offset,
1404 GLuint swizzle)
1405 {
1406 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1407
1408 assert(msg_length < 16);
1409
1410 brw_set_dest(insn, dest);
1411 brw_set_src0(insn, src0);
1412 brw_set_src1(insn, brw_imm_d(0));
1413
1414 insn->header.destreg__conditionalmod = msg_reg_nr;
1415
1416 brw_set_ff_sync_message(p->brw,
1417 insn,
1418 allocate,
1419 used,
1420 msg_length,
1421 response_length,
1422 eot,
1423 writes_complete,
1424 offset,
1425 swizzle);
1426 }