3ee50899fba39febb529db9cf0b7b2b3b3998e24
[mesa.git] / src / gallium / drivers / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36 #include "brw_debug.h"
37 #include "brw_disasm.h"
38
39
40
41
42 /***********************************************************************
43 * Internal helper for constructing instructions
44 */
45
46 static void guess_execution_size( struct brw_instruction *insn,
47 struct brw_reg reg )
48 {
49 if (reg.width == BRW_WIDTH_8 &&
50 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
51 insn->header.execution_size = BRW_EXECUTE_16;
52 else
53 insn->header.execution_size = reg.width; /* note - definitions are compatible */
54 }
55
56
57 static void brw_set_dest( struct brw_instruction *insn,
58 struct brw_reg dest )
59 {
60 if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
61 assert(dest.nr < 128);
62
63 insn->bits1.da1.dest_reg_file = dest.file;
64 insn->bits1.da1.dest_reg_type = dest.type;
65 insn->bits1.da1.dest_address_mode = dest.address_mode;
66
67 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
68 insn->bits1.da1.dest_reg_nr = dest.nr;
69
70 if (insn->header.access_mode == BRW_ALIGN_1) {
71 insn->bits1.da1.dest_subreg_nr = dest.subnr;
72 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
73 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
74 insn->bits1.da1.dest_horiz_stride = dest.hstride;
75 }
76 else {
77 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
78 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
79 }
80 }
81 else {
82 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
83
84 /* These are different sizes in align1 vs align16:
85 */
86 if (insn->header.access_mode == BRW_ALIGN_1) {
87 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
88 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
89 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
90 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
91 }
92 else {
93 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
94 }
95 }
96
97 /* NEW: Set the execution size based on dest.width and
98 * insn->compression_control:
99 */
100 guess_execution_size(insn, dest);
101 }
102
103 static void brw_set_src0( struct brw_instruction *insn,
104 struct brw_reg reg )
105 {
106 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
107
108 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
109 assert(reg.nr < 128);
110
111 insn->bits1.da1.src0_reg_file = reg.file;
112 insn->bits1.da1.src0_reg_type = reg.type;
113 insn->bits2.da1.src0_abs = reg.abs;
114 insn->bits2.da1.src0_negate = reg.negate;
115 insn->bits2.da1.src0_address_mode = reg.address_mode;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 insn->bits3.ud = reg.dw1.ud;
119
120 /* Required to set some fields in src1 as well:
121 */
122 insn->bits1.da1.src1_reg_file = 0; /* arf */
123 insn->bits1.da1.src1_reg_type = reg.type;
124 }
125 else
126 {
127 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
128 if (insn->header.access_mode == BRW_ALIGN_1) {
129 insn->bits2.da1.src0_subreg_nr = reg.subnr;
130 insn->bits2.da1.src0_reg_nr = reg.nr;
131 }
132 else {
133 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
134 insn->bits2.da16.src0_reg_nr = reg.nr;
135 }
136 }
137 else {
138 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
139
140 if (insn->header.access_mode == BRW_ALIGN_1) {
141 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
142 }
143 else {
144 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
145 }
146 }
147
148 if (insn->header.access_mode == BRW_ALIGN_1) {
149 if (reg.width == BRW_WIDTH_1 &&
150 insn->header.execution_size == BRW_EXECUTE_1) {
151 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
152 insn->bits2.da1.src0_width = BRW_WIDTH_1;
153 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
154 }
155 else {
156 insn->bits2.da1.src0_horiz_stride = reg.hstride;
157 insn->bits2.da1.src0_width = reg.width;
158 insn->bits2.da1.src0_vert_stride = reg.vstride;
159 }
160 }
161 else {
162 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
163 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
164 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
165 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
166
167 /* This is an oddity of the fact we're using the same
168 * descriptions for registers in align_16 as align_1:
169 */
170 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
171 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
172 else
173 insn->bits2.da16.src0_vert_stride = reg.vstride;
174 }
175 }
176 }
177
178
179 void brw_set_src1( struct brw_instruction *insn,
180 struct brw_reg reg )
181 {
182 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
183
184 assert(reg.nr < 128);
185
186 insn->bits1.da1.src1_reg_file = reg.file;
187 insn->bits1.da1.src1_reg_type = reg.type;
188 insn->bits3.da1.src1_abs = reg.abs;
189 insn->bits3.da1.src1_negate = reg.negate;
190
191 /* Only src1 can be immediate in two-argument instructions.
192 */
193 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
194
195 if (reg.file == BRW_IMMEDIATE_VALUE) {
196 insn->bits3.ud = reg.dw1.ud;
197 }
198 else {
199 /* This is a hardware restriction, which may or may not be lifted
200 * in the future:
201 */
202 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
203 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
204
205 if (insn->header.access_mode == BRW_ALIGN_1) {
206 insn->bits3.da1.src1_subreg_nr = reg.subnr;
207 insn->bits3.da1.src1_reg_nr = reg.nr;
208 }
209 else {
210 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
211 insn->bits3.da16.src1_reg_nr = reg.nr;
212 }
213
214 if (insn->header.access_mode == BRW_ALIGN_1) {
215 if (reg.width == BRW_WIDTH_1 &&
216 insn->header.execution_size == BRW_EXECUTE_1) {
217 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
218 insn->bits3.da1.src1_width = BRW_WIDTH_1;
219 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
220 }
221 else {
222 insn->bits3.da1.src1_horiz_stride = reg.hstride;
223 insn->bits3.da1.src1_width = reg.width;
224 insn->bits3.da1.src1_vert_stride = reg.vstride;
225 }
226 }
227 else {
228 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
229 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
230 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
231 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
232
233 /* This is an oddity of the fact we're using the same
234 * descriptions for registers in align_16 as align_1:
235 */
236 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
237 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
238 else
239 insn->bits3.da16.src1_vert_stride = reg.vstride;
240 }
241 }
242 }
243
244
245
246 static void brw_set_math_message( struct brw_context *brw,
247 struct brw_instruction *insn,
248 GLuint msg_length,
249 GLuint response_length,
250 GLuint function,
251 GLuint integer_type,
252 GLboolean low_precision,
253 GLboolean saturate,
254 GLuint dataType )
255 {
256 brw_set_src1(insn, brw_imm_d(0));
257
258 if (BRW_IS_IGDNG(brw)) {
259 insn->bits3.math_igdng.function = function;
260 insn->bits3.math_igdng.int_type = integer_type;
261 insn->bits3.math_igdng.precision = low_precision;
262 insn->bits3.math_igdng.saturate = saturate;
263 insn->bits3.math_igdng.data_type = dataType;
264 insn->bits3.math_igdng.snapshot = 0;
265 insn->bits3.math_igdng.header_present = 0;
266 insn->bits3.math_igdng.response_length = response_length;
267 insn->bits3.math_igdng.msg_length = msg_length;
268 insn->bits3.math_igdng.end_of_thread = 0;
269 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
270 insn->bits2.send_igdng.end_of_thread = 0;
271 } else {
272 insn->bits3.math.function = function;
273 insn->bits3.math.int_type = integer_type;
274 insn->bits3.math.precision = low_precision;
275 insn->bits3.math.saturate = saturate;
276 insn->bits3.math.data_type = dataType;
277 insn->bits3.math.response_length = response_length;
278 insn->bits3.math.msg_length = msg_length;
279 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
280 insn->bits3.math.end_of_thread = 0;
281 }
282 }
283
284
285 static void brw_set_ff_sync_message( struct brw_context *brw,
286 struct brw_instruction *insn,
287 GLboolean allocate,
288 GLboolean used,
289 GLuint msg_length,
290 GLuint response_length,
291 GLboolean end_of_thread,
292 GLboolean complete,
293 GLuint offset,
294 GLuint swizzle_control )
295 {
296 brw_set_src1(insn, brw_imm_d(0));
297
298 insn->bits3.urb_igdng.opcode = 1;
299 insn->bits3.urb_igdng.offset = offset;
300 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
301 insn->bits3.urb_igdng.allocate = allocate;
302 insn->bits3.urb_igdng.used = used;
303 insn->bits3.urb_igdng.complete = complete;
304 insn->bits3.urb_igdng.header_present = 1;
305 insn->bits3.urb_igdng.response_length = response_length;
306 insn->bits3.urb_igdng.msg_length = msg_length;
307 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
308 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
309 insn->bits2.send_igdng.end_of_thread = end_of_thread;
310 }
311
312 static void brw_set_urb_message( struct brw_context *brw,
313 struct brw_instruction *insn,
314 GLboolean allocate,
315 GLboolean used,
316 GLuint msg_length,
317 GLuint response_length,
318 GLboolean end_of_thread,
319 GLboolean complete,
320 GLuint offset,
321 GLuint swizzle_control )
322 {
323 brw_set_src1(insn, brw_imm_d(0));
324
325 if (BRW_IS_IGDNG(brw)) {
326 insn->bits3.urb_igdng.opcode = 0; /* ? */
327 insn->bits3.urb_igdng.offset = offset;
328 insn->bits3.urb_igdng.swizzle_control = swizzle_control;
329 insn->bits3.urb_igdng.allocate = allocate;
330 insn->bits3.urb_igdng.used = used; /* ? */
331 insn->bits3.urb_igdng.complete = complete;
332 insn->bits3.urb_igdng.header_present = 1;
333 insn->bits3.urb_igdng.response_length = response_length;
334 insn->bits3.urb_igdng.msg_length = msg_length;
335 insn->bits3.urb_igdng.end_of_thread = end_of_thread;
336 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
337 insn->bits2.send_igdng.end_of_thread = end_of_thread;
338 } else {
339 insn->bits3.urb.opcode = 0; /* ? */
340 insn->bits3.urb.offset = offset;
341 insn->bits3.urb.swizzle_control = swizzle_control;
342 insn->bits3.urb.allocate = allocate;
343 insn->bits3.urb.used = used; /* ? */
344 insn->bits3.urb.complete = complete;
345 insn->bits3.urb.response_length = response_length;
346 insn->bits3.urb.msg_length = msg_length;
347 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
348 insn->bits3.urb.end_of_thread = end_of_thread;
349 }
350 }
351
352 static void brw_set_dp_write_message( struct brw_context *brw,
353 struct brw_instruction *insn,
354 GLuint binding_table_index,
355 GLuint msg_control,
356 GLuint msg_type,
357 GLuint msg_length,
358 GLuint pixel_scoreboard_clear,
359 GLuint response_length,
360 GLuint end_of_thread )
361 {
362 brw_set_src1(insn, brw_imm_d(0));
363
364 if (BRW_IS_IGDNG(brw)) {
365 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
366 insn->bits3.dp_write_igdng.msg_control = msg_control;
367 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
368 insn->bits3.dp_write_igdng.msg_type = msg_type;
369 insn->bits3.dp_write_igdng.send_commit_msg = 0;
370 insn->bits3.dp_write_igdng.header_present = 1;
371 insn->bits3.dp_write_igdng.response_length = response_length;
372 insn->bits3.dp_write_igdng.msg_length = msg_length;
373 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
374 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
375 insn->bits2.send_igdng.end_of_thread = end_of_thread;
376 } else {
377 insn->bits3.dp_write.binding_table_index = binding_table_index;
378 insn->bits3.dp_write.msg_control = msg_control;
379 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
380 insn->bits3.dp_write.msg_type = msg_type;
381 insn->bits3.dp_write.send_commit_msg = 0;
382 insn->bits3.dp_write.response_length = response_length;
383 insn->bits3.dp_write.msg_length = msg_length;
384 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
385 insn->bits3.dp_write.end_of_thread = end_of_thread;
386 }
387 }
388
389 static void brw_set_dp_read_message( struct brw_context *brw,
390 struct brw_instruction *insn,
391 GLuint binding_table_index,
392 GLuint msg_control,
393 GLuint msg_type,
394 GLuint target_cache,
395 GLuint msg_length,
396 GLuint response_length,
397 GLuint end_of_thread )
398 {
399 brw_set_src1(insn, brw_imm_d(0));
400
401 if (BRW_IS_IGDNG(brw)) {
402 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
403 insn->bits3.dp_read_igdng.msg_control = msg_control;
404 insn->bits3.dp_read_igdng.msg_type = msg_type;
405 insn->bits3.dp_read_igdng.target_cache = target_cache;
406 insn->bits3.dp_read_igdng.header_present = 1;
407 insn->bits3.dp_read_igdng.response_length = response_length;
408 insn->bits3.dp_read_igdng.msg_length = msg_length;
409 insn->bits3.dp_read_igdng.pad1 = 0;
410 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
411 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
412 insn->bits2.send_igdng.end_of_thread = end_of_thread;
413 } else {
414 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
415 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
416 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
417 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
418 insn->bits3.dp_read.response_length = response_length; /*16:19*/
419 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
420 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
421 insn->bits3.dp_read.pad1 = 0; /*28:30*/
422 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
423 }
424 }
425
426 static void brw_set_sampler_message(struct brw_context *brw,
427 struct brw_instruction *insn,
428 GLuint binding_table_index,
429 GLuint sampler,
430 GLuint msg_type,
431 GLuint response_length,
432 GLuint msg_length,
433 GLboolean eot,
434 GLuint header_present,
435 GLuint simd_mode)
436 {
437 assert(eot == 0);
438 brw_set_src1(insn, brw_imm_d(0));
439
440 if (BRW_IS_IGDNG(brw)) {
441 insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
442 insn->bits3.sampler_igdng.sampler = sampler;
443 insn->bits3.sampler_igdng.msg_type = msg_type;
444 insn->bits3.sampler_igdng.simd_mode = simd_mode;
445 insn->bits3.sampler_igdng.header_present = header_present;
446 insn->bits3.sampler_igdng.response_length = response_length;
447 insn->bits3.sampler_igdng.msg_length = msg_length;
448 insn->bits3.sampler_igdng.end_of_thread = eot;
449 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
450 insn->bits2.send_igdng.end_of_thread = eot;
451 } else if (BRW_IS_G4X(brw)) {
452 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
453 insn->bits3.sampler_g4x.sampler = sampler;
454 insn->bits3.sampler_g4x.msg_type = msg_type;
455 insn->bits3.sampler_g4x.response_length = response_length;
456 insn->bits3.sampler_g4x.msg_length = msg_length;
457 insn->bits3.sampler_g4x.end_of_thread = eot;
458 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
459 } else {
460 insn->bits3.sampler.binding_table_index = binding_table_index;
461 insn->bits3.sampler.sampler = sampler;
462 insn->bits3.sampler.msg_type = msg_type;
463 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
464 insn->bits3.sampler.response_length = response_length;
465 insn->bits3.sampler.msg_length = msg_length;
466 insn->bits3.sampler.end_of_thread = eot;
467 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
468 }
469 }
470
471
472
473 static struct brw_instruction *next_insn( struct brw_compile *p,
474 GLuint opcode )
475 {
476 struct brw_instruction *insn;
477
478 if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
479 {
480 if (p->nr_insn)
481 brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
482 }
483
484 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
485
486 insn = &p->store[p->nr_insn++];
487 memcpy(insn, p->current, sizeof(*insn));
488
489 /* Reset this one-shot flag:
490 */
491
492 if (p->current->header.destreg__conditionalmod) {
493 p->current->header.destreg__conditionalmod = 0;
494 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
495 }
496
497 insn->header.opcode = opcode;
498 return insn;
499 }
500
501
502 static struct brw_instruction *brw_alu1( struct brw_compile *p,
503 GLuint opcode,
504 struct brw_reg dest,
505 struct brw_reg src )
506 {
507 struct brw_instruction *insn = next_insn(p, opcode);
508 brw_set_dest(insn, dest);
509 brw_set_src0(insn, src);
510 return insn;
511 }
512
513 static struct brw_instruction *brw_alu2(struct brw_compile *p,
514 GLuint opcode,
515 struct brw_reg dest,
516 struct brw_reg src0,
517 struct brw_reg src1 )
518 {
519 struct brw_instruction *insn = next_insn(p, opcode);
520 brw_set_dest(insn, dest);
521 brw_set_src0(insn, src0);
522 brw_set_src1(insn, src1);
523 return insn;
524 }
525
526
527 /***********************************************************************
528 * Convenience routines.
529 */
530 #define ALU1(OP) \
531 struct brw_instruction *brw_##OP(struct brw_compile *p, \
532 struct brw_reg dest, \
533 struct brw_reg src0) \
534 { \
535 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
536 }
537
538 #define ALU2(OP) \
539 struct brw_instruction *brw_##OP(struct brw_compile *p, \
540 struct brw_reg dest, \
541 struct brw_reg src0, \
542 struct brw_reg src1) \
543 { \
544 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
545 }
546
547
548 ALU1(MOV)
549 ALU2(SEL)
550 ALU1(NOT)
551 ALU2(AND)
552 ALU2(OR)
553 ALU2(XOR)
554 ALU2(SHR)
555 ALU2(SHL)
556 ALU2(RSR)
557 ALU2(RSL)
558 ALU2(ASR)
559 ALU2(ADD)
560 ALU2(MUL)
561 ALU1(FRC)
562 ALU1(RNDD)
563 ALU1(RNDZ)
564 ALU2(MAC)
565 ALU2(MACH)
566 ALU1(LZD)
567 ALU2(DP4)
568 ALU2(DPH)
569 ALU2(DP3)
570 ALU2(DP2)
571 ALU2(LINE)
572
573
574
575
576 void brw_NOP(struct brw_compile *p)
577 {
578 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
579 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
580 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
581 brw_set_src1(insn, brw_imm_ud(0x0));
582 }
583
584
585
586
587
588 /***********************************************************************
589 * Comparisons, if/else/endif
590 */
591
592 struct brw_instruction *brw_JMPI(struct brw_compile *p,
593 struct brw_reg dest,
594 struct brw_reg src0,
595 struct brw_reg src1)
596 {
597 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
598
599 insn->header.execution_size = 1;
600 insn->header.compression_control = BRW_COMPRESSION_NONE;
601 insn->header.mask_control = BRW_MASK_DISABLE;
602
603 p->current->header.predicate_control = BRW_PREDICATE_NONE;
604
605 return insn;
606 }
607
608 /* EU takes the value from the flag register and pushes it onto some
609 * sort of a stack (presumably merging with any flag value already on
610 * the stack). Within an if block, the flags at the top of the stack
611 * control execution on each channel of the unit, eg. on each of the
612 * 16 pixel values in our wm programs.
613 *
614 * When the matching 'else' instruction is reached (presumably by
615 * countdown of the instruction count patched in by our ELSE/ENDIF
616 * functions), the relevent flags are inverted.
617 *
618 * When the matching 'endif' instruction is reached, the flags are
619 * popped off. If the stack is now empty, normal execution resumes.
620 *
621 * No attempt is made to deal with stack overflow (14 elements?).
622 */
623 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
624 {
625 struct brw_instruction *insn;
626
627 if (p->single_program_flow) {
628 assert(execute_size == BRW_EXECUTE_1);
629
630 insn = next_insn(p, BRW_OPCODE_ADD);
631 insn->header.predicate_inverse = 1;
632 } else {
633 insn = next_insn(p, BRW_OPCODE_IF);
634 }
635
636 /* Override the defaults for this instruction:
637 */
638 brw_set_dest(insn, brw_ip_reg());
639 brw_set_src0(insn, brw_ip_reg());
640 brw_set_src1(insn, brw_imm_d(0x0));
641
642 insn->header.execution_size = execute_size;
643 insn->header.compression_control = BRW_COMPRESSION_NONE;
644 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
645 insn->header.mask_control = BRW_MASK_ENABLE;
646 if (!p->single_program_flow)
647 insn->header.thread_control = BRW_THREAD_SWITCH;
648
649 p->current->header.predicate_control = BRW_PREDICATE_NONE;
650
651 return insn;
652 }
653
654
655 struct brw_instruction *brw_ELSE(struct brw_compile *p,
656 struct brw_instruction *if_insn)
657 {
658 struct brw_instruction *insn;
659 GLuint br = 1;
660
661 if (BRW_IS_IGDNG(p->brw))
662 br = 2;
663
664 if (p->single_program_flow) {
665 insn = next_insn(p, BRW_OPCODE_ADD);
666 } else {
667 insn = next_insn(p, BRW_OPCODE_ELSE);
668 }
669
670 brw_set_dest(insn, brw_ip_reg());
671 brw_set_src0(insn, brw_ip_reg());
672 brw_set_src1(insn, brw_imm_d(0x0));
673
674 insn->header.compression_control = BRW_COMPRESSION_NONE;
675 insn->header.execution_size = if_insn->header.execution_size;
676 insn->header.mask_control = BRW_MASK_ENABLE;
677 if (!p->single_program_flow)
678 insn->header.thread_control = BRW_THREAD_SWITCH;
679
680 /* Patch the if instruction to point at this instruction.
681 */
682 if (p->single_program_flow) {
683 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
684
685 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
686 } else {
687 assert(if_insn->header.opcode == BRW_OPCODE_IF);
688
689 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
690 if_insn->bits3.if_else.pop_count = 0;
691 if_insn->bits3.if_else.pad0 = 0;
692 }
693
694 return insn;
695 }
696
697 void brw_ENDIF(struct brw_compile *p,
698 struct brw_instruction *patch_insn)
699 {
700 GLuint br = 1;
701
702 if (BRW_IS_IGDNG(p->brw))
703 br = 2;
704
705 if (p->single_program_flow) {
706 /* In single program flow mode, there's no need to execute an ENDIF,
707 * since we don't need to do any stack operations, and if we're executing
708 * currently, we want to just continue executing.
709 */
710 struct brw_instruction *next = &p->store[p->nr_insn];
711
712 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
713
714 patch_insn->bits3.ud = (next - patch_insn) * 16;
715 } else {
716 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
717
718 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
719 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720 brw_set_src1(insn, brw_imm_d(0x0));
721
722 insn->header.compression_control = BRW_COMPRESSION_NONE;
723 insn->header.execution_size = patch_insn->header.execution_size;
724 insn->header.mask_control = BRW_MASK_ENABLE;
725 insn->header.thread_control = BRW_THREAD_SWITCH;
726
727 assert(patch_insn->bits3.if_else.jump_count == 0);
728
729 /* Patch the if or else instructions to point at this or the next
730 * instruction respectively.
731 */
732 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
733 /* Automagically turn it into an IFF:
734 */
735 patch_insn->header.opcode = BRW_OPCODE_IFF;
736 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
737 patch_insn->bits3.if_else.pop_count = 0;
738 patch_insn->bits3.if_else.pad0 = 0;
739 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
740 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
741 patch_insn->bits3.if_else.pop_count = 1;
742 patch_insn->bits3.if_else.pad0 = 0;
743 } else {
744 assert(0);
745 }
746
747 /* Also pop item off the stack in the endif instruction:
748 */
749 insn->bits3.if_else.jump_count = 0;
750 insn->bits3.if_else.pop_count = 1;
751 insn->bits3.if_else.pad0 = 0;
752 }
753 }
754
755 struct brw_instruction *brw_BREAK(struct brw_compile *p)
756 {
757 struct brw_instruction *insn;
758 insn = next_insn(p, BRW_OPCODE_BREAK);
759 brw_set_dest(insn, brw_ip_reg());
760 brw_set_src0(insn, brw_ip_reg());
761 brw_set_src1(insn, brw_imm_d(0x0));
762 insn->header.compression_control = BRW_COMPRESSION_NONE;
763 insn->header.execution_size = BRW_EXECUTE_8;
764 /* insn->header.mask_control = BRW_MASK_DISABLE; */
765 insn->bits3.if_else.pad0 = 0;
766 return insn;
767 }
768
769 struct brw_instruction *brw_CONT(struct brw_compile *p)
770 {
771 struct brw_instruction *insn;
772 insn = next_insn(p, BRW_OPCODE_CONTINUE);
773 brw_set_dest(insn, brw_ip_reg());
774 brw_set_src0(insn, brw_ip_reg());
775 brw_set_src1(insn, brw_imm_d(0x0));
776 insn->header.compression_control = BRW_COMPRESSION_NONE;
777 insn->header.execution_size = BRW_EXECUTE_8;
778 /* insn->header.mask_control = BRW_MASK_DISABLE; */
779 insn->bits3.if_else.pad0 = 0;
780 return insn;
781 }
782
783 /* DO/WHILE loop:
784 */
785 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
786 {
787 if (p->single_program_flow) {
788 return &p->store[p->nr_insn];
789 } else {
790 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
791
792 /* Override the defaults for this instruction:
793 */
794 brw_set_dest(insn, brw_null_reg());
795 brw_set_src0(insn, brw_null_reg());
796 brw_set_src1(insn, brw_null_reg());
797
798 insn->header.compression_control = BRW_COMPRESSION_NONE;
799 insn->header.execution_size = execute_size;
800 insn->header.predicate_control = BRW_PREDICATE_NONE;
801 /* insn->header.mask_control = BRW_MASK_ENABLE; */
802 /* insn->header.mask_control = BRW_MASK_DISABLE; */
803
804 return insn;
805 }
806 }
807
808
809
810 struct brw_instruction *brw_WHILE(struct brw_compile *p,
811 struct brw_instruction *do_insn)
812 {
813 struct brw_instruction *insn;
814 GLuint br = 1;
815
816 if (BRW_IS_IGDNG(p->brw))
817 br = 2;
818
819 if (p->single_program_flow)
820 insn = next_insn(p, BRW_OPCODE_ADD);
821 else
822 insn = next_insn(p, BRW_OPCODE_WHILE);
823
824 brw_set_dest(insn, brw_ip_reg());
825 brw_set_src0(insn, brw_ip_reg());
826 brw_set_src1(insn, brw_imm_d(0x0));
827
828 insn->header.compression_control = BRW_COMPRESSION_NONE;
829
830 if (p->single_program_flow) {
831 insn->header.execution_size = BRW_EXECUTE_1;
832
833 insn->bits3.d = (do_insn - insn) * 16;
834 } else {
835 insn->header.execution_size = do_insn->header.execution_size;
836
837 assert(do_insn->header.opcode == BRW_OPCODE_DO);
838 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
839 insn->bits3.if_else.pop_count = 0;
840 insn->bits3.if_else.pad0 = 0;
841 }
842
843 /* insn->header.mask_control = BRW_MASK_ENABLE; */
844
845 /* insn->header.mask_control = BRW_MASK_DISABLE; */
846 p->current->header.predicate_control = BRW_PREDICATE_NONE;
847 return insn;
848 }
849
850
851 /* FORWARD JUMPS:
852 */
853 void brw_land_fwd_jump(struct brw_compile *p,
854 struct brw_instruction *jmp_insn)
855 {
856 struct brw_instruction *landing = &p->store[p->nr_insn];
857 GLuint jmpi = 1;
858
859 if (BRW_IS_IGDNG(p->brw))
860 jmpi = 2;
861
862 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
863 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
864
865 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
866 }
867
868
869
870 /* To integrate with the above, it makes sense that the comparison
871 * instruction should populate the flag register. It might be simpler
872 * just to use the flag reg for most WM tasks?
873 */
874 void brw_CMP(struct brw_compile *p,
875 struct brw_reg dest,
876 GLuint conditional,
877 struct brw_reg src0,
878 struct brw_reg src1)
879 {
880 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
881
882 insn->header.destreg__conditionalmod = conditional;
883 brw_set_dest(insn, dest);
884 brw_set_src0(insn, src0);
885 brw_set_src1(insn, src1);
886
887 /* guess_execution_size(insn, src0); */
888
889
890 /* Make it so that future instructions will use the computed flag
891 * value until brw_set_predicate_control_flag_value() is called
892 * again.
893 */
894 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
895 dest.nr == 0) {
896 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
897 p->flag_value = 0xff;
898 }
899 }
900
901
902
903 /***********************************************************************
904 * Helpers for the various SEND message types:
905 */
906
907 /** Extended math function, float[8].
908 */
909 void brw_math( struct brw_compile *p,
910 struct brw_reg dest,
911 GLuint function,
912 GLuint saturate,
913 GLuint msg_reg_nr,
914 struct brw_reg src,
915 GLuint data_type,
916 GLuint precision )
917 {
918 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
919 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
920 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
921
922 /* Example code doesn't set predicate_control for send
923 * instructions.
924 */
925 insn->header.predicate_control = 0;
926 insn->header.destreg__conditionalmod = msg_reg_nr;
927
928 brw_set_dest(insn, dest);
929 brw_set_src0(insn, src);
930 brw_set_math_message(p->brw,
931 insn,
932 msg_length, response_length,
933 function,
934 BRW_MATH_INTEGER_UNSIGNED,
935 precision,
936 saturate,
937 data_type);
938 }
939
940 /**
941 * Extended math function, float[16].
942 * Use 2 send instructions.
943 */
944 void brw_math_16( struct brw_compile *p,
945 struct brw_reg dest,
946 GLuint function,
947 GLuint saturate,
948 GLuint msg_reg_nr,
949 struct brw_reg src,
950 GLuint precision )
951 {
952 struct brw_instruction *insn;
953 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
954 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
955
956 /* First instruction:
957 */
958 brw_push_insn_state(p);
959 brw_set_predicate_control_flag_value(p, 0xff);
960 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
961
962 insn = next_insn(p, BRW_OPCODE_SEND);
963 insn->header.destreg__conditionalmod = msg_reg_nr;
964
965 brw_set_dest(insn, dest);
966 brw_set_src0(insn, src);
967 brw_set_math_message(p->brw,
968 insn,
969 msg_length, response_length,
970 function,
971 BRW_MATH_INTEGER_UNSIGNED,
972 precision,
973 saturate,
974 BRW_MATH_DATA_VECTOR);
975
976 /* Second instruction:
977 */
978 insn = next_insn(p, BRW_OPCODE_SEND);
979 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
980 insn->header.destreg__conditionalmod = msg_reg_nr+1;
981
982 brw_set_dest(insn, offset(dest,1));
983 brw_set_src0(insn, src);
984 brw_set_math_message(p->brw,
985 insn,
986 msg_length, response_length,
987 function,
988 BRW_MATH_INTEGER_UNSIGNED,
989 precision,
990 saturate,
991 BRW_MATH_DATA_VECTOR);
992
993 brw_pop_insn_state(p);
994 }
995
996
997 /**
998 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
999 * Scratch offset should be a multiple of 64.
1000 * Used for register spilling.
1001 */
1002 void brw_dp_WRITE_16( struct brw_compile *p,
1003 struct brw_reg src,
1004 GLuint scratch_offset )
1005 {
1006 GLuint msg_reg_nr = 1;
1007 {
1008 brw_push_insn_state(p);
1009 brw_set_mask_control(p, BRW_MASK_DISABLE);
1010 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1011
1012 /* set message header global offset field (reg 0, element 2) */
1013 brw_MOV(p,
1014 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1015 brw_imm_d(scratch_offset));
1016
1017 brw_pop_insn_state(p);
1018 }
1019
1020 {
1021 GLuint msg_length = 3;
1022 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1023 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1024
1025 insn->header.predicate_control = 0; /* XXX */
1026 insn->header.compression_control = BRW_COMPRESSION_NONE;
1027 insn->header.destreg__conditionalmod = msg_reg_nr;
1028
1029 brw_set_dest(insn, dest);
1030 brw_set_src0(insn, src);
1031
1032 brw_set_dp_write_message(p->brw,
1033 insn,
1034 255, /* binding table index (255=stateless) */
1035 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1036 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1037 msg_length,
1038 0, /* pixel scoreboard */
1039 0, /* response_length */
1040 0); /* eot */
1041 }
1042 }
1043
1044
1045 /**
1046 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1047 * Scratch offset should be a multiple of 64.
1048 * Used for register spilling.
1049 */
1050 void brw_dp_READ_16( struct brw_compile *p,
1051 struct brw_reg dest,
1052 GLuint scratch_offset )
1053 {
1054 GLuint msg_reg_nr = 1;
1055 {
1056 brw_push_insn_state(p);
1057 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1058 brw_set_mask_control(p, BRW_MASK_DISABLE);
1059
1060 /* set message header global offset field (reg 0, element 2) */
1061 brw_MOV(p,
1062 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1063 brw_imm_d(scratch_offset));
1064
1065 brw_pop_insn_state(p);
1066 }
1067
1068 {
1069 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1070
1071 insn->header.predicate_control = 0; /* XXX */
1072 insn->header.compression_control = BRW_COMPRESSION_NONE;
1073 insn->header.destreg__conditionalmod = msg_reg_nr;
1074
1075 brw_set_dest(insn, dest); /* UW? */
1076 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1077
1078 brw_set_dp_read_message(p->brw,
1079 insn,
1080 255, /* binding table index (255=stateless) */
1081 3, /* msg_control (3 means 4 Owords) */
1082 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1083 1, /* target cache (render/scratch) */
1084 1, /* msg_length */
1085 2, /* response_length */
1086 0); /* eot */
1087 }
1088 }
1089
1090
1091 /**
1092 * Read a float[4] vector from the data port Data Cache (const buffer).
1093 * Location (in buffer) should be a multiple of 16.
1094 * Used for fetching shader constants.
1095 * If relAddr is true, we'll do an indirect fetch using the address register.
1096 */
1097 void brw_dp_READ_4( struct brw_compile *p,
1098 struct brw_reg dest,
1099 GLboolean relAddr,
1100 GLuint location,
1101 GLuint bind_table_index )
1102 {
1103 /* XXX: relAddr not implemented */
1104 GLuint msg_reg_nr = 1;
1105 {
1106 struct brw_reg b;
1107 brw_push_insn_state(p);
1108 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1109 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1110 brw_set_mask_control(p, BRW_MASK_DISABLE);
1111
1112 /* Setup MRF[1] with location/offset into const buffer */
1113 b = brw_message_reg(msg_reg_nr);
1114 b = retype(b, BRW_REGISTER_TYPE_UD);
1115 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1116 * when the docs say only dword[2] should be set. Hmmm. But it works.
1117 */
1118 brw_MOV(p, b, brw_imm_ud(location));
1119 brw_pop_insn_state(p);
1120 }
1121
1122 {
1123 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1124
1125 insn->header.predicate_control = BRW_PREDICATE_NONE;
1126 insn->header.compression_control = BRW_COMPRESSION_NONE;
1127 insn->header.destreg__conditionalmod = msg_reg_nr;
1128 insn->header.mask_control = BRW_MASK_DISABLE;
1129
1130 /* cast dest to a uword[8] vector */
1131 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1132
1133 brw_set_dest(insn, dest);
1134 brw_set_src0(insn, brw_null_reg());
1135
1136 brw_set_dp_read_message(p->brw,
1137 insn,
1138 bind_table_index,
1139 0, /* msg_control (0 means 1 Oword) */
1140 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1141 0, /* source cache = data cache */
1142 1, /* msg_length */
1143 1, /* response_length (1 Oword) */
1144 0); /* eot */
1145 }
1146 }
1147
1148
1149 /**
1150 * Read float[4] constant(s) from VS constant buffer.
1151 * For relative addressing, two float[4] constants will be read into 'dest'.
1152 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1153 */
1154 void brw_dp_READ_4_vs(struct brw_compile *p,
1155 struct brw_reg dest,
1156 GLuint oword,
1157 GLboolean relAddr,
1158 struct brw_reg addrReg,
1159 GLuint location,
1160 GLuint bind_table_index)
1161 {
1162 GLuint msg_reg_nr = 1;
1163
1164 assert(oword < 2);
1165 /*
1166 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1167 location, msg_reg_nr);
1168 */
1169
1170 /* Setup MRF[1] with location/offset into const buffer */
1171 {
1172 struct brw_reg b;
1173
1174 brw_push_insn_state(p);
1175 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1176 brw_set_mask_control(p, BRW_MASK_DISABLE);
1177 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1178 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1179
1180 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1181 * when the docs say only dword[2] should be set. Hmmm. But it works.
1182 */
1183 b = brw_message_reg(msg_reg_nr);
1184 b = retype(b, BRW_REGISTER_TYPE_UD);
1185 /*b = get_element_ud(b, 2);*/
1186 if (relAddr) {
1187 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1188 }
1189 else {
1190 brw_MOV(p, b, brw_imm_ud(location));
1191 }
1192
1193 brw_pop_insn_state(p);
1194 }
1195
1196 {
1197 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1198
1199 insn->header.predicate_control = BRW_PREDICATE_NONE;
1200 insn->header.compression_control = BRW_COMPRESSION_NONE;
1201 insn->header.destreg__conditionalmod = msg_reg_nr;
1202 insn->header.mask_control = BRW_MASK_DISABLE;
1203 /*insn->header.access_mode = BRW_ALIGN_16;*/
1204
1205 brw_set_dest(insn, dest);
1206 brw_set_src0(insn, brw_null_reg());
1207
1208 brw_set_dp_read_message(p->brw,
1209 insn,
1210 bind_table_index,
1211 oword, /* 0 = lower Oword, 1 = upper Oword */
1212 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1213 0, /* source cache = data cache */
1214 1, /* msg_length */
1215 1, /* response_length (1 Oword) */
1216 0); /* eot */
1217 }
1218 }
1219
1220
1221
1222 void brw_fb_WRITE(struct brw_compile *p,
1223 struct brw_reg dest,
1224 GLuint msg_reg_nr,
1225 struct brw_reg src0,
1226 GLuint binding_table_index,
1227 GLuint msg_length,
1228 GLuint response_length,
1229 GLboolean eot)
1230 {
1231 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1232
1233 insn->header.predicate_control = 0; /* XXX */
1234 insn->header.compression_control = BRW_COMPRESSION_NONE;
1235 insn->header.destreg__conditionalmod = msg_reg_nr;
1236
1237 brw_set_dest(insn, dest);
1238 brw_set_src0(insn, src0);
1239 brw_set_dp_write_message(p->brw,
1240 insn,
1241 binding_table_index,
1242 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1243 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1244 msg_length,
1245 1, /* pixel scoreboard */
1246 response_length,
1247 eot);
1248 }
1249
1250
1251 /**
1252 * Texture sample instruction.
1253 * Note: the msg_type plus msg_length values determine exactly what kind
1254 * of sampling operation is performed. See volume 4, page 161 of docs.
1255 */
1256 void brw_SAMPLE(struct brw_compile *p,
1257 struct brw_reg dest,
1258 GLuint msg_reg_nr,
1259 struct brw_reg src0,
1260 GLuint binding_table_index,
1261 GLuint sampler,
1262 GLuint writemask,
1263 GLuint msg_type,
1264 GLuint response_length,
1265 GLuint msg_length,
1266 GLboolean eot,
1267 GLuint header_present,
1268 GLuint simd_mode)
1269 {
1270 GLboolean need_stall = 0;
1271
1272 if (writemask == 0) {
1273 /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
1274 return;
1275 }
1276
1277 /* Hardware doesn't do destination dependency checking on send
1278 * instructions properly. Add a workaround which generates the
1279 * dependency by other means. In practice it seems like this bug
1280 * only crops up for texture samples, and only where registers are
1281 * written by the send and then written again later without being
1282 * read in between. Luckily for us, we already track that
1283 * information and use it to modify the writemask for the
1284 * instruction, so that is a guide for whether a workaround is
1285 * needed.
1286 */
1287 if (writemask != BRW_WRITEMASK_XYZW) {
1288 GLuint dst_offset = 0;
1289 GLuint i, newmask = 0, len = 0;
1290
1291 for (i = 0; i < 4; i++) {
1292 if (writemask & (1<<i))
1293 break;
1294 dst_offset += 2;
1295 }
1296 for (; i < 4; i++) {
1297 if (!(writemask & (1<<i)))
1298 break;
1299 newmask |= 1<<i;
1300 len++;
1301 }
1302
1303 if (newmask != writemask) {
1304 need_stall = 1;
1305 /* debug_printf("need stall %x %x\n", newmask , writemask); */
1306 }
1307 else {
1308 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1309
1310 newmask = ~newmask & BRW_WRITEMASK_XYZW;
1311
1312 brw_push_insn_state(p);
1313
1314 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1315 brw_set_mask_control(p, BRW_MASK_DISABLE);
1316
1317 brw_MOV(p, m1, brw_vec8_grf(0,0));
1318 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1319
1320 brw_pop_insn_state(p);
1321
1322 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1323 dest = offset(dest, dst_offset);
1324 response_length = len * 2;
1325 }
1326 }
1327
1328 {
1329 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1330
1331 insn->header.predicate_control = 0; /* XXX */
1332 insn->header.compression_control = BRW_COMPRESSION_NONE;
1333 insn->header.destreg__conditionalmod = msg_reg_nr;
1334
1335 brw_set_dest(insn, dest);
1336 brw_set_src0(insn, src0);
1337 brw_set_sampler_message(p->brw, insn,
1338 binding_table_index,
1339 sampler,
1340 msg_type,
1341 response_length,
1342 msg_length,
1343 eot,
1344 header_present,
1345 simd_mode);
1346 }
1347
1348 if (need_stall) {
1349 struct brw_reg reg = vec8(offset(dest, response_length-1));
1350
1351 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1352 */
1353 brw_push_insn_state(p);
1354 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1355 brw_MOV(p, reg, reg);
1356 brw_pop_insn_state(p);
1357 }
1358
1359 }
1360
1361 /* All these variables are pretty confusing - we might be better off
1362 * using bitmasks and macros for this, in the old style. Or perhaps
1363 * just having the caller instantiate the fields in dword3 itself.
1364 */
1365 void brw_urb_WRITE(struct brw_compile *p,
1366 struct brw_reg dest,
1367 GLuint msg_reg_nr,
1368 struct brw_reg src0,
1369 GLboolean allocate,
1370 GLboolean used,
1371 GLuint msg_length,
1372 GLuint response_length,
1373 GLboolean eot,
1374 GLboolean writes_complete,
1375 GLuint offset,
1376 GLuint swizzle)
1377 {
1378 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1379
1380 assert(msg_length < BRW_MAX_MRF);
1381
1382 brw_set_dest(insn, dest);
1383 brw_set_src0(insn, src0);
1384 brw_set_src1(insn, brw_imm_d(0));
1385
1386 insn->header.destreg__conditionalmod = msg_reg_nr;
1387
1388 brw_set_urb_message(p->brw,
1389 insn,
1390 allocate,
1391 used,
1392 msg_length,
1393 response_length,
1394 eot,
1395 writes_complete,
1396 offset,
1397 swizzle);
1398 }
1399
1400 void brw_ff_sync(struct brw_compile *p,
1401 struct brw_reg dest,
1402 GLuint msg_reg_nr,
1403 struct brw_reg src0,
1404 GLboolean allocate,
1405 GLboolean used,
1406 GLuint msg_length,
1407 GLuint response_length,
1408 GLboolean eot,
1409 GLboolean writes_complete,
1410 GLuint offset,
1411 GLuint swizzle)
1412 {
1413 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1414
1415 assert(msg_length < 16);
1416
1417 brw_set_dest(insn, dest);
1418 brw_set_src0(insn, src0);
1419 brw_set_src1(insn, brw_imm_d(0));
1420
1421 insn->header.destreg__conditionalmod = msg_reg_nr;
1422
1423 brw_set_ff_sync_message(p->brw,
1424 insn,
1425 allocate,
1426 used,
1427 msg_length,
1428 response_length,
1429 eot,
1430 writes_complete,
1431 offset,
1432 swizzle);
1433 }