i956: Set the execution size correctly for scratch space writes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 }
79 }
80 else {
81 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83 /* These are different sizes in align1 vs align16:
84 */
85 if (insn->header.access_mode == BRW_ALIGN_1) {
86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90 }
91 else {
92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93 }
94 }
95
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
98 */
99 guess_execution_size(insn, dest);
100 }
101
102 static void brw_set_src0( struct brw_instruction *insn,
103 struct brw_reg reg )
104 {
105 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
106 assert(reg.nr < 128);
107
108 insn->bits1.da1.src0_reg_file = reg.file;
109 insn->bits1.da1.src0_reg_type = reg.type;
110 insn->bits2.da1.src0_abs = reg.abs;
111 insn->bits2.da1.src0_negate = reg.negate;
112 insn->bits2.da1.src0_address_mode = reg.address_mode;
113
114 if (reg.file == BRW_IMMEDIATE_VALUE) {
115 insn->bits3.ud = reg.dw1.ud;
116
117 /* Required to set some fields in src1 as well:
118 */
119 insn->bits1.da1.src1_reg_file = 0; /* arf */
120 insn->bits1.da1.src1_reg_type = reg.type;
121 }
122 else
123 {
124 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
125 if (insn->header.access_mode == BRW_ALIGN_1) {
126 insn->bits2.da1.src0_subreg_nr = reg.subnr;
127 insn->bits2.da1.src0_reg_nr = reg.nr;
128 }
129 else {
130 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
131 insn->bits2.da16.src0_reg_nr = reg.nr;
132 }
133 }
134 else {
135 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
136
137 if (insn->header.access_mode == BRW_ALIGN_1) {
138 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
139 }
140 else {
141 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
142 }
143 }
144
145 if (insn->header.access_mode == BRW_ALIGN_1) {
146 if (reg.width == BRW_WIDTH_1 &&
147 insn->header.execution_size == BRW_EXECUTE_1) {
148 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
149 insn->bits2.da1.src0_width = BRW_WIDTH_1;
150 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
151 }
152 else {
153 insn->bits2.da1.src0_horiz_stride = reg.hstride;
154 insn->bits2.da1.src0_width = reg.width;
155 insn->bits2.da1.src0_vert_stride = reg.vstride;
156 }
157 }
158 else {
159 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
160 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
161 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
162 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
163
164 /* This is an oddity of the fact we're using the same
165 * descriptions for registers in align_16 as align_1:
166 */
167 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
168 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
169 else
170 insn->bits2.da16.src0_vert_stride = reg.vstride;
171 }
172 }
173 }
174
175
176 void brw_set_src1( struct brw_instruction *insn,
177 struct brw_reg reg )
178 {
179 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
180
181 assert(reg.nr < 128);
182
183 insn->bits1.da1.src1_reg_file = reg.file;
184 insn->bits1.da1.src1_reg_type = reg.type;
185 insn->bits3.da1.src1_abs = reg.abs;
186 insn->bits3.da1.src1_negate = reg.negate;
187
188 /* Only src1 can be immediate in two-argument instructions.
189 */
190 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
191
192 if (reg.file == BRW_IMMEDIATE_VALUE) {
193 insn->bits3.ud = reg.dw1.ud;
194 }
195 else {
196 /* This is a hardware restriction, which may or may not be lifted
197 * in the future:
198 */
199 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
200 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
201
202 if (insn->header.access_mode == BRW_ALIGN_1) {
203 insn->bits3.da1.src1_subreg_nr = reg.subnr;
204 insn->bits3.da1.src1_reg_nr = reg.nr;
205 }
206 else {
207 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
208 insn->bits3.da16.src1_reg_nr = reg.nr;
209 }
210
211 if (insn->header.access_mode == BRW_ALIGN_1) {
212 if (reg.width == BRW_WIDTH_1 &&
213 insn->header.execution_size == BRW_EXECUTE_1) {
214 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
215 insn->bits3.da1.src1_width = BRW_WIDTH_1;
216 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
217 }
218 else {
219 insn->bits3.da1.src1_horiz_stride = reg.hstride;
220 insn->bits3.da1.src1_width = reg.width;
221 insn->bits3.da1.src1_vert_stride = reg.vstride;
222 }
223 }
224 else {
225 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
226 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
227 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
228 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
229
230 /* This is an oddity of the fact we're using the same
231 * descriptions for registers in align_16 as align_1:
232 */
233 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
234 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
235 else
236 insn->bits3.da16.src1_vert_stride = reg.vstride;
237 }
238 }
239 }
240
241
242
243 static void brw_set_math_message( struct brw_context *brw,
244 struct brw_instruction *insn,
245 GLuint msg_length,
246 GLuint response_length,
247 GLuint function,
248 GLuint integer_type,
249 GLboolean low_precision,
250 GLboolean saturate,
251 GLuint dataType )
252 {
253 struct intel_context *intel = &brw->intel;
254 brw_set_src1(insn, brw_imm_d(0));
255
256 if (intel->gen == 5) {
257 insn->bits3.math_gen5.function = function;
258 insn->bits3.math_gen5.int_type = integer_type;
259 insn->bits3.math_gen5.precision = low_precision;
260 insn->bits3.math_gen5.saturate = saturate;
261 insn->bits3.math_gen5.data_type = dataType;
262 insn->bits3.math_gen5.snapshot = 0;
263 insn->bits3.math_gen5.header_present = 0;
264 insn->bits3.math_gen5.response_length = response_length;
265 insn->bits3.math_gen5.msg_length = msg_length;
266 insn->bits3.math_gen5.end_of_thread = 0;
267 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
268 insn->bits2.send_gen5.end_of_thread = 0;
269 } else {
270 insn->bits3.math.function = function;
271 insn->bits3.math.int_type = integer_type;
272 insn->bits3.math.precision = low_precision;
273 insn->bits3.math.saturate = saturate;
274 insn->bits3.math.data_type = dataType;
275 insn->bits3.math.response_length = response_length;
276 insn->bits3.math.msg_length = msg_length;
277 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
278 insn->bits3.math.end_of_thread = 0;
279 }
280 }
281
282
283 static void brw_set_ff_sync_message(struct brw_context *brw,
284 struct brw_instruction *insn,
285 GLboolean allocate,
286 GLuint response_length,
287 GLboolean end_of_thread)
288 {
289 struct intel_context *intel = &brw->intel;
290 brw_set_src1(insn, brw_imm_d(0));
291
292 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
293 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
294 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
295 insn->bits3.urb_gen5.allocate = allocate;
296 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
297 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
298 insn->bits3.urb_gen5.header_present = 1;
299 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
300 insn->bits3.urb_gen5.msg_length = 1;
301 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
302 if (intel->gen >= 6) {
303 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
304 } else {
305 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
306 insn->bits2.send_gen5.end_of_thread = end_of_thread;
307 }
308 }
309
310 static void brw_set_urb_message( struct brw_context *brw,
311 struct brw_instruction *insn,
312 GLboolean allocate,
313 GLboolean used,
314 GLuint msg_length,
315 GLuint response_length,
316 GLboolean end_of_thread,
317 GLboolean complete,
318 GLuint offset,
319 GLuint swizzle_control )
320 {
321 struct intel_context *intel = &brw->intel;
322 brw_set_src1(insn, brw_imm_d(0));
323
324 if (intel->gen >= 5) {
325 insn->bits3.urb_gen5.opcode = 0; /* ? */
326 insn->bits3.urb_gen5.offset = offset;
327 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
328 insn->bits3.urb_gen5.allocate = allocate;
329 insn->bits3.urb_gen5.used = used; /* ? */
330 insn->bits3.urb_gen5.complete = complete;
331 insn->bits3.urb_gen5.header_present = 1;
332 insn->bits3.urb_gen5.response_length = response_length;
333 insn->bits3.urb_gen5.msg_length = msg_length;
334 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
335 if (intel->gen >= 6) {
336 /* For SNB, the SFID bits moved to the condmod bits, and
337 * EOT stayed in bits3 above. Does the EOT bit setting
338 * below on Ironlake even do anything?
339 */
340 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
341 } else {
342 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
343 insn->bits2.send_gen5.end_of_thread = end_of_thread;
344 }
345 } else {
346 insn->bits3.urb.opcode = 0; /* ? */
347 insn->bits3.urb.offset = offset;
348 insn->bits3.urb.swizzle_control = swizzle_control;
349 insn->bits3.urb.allocate = allocate;
350 insn->bits3.urb.used = used; /* ? */
351 insn->bits3.urb.complete = complete;
352 insn->bits3.urb.response_length = response_length;
353 insn->bits3.urb.msg_length = msg_length;
354 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
355 insn->bits3.urb.end_of_thread = end_of_thread;
356 }
357 }
358
359 static void brw_set_dp_write_message( struct brw_context *brw,
360 struct brw_instruction *insn,
361 GLuint binding_table_index,
362 GLuint msg_control,
363 GLuint msg_type,
364 GLuint msg_length,
365 GLuint pixel_scoreboard_clear,
366 GLuint response_length,
367 GLuint end_of_thread,
368 GLuint send_commit_msg)
369 {
370 struct intel_context *intel = &brw->intel;
371 brw_set_src1(insn, brw_imm_d(0));
372
373 if (intel->gen == 5) {
374 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
375 insn->bits3.dp_write_gen5.msg_control = msg_control;
376 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
377 insn->bits3.dp_write_gen5.msg_type = msg_type;
378 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
379 insn->bits3.dp_write_gen5.header_present = 1;
380 insn->bits3.dp_write_gen5.response_length = response_length;
381 insn->bits3.dp_write_gen5.msg_length = msg_length;
382 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
383 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
384 insn->bits2.send_gen5.end_of_thread = end_of_thread;
385 } else {
386 insn->bits3.dp_write.binding_table_index = binding_table_index;
387 insn->bits3.dp_write.msg_control = msg_control;
388 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
389 insn->bits3.dp_write.msg_type = msg_type;
390 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
391 insn->bits3.dp_write.response_length = response_length;
392 insn->bits3.dp_write.msg_length = msg_length;
393 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
394 insn->bits3.dp_write.end_of_thread = end_of_thread;
395 }
396 }
397
398 static void brw_set_dp_read_message( struct brw_context *brw,
399 struct brw_instruction *insn,
400 GLuint binding_table_index,
401 GLuint msg_control,
402 GLuint msg_type,
403 GLuint target_cache,
404 GLuint msg_length,
405 GLuint response_length,
406 GLuint end_of_thread )
407 {
408 struct intel_context *intel = &brw->intel;
409 brw_set_src1(insn, brw_imm_d(0));
410
411 if (intel->gen == 5) {
412 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
413 insn->bits3.dp_read_gen5.msg_control = msg_control;
414 insn->bits3.dp_read_gen5.msg_type = msg_type;
415 insn->bits3.dp_read_gen5.target_cache = target_cache;
416 insn->bits3.dp_read_gen5.header_present = 1;
417 insn->bits3.dp_read_gen5.response_length = response_length;
418 insn->bits3.dp_read_gen5.msg_length = msg_length;
419 insn->bits3.dp_read_gen5.pad1 = 0;
420 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
421 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
422 insn->bits2.send_gen5.end_of_thread = end_of_thread;
423 } else {
424 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
425 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
426 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
427 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
428 insn->bits3.dp_read.response_length = response_length; /*16:19*/
429 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
430 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
431 insn->bits3.dp_read.pad1 = 0; /*28:30*/
432 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
433 }
434 }
435
436 static void brw_set_sampler_message(struct brw_context *brw,
437 struct brw_instruction *insn,
438 GLuint binding_table_index,
439 GLuint sampler,
440 GLuint msg_type,
441 GLuint response_length,
442 GLuint msg_length,
443 GLboolean eot,
444 GLuint header_present,
445 GLuint simd_mode)
446 {
447 struct intel_context *intel = &brw->intel;
448 assert(eot == 0);
449 brw_set_src1(insn, brw_imm_d(0));
450
451 if (intel->gen == 5) {
452 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
453 insn->bits3.sampler_gen5.sampler = sampler;
454 insn->bits3.sampler_gen5.msg_type = msg_type;
455 insn->bits3.sampler_gen5.simd_mode = simd_mode;
456 insn->bits3.sampler_gen5.header_present = header_present;
457 insn->bits3.sampler_gen5.response_length = response_length;
458 insn->bits3.sampler_gen5.msg_length = msg_length;
459 insn->bits3.sampler_gen5.end_of_thread = eot;
460 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
461 insn->bits2.send_gen5.end_of_thread = eot;
462 } else if (intel->is_g4x) {
463 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
464 insn->bits3.sampler_g4x.sampler = sampler;
465 insn->bits3.sampler_g4x.msg_type = msg_type;
466 insn->bits3.sampler_g4x.response_length = response_length;
467 insn->bits3.sampler_g4x.msg_length = msg_length;
468 insn->bits3.sampler_g4x.end_of_thread = eot;
469 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
470 } else {
471 insn->bits3.sampler.binding_table_index = binding_table_index;
472 insn->bits3.sampler.sampler = sampler;
473 insn->bits3.sampler.msg_type = msg_type;
474 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
475 insn->bits3.sampler.response_length = response_length;
476 insn->bits3.sampler.msg_length = msg_length;
477 insn->bits3.sampler.end_of_thread = eot;
478 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
479 }
480 }
481
482
483
484 static struct brw_instruction *next_insn( struct brw_compile *p,
485 GLuint opcode )
486 {
487 struct brw_instruction *insn;
488
489 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
490
491 insn = &p->store[p->nr_insn++];
492 memcpy(insn, p->current, sizeof(*insn));
493
494 /* Reset this one-shot flag:
495 */
496
497 if (p->current->header.destreg__conditionalmod) {
498 p->current->header.destreg__conditionalmod = 0;
499 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
500 }
501
502 insn->header.opcode = opcode;
503 return insn;
504 }
505
506
507 static struct brw_instruction *brw_alu1( struct brw_compile *p,
508 GLuint opcode,
509 struct brw_reg dest,
510 struct brw_reg src )
511 {
512 struct brw_instruction *insn = next_insn(p, opcode);
513 brw_set_dest(insn, dest);
514 brw_set_src0(insn, src);
515 return insn;
516 }
517
518 static struct brw_instruction *brw_alu2(struct brw_compile *p,
519 GLuint opcode,
520 struct brw_reg dest,
521 struct brw_reg src0,
522 struct brw_reg src1 )
523 {
524 struct brw_instruction *insn = next_insn(p, opcode);
525 brw_set_dest(insn, dest);
526 brw_set_src0(insn, src0);
527 brw_set_src1(insn, src1);
528 return insn;
529 }
530
531
532 /***********************************************************************
533 * Convenience routines.
534 */
535 #define ALU1(OP) \
536 struct brw_instruction *brw_##OP(struct brw_compile *p, \
537 struct brw_reg dest, \
538 struct brw_reg src0) \
539 { \
540 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
541 }
542
543 #define ALU2(OP) \
544 struct brw_instruction *brw_##OP(struct brw_compile *p, \
545 struct brw_reg dest, \
546 struct brw_reg src0, \
547 struct brw_reg src1) \
548 { \
549 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
550 }
551
552
553 ALU1(MOV)
554 ALU2(SEL)
555 ALU1(NOT)
556 ALU2(AND)
557 ALU2(OR)
558 ALU2(XOR)
559 ALU2(SHR)
560 ALU2(SHL)
561 ALU2(RSR)
562 ALU2(RSL)
563 ALU2(ASR)
564 ALU2(ADD)
565 ALU2(MUL)
566 ALU1(FRC)
567 ALU1(RNDD)
568 ALU1(RNDZ)
569 ALU2(MAC)
570 ALU2(MACH)
571 ALU1(LZD)
572 ALU2(DP4)
573 ALU2(DPH)
574 ALU2(DP3)
575 ALU2(DP2)
576 ALU2(LINE)
577 ALU2(PLN)
578
579
580
581 void brw_NOP(struct brw_compile *p)
582 {
583 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
584 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
585 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
586 brw_set_src1(insn, brw_imm_ud(0x0));
587 }
588
589
590
591
592
593 /***********************************************************************
594 * Comparisons, if/else/endif
595 */
596
597 struct brw_instruction *brw_JMPI(struct brw_compile *p,
598 struct brw_reg dest,
599 struct brw_reg src0,
600 struct brw_reg src1)
601 {
602 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
603
604 insn->header.execution_size = 1;
605 insn->header.compression_control = BRW_COMPRESSION_NONE;
606 insn->header.mask_control = BRW_MASK_DISABLE;
607
608 p->current->header.predicate_control = BRW_PREDICATE_NONE;
609
610 return insn;
611 }
612
613 /* EU takes the value from the flag register and pushes it onto some
614 * sort of a stack (presumably merging with any flag value already on
615 * the stack). Within an if block, the flags at the top of the stack
616 * control execution on each channel of the unit, eg. on each of the
617 * 16 pixel values in our wm programs.
618 *
619 * When the matching 'else' instruction is reached (presumably by
620 * countdown of the instruction count patched in by our ELSE/ENDIF
621 * functions), the relevent flags are inverted.
622 *
623 * When the matching 'endif' instruction is reached, the flags are
624 * popped off. If the stack is now empty, normal execution resumes.
625 *
626 * No attempt is made to deal with stack overflow (14 elements?).
627 */
628 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
629 {
630 struct brw_instruction *insn;
631
632 if (p->single_program_flow) {
633 assert(execute_size == BRW_EXECUTE_1);
634
635 insn = next_insn(p, BRW_OPCODE_ADD);
636 insn->header.predicate_inverse = 1;
637 } else {
638 insn = next_insn(p, BRW_OPCODE_IF);
639 }
640
641 /* Override the defaults for this instruction:
642 */
643 brw_set_dest(insn, brw_ip_reg());
644 brw_set_src0(insn, brw_ip_reg());
645 brw_set_src1(insn, brw_imm_d(0x0));
646
647 insn->header.execution_size = execute_size;
648 insn->header.compression_control = BRW_COMPRESSION_NONE;
649 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
650 insn->header.mask_control = BRW_MASK_ENABLE;
651 if (!p->single_program_flow)
652 insn->header.thread_control = BRW_THREAD_SWITCH;
653
654 p->current->header.predicate_control = BRW_PREDICATE_NONE;
655
656 return insn;
657 }
658
659
660 struct brw_instruction *brw_ELSE(struct brw_compile *p,
661 struct brw_instruction *if_insn)
662 {
663 struct intel_context *intel = &p->brw->intel;
664 struct brw_instruction *insn;
665 GLuint br = 1;
666
667 if (intel->gen == 5)
668 br = 2;
669
670 if (p->single_program_flow) {
671 insn = next_insn(p, BRW_OPCODE_ADD);
672 } else {
673 insn = next_insn(p, BRW_OPCODE_ELSE);
674 }
675
676 brw_set_dest(insn, brw_ip_reg());
677 brw_set_src0(insn, brw_ip_reg());
678 brw_set_src1(insn, brw_imm_d(0x0));
679
680 insn->header.compression_control = BRW_COMPRESSION_NONE;
681 insn->header.execution_size = if_insn->header.execution_size;
682 insn->header.mask_control = BRW_MASK_ENABLE;
683 if (!p->single_program_flow)
684 insn->header.thread_control = BRW_THREAD_SWITCH;
685
686 /* Patch the if instruction to point at this instruction.
687 */
688 if (p->single_program_flow) {
689 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
690
691 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
692 } else {
693 assert(if_insn->header.opcode == BRW_OPCODE_IF);
694
695 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
696 if_insn->bits3.if_else.pop_count = 0;
697 if_insn->bits3.if_else.pad0 = 0;
698 }
699
700 return insn;
701 }
702
703 void brw_ENDIF(struct brw_compile *p,
704 struct brw_instruction *patch_insn)
705 {
706 struct intel_context *intel = &p->brw->intel;
707 GLuint br = 1;
708
709 if (intel->gen == 5)
710 br = 2;
711
712 if (p->single_program_flow) {
713 /* In single program flow mode, there's no need to execute an ENDIF,
714 * since we don't need to do any stack operations, and if we're executing
715 * currently, we want to just continue executing.
716 */
717 struct brw_instruction *next = &p->store[p->nr_insn];
718
719 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
720
721 patch_insn->bits3.ud = (next - patch_insn) * 16;
722 } else {
723 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
724
725 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
726 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
727 brw_set_src1(insn, brw_imm_d(0x0));
728
729 insn->header.compression_control = BRW_COMPRESSION_NONE;
730 insn->header.execution_size = patch_insn->header.execution_size;
731 insn->header.mask_control = BRW_MASK_ENABLE;
732 insn->header.thread_control = BRW_THREAD_SWITCH;
733
734 assert(patch_insn->bits3.if_else.jump_count == 0);
735
736 /* Patch the if or else instructions to point at this or the next
737 * instruction respectively.
738 */
739 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
740 /* Automagically turn it into an IFF:
741 */
742 patch_insn->header.opcode = BRW_OPCODE_IFF;
743 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
744 patch_insn->bits3.if_else.pop_count = 0;
745 patch_insn->bits3.if_else.pad0 = 0;
746 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
747 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
748 patch_insn->bits3.if_else.pop_count = 1;
749 patch_insn->bits3.if_else.pad0 = 0;
750 } else {
751 assert(0);
752 }
753
754 /* Also pop item off the stack in the endif instruction:
755 */
756 insn->bits3.if_else.jump_count = 0;
757 insn->bits3.if_else.pop_count = 1;
758 insn->bits3.if_else.pad0 = 0;
759 }
760 }
761
762 struct brw_instruction *brw_BREAK(struct brw_compile *p)
763 {
764 struct brw_instruction *insn;
765 insn = next_insn(p, BRW_OPCODE_BREAK);
766 brw_set_dest(insn, brw_ip_reg());
767 brw_set_src0(insn, brw_ip_reg());
768 brw_set_src1(insn, brw_imm_d(0x0));
769 insn->header.compression_control = BRW_COMPRESSION_NONE;
770 insn->header.execution_size = BRW_EXECUTE_8;
771 /* insn->header.mask_control = BRW_MASK_DISABLE; */
772 insn->bits3.if_else.pad0 = 0;
773 return insn;
774 }
775
776 struct brw_instruction *brw_CONT(struct brw_compile *p)
777 {
778 struct brw_instruction *insn;
779 insn = next_insn(p, BRW_OPCODE_CONTINUE);
780 brw_set_dest(insn, brw_ip_reg());
781 brw_set_src0(insn, brw_ip_reg());
782 brw_set_src1(insn, brw_imm_d(0x0));
783 insn->header.compression_control = BRW_COMPRESSION_NONE;
784 insn->header.execution_size = BRW_EXECUTE_8;
785 /* insn->header.mask_control = BRW_MASK_DISABLE; */
786 insn->bits3.if_else.pad0 = 0;
787 return insn;
788 }
789
790 /* DO/WHILE loop:
791 */
792 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
793 {
794 if (p->single_program_flow) {
795 return &p->store[p->nr_insn];
796 } else {
797 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
798
799 /* Override the defaults for this instruction:
800 */
801 brw_set_dest(insn, brw_null_reg());
802 brw_set_src0(insn, brw_null_reg());
803 brw_set_src1(insn, brw_null_reg());
804
805 insn->header.compression_control = BRW_COMPRESSION_NONE;
806 insn->header.execution_size = execute_size;
807 insn->header.predicate_control = BRW_PREDICATE_NONE;
808 /* insn->header.mask_control = BRW_MASK_ENABLE; */
809 /* insn->header.mask_control = BRW_MASK_DISABLE; */
810
811 return insn;
812 }
813 }
814
815
816
817 struct brw_instruction *brw_WHILE(struct brw_compile *p,
818 struct brw_instruction *do_insn)
819 {
820 struct intel_context *intel = &p->brw->intel;
821 struct brw_instruction *insn;
822 GLuint br = 1;
823
824 if (intel->gen == 5)
825 br = 2;
826
827 if (p->single_program_flow)
828 insn = next_insn(p, BRW_OPCODE_ADD);
829 else
830 insn = next_insn(p, BRW_OPCODE_WHILE);
831
832 brw_set_dest(insn, brw_ip_reg());
833 brw_set_src0(insn, brw_ip_reg());
834 brw_set_src1(insn, brw_imm_d(0x0));
835
836 insn->header.compression_control = BRW_COMPRESSION_NONE;
837
838 if (p->single_program_flow) {
839 insn->header.execution_size = BRW_EXECUTE_1;
840
841 insn->bits3.d = (do_insn - insn) * 16;
842 } else {
843 insn->header.execution_size = do_insn->header.execution_size;
844
845 assert(do_insn->header.opcode == BRW_OPCODE_DO);
846 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
847 insn->bits3.if_else.pop_count = 0;
848 insn->bits3.if_else.pad0 = 0;
849 }
850
851 /* insn->header.mask_control = BRW_MASK_ENABLE; */
852
853 /* insn->header.mask_control = BRW_MASK_DISABLE; */
854 p->current->header.predicate_control = BRW_PREDICATE_NONE;
855 return insn;
856 }
857
858
859 /* FORWARD JUMPS:
860 */
861 void brw_land_fwd_jump(struct brw_compile *p,
862 struct brw_instruction *jmp_insn)
863 {
864 struct intel_context *intel = &p->brw->intel;
865 struct brw_instruction *landing = &p->store[p->nr_insn];
866 GLuint jmpi = 1;
867
868 if (intel->gen == 5)
869 jmpi = 2;
870
871 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
872 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
873
874 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
875 }
876
877
878
879 /* To integrate with the above, it makes sense that the comparison
880 * instruction should populate the flag register. It might be simpler
881 * just to use the flag reg for most WM tasks?
882 */
883 void brw_CMP(struct brw_compile *p,
884 struct brw_reg dest,
885 GLuint conditional,
886 struct brw_reg src0,
887 struct brw_reg src1)
888 {
889 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
890
891 insn->header.destreg__conditionalmod = conditional;
892 brw_set_dest(insn, dest);
893 brw_set_src0(insn, src0);
894 brw_set_src1(insn, src1);
895
896 /* guess_execution_size(insn, src0); */
897
898
899 /* Make it so that future instructions will use the computed flag
900 * value until brw_set_predicate_control_flag_value() is called
901 * again.
902 */
903 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
904 dest.nr == 0) {
905 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
906 p->flag_value = 0xff;
907 }
908 }
909
910 /* Issue 'wait' instruction for n1, host could program MMIO
911 to wake up thread. */
912 void brw_WAIT (struct brw_compile *p)
913 {
914 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
915 struct brw_reg src = brw_notification_1_reg();
916
917 brw_set_dest(insn, src);
918 brw_set_src0(insn, src);
919 brw_set_src1(insn, brw_null_reg());
920 insn->header.execution_size = 0; /* must */
921 insn->header.predicate_control = 0;
922 insn->header.compression_control = 0;
923 }
924
925
926 /***********************************************************************
927 * Helpers for the various SEND message types:
928 */
929
930 /** Extended math function, float[8].
931 */
932 void brw_math( struct brw_compile *p,
933 struct brw_reg dest,
934 GLuint function,
935 GLuint saturate,
936 GLuint msg_reg_nr,
937 struct brw_reg src,
938 GLuint data_type,
939 GLuint precision )
940 {
941 struct intel_context *intel = &p->brw->intel;
942
943 if (intel->gen >= 6) {
944 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
945
946 /* Math is the same ISA format as other opcodes, except that CondModifier
947 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
948 */
949 insn->header.destreg__conditionalmod = function;
950
951 brw_set_dest(insn, dest);
952 brw_set_src0(insn, src);
953 brw_set_src1(insn, brw_null_reg());
954 } else {
955 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
956 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
957 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
958 /* Example code doesn't set predicate_control for send
959 * instructions.
960 */
961 insn->header.predicate_control = 0;
962 insn->header.destreg__conditionalmod = msg_reg_nr;
963
964 brw_set_dest(insn, dest);
965 brw_set_src0(insn, src);
966 brw_set_math_message(p->brw,
967 insn,
968 msg_length, response_length,
969 function,
970 BRW_MATH_INTEGER_UNSIGNED,
971 precision,
972 saturate,
973 data_type);
974 }
975 }
976
977 /**
978 * Extended math function, float[16].
979 * Use 2 send instructions.
980 */
981 void brw_math_16( struct brw_compile *p,
982 struct brw_reg dest,
983 GLuint function,
984 GLuint saturate,
985 GLuint msg_reg_nr,
986 struct brw_reg src,
987 GLuint precision )
988 {
989 struct intel_context *intel = &p->brw->intel;
990 struct brw_instruction *insn;
991 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
992 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
993
994 if (intel->gen >= 6) {
995 insn = next_insn(p, BRW_OPCODE_MATH);
996
997 /* Math is the same ISA format as other opcodes, except that CondModifier
998 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
999 */
1000 insn->header.destreg__conditionalmod = function;
1001
1002 brw_set_dest(insn, dest);
1003 brw_set_src0(insn, src);
1004 brw_set_src1(insn, brw_null_reg());
1005 return;
1006 }
1007
1008 /* First instruction:
1009 */
1010 brw_push_insn_state(p);
1011 brw_set_predicate_control_flag_value(p, 0xff);
1012 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1013
1014 insn = next_insn(p, BRW_OPCODE_SEND);
1015 insn->header.destreg__conditionalmod = msg_reg_nr;
1016
1017 brw_set_dest(insn, dest);
1018 brw_set_src0(insn, src);
1019 brw_set_math_message(p->brw,
1020 insn,
1021 msg_length, response_length,
1022 function,
1023 BRW_MATH_INTEGER_UNSIGNED,
1024 precision,
1025 saturate,
1026 BRW_MATH_DATA_VECTOR);
1027
1028 /* Second instruction:
1029 */
1030 insn = next_insn(p, BRW_OPCODE_SEND);
1031 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1032 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1033
1034 brw_set_dest(insn, offset(dest,1));
1035 brw_set_src0(insn, src);
1036 brw_set_math_message(p->brw,
1037 insn,
1038 msg_length, response_length,
1039 function,
1040 BRW_MATH_INTEGER_UNSIGNED,
1041 precision,
1042 saturate,
1043 BRW_MATH_DATA_VECTOR);
1044
1045 brw_pop_insn_state(p);
1046 }
1047
1048
1049 /**
1050 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1051 * Scratch offset should be a multiple of 64.
1052 * Used for register spilling.
1053 */
1054 void brw_dp_WRITE_16( struct brw_compile *p,
1055 struct brw_reg src,
1056 GLuint scratch_offset )
1057 {
1058 struct intel_context *intel = &p->brw->intel;
1059 GLuint msg_reg_nr = 1;
1060 {
1061 brw_push_insn_state(p);
1062 brw_set_mask_control(p, BRW_MASK_DISABLE);
1063 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1064
1065 /* set message header global offset field (reg 0, element 2) */
1066 brw_MOV(p,
1067 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1068 brw_imm_d(scratch_offset));
1069
1070 brw_pop_insn_state(p);
1071 }
1072
1073 {
1074 GLuint msg_length = 3;
1075 struct brw_reg dest;
1076 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1077 int send_commit_msg;
1078
1079 insn->header.predicate_control = 0; /* XXX */
1080 insn->header.compression_control = BRW_COMPRESSION_NONE;
1081 insn->header.destreg__conditionalmod = msg_reg_nr;
1082
1083 /* Until gen6, writes followed by reads from the same location
1084 * are not guaranteed to be ordered unless write_commit is set.
1085 * If set, then a no-op write is issued to the destination
1086 * register to set a dependency, and a read from the destination
1087 * can be used to ensure the ordering.
1088 *
1089 * For gen6, only writes between different threads need ordering
1090 * protection. Our use of DP writes is all about register
1091 * spilling within a thread.
1092 */
1093 if (intel->gen >= 6) {
1094 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1095 send_commit_msg = 0;
1096 } else {
1097 dest = brw_uw16_grf(0, 0);
1098 send_commit_msg = 1;
1099 }
1100
1101 brw_set_dest(insn, dest);
1102 brw_set_src0(insn, src);
1103
1104 brw_set_dp_write_message(p->brw,
1105 insn,
1106 255, /* binding table index (255=stateless) */
1107 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1108 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1109 msg_length,
1110 0, /* pixel scoreboard */
1111 send_commit_msg, /* response_length */
1112 0, /* eot */
1113 send_commit_msg);
1114 }
1115 }
1116
1117
1118 /**
1119 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1120 * Scratch offset should be a multiple of 64.
1121 * Used for register spilling.
1122 */
1123 void brw_dp_READ_16( struct brw_compile *p,
1124 struct brw_reg dest,
1125 GLuint scratch_offset )
1126 {
1127 GLuint msg_reg_nr = 1;
1128 {
1129 brw_push_insn_state(p);
1130 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1131 brw_set_mask_control(p, BRW_MASK_DISABLE);
1132
1133 /* set message header global offset field (reg 0, element 2) */
1134 brw_MOV(p,
1135 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1136 brw_imm_d(scratch_offset));
1137
1138 brw_pop_insn_state(p);
1139 }
1140
1141 {
1142 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1143
1144 insn->header.predicate_control = 0; /* XXX */
1145 insn->header.compression_control = BRW_COMPRESSION_NONE;
1146 insn->header.destreg__conditionalmod = msg_reg_nr;
1147
1148 brw_set_dest(insn, dest); /* UW? */
1149 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1150
1151 brw_set_dp_read_message(p->brw,
1152 insn,
1153 255, /* binding table index (255=stateless) */
1154 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1155 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1156 1, /* target cache (render/scratch) */
1157 1, /* msg_length */
1158 2, /* response_length */
1159 0); /* eot */
1160 }
1161 }
1162
1163
1164 /**
1165 * Read a float[4] vector from the data port Data Cache (const buffer).
1166 * Location (in buffer) should be a multiple of 16.
1167 * Used for fetching shader constants.
1168 * If relAddr is true, we'll do an indirect fetch using the address register.
1169 */
1170 void brw_dp_READ_4( struct brw_compile *p,
1171 struct brw_reg dest,
1172 GLboolean relAddr,
1173 GLuint location,
1174 GLuint bind_table_index )
1175 {
1176 /* XXX: relAddr not implemented */
1177 GLuint msg_reg_nr = 1;
1178 {
1179 struct brw_reg b;
1180 brw_push_insn_state(p);
1181 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1182 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1183 brw_set_mask_control(p, BRW_MASK_DISABLE);
1184
1185 /* Setup MRF[1] with location/offset into const buffer */
1186 b = brw_message_reg(msg_reg_nr);
1187 b = retype(b, BRW_REGISTER_TYPE_UD);
1188 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1189 * when the docs say only dword[2] should be set. Hmmm. But it works.
1190 */
1191 brw_MOV(p, b, brw_imm_ud(location));
1192 brw_pop_insn_state(p);
1193 }
1194
1195 {
1196 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1197
1198 insn->header.predicate_control = BRW_PREDICATE_NONE;
1199 insn->header.compression_control = BRW_COMPRESSION_NONE;
1200 insn->header.destreg__conditionalmod = msg_reg_nr;
1201 insn->header.mask_control = BRW_MASK_DISABLE;
1202
1203 /* cast dest to a uword[8] vector */
1204 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1205
1206 brw_set_dest(insn, dest);
1207 brw_set_src0(insn, brw_null_reg());
1208
1209 brw_set_dp_read_message(p->brw,
1210 insn,
1211 bind_table_index,
1212 0, /* msg_control (0 means 1 Oword) */
1213 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1214 0, /* source cache = data cache */
1215 1, /* msg_length */
1216 1, /* response_length (1 Oword) */
1217 0); /* eot */
1218 }
1219 }
1220
1221
1222 /**
1223 * Read float[4] constant(s) from VS constant buffer.
1224 * For relative addressing, two float[4] constants will be read into 'dest'.
1225 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1226 */
1227 void brw_dp_READ_4_vs(struct brw_compile *p,
1228 struct brw_reg dest,
1229 GLuint oword,
1230 GLboolean relAddr,
1231 struct brw_reg addrReg,
1232 GLuint location,
1233 GLuint bind_table_index)
1234 {
1235 GLuint msg_reg_nr = 1;
1236
1237 assert(oword < 2);
1238 /*
1239 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1240 location, msg_reg_nr);
1241 */
1242
1243 /* Setup MRF[1] with location/offset into const buffer */
1244 {
1245 struct brw_reg b;
1246
1247 brw_push_insn_state(p);
1248 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1249 brw_set_mask_control(p, BRW_MASK_DISABLE);
1250 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1251 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1252
1253 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1254 * when the docs say only dword[2] should be set. Hmmm. But it works.
1255 */
1256 b = brw_message_reg(msg_reg_nr);
1257 b = retype(b, BRW_REGISTER_TYPE_UD);
1258 /*b = get_element_ud(b, 2);*/
1259 if (relAddr) {
1260 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1261 }
1262 else {
1263 brw_MOV(p, b, brw_imm_ud(location));
1264 }
1265
1266 brw_pop_insn_state(p);
1267 }
1268
1269 {
1270 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1271
1272 insn->header.predicate_control = BRW_PREDICATE_NONE;
1273 insn->header.compression_control = BRW_COMPRESSION_NONE;
1274 insn->header.destreg__conditionalmod = msg_reg_nr;
1275 insn->header.mask_control = BRW_MASK_DISABLE;
1276 /*insn->header.access_mode = BRW_ALIGN_16;*/
1277
1278 brw_set_dest(insn, dest);
1279 brw_set_src0(insn, brw_null_reg());
1280
1281 brw_set_dp_read_message(p->brw,
1282 insn,
1283 bind_table_index,
1284 oword, /* 0 = lower Oword, 1 = upper Oword */
1285 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1286 0, /* source cache = data cache */
1287 1, /* msg_length */
1288 1, /* response_length (1 Oword) */
1289 0); /* eot */
1290 }
1291 }
1292
1293
1294
1295 void brw_fb_WRITE(struct brw_compile *p,
1296 struct brw_reg dest,
1297 GLuint msg_reg_nr,
1298 struct brw_reg src0,
1299 GLuint binding_table_index,
1300 GLuint msg_length,
1301 GLuint response_length,
1302 GLboolean eot)
1303 {
1304 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1305
1306 insn->header.predicate_control = 0; /* XXX */
1307 insn->header.compression_control = BRW_COMPRESSION_NONE;
1308 insn->header.destreg__conditionalmod = msg_reg_nr;
1309
1310 brw_set_dest(insn, dest);
1311 brw_set_src0(insn, src0);
1312 brw_set_dp_write_message(p->brw,
1313 insn,
1314 binding_table_index,
1315 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1316 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1317 msg_length,
1318 1, /* pixel scoreboard */
1319 response_length,
1320 eot,
1321 0 /* send_commit_msg */);
1322 }
1323
1324
1325 /**
1326 * Texture sample instruction.
1327 * Note: the msg_type plus msg_length values determine exactly what kind
1328 * of sampling operation is performed. See volume 4, page 161 of docs.
1329 */
1330 void brw_SAMPLE(struct brw_compile *p,
1331 struct brw_reg dest,
1332 GLuint msg_reg_nr,
1333 struct brw_reg src0,
1334 GLuint binding_table_index,
1335 GLuint sampler,
1336 GLuint writemask,
1337 GLuint msg_type,
1338 GLuint response_length,
1339 GLuint msg_length,
1340 GLboolean eot,
1341 GLuint header_present,
1342 GLuint simd_mode)
1343 {
1344 GLboolean need_stall = 0;
1345
1346 if (writemask == 0) {
1347 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1348 return;
1349 }
1350
1351 /* Hardware doesn't do destination dependency checking on send
1352 * instructions properly. Add a workaround which generates the
1353 * dependency by other means. In practice it seems like this bug
1354 * only crops up for texture samples, and only where registers are
1355 * written by the send and then written again later without being
1356 * read in between. Luckily for us, we already track that
1357 * information and use it to modify the writemask for the
1358 * instruction, so that is a guide for whether a workaround is
1359 * needed.
1360 */
1361 if (writemask != WRITEMASK_XYZW) {
1362 GLuint dst_offset = 0;
1363 GLuint i, newmask = 0, len = 0;
1364
1365 for (i = 0; i < 4; i++) {
1366 if (writemask & (1<<i))
1367 break;
1368 dst_offset += 2;
1369 }
1370 for (; i < 4; i++) {
1371 if (!(writemask & (1<<i)))
1372 break;
1373 newmask |= 1<<i;
1374 len++;
1375 }
1376
1377 if (newmask != writemask) {
1378 need_stall = 1;
1379 /* printf("need stall %x %x\n", newmask , writemask); */
1380 }
1381 else {
1382 GLboolean dispatch_16 = GL_FALSE;
1383
1384 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1385
1386 guess_execution_size(p->current, dest);
1387 if (p->current->header.execution_size == BRW_EXECUTE_16)
1388 dispatch_16 = GL_TRUE;
1389
1390 newmask = ~newmask & WRITEMASK_XYZW;
1391
1392 brw_push_insn_state(p);
1393
1394 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1395 brw_set_mask_control(p, BRW_MASK_DISABLE);
1396
1397 brw_MOV(p, m1, brw_vec8_grf(0,0));
1398 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1399
1400 brw_pop_insn_state(p);
1401
1402 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1403 dest = offset(dest, dst_offset);
1404
1405 /* For 16-wide dispatch, masked channels are skipped in the
1406 * response. For 8-wide, masked channels still take up slots,
1407 * and are just not written to.
1408 */
1409 if (dispatch_16)
1410 response_length = len * 2;
1411 }
1412 }
1413
1414 {
1415 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1416
1417 insn->header.predicate_control = 0; /* XXX */
1418 insn->header.compression_control = BRW_COMPRESSION_NONE;
1419 insn->header.destreg__conditionalmod = msg_reg_nr;
1420
1421 brw_set_dest(insn, dest);
1422 brw_set_src0(insn, src0);
1423 brw_set_sampler_message(p->brw, insn,
1424 binding_table_index,
1425 sampler,
1426 msg_type,
1427 response_length,
1428 msg_length,
1429 eot,
1430 header_present,
1431 simd_mode);
1432 }
1433
1434 if (need_stall) {
1435 struct brw_reg reg = vec8(offset(dest, response_length-1));
1436
1437 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1438 */
1439 brw_push_insn_state(p);
1440 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1441 brw_MOV(p, reg, reg);
1442 brw_pop_insn_state(p);
1443 }
1444
1445 }
1446
1447 /* All these variables are pretty confusing - we might be better off
1448 * using bitmasks and macros for this, in the old style. Or perhaps
1449 * just having the caller instantiate the fields in dword3 itself.
1450 */
1451 void brw_urb_WRITE(struct brw_compile *p,
1452 struct brw_reg dest,
1453 GLuint msg_reg_nr,
1454 struct brw_reg src0,
1455 GLboolean allocate,
1456 GLboolean used,
1457 GLuint msg_length,
1458 GLuint response_length,
1459 GLboolean eot,
1460 GLboolean writes_complete,
1461 GLuint offset,
1462 GLuint swizzle)
1463 {
1464 struct intel_context *intel = &p->brw->intel;
1465 struct brw_instruction *insn;
1466
1467 /* Sandybridge doesn't have the implied move for SENDs,
1468 * and the first message register index comes from src0.
1469 */
1470 if (intel->gen >= 6) {
1471 brw_push_insn_state(p);
1472 brw_set_mask_control( p, BRW_MASK_DISABLE );
1473 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1474 brw_pop_insn_state(p);
1475 src0 = brw_message_reg(msg_reg_nr);
1476 }
1477
1478 insn = next_insn(p, BRW_OPCODE_SEND);
1479
1480 assert(msg_length < BRW_MAX_MRF);
1481
1482 brw_set_dest(insn, dest);
1483 brw_set_src0(insn, src0);
1484 brw_set_src1(insn, brw_imm_d(0));
1485
1486 if (intel->gen < 6)
1487 insn->header.destreg__conditionalmod = msg_reg_nr;
1488
1489 brw_set_urb_message(p->brw,
1490 insn,
1491 allocate,
1492 used,
1493 msg_length,
1494 response_length,
1495 eot,
1496 writes_complete,
1497 offset,
1498 swizzle);
1499 }
1500
1501 void brw_ff_sync(struct brw_compile *p,
1502 struct brw_reg dest,
1503 GLuint msg_reg_nr,
1504 struct brw_reg src0,
1505 GLboolean allocate,
1506 GLuint response_length,
1507 GLboolean eot)
1508 {
1509 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1510
1511 brw_set_dest(insn, dest);
1512 brw_set_src0(insn, src0);
1513 brw_set_src1(insn, brw_imm_d(0));
1514
1515 insn->header.destreg__conditionalmod = msg_reg_nr;
1516
1517 brw_set_ff_sync_message(p->brw,
1518 insn,
1519 allocate,
1520 response_length,
1521 eot);
1522 }