Merge branch 'glsl2-head' into glsl2
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 }
79 }
80 else {
81 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83 /* These are different sizes in align1 vs align16:
84 */
85 if (insn->header.access_mode == BRW_ALIGN_1) {
86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90 }
91 else {
92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93 }
94 }
95
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
98 */
99 guess_execution_size(insn, dest);
100 }
101
102 static void brw_set_src0( struct brw_instruction *insn,
103 struct brw_reg reg )
104 {
105 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
106 assert(reg.nr < 128);
107
108 insn->bits1.da1.src0_reg_file = reg.file;
109 insn->bits1.da1.src0_reg_type = reg.type;
110 insn->bits2.da1.src0_abs = reg.abs;
111 insn->bits2.da1.src0_negate = reg.negate;
112 insn->bits2.da1.src0_address_mode = reg.address_mode;
113
114 if (reg.file == BRW_IMMEDIATE_VALUE) {
115 insn->bits3.ud = reg.dw1.ud;
116
117 /* Required to set some fields in src1 as well:
118 */
119 insn->bits1.da1.src1_reg_file = 0; /* arf */
120 insn->bits1.da1.src1_reg_type = reg.type;
121 }
122 else
123 {
124 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
125 if (insn->header.access_mode == BRW_ALIGN_1) {
126 insn->bits2.da1.src0_subreg_nr = reg.subnr;
127 insn->bits2.da1.src0_reg_nr = reg.nr;
128 }
129 else {
130 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
131 insn->bits2.da16.src0_reg_nr = reg.nr;
132 }
133 }
134 else {
135 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
136
137 if (insn->header.access_mode == BRW_ALIGN_1) {
138 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
139 }
140 else {
141 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
142 }
143 }
144
145 if (insn->header.access_mode == BRW_ALIGN_1) {
146 if (reg.width == BRW_WIDTH_1 &&
147 insn->header.execution_size == BRW_EXECUTE_1) {
148 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
149 insn->bits2.da1.src0_width = BRW_WIDTH_1;
150 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
151 }
152 else {
153 insn->bits2.da1.src0_horiz_stride = reg.hstride;
154 insn->bits2.da1.src0_width = reg.width;
155 insn->bits2.da1.src0_vert_stride = reg.vstride;
156 }
157 }
158 else {
159 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
160 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
161 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
162 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
163
164 /* This is an oddity of the fact we're using the same
165 * descriptions for registers in align_16 as align_1:
166 */
167 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
168 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
169 else
170 insn->bits2.da16.src0_vert_stride = reg.vstride;
171 }
172 }
173 }
174
175
176 void brw_set_src1( struct brw_instruction *insn,
177 struct brw_reg reg )
178 {
179 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
180
181 assert(reg.nr < 128);
182
183 insn->bits1.da1.src1_reg_file = reg.file;
184 insn->bits1.da1.src1_reg_type = reg.type;
185 insn->bits3.da1.src1_abs = reg.abs;
186 insn->bits3.da1.src1_negate = reg.negate;
187
188 /* Only src1 can be immediate in two-argument instructions.
189 */
190 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
191
192 if (reg.file == BRW_IMMEDIATE_VALUE) {
193 insn->bits3.ud = reg.dw1.ud;
194 }
195 else {
196 /* This is a hardware restriction, which may or may not be lifted
197 * in the future:
198 */
199 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
200 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
201
202 if (insn->header.access_mode == BRW_ALIGN_1) {
203 insn->bits3.da1.src1_subreg_nr = reg.subnr;
204 insn->bits3.da1.src1_reg_nr = reg.nr;
205 }
206 else {
207 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
208 insn->bits3.da16.src1_reg_nr = reg.nr;
209 }
210
211 if (insn->header.access_mode == BRW_ALIGN_1) {
212 if (reg.width == BRW_WIDTH_1 &&
213 insn->header.execution_size == BRW_EXECUTE_1) {
214 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
215 insn->bits3.da1.src1_width = BRW_WIDTH_1;
216 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
217 }
218 else {
219 insn->bits3.da1.src1_horiz_stride = reg.hstride;
220 insn->bits3.da1.src1_width = reg.width;
221 insn->bits3.da1.src1_vert_stride = reg.vstride;
222 }
223 }
224 else {
225 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
226 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
227 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
228 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
229
230 /* This is an oddity of the fact we're using the same
231 * descriptions for registers in align_16 as align_1:
232 */
233 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
234 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
235 else
236 insn->bits3.da16.src1_vert_stride = reg.vstride;
237 }
238 }
239 }
240
241
242
243 static void brw_set_math_message( struct brw_context *brw,
244 struct brw_instruction *insn,
245 GLuint msg_length,
246 GLuint response_length,
247 GLuint function,
248 GLuint integer_type,
249 GLboolean low_precision,
250 GLboolean saturate,
251 GLuint dataType )
252 {
253 struct intel_context *intel = &brw->intel;
254 brw_set_src1(insn, brw_imm_d(0));
255
256 if (intel->gen == 5) {
257 insn->bits3.math_gen5.function = function;
258 insn->bits3.math_gen5.int_type = integer_type;
259 insn->bits3.math_gen5.precision = low_precision;
260 insn->bits3.math_gen5.saturate = saturate;
261 insn->bits3.math_gen5.data_type = dataType;
262 insn->bits3.math_gen5.snapshot = 0;
263 insn->bits3.math_gen5.header_present = 0;
264 insn->bits3.math_gen5.response_length = response_length;
265 insn->bits3.math_gen5.msg_length = msg_length;
266 insn->bits3.math_gen5.end_of_thread = 0;
267 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
268 insn->bits2.send_gen5.end_of_thread = 0;
269 } else {
270 insn->bits3.math.function = function;
271 insn->bits3.math.int_type = integer_type;
272 insn->bits3.math.precision = low_precision;
273 insn->bits3.math.saturate = saturate;
274 insn->bits3.math.data_type = dataType;
275 insn->bits3.math.response_length = response_length;
276 insn->bits3.math.msg_length = msg_length;
277 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
278 insn->bits3.math.end_of_thread = 0;
279 }
280 }
281
282
283 static void brw_set_ff_sync_message(struct brw_context *brw,
284 struct brw_instruction *insn,
285 GLboolean allocate,
286 GLuint response_length,
287 GLboolean end_of_thread)
288 {
289 struct intel_context *intel = &brw->intel;
290 brw_set_src1(insn, brw_imm_d(0));
291
292 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
293 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
294 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
295 insn->bits3.urb_gen5.allocate = allocate;
296 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
297 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
298 insn->bits3.urb_gen5.header_present = 1;
299 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
300 insn->bits3.urb_gen5.msg_length = 1;
301 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
302 if (intel->gen >= 6) {
303 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
304 } else {
305 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
306 insn->bits2.send_gen5.end_of_thread = end_of_thread;
307 }
308 }
309
310 static void brw_set_urb_message( struct brw_context *brw,
311 struct brw_instruction *insn,
312 GLboolean allocate,
313 GLboolean used,
314 GLuint msg_length,
315 GLuint response_length,
316 GLboolean end_of_thread,
317 GLboolean complete,
318 GLuint offset,
319 GLuint swizzle_control )
320 {
321 struct intel_context *intel = &brw->intel;
322 brw_set_src1(insn, brw_imm_d(0));
323
324 if (intel->gen >= 5) {
325 insn->bits3.urb_gen5.opcode = 0; /* ? */
326 insn->bits3.urb_gen5.offset = offset;
327 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
328 insn->bits3.urb_gen5.allocate = allocate;
329 insn->bits3.urb_gen5.used = used; /* ? */
330 insn->bits3.urb_gen5.complete = complete;
331 insn->bits3.urb_gen5.header_present = 1;
332 insn->bits3.urb_gen5.response_length = response_length;
333 insn->bits3.urb_gen5.msg_length = msg_length;
334 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
335 if (intel->gen >= 6) {
336 /* For SNB, the SFID bits moved to the condmod bits, and
337 * EOT stayed in bits3 above. Does the EOT bit setting
338 * below on Ironlake even do anything?
339 */
340 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
341 } else {
342 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
343 insn->bits2.send_gen5.end_of_thread = end_of_thread;
344 }
345 } else {
346 insn->bits3.urb.opcode = 0; /* ? */
347 insn->bits3.urb.offset = offset;
348 insn->bits3.urb.swizzle_control = swizzle_control;
349 insn->bits3.urb.allocate = allocate;
350 insn->bits3.urb.used = used; /* ? */
351 insn->bits3.urb.complete = complete;
352 insn->bits3.urb.response_length = response_length;
353 insn->bits3.urb.msg_length = msg_length;
354 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
355 insn->bits3.urb.end_of_thread = end_of_thread;
356 }
357 }
358
359 static void brw_set_dp_write_message( struct brw_context *brw,
360 struct brw_instruction *insn,
361 GLuint binding_table_index,
362 GLuint msg_control,
363 GLuint msg_type,
364 GLuint msg_length,
365 GLuint pixel_scoreboard_clear,
366 GLuint response_length,
367 GLuint end_of_thread )
368 {
369 struct intel_context *intel = &brw->intel;
370 brw_set_src1(insn, brw_imm_d(0));
371
372 if (intel->gen == 5) {
373 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
374 insn->bits3.dp_write_gen5.msg_control = msg_control;
375 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
376 insn->bits3.dp_write_gen5.msg_type = msg_type;
377 insn->bits3.dp_write_gen5.send_commit_msg = 0;
378 insn->bits3.dp_write_gen5.header_present = 1;
379 insn->bits3.dp_write_gen5.response_length = response_length;
380 insn->bits3.dp_write_gen5.msg_length = msg_length;
381 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
382 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
383 insn->bits2.send_gen5.end_of_thread = end_of_thread;
384 } else {
385 insn->bits3.dp_write.binding_table_index = binding_table_index;
386 insn->bits3.dp_write.msg_control = msg_control;
387 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
388 insn->bits3.dp_write.msg_type = msg_type;
389 insn->bits3.dp_write.send_commit_msg = 0;
390 insn->bits3.dp_write.response_length = response_length;
391 insn->bits3.dp_write.msg_length = msg_length;
392 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
393 insn->bits3.dp_write.end_of_thread = end_of_thread;
394 }
395 }
396
397 static void brw_set_dp_read_message( struct brw_context *brw,
398 struct brw_instruction *insn,
399 GLuint binding_table_index,
400 GLuint msg_control,
401 GLuint msg_type,
402 GLuint target_cache,
403 GLuint msg_length,
404 GLuint response_length,
405 GLuint end_of_thread )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen == 5) {
411 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
412 insn->bits3.dp_read_gen5.msg_control = msg_control;
413 insn->bits3.dp_read_gen5.msg_type = msg_type;
414 insn->bits3.dp_read_gen5.target_cache = target_cache;
415 insn->bits3.dp_read_gen5.header_present = 1;
416 insn->bits3.dp_read_gen5.response_length = response_length;
417 insn->bits3.dp_read_gen5.msg_length = msg_length;
418 insn->bits3.dp_read_gen5.pad1 = 0;
419 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
420 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
421 insn->bits2.send_gen5.end_of_thread = end_of_thread;
422 } else {
423 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
424 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
425 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
426 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
427 insn->bits3.dp_read.response_length = response_length; /*16:19*/
428 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
429 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
430 insn->bits3.dp_read.pad1 = 0; /*28:30*/
431 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
432 }
433 }
434
435 static void brw_set_sampler_message(struct brw_context *brw,
436 struct brw_instruction *insn,
437 GLuint binding_table_index,
438 GLuint sampler,
439 GLuint msg_type,
440 GLuint response_length,
441 GLuint msg_length,
442 GLboolean eot,
443 GLuint header_present,
444 GLuint simd_mode)
445 {
446 struct intel_context *intel = &brw->intel;
447 assert(eot == 0);
448 brw_set_src1(insn, brw_imm_d(0));
449
450 if (intel->gen == 5) {
451 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
452 insn->bits3.sampler_gen5.sampler = sampler;
453 insn->bits3.sampler_gen5.msg_type = msg_type;
454 insn->bits3.sampler_gen5.simd_mode = simd_mode;
455 insn->bits3.sampler_gen5.header_present = header_present;
456 insn->bits3.sampler_gen5.response_length = response_length;
457 insn->bits3.sampler_gen5.msg_length = msg_length;
458 insn->bits3.sampler_gen5.end_of_thread = eot;
459 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
460 insn->bits2.send_gen5.end_of_thread = eot;
461 } else if (intel->is_g4x) {
462 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
463 insn->bits3.sampler_g4x.sampler = sampler;
464 insn->bits3.sampler_g4x.msg_type = msg_type;
465 insn->bits3.sampler_g4x.response_length = response_length;
466 insn->bits3.sampler_g4x.msg_length = msg_length;
467 insn->bits3.sampler_g4x.end_of_thread = eot;
468 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
469 } else {
470 insn->bits3.sampler.binding_table_index = binding_table_index;
471 insn->bits3.sampler.sampler = sampler;
472 insn->bits3.sampler.msg_type = msg_type;
473 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
474 insn->bits3.sampler.response_length = response_length;
475 insn->bits3.sampler.msg_length = msg_length;
476 insn->bits3.sampler.end_of_thread = eot;
477 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
478 }
479 }
480
481
482
483 static struct brw_instruction *next_insn( struct brw_compile *p,
484 GLuint opcode )
485 {
486 struct brw_instruction *insn;
487
488 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
489
490 insn = &p->store[p->nr_insn++];
491 memcpy(insn, p->current, sizeof(*insn));
492
493 /* Reset this one-shot flag:
494 */
495
496 if (p->current->header.destreg__conditionalmod) {
497 p->current->header.destreg__conditionalmod = 0;
498 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
499 }
500
501 insn->header.opcode = opcode;
502 return insn;
503 }
504
505
506 static struct brw_instruction *brw_alu1( struct brw_compile *p,
507 GLuint opcode,
508 struct brw_reg dest,
509 struct brw_reg src )
510 {
511 struct brw_instruction *insn = next_insn(p, opcode);
512 brw_set_dest(insn, dest);
513 brw_set_src0(insn, src);
514 return insn;
515 }
516
517 static struct brw_instruction *brw_alu2(struct brw_compile *p,
518 GLuint opcode,
519 struct brw_reg dest,
520 struct brw_reg src0,
521 struct brw_reg src1 )
522 {
523 struct brw_instruction *insn = next_insn(p, opcode);
524 brw_set_dest(insn, dest);
525 brw_set_src0(insn, src0);
526 brw_set_src1(insn, src1);
527 return insn;
528 }
529
530
531 /***********************************************************************
532 * Convenience routines.
533 */
534 #define ALU1(OP) \
535 struct brw_instruction *brw_##OP(struct brw_compile *p, \
536 struct brw_reg dest, \
537 struct brw_reg src0) \
538 { \
539 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
540 }
541
542 #define ALU2(OP) \
543 struct brw_instruction *brw_##OP(struct brw_compile *p, \
544 struct brw_reg dest, \
545 struct brw_reg src0, \
546 struct brw_reg src1) \
547 { \
548 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
549 }
550
551
552 ALU1(MOV)
553 ALU2(SEL)
554 ALU1(NOT)
555 ALU2(AND)
556 ALU2(OR)
557 ALU2(XOR)
558 ALU2(SHR)
559 ALU2(SHL)
560 ALU2(RSR)
561 ALU2(RSL)
562 ALU2(ASR)
563 ALU2(ADD)
564 ALU2(MUL)
565 ALU1(FRC)
566 ALU1(RNDD)
567 ALU1(RNDZ)
568 ALU2(MAC)
569 ALU2(MACH)
570 ALU1(LZD)
571 ALU2(DP4)
572 ALU2(DPH)
573 ALU2(DP3)
574 ALU2(DP2)
575 ALU2(LINE)
576 ALU2(PLN)
577
578
579
580 void brw_NOP(struct brw_compile *p)
581 {
582 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
583 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
584 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
585 brw_set_src1(insn, brw_imm_ud(0x0));
586 }
587
588
589
590
591
592 /***********************************************************************
593 * Comparisons, if/else/endif
594 */
595
596 struct brw_instruction *brw_JMPI(struct brw_compile *p,
597 struct brw_reg dest,
598 struct brw_reg src0,
599 struct brw_reg src1)
600 {
601 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
602
603 insn->header.execution_size = 1;
604 insn->header.compression_control = BRW_COMPRESSION_NONE;
605 insn->header.mask_control = BRW_MASK_DISABLE;
606
607 p->current->header.predicate_control = BRW_PREDICATE_NONE;
608
609 return insn;
610 }
611
612 /* EU takes the value from the flag register and pushes it onto some
613 * sort of a stack (presumably merging with any flag value already on
614 * the stack). Within an if block, the flags at the top of the stack
615 * control execution on each channel of the unit, eg. on each of the
616 * 16 pixel values in our wm programs.
617 *
618 * When the matching 'else' instruction is reached (presumably by
619 * countdown of the instruction count patched in by our ELSE/ENDIF
620 * functions), the relevent flags are inverted.
621 *
622 * When the matching 'endif' instruction is reached, the flags are
623 * popped off. If the stack is now empty, normal execution resumes.
624 *
625 * No attempt is made to deal with stack overflow (14 elements?).
626 */
627 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
628 {
629 struct brw_instruction *insn;
630
631 if (p->single_program_flow) {
632 assert(execute_size == BRW_EXECUTE_1);
633
634 insn = next_insn(p, BRW_OPCODE_ADD);
635 insn->header.predicate_inverse = 1;
636 } else {
637 insn = next_insn(p, BRW_OPCODE_IF);
638 }
639
640 /* Override the defaults for this instruction:
641 */
642 brw_set_dest(insn, brw_ip_reg());
643 brw_set_src0(insn, brw_ip_reg());
644 brw_set_src1(insn, brw_imm_d(0x0));
645
646 insn->header.execution_size = execute_size;
647 insn->header.compression_control = BRW_COMPRESSION_NONE;
648 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
649 insn->header.mask_control = BRW_MASK_ENABLE;
650 if (!p->single_program_flow)
651 insn->header.thread_control = BRW_THREAD_SWITCH;
652
653 p->current->header.predicate_control = BRW_PREDICATE_NONE;
654
655 return insn;
656 }
657
658
659 struct brw_instruction *brw_ELSE(struct brw_compile *p,
660 struct brw_instruction *if_insn)
661 {
662 struct intel_context *intel = &p->brw->intel;
663 struct brw_instruction *insn;
664 GLuint br = 1;
665
666 if (intel->gen == 5)
667 br = 2;
668
669 if (p->single_program_flow) {
670 insn = next_insn(p, BRW_OPCODE_ADD);
671 } else {
672 insn = next_insn(p, BRW_OPCODE_ELSE);
673 }
674
675 brw_set_dest(insn, brw_ip_reg());
676 brw_set_src0(insn, brw_ip_reg());
677 brw_set_src1(insn, brw_imm_d(0x0));
678
679 insn->header.compression_control = BRW_COMPRESSION_NONE;
680 insn->header.execution_size = if_insn->header.execution_size;
681 insn->header.mask_control = BRW_MASK_ENABLE;
682 if (!p->single_program_flow)
683 insn->header.thread_control = BRW_THREAD_SWITCH;
684
685 /* Patch the if instruction to point at this instruction.
686 */
687 if (p->single_program_flow) {
688 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
689
690 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
691 } else {
692 assert(if_insn->header.opcode == BRW_OPCODE_IF);
693
694 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
695 if_insn->bits3.if_else.pop_count = 0;
696 if_insn->bits3.if_else.pad0 = 0;
697 }
698
699 return insn;
700 }
701
702 void brw_ENDIF(struct brw_compile *p,
703 struct brw_instruction *patch_insn)
704 {
705 struct intel_context *intel = &p->brw->intel;
706 GLuint br = 1;
707
708 if (intel->gen == 5)
709 br = 2;
710
711 if (p->single_program_flow) {
712 /* In single program flow mode, there's no need to execute an ENDIF,
713 * since we don't need to do any stack operations, and if we're executing
714 * currently, we want to just continue executing.
715 */
716 struct brw_instruction *next = &p->store[p->nr_insn];
717
718 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
719
720 patch_insn->bits3.ud = (next - patch_insn) * 16;
721 } else {
722 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
723
724 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
725 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
726 brw_set_src1(insn, brw_imm_d(0x0));
727
728 insn->header.compression_control = BRW_COMPRESSION_NONE;
729 insn->header.execution_size = patch_insn->header.execution_size;
730 insn->header.mask_control = BRW_MASK_ENABLE;
731 insn->header.thread_control = BRW_THREAD_SWITCH;
732
733 assert(patch_insn->bits3.if_else.jump_count == 0);
734
735 /* Patch the if or else instructions to point at this or the next
736 * instruction respectively.
737 */
738 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
739 /* Automagically turn it into an IFF:
740 */
741 patch_insn->header.opcode = BRW_OPCODE_IFF;
742 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
743 patch_insn->bits3.if_else.pop_count = 0;
744 patch_insn->bits3.if_else.pad0 = 0;
745 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
746 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
747 patch_insn->bits3.if_else.pop_count = 1;
748 patch_insn->bits3.if_else.pad0 = 0;
749 } else {
750 assert(0);
751 }
752
753 /* Also pop item off the stack in the endif instruction:
754 */
755 insn->bits3.if_else.jump_count = 0;
756 insn->bits3.if_else.pop_count = 1;
757 insn->bits3.if_else.pad0 = 0;
758 }
759 }
760
761 struct brw_instruction *brw_BREAK(struct brw_compile *p)
762 {
763 struct brw_instruction *insn;
764 insn = next_insn(p, BRW_OPCODE_BREAK);
765 brw_set_dest(insn, brw_ip_reg());
766 brw_set_src0(insn, brw_ip_reg());
767 brw_set_src1(insn, brw_imm_d(0x0));
768 insn->header.compression_control = BRW_COMPRESSION_NONE;
769 insn->header.execution_size = BRW_EXECUTE_8;
770 /* insn->header.mask_control = BRW_MASK_DISABLE; */
771 insn->bits3.if_else.pad0 = 0;
772 return insn;
773 }
774
775 struct brw_instruction *brw_CONT(struct brw_compile *p)
776 {
777 struct brw_instruction *insn;
778 insn = next_insn(p, BRW_OPCODE_CONTINUE);
779 brw_set_dest(insn, brw_ip_reg());
780 brw_set_src0(insn, brw_ip_reg());
781 brw_set_src1(insn, brw_imm_d(0x0));
782 insn->header.compression_control = BRW_COMPRESSION_NONE;
783 insn->header.execution_size = BRW_EXECUTE_8;
784 /* insn->header.mask_control = BRW_MASK_DISABLE; */
785 insn->bits3.if_else.pad0 = 0;
786 return insn;
787 }
788
789 /* DO/WHILE loop:
790 */
791 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
792 {
793 if (p->single_program_flow) {
794 return &p->store[p->nr_insn];
795 } else {
796 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
797
798 /* Override the defaults for this instruction:
799 */
800 brw_set_dest(insn, brw_null_reg());
801 brw_set_src0(insn, brw_null_reg());
802 brw_set_src1(insn, brw_null_reg());
803
804 insn->header.compression_control = BRW_COMPRESSION_NONE;
805 insn->header.execution_size = execute_size;
806 insn->header.predicate_control = BRW_PREDICATE_NONE;
807 /* insn->header.mask_control = BRW_MASK_ENABLE; */
808 /* insn->header.mask_control = BRW_MASK_DISABLE; */
809
810 return insn;
811 }
812 }
813
814
815
816 struct brw_instruction *brw_WHILE(struct brw_compile *p,
817 struct brw_instruction *do_insn)
818 {
819 struct intel_context *intel = &p->brw->intel;
820 struct brw_instruction *insn;
821 GLuint br = 1;
822
823 if (intel->gen == 5)
824 br = 2;
825
826 if (p->single_program_flow)
827 insn = next_insn(p, BRW_OPCODE_ADD);
828 else
829 insn = next_insn(p, BRW_OPCODE_WHILE);
830
831 brw_set_dest(insn, brw_ip_reg());
832 brw_set_src0(insn, brw_ip_reg());
833 brw_set_src1(insn, brw_imm_d(0x0));
834
835 insn->header.compression_control = BRW_COMPRESSION_NONE;
836
837 if (p->single_program_flow) {
838 insn->header.execution_size = BRW_EXECUTE_1;
839
840 insn->bits3.d = (do_insn - insn) * 16;
841 } else {
842 insn->header.execution_size = do_insn->header.execution_size;
843
844 assert(do_insn->header.opcode == BRW_OPCODE_DO);
845 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
846 insn->bits3.if_else.pop_count = 0;
847 insn->bits3.if_else.pad0 = 0;
848 }
849
850 /* insn->header.mask_control = BRW_MASK_ENABLE; */
851
852 /* insn->header.mask_control = BRW_MASK_DISABLE; */
853 p->current->header.predicate_control = BRW_PREDICATE_NONE;
854 return insn;
855 }
856
857
858 /* FORWARD JUMPS:
859 */
860 void brw_land_fwd_jump(struct brw_compile *p,
861 struct brw_instruction *jmp_insn)
862 {
863 struct intel_context *intel = &p->brw->intel;
864 struct brw_instruction *landing = &p->store[p->nr_insn];
865 GLuint jmpi = 1;
866
867 if (intel->gen == 5)
868 jmpi = 2;
869
870 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
871 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
872
873 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
874 }
875
876
877
878 /* To integrate with the above, it makes sense that the comparison
879 * instruction should populate the flag register. It might be simpler
880 * just to use the flag reg for most WM tasks?
881 */
882 void brw_CMP(struct brw_compile *p,
883 struct brw_reg dest,
884 GLuint conditional,
885 struct brw_reg src0,
886 struct brw_reg src1)
887 {
888 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
889
890 insn->header.destreg__conditionalmod = conditional;
891 brw_set_dest(insn, dest);
892 brw_set_src0(insn, src0);
893 brw_set_src1(insn, src1);
894
895 /* guess_execution_size(insn, src0); */
896
897
898 /* Make it so that future instructions will use the computed flag
899 * value until brw_set_predicate_control_flag_value() is called
900 * again.
901 */
902 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
903 dest.nr == 0) {
904 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
905 p->flag_value = 0xff;
906 }
907 }
908
909
910
911 /***********************************************************************
912 * Helpers for the various SEND message types:
913 */
914
915 /** Extended math function, float[8].
916 */
917 void brw_math( struct brw_compile *p,
918 struct brw_reg dest,
919 GLuint function,
920 GLuint saturate,
921 GLuint msg_reg_nr,
922 struct brw_reg src,
923 GLuint data_type,
924 GLuint precision )
925 {
926 struct intel_context *intel = &p->brw->intel;
927
928 if (intel->gen >= 6) {
929 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
930
931 /* Math is the same ISA format as other opcodes, except that CondModifier
932 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
933 */
934 insn->header.destreg__conditionalmod = function;
935
936 brw_set_dest(insn, dest);
937 brw_set_src0(insn, src);
938 brw_set_src1(insn, brw_null_reg());
939 } else {
940 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
941 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
942 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
943 /* Example code doesn't set predicate_control for send
944 * instructions.
945 */
946 insn->header.predicate_control = 0;
947 insn->header.destreg__conditionalmod = msg_reg_nr;
948
949 brw_set_dest(insn, dest);
950 brw_set_src0(insn, src);
951 brw_set_math_message(p->brw,
952 insn,
953 msg_length, response_length,
954 function,
955 BRW_MATH_INTEGER_UNSIGNED,
956 precision,
957 saturate,
958 data_type);
959 }
960 }
961
962 /**
963 * Extended math function, float[16].
964 * Use 2 send instructions.
965 */
966 void brw_math_16( struct brw_compile *p,
967 struct brw_reg dest,
968 GLuint function,
969 GLuint saturate,
970 GLuint msg_reg_nr,
971 struct brw_reg src,
972 GLuint precision )
973 {
974 struct intel_context *intel = &p->brw->intel;
975 struct brw_instruction *insn;
976 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
977 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
978
979 if (intel->gen >= 6) {
980 insn = next_insn(p, BRW_OPCODE_MATH);
981
982 /* Math is the same ISA format as other opcodes, except that CondModifier
983 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
984 */
985 insn->header.destreg__conditionalmod = function;
986
987 brw_set_dest(insn, dest);
988 brw_set_src0(insn, src);
989 brw_set_src1(insn, brw_null_reg());
990 return;
991 }
992
993 /* First instruction:
994 */
995 brw_push_insn_state(p);
996 brw_set_predicate_control_flag_value(p, 0xff);
997 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
998
999 insn = next_insn(p, BRW_OPCODE_SEND);
1000 insn->header.destreg__conditionalmod = msg_reg_nr;
1001
1002 brw_set_dest(insn, dest);
1003 brw_set_src0(insn, src);
1004 brw_set_math_message(p->brw,
1005 insn,
1006 msg_length, response_length,
1007 function,
1008 BRW_MATH_INTEGER_UNSIGNED,
1009 precision,
1010 saturate,
1011 BRW_MATH_DATA_VECTOR);
1012
1013 /* Second instruction:
1014 */
1015 insn = next_insn(p, BRW_OPCODE_SEND);
1016 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1017 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1018
1019 brw_set_dest(insn, offset(dest,1));
1020 brw_set_src0(insn, src);
1021 brw_set_math_message(p->brw,
1022 insn,
1023 msg_length, response_length,
1024 function,
1025 BRW_MATH_INTEGER_UNSIGNED,
1026 precision,
1027 saturate,
1028 BRW_MATH_DATA_VECTOR);
1029
1030 brw_pop_insn_state(p);
1031 }
1032
1033
1034 /**
1035 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1036 * Scratch offset should be a multiple of 64.
1037 * Used for register spilling.
1038 */
1039 void brw_dp_WRITE_16( struct brw_compile *p,
1040 struct brw_reg src,
1041 GLuint scratch_offset )
1042 {
1043 GLuint msg_reg_nr = 1;
1044 {
1045 brw_push_insn_state(p);
1046 brw_set_mask_control(p, BRW_MASK_DISABLE);
1047 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1048
1049 /* set message header global offset field (reg 0, element 2) */
1050 brw_MOV(p,
1051 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1052 brw_imm_d(scratch_offset));
1053
1054 brw_pop_insn_state(p);
1055 }
1056
1057 {
1058 GLuint msg_length = 3;
1059 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1060 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1061
1062 insn->header.predicate_control = 0; /* XXX */
1063 insn->header.compression_control = BRW_COMPRESSION_NONE;
1064 insn->header.destreg__conditionalmod = msg_reg_nr;
1065
1066 brw_set_dest(insn, dest);
1067 brw_set_src0(insn, src);
1068
1069 brw_set_dp_write_message(p->brw,
1070 insn,
1071 255, /* binding table index (255=stateless) */
1072 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1073 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1074 msg_length,
1075 0, /* pixel scoreboard */
1076 0, /* response_length */
1077 0); /* eot */
1078 }
1079 }
1080
1081
1082 /**
1083 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1084 * Scratch offset should be a multiple of 64.
1085 * Used for register spilling.
1086 */
1087 void brw_dp_READ_16( struct brw_compile *p,
1088 struct brw_reg dest,
1089 GLuint scratch_offset )
1090 {
1091 GLuint msg_reg_nr = 1;
1092 {
1093 brw_push_insn_state(p);
1094 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1095 brw_set_mask_control(p, BRW_MASK_DISABLE);
1096
1097 /* set message header global offset field (reg 0, element 2) */
1098 brw_MOV(p,
1099 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1100 brw_imm_d(scratch_offset));
1101
1102 brw_pop_insn_state(p);
1103 }
1104
1105 {
1106 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1107
1108 insn->header.predicate_control = 0; /* XXX */
1109 insn->header.compression_control = BRW_COMPRESSION_NONE;
1110 insn->header.destreg__conditionalmod = msg_reg_nr;
1111
1112 brw_set_dest(insn, dest); /* UW? */
1113 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1114
1115 brw_set_dp_read_message(p->brw,
1116 insn,
1117 255, /* binding table index (255=stateless) */
1118 3, /* msg_control (3 means 4 Owords) */
1119 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1120 1, /* target cache (render/scratch) */
1121 1, /* msg_length */
1122 2, /* response_length */
1123 0); /* eot */
1124 }
1125 }
1126
1127
1128 /**
1129 * Read a float[4] vector from the data port Data Cache (const buffer).
1130 * Location (in buffer) should be a multiple of 16.
1131 * Used for fetching shader constants.
1132 * If relAddr is true, we'll do an indirect fetch using the address register.
1133 */
1134 void brw_dp_READ_4( struct brw_compile *p,
1135 struct brw_reg dest,
1136 GLboolean relAddr,
1137 GLuint location,
1138 GLuint bind_table_index )
1139 {
1140 /* XXX: relAddr not implemented */
1141 GLuint msg_reg_nr = 1;
1142 {
1143 struct brw_reg b;
1144 brw_push_insn_state(p);
1145 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1146 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1147 brw_set_mask_control(p, BRW_MASK_DISABLE);
1148
1149 /* Setup MRF[1] with location/offset into const buffer */
1150 b = brw_message_reg(msg_reg_nr);
1151 b = retype(b, BRW_REGISTER_TYPE_UD);
1152 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1153 * when the docs say only dword[2] should be set. Hmmm. But it works.
1154 */
1155 brw_MOV(p, b, brw_imm_ud(location));
1156 brw_pop_insn_state(p);
1157 }
1158
1159 {
1160 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1161
1162 insn->header.predicate_control = BRW_PREDICATE_NONE;
1163 insn->header.compression_control = BRW_COMPRESSION_NONE;
1164 insn->header.destreg__conditionalmod = msg_reg_nr;
1165 insn->header.mask_control = BRW_MASK_DISABLE;
1166
1167 /* cast dest to a uword[8] vector */
1168 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1169
1170 brw_set_dest(insn, dest);
1171 brw_set_src0(insn, brw_null_reg());
1172
1173 brw_set_dp_read_message(p->brw,
1174 insn,
1175 bind_table_index,
1176 0, /* msg_control (0 means 1 Oword) */
1177 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1178 0, /* source cache = data cache */
1179 1, /* msg_length */
1180 1, /* response_length (1 Oword) */
1181 0); /* eot */
1182 }
1183 }
1184
1185
1186 /**
1187 * Read float[4] constant(s) from VS constant buffer.
1188 * For relative addressing, two float[4] constants will be read into 'dest'.
1189 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1190 */
1191 void brw_dp_READ_4_vs(struct brw_compile *p,
1192 struct brw_reg dest,
1193 GLuint oword,
1194 GLboolean relAddr,
1195 struct brw_reg addrReg,
1196 GLuint location,
1197 GLuint bind_table_index)
1198 {
1199 GLuint msg_reg_nr = 1;
1200
1201 assert(oword < 2);
1202 /*
1203 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1204 location, msg_reg_nr);
1205 */
1206
1207 /* Setup MRF[1] with location/offset into const buffer */
1208 {
1209 struct brw_reg b;
1210
1211 brw_push_insn_state(p);
1212 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1213 brw_set_mask_control(p, BRW_MASK_DISABLE);
1214 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1215 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1216
1217 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1218 * when the docs say only dword[2] should be set. Hmmm. But it works.
1219 */
1220 b = brw_message_reg(msg_reg_nr);
1221 b = retype(b, BRW_REGISTER_TYPE_UD);
1222 /*b = get_element_ud(b, 2);*/
1223 if (relAddr) {
1224 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1225 }
1226 else {
1227 brw_MOV(p, b, brw_imm_ud(location));
1228 }
1229
1230 brw_pop_insn_state(p);
1231 }
1232
1233 {
1234 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1235
1236 insn->header.predicate_control = BRW_PREDICATE_NONE;
1237 insn->header.compression_control = BRW_COMPRESSION_NONE;
1238 insn->header.destreg__conditionalmod = msg_reg_nr;
1239 insn->header.mask_control = BRW_MASK_DISABLE;
1240 /*insn->header.access_mode = BRW_ALIGN_16;*/
1241
1242 brw_set_dest(insn, dest);
1243 brw_set_src0(insn, brw_null_reg());
1244
1245 brw_set_dp_read_message(p->brw,
1246 insn,
1247 bind_table_index,
1248 oword, /* 0 = lower Oword, 1 = upper Oword */
1249 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1250 0, /* source cache = data cache */
1251 1, /* msg_length */
1252 1, /* response_length (1 Oword) */
1253 0); /* eot */
1254 }
1255 }
1256
1257
1258
1259 void brw_fb_WRITE(struct brw_compile *p,
1260 struct brw_reg dest,
1261 GLuint msg_reg_nr,
1262 struct brw_reg src0,
1263 GLuint binding_table_index,
1264 GLuint msg_length,
1265 GLuint response_length,
1266 GLboolean eot)
1267 {
1268 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1269
1270 insn->header.predicate_control = 0; /* XXX */
1271 insn->header.compression_control = BRW_COMPRESSION_NONE;
1272 insn->header.destreg__conditionalmod = msg_reg_nr;
1273
1274 brw_set_dest(insn, dest);
1275 brw_set_src0(insn, src0);
1276 brw_set_dp_write_message(p->brw,
1277 insn,
1278 binding_table_index,
1279 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1280 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1281 msg_length,
1282 1, /* pixel scoreboard */
1283 response_length,
1284 eot);
1285 }
1286
1287
1288 /**
1289 * Texture sample instruction.
1290 * Note: the msg_type plus msg_length values determine exactly what kind
1291 * of sampling operation is performed. See volume 4, page 161 of docs.
1292 */
1293 void brw_SAMPLE(struct brw_compile *p,
1294 struct brw_reg dest,
1295 GLuint msg_reg_nr,
1296 struct brw_reg src0,
1297 GLuint binding_table_index,
1298 GLuint sampler,
1299 GLuint writemask,
1300 GLuint msg_type,
1301 GLuint response_length,
1302 GLuint msg_length,
1303 GLboolean eot,
1304 GLuint header_present,
1305 GLuint simd_mode)
1306 {
1307 GLboolean need_stall = 0;
1308
1309 if (writemask == 0) {
1310 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1311 return;
1312 }
1313
1314 /* Hardware doesn't do destination dependency checking on send
1315 * instructions properly. Add a workaround which generates the
1316 * dependency by other means. In practice it seems like this bug
1317 * only crops up for texture samples, and only where registers are
1318 * written by the send and then written again later without being
1319 * read in between. Luckily for us, we already track that
1320 * information and use it to modify the writemask for the
1321 * instruction, so that is a guide for whether a workaround is
1322 * needed.
1323 */
1324 if (writemask != WRITEMASK_XYZW) {
1325 GLuint dst_offset = 0;
1326 GLuint i, newmask = 0, len = 0;
1327
1328 for (i = 0; i < 4; i++) {
1329 if (writemask & (1<<i))
1330 break;
1331 dst_offset += 2;
1332 }
1333 for (; i < 4; i++) {
1334 if (!(writemask & (1<<i)))
1335 break;
1336 newmask |= 1<<i;
1337 len++;
1338 }
1339
1340 if (newmask != writemask) {
1341 need_stall = 1;
1342 /* printf("need stall %x %x\n", newmask , writemask); */
1343 }
1344 else {
1345 GLboolean dispatch_16 = GL_FALSE;
1346
1347 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1348
1349 guess_execution_size(p->current, dest);
1350 if (p->current->header.execution_size == BRW_EXECUTE_16)
1351 dispatch_16 = GL_TRUE;
1352
1353 newmask = ~newmask & WRITEMASK_XYZW;
1354
1355 brw_push_insn_state(p);
1356
1357 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1358 brw_set_mask_control(p, BRW_MASK_DISABLE);
1359
1360 brw_MOV(p, m1, brw_vec8_grf(0,0));
1361 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1362
1363 brw_pop_insn_state(p);
1364
1365 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1366 dest = offset(dest, dst_offset);
1367
1368 /* For 16-wide dispatch, masked channels are skipped in the
1369 * response. For 8-wide, masked channels still take up slots,
1370 * and are just not written to.
1371 */
1372 if (dispatch_16)
1373 response_length = len * 2;
1374 }
1375 }
1376
1377 {
1378 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1379
1380 insn->header.predicate_control = 0; /* XXX */
1381 insn->header.compression_control = BRW_COMPRESSION_NONE;
1382 insn->header.destreg__conditionalmod = msg_reg_nr;
1383
1384 brw_set_dest(insn, dest);
1385 brw_set_src0(insn, src0);
1386 brw_set_sampler_message(p->brw, insn,
1387 binding_table_index,
1388 sampler,
1389 msg_type,
1390 response_length,
1391 msg_length,
1392 eot,
1393 header_present,
1394 simd_mode);
1395 }
1396
1397 if (need_stall) {
1398 struct brw_reg reg = vec8(offset(dest, response_length-1));
1399
1400 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1401 */
1402 brw_push_insn_state(p);
1403 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1404 brw_MOV(p, reg, reg);
1405 brw_pop_insn_state(p);
1406 }
1407
1408 }
1409
1410 /* All these variables are pretty confusing - we might be better off
1411 * using bitmasks and macros for this, in the old style. Or perhaps
1412 * just having the caller instantiate the fields in dword3 itself.
1413 */
1414 void brw_urb_WRITE(struct brw_compile *p,
1415 struct brw_reg dest,
1416 GLuint msg_reg_nr,
1417 struct brw_reg src0,
1418 GLboolean allocate,
1419 GLboolean used,
1420 GLuint msg_length,
1421 GLuint response_length,
1422 GLboolean eot,
1423 GLboolean writes_complete,
1424 GLuint offset,
1425 GLuint swizzle)
1426 {
1427 struct intel_context *intel = &p->brw->intel;
1428 struct brw_instruction *insn;
1429
1430 /* Sandybridge doesn't have the implied move for SENDs,
1431 * and the first message register index comes from src0.
1432 */
1433 if (intel->gen >= 6) {
1434 brw_push_insn_state(p);
1435 brw_set_mask_control( p, BRW_MASK_DISABLE );
1436 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1437 brw_pop_insn_state(p);
1438 src0 = brw_message_reg(msg_reg_nr);
1439 }
1440
1441 insn = next_insn(p, BRW_OPCODE_SEND);
1442
1443 assert(msg_length < BRW_MAX_MRF);
1444
1445 brw_set_dest(insn, dest);
1446 brw_set_src0(insn, src0);
1447 brw_set_src1(insn, brw_imm_d(0));
1448
1449 if (intel->gen < 6)
1450 insn->header.destreg__conditionalmod = msg_reg_nr;
1451
1452 brw_set_urb_message(p->brw,
1453 insn,
1454 allocate,
1455 used,
1456 msg_length,
1457 response_length,
1458 eot,
1459 writes_complete,
1460 offset,
1461 swizzle);
1462 }
1463
1464 void brw_ff_sync(struct brw_compile *p,
1465 struct brw_reg dest,
1466 GLuint msg_reg_nr,
1467 struct brw_reg src0,
1468 GLboolean allocate,
1469 GLuint response_length,
1470 GLboolean eot)
1471 {
1472 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1473
1474 brw_set_dest(insn, dest);
1475 brw_set_src0(insn, src0);
1476 brw_set_src1(insn, brw_imm_d(0));
1477
1478 insn->header.destreg__conditionalmod = msg_reg_nr;
1479
1480 brw_set_ff_sync_message(p->brw,
1481 insn,
1482 allocate,
1483 response_length,
1484 eot);
1485 }