Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 static void brw_set_src0( struct brw_instruction *insn,
107 struct brw_reg reg )
108 {
109 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
110 assert(reg.nr < 128);
111
112 insn->bits1.da1.src0_reg_file = reg.file;
113 insn->bits1.da1.src0_reg_type = reg.type;
114 insn->bits2.da1.src0_abs = reg.abs;
115 insn->bits2.da1.src0_negate = reg.negate;
116 insn->bits2.da1.src0_address_mode = reg.address_mode;
117
118 if (reg.file == BRW_IMMEDIATE_VALUE) {
119 insn->bits3.ud = reg.dw1.ud;
120
121 /* Required to set some fields in src1 as well:
122 */
123 insn->bits1.da1.src1_reg_file = 0; /* arf */
124 insn->bits1.da1.src1_reg_type = reg.type;
125 }
126 else
127 {
128 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
129 if (insn->header.access_mode == BRW_ALIGN_1) {
130 insn->bits2.da1.src0_subreg_nr = reg.subnr;
131 insn->bits2.da1.src0_reg_nr = reg.nr;
132 }
133 else {
134 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
135 insn->bits2.da16.src0_reg_nr = reg.nr;
136 }
137 }
138 else {
139 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
140
141 if (insn->header.access_mode == BRW_ALIGN_1) {
142 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
143 }
144 else {
145 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
146 }
147 }
148
149 if (insn->header.access_mode == BRW_ALIGN_1) {
150 if (reg.width == BRW_WIDTH_1 &&
151 insn->header.execution_size == BRW_EXECUTE_1) {
152 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
153 insn->bits2.da1.src0_width = BRW_WIDTH_1;
154 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
155 }
156 else {
157 insn->bits2.da1.src0_horiz_stride = reg.hstride;
158 insn->bits2.da1.src0_width = reg.width;
159 insn->bits2.da1.src0_vert_stride = reg.vstride;
160 }
161 }
162 else {
163 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
164 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
165 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
166 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
167
168 /* This is an oddity of the fact we're using the same
169 * descriptions for registers in align_16 as align_1:
170 */
171 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
172 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
173 else
174 insn->bits2.da16.src0_vert_stride = reg.vstride;
175 }
176 }
177 }
178
179
180 void brw_set_src1( struct brw_instruction *insn,
181 struct brw_reg reg )
182 {
183 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
184
185 assert(reg.nr < 128);
186
187 insn->bits1.da1.src1_reg_file = reg.file;
188 insn->bits1.da1.src1_reg_type = reg.type;
189 insn->bits3.da1.src1_abs = reg.abs;
190 insn->bits3.da1.src1_negate = reg.negate;
191
192 /* Only src1 can be immediate in two-argument instructions.
193 */
194 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
195
196 if (reg.file == BRW_IMMEDIATE_VALUE) {
197 insn->bits3.ud = reg.dw1.ud;
198 }
199 else {
200 /* This is a hardware restriction, which may or may not be lifted
201 * in the future:
202 */
203 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
204 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
205
206 if (insn->header.access_mode == BRW_ALIGN_1) {
207 insn->bits3.da1.src1_subreg_nr = reg.subnr;
208 insn->bits3.da1.src1_reg_nr = reg.nr;
209 }
210 else {
211 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
212 insn->bits3.da16.src1_reg_nr = reg.nr;
213 }
214
215 if (insn->header.access_mode == BRW_ALIGN_1) {
216 if (reg.width == BRW_WIDTH_1 &&
217 insn->header.execution_size == BRW_EXECUTE_1) {
218 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
219 insn->bits3.da1.src1_width = BRW_WIDTH_1;
220 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
221 }
222 else {
223 insn->bits3.da1.src1_horiz_stride = reg.hstride;
224 insn->bits3.da1.src1_width = reg.width;
225 insn->bits3.da1.src1_vert_stride = reg.vstride;
226 }
227 }
228 else {
229 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
230 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
231 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
232 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
233
234 /* This is an oddity of the fact we're using the same
235 * descriptions for registers in align_16 as align_1:
236 */
237 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
238 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
239 else
240 insn->bits3.da16.src1_vert_stride = reg.vstride;
241 }
242 }
243 }
244
245
246
247 static void brw_set_math_message( struct brw_context *brw,
248 struct brw_instruction *insn,
249 GLuint msg_length,
250 GLuint response_length,
251 GLuint function,
252 GLuint integer_type,
253 GLboolean low_precision,
254 GLboolean saturate,
255 GLuint dataType )
256 {
257 struct intel_context *intel = &brw->intel;
258 brw_set_src1(insn, brw_imm_d(0));
259
260 if (intel->gen == 5) {
261 insn->bits3.math_gen5.function = function;
262 insn->bits3.math_gen5.int_type = integer_type;
263 insn->bits3.math_gen5.precision = low_precision;
264 insn->bits3.math_gen5.saturate = saturate;
265 insn->bits3.math_gen5.data_type = dataType;
266 insn->bits3.math_gen5.snapshot = 0;
267 insn->bits3.math_gen5.header_present = 0;
268 insn->bits3.math_gen5.response_length = response_length;
269 insn->bits3.math_gen5.msg_length = msg_length;
270 insn->bits3.math_gen5.end_of_thread = 0;
271 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
272 insn->bits2.send_gen5.end_of_thread = 0;
273 } else {
274 insn->bits3.math.function = function;
275 insn->bits3.math.int_type = integer_type;
276 insn->bits3.math.precision = low_precision;
277 insn->bits3.math.saturate = saturate;
278 insn->bits3.math.data_type = dataType;
279 insn->bits3.math.response_length = response_length;
280 insn->bits3.math.msg_length = msg_length;
281 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
282 insn->bits3.math.end_of_thread = 0;
283 }
284 }
285
286
287 static void brw_set_ff_sync_message(struct brw_context *brw,
288 struct brw_instruction *insn,
289 GLboolean allocate,
290 GLuint response_length,
291 GLboolean end_of_thread)
292 {
293 struct intel_context *intel = &brw->intel;
294 brw_set_src1(insn, brw_imm_d(0));
295
296 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
297 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
298 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
299 insn->bits3.urb_gen5.allocate = allocate;
300 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
301 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
302 insn->bits3.urb_gen5.header_present = 1;
303 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
304 insn->bits3.urb_gen5.msg_length = 1;
305 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
306 if (intel->gen >= 6) {
307 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
308 } else {
309 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
310 insn->bits2.send_gen5.end_of_thread = end_of_thread;
311 }
312 }
313
314 static void brw_set_urb_message( struct brw_context *brw,
315 struct brw_instruction *insn,
316 GLboolean allocate,
317 GLboolean used,
318 GLuint msg_length,
319 GLuint response_length,
320 GLboolean end_of_thread,
321 GLboolean complete,
322 GLuint offset,
323 GLuint swizzle_control )
324 {
325 struct intel_context *intel = &brw->intel;
326 brw_set_src1(insn, brw_imm_d(0));
327
328 if (intel->gen >= 5) {
329 insn->bits3.urb_gen5.opcode = 0; /* ? */
330 insn->bits3.urb_gen5.offset = offset;
331 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
332 insn->bits3.urb_gen5.allocate = allocate;
333 insn->bits3.urb_gen5.used = used; /* ? */
334 insn->bits3.urb_gen5.complete = complete;
335 insn->bits3.urb_gen5.header_present = 1;
336 insn->bits3.urb_gen5.response_length = response_length;
337 insn->bits3.urb_gen5.msg_length = msg_length;
338 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
339 if (intel->gen >= 6) {
340 /* For SNB, the SFID bits moved to the condmod bits, and
341 * EOT stayed in bits3 above. Does the EOT bit setting
342 * below on Ironlake even do anything?
343 */
344 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
345 } else {
346 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
347 insn->bits2.send_gen5.end_of_thread = end_of_thread;
348 }
349 } else {
350 insn->bits3.urb.opcode = 0; /* ? */
351 insn->bits3.urb.offset = offset;
352 insn->bits3.urb.swizzle_control = swizzle_control;
353 insn->bits3.urb.allocate = allocate;
354 insn->bits3.urb.used = used; /* ? */
355 insn->bits3.urb.complete = complete;
356 insn->bits3.urb.response_length = response_length;
357 insn->bits3.urb.msg_length = msg_length;
358 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
359 insn->bits3.urb.end_of_thread = end_of_thread;
360 }
361 }
362
363 static void brw_set_dp_write_message( struct brw_context *brw,
364 struct brw_instruction *insn,
365 GLuint binding_table_index,
366 GLuint msg_control,
367 GLuint msg_type,
368 GLuint msg_length,
369 GLuint pixel_scoreboard_clear,
370 GLuint response_length,
371 GLuint end_of_thread,
372 GLuint send_commit_msg)
373 {
374 struct intel_context *intel = &brw->intel;
375 brw_set_src1(insn, brw_imm_ud(0));
376
377 if (intel->gen >= 6) {
378 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
379 insn->bits3.dp_render_cache.msg_control = msg_control;
380 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
381 insn->bits3.dp_render_cache.msg_type = msg_type;
382 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
383 insn->bits3.dp_render_cache.header_present = 0; /* XXX */
384 insn->bits3.dp_render_cache.response_length = response_length;
385 insn->bits3.dp_render_cache.msg_length = msg_length;
386 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
387 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
388 /* XXX really need below? */
389 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
390 insn->bits2.send_gen5.end_of_thread = end_of_thread;
391 } else if (intel->gen == 5) {
392 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
393 insn->bits3.dp_write_gen5.msg_control = msg_control;
394 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
395 insn->bits3.dp_write_gen5.msg_type = msg_type;
396 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
397 insn->bits3.dp_write_gen5.header_present = 1;
398 insn->bits3.dp_write_gen5.response_length = response_length;
399 insn->bits3.dp_write_gen5.msg_length = msg_length;
400 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
401 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
402 insn->bits2.send_gen5.end_of_thread = end_of_thread;
403 } else {
404 insn->bits3.dp_write.binding_table_index = binding_table_index;
405 insn->bits3.dp_write.msg_control = msg_control;
406 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
407 insn->bits3.dp_write.msg_type = msg_type;
408 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
409 insn->bits3.dp_write.response_length = response_length;
410 insn->bits3.dp_write.msg_length = msg_length;
411 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
412 insn->bits3.dp_write.end_of_thread = end_of_thread;
413 }
414 }
415
416 static void brw_set_dp_read_message( struct brw_context *brw,
417 struct brw_instruction *insn,
418 GLuint binding_table_index,
419 GLuint msg_control,
420 GLuint msg_type,
421 GLuint target_cache,
422 GLuint msg_length,
423 GLuint response_length,
424 GLuint end_of_thread )
425 {
426 struct intel_context *intel = &brw->intel;
427 brw_set_src1(insn, brw_imm_d(0));
428
429 if (intel->gen == 5) {
430 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
431 insn->bits3.dp_read_gen5.msg_control = msg_control;
432 insn->bits3.dp_read_gen5.msg_type = msg_type;
433 insn->bits3.dp_read_gen5.target_cache = target_cache;
434 insn->bits3.dp_read_gen5.header_present = 1;
435 insn->bits3.dp_read_gen5.response_length = response_length;
436 insn->bits3.dp_read_gen5.msg_length = msg_length;
437 insn->bits3.dp_read_gen5.pad1 = 0;
438 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
439 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
440 insn->bits2.send_gen5.end_of_thread = end_of_thread;
441 } else {
442 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
443 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
444 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
445 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
446 insn->bits3.dp_read.response_length = response_length; /*16:19*/
447 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
448 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
449 insn->bits3.dp_read.pad1 = 0; /*28:30*/
450 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
451 }
452 }
453
454 static void brw_set_sampler_message(struct brw_context *brw,
455 struct brw_instruction *insn,
456 GLuint binding_table_index,
457 GLuint sampler,
458 GLuint msg_type,
459 GLuint response_length,
460 GLuint msg_length,
461 GLboolean eot,
462 GLuint header_present,
463 GLuint simd_mode)
464 {
465 struct intel_context *intel = &brw->intel;
466 assert(eot == 0);
467 brw_set_src1(insn, brw_imm_d(0));
468
469 if (intel->gen == 5) {
470 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
471 insn->bits3.sampler_gen5.sampler = sampler;
472 insn->bits3.sampler_gen5.msg_type = msg_type;
473 insn->bits3.sampler_gen5.simd_mode = simd_mode;
474 insn->bits3.sampler_gen5.header_present = header_present;
475 insn->bits3.sampler_gen5.response_length = response_length;
476 insn->bits3.sampler_gen5.msg_length = msg_length;
477 insn->bits3.sampler_gen5.end_of_thread = eot;
478 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
479 insn->bits2.send_gen5.end_of_thread = eot;
480 } else if (intel->is_g4x) {
481 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
482 insn->bits3.sampler_g4x.sampler = sampler;
483 insn->bits3.sampler_g4x.msg_type = msg_type;
484 insn->bits3.sampler_g4x.response_length = response_length;
485 insn->bits3.sampler_g4x.msg_length = msg_length;
486 insn->bits3.sampler_g4x.end_of_thread = eot;
487 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
488 } else {
489 insn->bits3.sampler.binding_table_index = binding_table_index;
490 insn->bits3.sampler.sampler = sampler;
491 insn->bits3.sampler.msg_type = msg_type;
492 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
493 insn->bits3.sampler.response_length = response_length;
494 insn->bits3.sampler.msg_length = msg_length;
495 insn->bits3.sampler.end_of_thread = eot;
496 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
497 }
498 }
499
500
501
502 static struct brw_instruction *next_insn( struct brw_compile *p,
503 GLuint opcode )
504 {
505 struct brw_instruction *insn;
506
507 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
508
509 insn = &p->store[p->nr_insn++];
510 memcpy(insn, p->current, sizeof(*insn));
511
512 /* Reset this one-shot flag:
513 */
514
515 if (p->current->header.destreg__conditionalmod) {
516 p->current->header.destreg__conditionalmod = 0;
517 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
518 }
519
520 insn->header.opcode = opcode;
521 return insn;
522 }
523
524
525 static struct brw_instruction *brw_alu1( struct brw_compile *p,
526 GLuint opcode,
527 struct brw_reg dest,
528 struct brw_reg src )
529 {
530 struct brw_instruction *insn = next_insn(p, opcode);
531 brw_set_dest(insn, dest);
532 brw_set_src0(insn, src);
533 return insn;
534 }
535
536 static struct brw_instruction *brw_alu2(struct brw_compile *p,
537 GLuint opcode,
538 struct brw_reg dest,
539 struct brw_reg src0,
540 struct brw_reg src1 )
541 {
542 struct brw_instruction *insn = next_insn(p, opcode);
543 brw_set_dest(insn, dest);
544 brw_set_src0(insn, src0);
545 brw_set_src1(insn, src1);
546 return insn;
547 }
548
549
550 /***********************************************************************
551 * Convenience routines.
552 */
553 #define ALU1(OP) \
554 struct brw_instruction *brw_##OP(struct brw_compile *p, \
555 struct brw_reg dest, \
556 struct brw_reg src0) \
557 { \
558 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
559 }
560
561 #define ALU2(OP) \
562 struct brw_instruction *brw_##OP(struct brw_compile *p, \
563 struct brw_reg dest, \
564 struct brw_reg src0, \
565 struct brw_reg src1) \
566 { \
567 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
568 }
569
570
571 ALU1(MOV)
572 ALU2(SEL)
573 ALU1(NOT)
574 ALU2(AND)
575 ALU2(OR)
576 ALU2(XOR)
577 ALU2(SHR)
578 ALU2(SHL)
579 ALU2(RSR)
580 ALU2(RSL)
581 ALU2(ASR)
582 ALU2(ADD)
583 ALU2(MUL)
584 ALU1(FRC)
585 ALU1(RNDD)
586 ALU1(RNDZ)
587 ALU2(MAC)
588 ALU2(MACH)
589 ALU1(LZD)
590 ALU2(DP4)
591 ALU2(DPH)
592 ALU2(DP3)
593 ALU2(DP2)
594 ALU2(LINE)
595 ALU2(PLN)
596
597
598
599 void brw_NOP(struct brw_compile *p)
600 {
601 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
602 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
603 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
604 brw_set_src1(insn, brw_imm_ud(0x0));
605 }
606
607
608
609
610
611 /***********************************************************************
612 * Comparisons, if/else/endif
613 */
614
615 struct brw_instruction *brw_JMPI(struct brw_compile *p,
616 struct brw_reg dest,
617 struct brw_reg src0,
618 struct brw_reg src1)
619 {
620 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
621
622 insn->header.execution_size = 1;
623 insn->header.compression_control = BRW_COMPRESSION_NONE;
624 insn->header.mask_control = BRW_MASK_DISABLE;
625
626 p->current->header.predicate_control = BRW_PREDICATE_NONE;
627
628 return insn;
629 }
630
631 /* EU takes the value from the flag register and pushes it onto some
632 * sort of a stack (presumably merging with any flag value already on
633 * the stack). Within an if block, the flags at the top of the stack
634 * control execution on each channel of the unit, eg. on each of the
635 * 16 pixel values in our wm programs.
636 *
637 * When the matching 'else' instruction is reached (presumably by
638 * countdown of the instruction count patched in by our ELSE/ENDIF
639 * functions), the relevent flags are inverted.
640 *
641 * When the matching 'endif' instruction is reached, the flags are
642 * popped off. If the stack is now empty, normal execution resumes.
643 *
644 * No attempt is made to deal with stack overflow (14 elements?).
645 */
646 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
647 {
648 struct brw_instruction *insn;
649
650 if (p->single_program_flow) {
651 assert(execute_size == BRW_EXECUTE_1);
652
653 insn = next_insn(p, BRW_OPCODE_ADD);
654 insn->header.predicate_inverse = 1;
655 } else {
656 insn = next_insn(p, BRW_OPCODE_IF);
657 }
658
659 /* Override the defaults for this instruction:
660 */
661 brw_set_dest(insn, brw_ip_reg());
662 brw_set_src0(insn, brw_ip_reg());
663 brw_set_src1(insn, brw_imm_d(0x0));
664
665 insn->header.execution_size = execute_size;
666 insn->header.compression_control = BRW_COMPRESSION_NONE;
667 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
668 insn->header.mask_control = BRW_MASK_ENABLE;
669 if (!p->single_program_flow)
670 insn->header.thread_control = BRW_THREAD_SWITCH;
671
672 p->current->header.predicate_control = BRW_PREDICATE_NONE;
673
674 return insn;
675 }
676
677
678 struct brw_instruction *brw_ELSE(struct brw_compile *p,
679 struct brw_instruction *if_insn)
680 {
681 struct intel_context *intel = &p->brw->intel;
682 struct brw_instruction *insn;
683 GLuint br = 1;
684
685 if (intel->gen == 5)
686 br = 2;
687
688 if (p->single_program_flow) {
689 insn = next_insn(p, BRW_OPCODE_ADD);
690 } else {
691 insn = next_insn(p, BRW_OPCODE_ELSE);
692 }
693
694 brw_set_dest(insn, brw_ip_reg());
695 brw_set_src0(insn, brw_ip_reg());
696 brw_set_src1(insn, brw_imm_d(0x0));
697
698 insn->header.compression_control = BRW_COMPRESSION_NONE;
699 insn->header.execution_size = if_insn->header.execution_size;
700 insn->header.mask_control = BRW_MASK_ENABLE;
701 if (!p->single_program_flow)
702 insn->header.thread_control = BRW_THREAD_SWITCH;
703
704 /* Patch the if instruction to point at this instruction.
705 */
706 if (p->single_program_flow) {
707 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
708
709 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
710 } else {
711 assert(if_insn->header.opcode == BRW_OPCODE_IF);
712
713 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
714 if_insn->bits3.if_else.pop_count = 0;
715 if_insn->bits3.if_else.pad0 = 0;
716 }
717
718 return insn;
719 }
720
721 void brw_ENDIF(struct brw_compile *p,
722 struct brw_instruction *patch_insn)
723 {
724 struct intel_context *intel = &p->brw->intel;
725 GLuint br = 1;
726
727 if (intel->gen == 5)
728 br = 2;
729
730 if (p->single_program_flow) {
731 /* In single program flow mode, there's no need to execute an ENDIF,
732 * since we don't need to do any stack operations, and if we're executing
733 * currently, we want to just continue executing.
734 */
735 struct brw_instruction *next = &p->store[p->nr_insn];
736
737 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
738
739 patch_insn->bits3.ud = (next - patch_insn) * 16;
740 } else {
741 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
742
743 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
744 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745 brw_set_src1(insn, brw_imm_d(0x0));
746
747 insn->header.compression_control = BRW_COMPRESSION_NONE;
748 insn->header.execution_size = patch_insn->header.execution_size;
749 insn->header.mask_control = BRW_MASK_ENABLE;
750 insn->header.thread_control = BRW_THREAD_SWITCH;
751
752 assert(patch_insn->bits3.if_else.jump_count == 0);
753
754 /* Patch the if or else instructions to point at this or the next
755 * instruction respectively.
756 */
757 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
758 /* Automagically turn it into an IFF:
759 */
760 patch_insn->header.opcode = BRW_OPCODE_IFF;
761 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
762 patch_insn->bits3.if_else.pop_count = 0;
763 patch_insn->bits3.if_else.pad0 = 0;
764 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
765 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
766 patch_insn->bits3.if_else.pop_count = 1;
767 patch_insn->bits3.if_else.pad0 = 0;
768 } else {
769 assert(0);
770 }
771
772 /* Also pop item off the stack in the endif instruction:
773 */
774 insn->bits3.if_else.jump_count = 0;
775 insn->bits3.if_else.pop_count = 1;
776 insn->bits3.if_else.pad0 = 0;
777 }
778 }
779
780 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
781 {
782 struct brw_instruction *insn;
783 insn = next_insn(p, BRW_OPCODE_BREAK);
784 brw_set_dest(insn, brw_ip_reg());
785 brw_set_src0(insn, brw_ip_reg());
786 brw_set_src1(insn, brw_imm_d(0x0));
787 insn->header.compression_control = BRW_COMPRESSION_NONE;
788 insn->header.execution_size = BRW_EXECUTE_8;
789 /* insn->header.mask_control = BRW_MASK_DISABLE; */
790 insn->bits3.if_else.pad0 = 0;
791 insn->bits3.if_else.pop_count = pop_count;
792 return insn;
793 }
794
795 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
796 {
797 struct brw_instruction *insn;
798 insn = next_insn(p, BRW_OPCODE_CONTINUE);
799 brw_set_dest(insn, brw_ip_reg());
800 brw_set_src0(insn, brw_ip_reg());
801 brw_set_src1(insn, brw_imm_d(0x0));
802 insn->header.compression_control = BRW_COMPRESSION_NONE;
803 insn->header.execution_size = BRW_EXECUTE_8;
804 /* insn->header.mask_control = BRW_MASK_DISABLE; */
805 insn->bits3.if_else.pad0 = 0;
806 insn->bits3.if_else.pop_count = pop_count;
807 return insn;
808 }
809
810 /* DO/WHILE loop:
811 */
812 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
813 {
814 if (p->single_program_flow) {
815 return &p->store[p->nr_insn];
816 } else {
817 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
818
819 /* Override the defaults for this instruction:
820 */
821 brw_set_dest(insn, brw_null_reg());
822 brw_set_src0(insn, brw_null_reg());
823 brw_set_src1(insn, brw_null_reg());
824
825 insn->header.compression_control = BRW_COMPRESSION_NONE;
826 insn->header.execution_size = execute_size;
827 insn->header.predicate_control = BRW_PREDICATE_NONE;
828 /* insn->header.mask_control = BRW_MASK_ENABLE; */
829 /* insn->header.mask_control = BRW_MASK_DISABLE; */
830
831 return insn;
832 }
833 }
834
835
836
837 struct brw_instruction *brw_WHILE(struct brw_compile *p,
838 struct brw_instruction *do_insn)
839 {
840 struct intel_context *intel = &p->brw->intel;
841 struct brw_instruction *insn;
842 GLuint br = 1;
843
844 if (intel->gen == 5)
845 br = 2;
846
847 if (p->single_program_flow)
848 insn = next_insn(p, BRW_OPCODE_ADD);
849 else
850 insn = next_insn(p, BRW_OPCODE_WHILE);
851
852 brw_set_dest(insn, brw_ip_reg());
853 brw_set_src0(insn, brw_ip_reg());
854 brw_set_src1(insn, brw_imm_d(0x0));
855
856 insn->header.compression_control = BRW_COMPRESSION_NONE;
857
858 if (p->single_program_flow) {
859 insn->header.execution_size = BRW_EXECUTE_1;
860
861 insn->bits3.d = (do_insn - insn) * 16;
862 } else {
863 insn->header.execution_size = do_insn->header.execution_size;
864
865 assert(do_insn->header.opcode == BRW_OPCODE_DO);
866 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
867 insn->bits3.if_else.pop_count = 0;
868 insn->bits3.if_else.pad0 = 0;
869 }
870
871 /* insn->header.mask_control = BRW_MASK_ENABLE; */
872
873 /* insn->header.mask_control = BRW_MASK_DISABLE; */
874 p->current->header.predicate_control = BRW_PREDICATE_NONE;
875 return insn;
876 }
877
878
879 /* FORWARD JUMPS:
880 */
881 void brw_land_fwd_jump(struct brw_compile *p,
882 struct brw_instruction *jmp_insn)
883 {
884 struct intel_context *intel = &p->brw->intel;
885 struct brw_instruction *landing = &p->store[p->nr_insn];
886 GLuint jmpi = 1;
887
888 if (intel->gen == 5)
889 jmpi = 2;
890
891 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
892 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
893
894 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
895 }
896
897
898
899 /* To integrate with the above, it makes sense that the comparison
900 * instruction should populate the flag register. It might be simpler
901 * just to use the flag reg for most WM tasks?
902 */
903 void brw_CMP(struct brw_compile *p,
904 struct brw_reg dest,
905 GLuint conditional,
906 struct brw_reg src0,
907 struct brw_reg src1)
908 {
909 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
910
911 insn->header.destreg__conditionalmod = conditional;
912 brw_set_dest(insn, dest);
913 brw_set_src0(insn, src0);
914 brw_set_src1(insn, src1);
915
916 /* guess_execution_size(insn, src0); */
917
918
919 /* Make it so that future instructions will use the computed flag
920 * value until brw_set_predicate_control_flag_value() is called
921 * again.
922 */
923 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
924 dest.nr == 0) {
925 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
926 p->flag_value = 0xff;
927 }
928 }
929
930 /* Issue 'wait' instruction for n1, host could program MMIO
931 to wake up thread. */
932 void brw_WAIT (struct brw_compile *p)
933 {
934 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
935 struct brw_reg src = brw_notification_1_reg();
936
937 brw_set_dest(insn, src);
938 brw_set_src0(insn, src);
939 brw_set_src1(insn, brw_null_reg());
940 insn->header.execution_size = 0; /* must */
941 insn->header.predicate_control = 0;
942 insn->header.compression_control = 0;
943 }
944
945
946 /***********************************************************************
947 * Helpers for the various SEND message types:
948 */
949
950 /** Extended math function, float[8].
951 */
952 void brw_math( struct brw_compile *p,
953 struct brw_reg dest,
954 GLuint function,
955 GLuint saturate,
956 GLuint msg_reg_nr,
957 struct brw_reg src,
958 GLuint data_type,
959 GLuint precision )
960 {
961 struct intel_context *intel = &p->brw->intel;
962
963 if (intel->gen >= 6) {
964 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
965
966 /* Math is the same ISA format as other opcodes, except that CondModifier
967 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
968 */
969 insn->header.destreg__conditionalmod = function;
970
971 brw_set_dest(insn, dest);
972 brw_set_src0(insn, src);
973 brw_set_src1(insn, brw_null_reg());
974 } else {
975 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
976 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
977 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
978 /* Example code doesn't set predicate_control for send
979 * instructions.
980 */
981 insn->header.predicate_control = 0;
982 insn->header.destreg__conditionalmod = msg_reg_nr;
983
984 brw_set_dest(insn, dest);
985 brw_set_src0(insn, src);
986 brw_set_math_message(p->brw,
987 insn,
988 msg_length, response_length,
989 function,
990 BRW_MATH_INTEGER_UNSIGNED,
991 precision,
992 saturate,
993 data_type);
994 }
995 }
996
997 /**
998 * Extended math function, float[16].
999 * Use 2 send instructions.
1000 */
1001 void brw_math_16( struct brw_compile *p,
1002 struct brw_reg dest,
1003 GLuint function,
1004 GLuint saturate,
1005 GLuint msg_reg_nr,
1006 struct brw_reg src,
1007 GLuint precision )
1008 {
1009 struct intel_context *intel = &p->brw->intel;
1010 struct brw_instruction *insn;
1011 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1012 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1013
1014 if (intel->gen >= 6) {
1015 insn = next_insn(p, BRW_OPCODE_MATH);
1016
1017 /* Math is the same ISA format as other opcodes, except that CondModifier
1018 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1019 */
1020 insn->header.destreg__conditionalmod = function;
1021
1022 brw_set_dest(insn, dest);
1023 brw_set_src0(insn, src);
1024 brw_set_src1(insn, brw_null_reg());
1025 return;
1026 }
1027
1028 /* First instruction:
1029 */
1030 brw_push_insn_state(p);
1031 brw_set_predicate_control_flag_value(p, 0xff);
1032 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1033
1034 insn = next_insn(p, BRW_OPCODE_SEND);
1035 insn->header.destreg__conditionalmod = msg_reg_nr;
1036
1037 brw_set_dest(insn, dest);
1038 brw_set_src0(insn, src);
1039 brw_set_math_message(p->brw,
1040 insn,
1041 msg_length, response_length,
1042 function,
1043 BRW_MATH_INTEGER_UNSIGNED,
1044 precision,
1045 saturate,
1046 BRW_MATH_DATA_VECTOR);
1047
1048 /* Second instruction:
1049 */
1050 insn = next_insn(p, BRW_OPCODE_SEND);
1051 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1052 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1053
1054 brw_set_dest(insn, offset(dest,1));
1055 brw_set_src0(insn, src);
1056 brw_set_math_message(p->brw,
1057 insn,
1058 msg_length, response_length,
1059 function,
1060 BRW_MATH_INTEGER_UNSIGNED,
1061 precision,
1062 saturate,
1063 BRW_MATH_DATA_VECTOR);
1064
1065 brw_pop_insn_state(p);
1066 }
1067
1068
1069 /**
1070 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1071 * Scratch offset should be a multiple of 64.
1072 * Used for register spilling.
1073 */
1074 void brw_dp_WRITE_16( struct brw_compile *p,
1075 struct brw_reg src,
1076 GLuint scratch_offset )
1077 {
1078 struct intel_context *intel = &p->brw->intel;
1079 GLuint msg_reg_nr = 1;
1080 {
1081 brw_push_insn_state(p);
1082 brw_set_mask_control(p, BRW_MASK_DISABLE);
1083 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1084
1085 /* set message header global offset field (reg 0, element 2) */
1086 brw_MOV(p,
1087 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1088 brw_imm_d(scratch_offset));
1089
1090 brw_pop_insn_state(p);
1091 }
1092
1093 {
1094 GLuint msg_length = 3;
1095 struct brw_reg dest;
1096 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1097 int send_commit_msg;
1098
1099 insn->header.predicate_control = 0; /* XXX */
1100 insn->header.compression_control = BRW_COMPRESSION_NONE;
1101 insn->header.destreg__conditionalmod = msg_reg_nr;
1102
1103 /* Until gen6, writes followed by reads from the same location
1104 * are not guaranteed to be ordered unless write_commit is set.
1105 * If set, then a no-op write is issued to the destination
1106 * register to set a dependency, and a read from the destination
1107 * can be used to ensure the ordering.
1108 *
1109 * For gen6, only writes between different threads need ordering
1110 * protection. Our use of DP writes is all about register
1111 * spilling within a thread.
1112 */
1113 if (intel->gen >= 6) {
1114 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1115 send_commit_msg = 0;
1116 } else {
1117 dest = brw_uw16_grf(0, 0);
1118 send_commit_msg = 1;
1119 }
1120
1121 brw_set_dest(insn, dest);
1122 brw_set_src0(insn, src);
1123
1124 brw_set_dp_write_message(p->brw,
1125 insn,
1126 255, /* binding table index (255=stateless) */
1127 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1128 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1129 msg_length,
1130 0, /* pixel scoreboard */
1131 send_commit_msg, /* response_length */
1132 0, /* eot */
1133 send_commit_msg);
1134 }
1135 }
1136
1137
1138 /**
1139 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1140 * Scratch offset should be a multiple of 64.
1141 * Used for register spilling.
1142 */
1143 void brw_dp_READ_16( struct brw_compile *p,
1144 struct brw_reg dest,
1145 GLuint scratch_offset )
1146 {
1147 GLuint msg_reg_nr = 1;
1148 {
1149 brw_push_insn_state(p);
1150 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1151 brw_set_mask_control(p, BRW_MASK_DISABLE);
1152
1153 /* set message header global offset field (reg 0, element 2) */
1154 brw_MOV(p,
1155 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1156 brw_imm_d(scratch_offset));
1157
1158 brw_pop_insn_state(p);
1159 }
1160
1161 {
1162 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1163
1164 insn->header.predicate_control = 0; /* XXX */
1165 insn->header.compression_control = BRW_COMPRESSION_NONE;
1166 insn->header.destreg__conditionalmod = msg_reg_nr;
1167
1168 brw_set_dest(insn, dest); /* UW? */
1169 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1170
1171 brw_set_dp_read_message(p->brw,
1172 insn,
1173 255, /* binding table index (255=stateless) */
1174 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1175 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1176 1, /* target cache (render/scratch) */
1177 1, /* msg_length */
1178 2, /* response_length */
1179 0); /* eot */
1180 }
1181 }
1182
1183
1184 /**
1185 * Read a float[4] vector from the data port Data Cache (const buffer).
1186 * Location (in buffer) should be a multiple of 16.
1187 * Used for fetching shader constants.
1188 * If relAddr is true, we'll do an indirect fetch using the address register.
1189 */
1190 void brw_dp_READ_4( struct brw_compile *p,
1191 struct brw_reg dest,
1192 GLboolean relAddr,
1193 GLuint location,
1194 GLuint bind_table_index )
1195 {
1196 /* XXX: relAddr not implemented */
1197 GLuint msg_reg_nr = 1;
1198 {
1199 struct brw_reg b;
1200 brw_push_insn_state(p);
1201 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1202 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1203 brw_set_mask_control(p, BRW_MASK_DISABLE);
1204
1205 /* Setup MRF[1] with location/offset into const buffer */
1206 b = brw_message_reg(msg_reg_nr);
1207 b = retype(b, BRW_REGISTER_TYPE_UD);
1208 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1209 * when the docs say only dword[2] should be set. Hmmm. But it works.
1210 */
1211 brw_MOV(p, b, brw_imm_ud(location));
1212 brw_pop_insn_state(p);
1213 }
1214
1215 {
1216 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1217
1218 insn->header.predicate_control = BRW_PREDICATE_NONE;
1219 insn->header.compression_control = BRW_COMPRESSION_NONE;
1220 insn->header.destreg__conditionalmod = msg_reg_nr;
1221 insn->header.mask_control = BRW_MASK_DISABLE;
1222
1223 /* cast dest to a uword[8] vector */
1224 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1225
1226 brw_set_dest(insn, dest);
1227 brw_set_src0(insn, brw_null_reg());
1228
1229 brw_set_dp_read_message(p->brw,
1230 insn,
1231 bind_table_index,
1232 0, /* msg_control (0 means 1 Oword) */
1233 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1234 0, /* source cache = data cache */
1235 1, /* msg_length */
1236 1, /* response_length (1 Oword) */
1237 0); /* eot */
1238 }
1239 }
1240
1241
1242 /**
1243 * Read float[4] constant(s) from VS constant buffer.
1244 * For relative addressing, two float[4] constants will be read into 'dest'.
1245 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1246 */
1247 void brw_dp_READ_4_vs(struct brw_compile *p,
1248 struct brw_reg dest,
1249 GLuint location,
1250 GLuint bind_table_index)
1251 {
1252 struct brw_instruction *insn;
1253 GLuint msg_reg_nr = 1;
1254 struct brw_reg b;
1255
1256 /*
1257 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1258 location, msg_reg_nr);
1259 */
1260
1261 /* Setup MRF[1] with location/offset into const buffer */
1262 brw_push_insn_state(p);
1263 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1264 brw_set_mask_control(p, BRW_MASK_DISABLE);
1265 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1266
1267 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1268 * when the docs say only dword[2] should be set. Hmmm. But it works.
1269 */
1270 b = brw_message_reg(msg_reg_nr);
1271 b = retype(b, BRW_REGISTER_TYPE_UD);
1272 /*b = get_element_ud(b, 2);*/
1273 brw_MOV(p, b, brw_imm_ud(location));
1274
1275 brw_pop_insn_state(p);
1276
1277 insn = next_insn(p, BRW_OPCODE_SEND);
1278
1279 insn->header.predicate_control = BRW_PREDICATE_NONE;
1280 insn->header.compression_control = BRW_COMPRESSION_NONE;
1281 insn->header.destreg__conditionalmod = msg_reg_nr;
1282 insn->header.mask_control = BRW_MASK_DISABLE;
1283
1284 brw_set_dest(insn, dest);
1285 brw_set_src0(insn, brw_null_reg());
1286
1287 brw_set_dp_read_message(p->brw,
1288 insn,
1289 bind_table_index,
1290 0,
1291 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1292 0, /* source cache = data cache */
1293 1, /* msg_length */
1294 1, /* response_length (1 Oword) */
1295 0); /* eot */
1296 }
1297
1298 /**
1299 * Read a float[4] constant per vertex from VS constant buffer, with
1300 * relative addressing.
1301 */
1302 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1303 struct brw_reg dest,
1304 struct brw_reg addr_reg,
1305 GLuint offset,
1306 GLuint bind_table_index)
1307 {
1308 struct intel_context *intel = &p->brw->intel;
1309 int msg_type;
1310
1311 /* Setup MRF[1] with offset into const buffer */
1312 brw_push_insn_state(p);
1313 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1314 brw_set_mask_control(p, BRW_MASK_DISABLE);
1315 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1316
1317 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1318 * fields ignored.
1319 */
1320 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1321 addr_reg, brw_imm_d(offset));
1322 brw_pop_insn_state(p);
1323
1324 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1325
1326 insn->header.predicate_control = BRW_PREDICATE_NONE;
1327 insn->header.compression_control = BRW_COMPRESSION_NONE;
1328 insn->header.destreg__conditionalmod = 0;
1329 insn->header.mask_control = BRW_MASK_DISABLE;
1330
1331 brw_set_dest(insn, dest);
1332 brw_set_src0(insn, brw_vec8_grf(0, 0));
1333
1334 if (intel->gen == 6)
1335 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1336 else if (intel->gen == 5 || intel->is_g4x)
1337 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1338 else
1339 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1340
1341 brw_set_dp_read_message(p->brw,
1342 insn,
1343 bind_table_index,
1344 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1345 msg_type,
1346 0, /* source cache = data cache */
1347 2, /* msg_length */
1348 1, /* response_length */
1349 0); /* eot */
1350 }
1351
1352
1353
1354 void brw_fb_WRITE(struct brw_compile *p,
1355 int dispatch_width,
1356 struct brw_reg dest,
1357 GLuint msg_reg_nr,
1358 struct brw_reg src0,
1359 GLuint binding_table_index,
1360 GLuint msg_length,
1361 GLuint response_length,
1362 GLboolean eot)
1363 {
1364 struct intel_context *intel = &p->brw->intel;
1365 struct brw_instruction *insn;
1366 GLuint msg_control, msg_type;
1367
1368 insn = next_insn(p, BRW_OPCODE_SEND);
1369 insn->header.predicate_control = 0; /* XXX */
1370 insn->header.compression_control = BRW_COMPRESSION_NONE;
1371
1372 if (intel->gen >= 6) {
1373 /* headerless version, just submit color payload */
1374 src0 = brw_message_reg(msg_reg_nr);
1375
1376 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1377 } else {
1378 insn->header.destreg__conditionalmod = msg_reg_nr;
1379
1380 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1381 }
1382
1383 if (dispatch_width == 16)
1384 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1385 else
1386 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1387
1388 brw_set_dest(insn, dest);
1389 brw_set_src0(insn, src0);
1390 brw_set_dp_write_message(p->brw,
1391 insn,
1392 binding_table_index,
1393 msg_control,
1394 msg_type,
1395 msg_length,
1396 1, /* pixel scoreboard */
1397 response_length,
1398 eot,
1399 0 /* send_commit_msg */);
1400 }
1401
1402
1403 /**
1404 * Texture sample instruction.
1405 * Note: the msg_type plus msg_length values determine exactly what kind
1406 * of sampling operation is performed. See volume 4, page 161 of docs.
1407 */
1408 void brw_SAMPLE(struct brw_compile *p,
1409 struct brw_reg dest,
1410 GLuint msg_reg_nr,
1411 struct brw_reg src0,
1412 GLuint binding_table_index,
1413 GLuint sampler,
1414 GLuint writemask,
1415 GLuint msg_type,
1416 GLuint response_length,
1417 GLuint msg_length,
1418 GLboolean eot,
1419 GLuint header_present,
1420 GLuint simd_mode)
1421 {
1422 GLboolean need_stall = 0;
1423
1424 if (writemask == 0) {
1425 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1426 return;
1427 }
1428
1429 /* Hardware doesn't do destination dependency checking on send
1430 * instructions properly. Add a workaround which generates the
1431 * dependency by other means. In practice it seems like this bug
1432 * only crops up for texture samples, and only where registers are
1433 * written by the send and then written again later without being
1434 * read in between. Luckily for us, we already track that
1435 * information and use it to modify the writemask for the
1436 * instruction, so that is a guide for whether a workaround is
1437 * needed.
1438 */
1439 if (writemask != WRITEMASK_XYZW) {
1440 GLuint dst_offset = 0;
1441 GLuint i, newmask = 0, len = 0;
1442
1443 for (i = 0; i < 4; i++) {
1444 if (writemask & (1<<i))
1445 break;
1446 dst_offset += 2;
1447 }
1448 for (; i < 4; i++) {
1449 if (!(writemask & (1<<i)))
1450 break;
1451 newmask |= 1<<i;
1452 len++;
1453 }
1454
1455 if (newmask != writemask) {
1456 need_stall = 1;
1457 /* printf("need stall %x %x\n", newmask , writemask); */
1458 }
1459 else {
1460 GLboolean dispatch_16 = GL_FALSE;
1461
1462 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1463
1464 guess_execution_size(p->current, dest);
1465 if (p->current->header.execution_size == BRW_EXECUTE_16)
1466 dispatch_16 = GL_TRUE;
1467
1468 newmask = ~newmask & WRITEMASK_XYZW;
1469
1470 brw_push_insn_state(p);
1471
1472 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1473 brw_set_mask_control(p, BRW_MASK_DISABLE);
1474
1475 brw_MOV(p, m1, brw_vec8_grf(0,0));
1476 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1477
1478 brw_pop_insn_state(p);
1479
1480 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1481 dest = offset(dest, dst_offset);
1482
1483 /* For 16-wide dispatch, masked channels are skipped in the
1484 * response. For 8-wide, masked channels still take up slots,
1485 * and are just not written to.
1486 */
1487 if (dispatch_16)
1488 response_length = len * 2;
1489 }
1490 }
1491
1492 {
1493 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1494
1495 insn->header.predicate_control = 0; /* XXX */
1496 insn->header.compression_control = BRW_COMPRESSION_NONE;
1497 insn->header.destreg__conditionalmod = msg_reg_nr;
1498
1499 brw_set_dest(insn, dest);
1500 brw_set_src0(insn, src0);
1501 brw_set_sampler_message(p->brw, insn,
1502 binding_table_index,
1503 sampler,
1504 msg_type,
1505 response_length,
1506 msg_length,
1507 eot,
1508 header_present,
1509 simd_mode);
1510 }
1511
1512 if (need_stall) {
1513 struct brw_reg reg = vec8(offset(dest, response_length-1));
1514
1515 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1516 */
1517 brw_push_insn_state(p);
1518 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1519 brw_MOV(p, reg, reg);
1520 brw_pop_insn_state(p);
1521 }
1522
1523 }
1524
1525 /* All these variables are pretty confusing - we might be better off
1526 * using bitmasks and macros for this, in the old style. Or perhaps
1527 * just having the caller instantiate the fields in dword3 itself.
1528 */
1529 void brw_urb_WRITE(struct brw_compile *p,
1530 struct brw_reg dest,
1531 GLuint msg_reg_nr,
1532 struct brw_reg src0,
1533 GLboolean allocate,
1534 GLboolean used,
1535 GLuint msg_length,
1536 GLuint response_length,
1537 GLboolean eot,
1538 GLboolean writes_complete,
1539 GLuint offset,
1540 GLuint swizzle)
1541 {
1542 struct intel_context *intel = &p->brw->intel;
1543 struct brw_instruction *insn;
1544
1545 /* Sandybridge doesn't have the implied move for SENDs,
1546 * and the first message register index comes from src0.
1547 */
1548 if (intel->gen >= 6) {
1549 brw_push_insn_state(p);
1550 brw_set_mask_control( p, BRW_MASK_DISABLE );
1551 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1552 brw_pop_insn_state(p);
1553 src0 = brw_message_reg(msg_reg_nr);
1554 }
1555
1556 insn = next_insn(p, BRW_OPCODE_SEND);
1557
1558 assert(msg_length < BRW_MAX_MRF);
1559
1560 brw_set_dest(insn, dest);
1561 brw_set_src0(insn, src0);
1562 brw_set_src1(insn, brw_imm_d(0));
1563
1564 if (intel->gen < 6)
1565 insn->header.destreg__conditionalmod = msg_reg_nr;
1566
1567 brw_set_urb_message(p->brw,
1568 insn,
1569 allocate,
1570 used,
1571 msg_length,
1572 response_length,
1573 eot,
1574 writes_complete,
1575 offset,
1576 swizzle);
1577 }
1578
1579 void brw_ff_sync(struct brw_compile *p,
1580 struct brw_reg dest,
1581 GLuint msg_reg_nr,
1582 struct brw_reg src0,
1583 GLboolean allocate,
1584 GLuint response_length,
1585 GLboolean eot)
1586 {
1587 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1588
1589 brw_set_dest(insn, dest);
1590 brw_set_src0(insn, src0);
1591 brw_set_src1(insn, brw_imm_d(0));
1592
1593 insn->header.destreg__conditionalmod = msg_reg_nr;
1594
1595 brw_set_ff_sync_message(p->brw,
1596 insn,
1597 allocate,
1598 response_length,
1599 eot);
1600 }