i965: Use unreachable() instead of unconditional assert().
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_vec4_generator.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26
27 extern "C" {
28 #include "brw_eu.h"
29 #include "main/macros.h"
30 #include "program/prog_print.h"
31 #include "program/prog_parameter.h"
32 };
33
34 namespace brw {
35
36 gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
37 struct gl_shader_program *shader_prog,
38 struct gl_program *prog,
39 struct brw_vec4_prog_data *prog_data,
40 void *mem_ctx,
41 bool debug_flag)
42 : gen8_generator(brw, shader_prog, prog, mem_ctx),
43 prog_data(prog_data),
44 debug_flag(debug_flag)
45 {
46 }
47
48 gen8_vec4_generator::~gen8_vec4_generator()
49 {
50 }
51
52 void
53 gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst)
54 {
55 int msg_type = 0;
56
57 switch (ir->opcode) {
58 case SHADER_OPCODE_TEX:
59 case SHADER_OPCODE_TXL:
60 if (ir->shadow_compare) {
61 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
62 } else {
63 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
64 }
65 break;
66 case SHADER_OPCODE_TXD:
67 if (ir->shadow_compare) {
68 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
69 } else {
70 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
71 }
72 break;
73 case SHADER_OPCODE_TXF:
74 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
75 break;
76 case SHADER_OPCODE_TXF_CMS:
77 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
78 break;
79 case SHADER_OPCODE_TXF_MCS:
80 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
81 break;
82 case SHADER_OPCODE_TXS:
83 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
84 break;
85 case SHADER_OPCODE_TG4:
86 if (ir->shadow_compare) {
87 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
88 } else {
89 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
90 }
91 break;
92 case SHADER_OPCODE_TG4_OFFSET:
93 if (ir->shadow_compare) {
94 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
95 } else {
96 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
97 }
98 break;
99 default:
100 unreachable("should not get here: invalid VS texture opcode");
101 }
102
103 if (ir->header_present) {
104 MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
105 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
106
107 default_state.access_mode = BRW_ALIGN_1;
108
109 if (ir->texture_offset) {
110 /* Set the offset bits in DWord 2. */
111 MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
112 BRW_REGISTER_TYPE_UD),
113 brw_imm_ud(ir->texture_offset));
114 }
115
116 if (ir->sampler >= 16) {
117 /* The "Sampler Index" field can only store values between 0 and 15.
118 * However, we can add an offset to the "Sampler State Pointer"
119 * field, effectively selecting a different set of 16 samplers.
120 *
121 * The "Sampler State Pointer" needs to be aligned to a 32-byte
122 * offset, and each sampler state is only 16-bytes, so we can't
123 * exclusively use the offset - we have to use both.
124 */
125 gen8_instruction *add =
126 ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3),
127 get_element_ud(brw_vec8_grf(0, 0), 3),
128 brw_imm_ud(16 * (ir->sampler / 16) *
129 sizeof(gen7_sampler_state)));
130 gen8_set_mask_control(add, BRW_MASK_DISABLE);
131 }
132
133 default_state.access_mode = BRW_ALIGN_16;
134 }
135
136 uint32_t surf_index =
137 prog_data->base.binding_table.texture_start + ir->sampler;
138
139 gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
140 gen8_set_dst(brw, inst, dst);
141 gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
142 gen8_set_sampler_message(brw, inst,
143 surf_index,
144 ir->sampler % 16,
145 msg_type,
146 1,
147 ir->mlen,
148 ir->header_present,
149 BRW_SAMPLER_SIMD_MODE_SIMD4X2);
150
151 brw_mark_surface_used(&prog_data->base, surf_index);
152 }
153
154 void
155 gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
156 {
157 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
158
159 /* Copy g0. */
160 if (vs)
161 MOV_RAW(header, brw_vec8_grf(0, 0));
162
163 gen8_instruction *inst;
164 if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
165 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
166 default_state.access_mode = BRW_ALIGN_1;
167 MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
168 brw_imm_ud(0xff00));
169 default_state.access_mode = BRW_ALIGN_16;
170 }
171
172 inst = next_inst(BRW_OPCODE_SEND);
173 gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
174 true);
175 gen8_set_dst(brw, inst, brw_null_reg());
176 gen8_set_src0(brw, inst, header);
177 }
178
179 void
180 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
181 struct brw_reg src)
182 {
183 /* Move the vertex count into the second MRF for the EOT write. */
184 assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
185 int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
186 MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
187 }
188
189 void
190 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
191 {
192 struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
193 gen8_instruction *inst;
194
195 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
196 default_state.access_mode = BRW_ALIGN_1;
197 inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
198 BRW_REGISTER_TYPE_UD),
199 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
200 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
201 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
202 default_state.access_mode = BRW_ALIGN_16;
203
204 /* mlen = 2: g0 header + vertex count */
205 inst = next_inst(BRW_OPCODE_SEND);
206 gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
207 gen8_set_dst(brw, inst, brw_null_reg());
208 gen8_set_src0(brw, inst, src);
209 }
210
211 void
212 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
213 struct brw_reg src0,
214 struct brw_reg src1)
215 {
216 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
217 * Header: M0.3):
218 *
219 * Slot 0 Offset. This field, after adding to the Global Offset field
220 * in the message descriptor, specifies the offset (in 256-bit units)
221 * from the start of the URB entry, as referenced by URB Handle 0, at
222 * which the data will be accessed.
223 *
224 * Similar text describes DWORD M0.4, which is slot 1 offset.
225 *
226 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
227 * of the register for geometry shader invocations 0 and 1) by the
228 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
229 *
230 * We can do this with the following EU instruction:
231 *
232 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
233 */
234 default_state.access_mode = BRW_ALIGN_1;
235 gen8_instruction *inst =
236 MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
237 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
238 default_state.access_mode = BRW_ALIGN_16;
239 }
240
241 void
242 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
243 struct brw_reg src)
244 {
245 assert(src.file == BRW_IMMEDIATE_VALUE);
246
247 default_state.access_mode = BRW_ALIGN_1;
248
249 gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
250 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
251
252 default_state.access_mode = BRW_ALIGN_16;
253 }
254
255 void
256 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
257 {
258 /* We want to left shift just DWORD 4 (the x component belonging to the
259 * second geometry shader invocation) by 4 bits. So generate the
260 * instruction:
261 *
262 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
263 */
264 dst = suboffset(vec1(dst), 4);
265 default_state.access_mode = BRW_ALIGN_1;
266 gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
267 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
268 default_state.access_mode = BRW_ALIGN_16;
269 }
270
271 void
272 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
273 struct brw_reg src)
274 {
275 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
276 * Header: M0.5):
277 *
278 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
279 *
280 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
281 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
282 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
283 * channel enable to determine the final channel enable. For the
284 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
285 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
286 * in the writeback message. For the URB_WRITE_OWORD &
287 * URB_WRITE_HWORD messages, when final channel enable is 1 it
288 * indicates that Vertex 1 DATA [3] will be written to the surface.
289 *
290 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
291 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
292 *
293 * 14 Vertex 1 DATA [2] Channel Mask
294 * 13 Vertex 1 DATA [1] Channel Mask
295 * 12 Vertex 1 DATA [0] Channel Mask
296 * 11 Vertex 0 DATA [3] Channel Mask
297 * 10 Vertex 0 DATA [2] Channel Mask
298 * 9 Vertex 0 DATA [1] Channel Mask
299 * 8 Vertex 0 DATA [0] Channel Mask
300 *
301 * (This is from a section of the PRM that is agnostic to the particular
302 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
303 * geometry shader invocations 0 and 1, respectively). Since we have the
304 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
305 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
306 * DWORD 4, we just need to OR them together and store the result in bits
307 * 15:8 of DWORD 5.
308 *
309 * It's easier to get the EU to do this if we think of the src and dst
310 * registers as composed of 32 bytes each; then, we want to pick up the
311 * contents of bytes 0 and 16 from src, OR them together, and store them in
312 * byte 21.
313 *
314 * We can do that by the following EU instruction:
315 *
316 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
317 *
318 * Note: this relies on the source register having zeros in (a) bits 7:4 of
319 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
320 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
321 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
322 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
323 * contain valid channel mask values (which are in the range 0x0-0xf).
324 */
325 dst = retype(dst, BRW_REGISTER_TYPE_UB);
326 src = retype(src, BRW_REGISTER_TYPE_UB);
327
328 default_state.access_mode = BRW_ALIGN_1;
329
330 gen8_instruction *inst =
331 OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
332 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
333
334 default_state.access_mode = BRW_ALIGN_16;
335 }
336
337 void
338 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
339 struct brw_reg index)
340 {
341 int second_vertex_offset = 1;
342
343 m1 = retype(m1, BRW_REGISTER_TYPE_D);
344
345 /* Set up M1 (message payload). Only the block offsets in M1.0 and
346 * M1.4 are used, and the rest are ignored.
347 */
348 struct brw_reg m1_0 = suboffset(vec1(m1), 0);
349 struct brw_reg m1_4 = suboffset(vec1(m1), 4);
350 struct brw_reg index_0 = suboffset(vec1(index), 0);
351 struct brw_reg index_4 = suboffset(vec1(index), 4);
352
353 default_state.mask_control = BRW_MASK_DISABLE;
354 default_state.access_mode = BRW_ALIGN_1;
355
356 MOV(m1_0, index_0);
357
358 if (index.file == BRW_IMMEDIATE_VALUE) {
359 index_4.dw1.ud += second_vertex_offset;
360 MOV(m1_4, index_4);
361 } else {
362 ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
363 }
364
365 default_state.mask_control = BRW_MASK_ENABLE;
366 default_state.access_mode = BRW_ALIGN_16;
367 }
368
369 void
370 gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
371 struct brw_reg dst,
372 struct brw_reg index)
373 {
374 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
375
376 MOV_RAW(header, brw_vec8_grf(0, 0));
377
378 generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
379
380 /* Each of the 8 channel enables is considered for whether each
381 * dword is written.
382 */
383 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
384 gen8_set_dst(brw, send, dst);
385 gen8_set_src0(brw, send, header);
386 gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
387 255, /* binding table index: stateless access */
388 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
389 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
390 2, /* mlen */
391 1, /* rlen */
392 true, /* header present */
393 false); /* EOT */
394 }
395
396 void
397 gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
398 struct brw_reg dst,
399 struct brw_reg src,
400 struct brw_reg index)
401 {
402 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
403
404 MOV_RAW(header, brw_vec8_grf(0, 0));
405
406 generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
407
408 MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
409 retype(src, BRW_REGISTER_TYPE_D));
410
411 /* Each of the 8 channel enables is considered for whether each
412 * dword is written.
413 */
414 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
415 gen8_set_dst(brw, send, dst);
416 gen8_set_src0(brw, send, header);
417 gen8_set_pred_control(send, ir->predicate);
418 gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
419 255, /* binding table index: stateless access */
420 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
421 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
422 3, /* mlen */
423 0, /* rlen */
424 true, /* header present */
425 false); /* EOT */
426 }
427
428 void
429 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
430 struct brw_reg dst,
431 struct brw_reg index,
432 struct brw_reg offset)
433 {
434 assert(index.file == BRW_IMMEDIATE_VALUE &&
435 index.type == BRW_REGISTER_TYPE_UD);
436 uint32_t surf_index = index.dw1.ud;
437
438 assert(offset.file == BRW_GENERAL_REGISTER_FILE);
439
440 /* Each of the 8 channel enables is considered for whether each
441 * dword is written.
442 */
443 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
444 gen8_set_dst(brw, send, dst);
445 gen8_set_src0(brw, send, offset);
446 gen8_set_sampler_message(brw, send,
447 surf_index,
448 0, /* The LD message ignores the sampler unit. */
449 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
450 1, /* rlen */
451 1, /* mlen */
452 false, /* no header */
453 BRW_SAMPLER_SIMD_MODE_SIMD4X2);
454
455 brw_mark_surface_used(&prog_data->base, surf_index);
456 }
457
458 void
459 gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir,
460 struct brw_reg dst,
461 struct brw_reg atomic_op,
462 struct brw_reg surf_index)
463 {
464 assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
465 atomic_op.type == BRW_REGISTER_TYPE_UD &&
466 surf_index.file == BRW_IMMEDIATE_VALUE &&
467 surf_index.type == BRW_REGISTER_TYPE_UD);
468 assert((atomic_op.dw1.ud & ~0xf) == 0);
469
470 unsigned msg_control =
471 atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
472 (1 << 5); /* Return data expected */
473
474 gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
475 gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
476 gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
477 gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
478 surf_index.dw1.ud,
479 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2,
480 msg_control,
481 ir->mlen,
482 1,
483 ir->header_present,
484 false);
485
486 brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
487 }
488
489
490
491 void
492 gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir,
493 struct brw_reg dst,
494 struct brw_reg surf_index)
495 {
496 assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
497 surf_index.type == BRW_REGISTER_TYPE_UD);
498
499 gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
500 gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
501 gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
502 gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
503 surf_index.dw1.ud,
504 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
505 0xe, /* enable only the R channel */
506 ir->mlen,
507 1,
508 ir->header_present,
509 false);
510
511 brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
512 }
513
514
515 void
516 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
517 struct brw_reg dst,
518 struct brw_reg *src)
519 {
520 vec4_instruction *ir = (vec4_instruction *) instruction;
521
522 if (dst.width == BRW_WIDTH_4) {
523 /* This happens in attribute fixups for "dual instanced" geometry
524 * shaders, since they use attributes that are vec4's. Since the exec
525 * width is only 4, it's essential that the caller set
526 * force_writemask_all in order to make sure the instruction is executed
527 * regardless of which channels are enabled.
528 */
529 assert(ir->force_writemask_all);
530
531 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
532 * the following register region restrictions (from Graphics BSpec:
533 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
534 * > Register Region Restrictions)
535 *
536 * 1. ExecSize must be greater than or equal to Width.
537 *
538 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
539 * to Width * HorzStride."
540 */
541 for (int i = 0; i < 3; i++) {
542 if (src[i].file == BRW_GENERAL_REGISTER_FILE)
543 src[i] = stride(src[i], 4, 4, 1);
544 }
545 }
546
547 switch (ir->opcode) {
548 case BRW_OPCODE_MOV:
549 MOV(dst, src[0]);
550 break;
551
552 case BRW_OPCODE_ADD:
553 ADD(dst, src[0], src[1]);
554 break;
555
556 case BRW_OPCODE_MUL:
557 MUL(dst, src[0], src[1]);
558 break;
559
560 case BRW_OPCODE_MACH:
561 MACH(dst, src[0], src[1]);
562 break;
563
564 case BRW_OPCODE_MAD:
565 MAD(dst, src[0], src[1], src[2]);
566 break;
567
568 case BRW_OPCODE_FRC:
569 FRC(dst, src[0]);
570 break;
571
572 case BRW_OPCODE_RNDD:
573 RNDD(dst, src[0]);
574 break;
575
576 case BRW_OPCODE_RNDE:
577 RNDE(dst, src[0]);
578 break;
579
580 case BRW_OPCODE_RNDZ:
581 RNDZ(dst, src[0]);
582 break;
583
584 case BRW_OPCODE_AND:
585 AND(dst, src[0], src[1]);
586 break;
587
588 case BRW_OPCODE_OR:
589 OR(dst, src[0], src[1]);
590 break;
591
592 case BRW_OPCODE_XOR:
593 XOR(dst, src[0], src[1]);
594 break;
595
596 case BRW_OPCODE_NOT:
597 NOT(dst, src[0]);
598 break;
599
600 case BRW_OPCODE_ASR:
601 ASR(dst, src[0], src[1]);
602 break;
603
604 case BRW_OPCODE_SHR:
605 SHR(dst, src[0], src[1]);
606 break;
607
608 case BRW_OPCODE_SHL:
609 SHL(dst, src[0], src[1]);
610 break;
611
612 case BRW_OPCODE_CMP:
613 CMP(dst, ir->conditional_mod, src[0], src[1]);
614 break;
615
616 case BRW_OPCODE_SEL:
617 SEL(dst, src[0], src[1]);
618 break;
619
620 case BRW_OPCODE_DPH:
621 DPH(dst, src[0], src[1]);
622 break;
623
624 case BRW_OPCODE_DP4:
625 DP4(dst, src[0], src[1]);
626 break;
627
628 case BRW_OPCODE_DP3:
629 DP3(dst, src[0], src[1]);
630 break;
631
632 case BRW_OPCODE_DP2:
633 DP2(dst, src[0], src[1]);
634 break;
635
636 case BRW_OPCODE_F32TO16:
637 /* Emulate the Gen7 zeroing bug. */
638 MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
639 MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
640 break;
641
642 case BRW_OPCODE_F16TO32:
643 MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
644 break;
645
646 case BRW_OPCODE_LRP:
647 LRP(dst, src[0], src[1], src[2]);
648 break;
649
650 case BRW_OPCODE_BFREV:
651 /* BFREV only supports UD type for src and dst. */
652 BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
653 retype(src[0], BRW_REGISTER_TYPE_UD));
654 break;
655
656 case BRW_OPCODE_FBH:
657 /* FBH only supports UD type for dst. */
658 FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
659 break;
660
661 case BRW_OPCODE_FBL:
662 /* FBL only supports UD type for dst. */
663 FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
664 break;
665
666 case BRW_OPCODE_CBIT:
667 /* CBIT only supports UD type for dst. */
668 CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
669 break;
670
671 case BRW_OPCODE_ADDC:
672 ADDC(dst, src[0], src[1]);
673 break;
674
675 case BRW_OPCODE_SUBB:
676 SUBB(dst, src[0], src[1]);
677 break;
678
679 case BRW_OPCODE_BFE:
680 BFE(dst, src[0], src[1], src[2]);
681 break;
682
683 case BRW_OPCODE_BFI1:
684 BFI1(dst, src[0], src[1]);
685 break;
686
687 case BRW_OPCODE_BFI2:
688 BFI2(dst, src[0], src[1], src[2]);
689 break;
690
691 case BRW_OPCODE_IF:
692 IF(ir->predicate);
693 break;
694
695 case BRW_OPCODE_ELSE:
696 ELSE();
697 break;
698
699 case BRW_OPCODE_ENDIF:
700 ENDIF();
701 break;
702
703 case BRW_OPCODE_DO:
704 DO();
705 break;
706
707 case BRW_OPCODE_BREAK:
708 BREAK();
709 break;
710
711 case BRW_OPCODE_CONTINUE:
712 CONTINUE();
713 break;
714
715 case BRW_OPCODE_WHILE:
716 WHILE();
717 break;
718
719 case SHADER_OPCODE_RCP:
720 MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
721 break;
722
723 case SHADER_OPCODE_RSQ:
724 MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
725 break;
726
727 case SHADER_OPCODE_SQRT:
728 MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
729 break;
730
731 case SHADER_OPCODE_EXP2:
732 MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
733 break;
734
735 case SHADER_OPCODE_LOG2:
736 MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
737 break;
738
739 case SHADER_OPCODE_SIN:
740 MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
741 break;
742
743 case SHADER_OPCODE_COS:
744 MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
745 break;
746
747 case SHADER_OPCODE_POW:
748 MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
749 break;
750
751 case SHADER_OPCODE_INT_QUOTIENT:
752 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
753 break;
754
755 case SHADER_OPCODE_INT_REMAINDER:
756 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
757 break;
758
759 case SHADER_OPCODE_TEX:
760 case SHADER_OPCODE_TXD:
761 case SHADER_OPCODE_TXF:
762 case SHADER_OPCODE_TXF_CMS:
763 case SHADER_OPCODE_TXF_MCS:
764 case SHADER_OPCODE_TXL:
765 case SHADER_OPCODE_TXS:
766 case SHADER_OPCODE_TG4:
767 case SHADER_OPCODE_TG4_OFFSET:
768 generate_tex(ir, dst);
769 break;
770
771 case VS_OPCODE_URB_WRITE:
772 generate_urb_write(ir, true);
773 break;
774
775 case SHADER_OPCODE_GEN4_SCRATCH_READ:
776 generate_scratch_read(ir, dst, src[0]);
777 break;
778
779 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
780 generate_scratch_write(ir, dst, src[0], src[1]);
781 break;
782
783 case VS_OPCODE_PULL_CONSTANT_LOAD:
784 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
785 generate_pull_constant_load(ir, dst, src[0], src[1]);
786 break;
787
788 case GS_OPCODE_URB_WRITE:
789 generate_urb_write(ir, false);
790 break;
791
792 case GS_OPCODE_THREAD_END:
793 generate_gs_thread_end(ir);
794 break;
795
796 case GS_OPCODE_SET_WRITE_OFFSET:
797 generate_gs_set_write_offset(dst, src[0], src[1]);
798 break;
799
800 case GS_OPCODE_SET_VERTEX_COUNT:
801 generate_gs_set_vertex_count(dst, src[0]);
802 break;
803
804 case GS_OPCODE_SET_DWORD_2_IMMED:
805 generate_gs_set_dword_2_immed(dst, src[0]);
806 break;
807
808 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
809 generate_gs_prepare_channel_masks(dst);
810 break;
811
812 case GS_OPCODE_SET_CHANNEL_MASKS:
813 generate_gs_set_channel_masks(dst, src[0]);
814 break;
815
816 case SHADER_OPCODE_SHADER_TIME_ADD:
817 unreachable("XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
818
819 case SHADER_OPCODE_UNTYPED_ATOMIC:
820 generate_untyped_atomic(ir, dst, src[0], src[1]);
821 break;
822
823 case SHADER_OPCODE_UNTYPED_SURFACE_READ:
824 generate_untyped_surface_read(ir, dst, src[0]);
825 break;
826
827 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
828 unreachable("VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
829
830 default:
831 if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
832 _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
833 opcode_descs[ir->opcode].name);
834 } else {
835 _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
836 }
837 abort();
838 }
839 }
840
841 void
842 gen8_vec4_generator::generate_code(exec_list *instructions)
843 {
844 struct annotation_info annotation;
845 memset(&annotation, 0, sizeof(annotation));
846
847 cfg_t *cfg = NULL;
848 if (unlikely(debug_flag))
849 cfg = new(mem_ctx) cfg_t(instructions);
850
851 foreach_in_list(vec4_instruction, ir, instructions) {
852 struct brw_reg src[3], dst;
853
854 if (unlikely(debug_flag))
855 annotate(brw, &annotation, cfg, ir, next_inst_offset);
856
857 for (unsigned int i = 0; i < 3; i++) {
858 src[i] = ir->get_src(prog_data, i);
859 }
860 dst = ir->get_dst();
861
862 default_state.conditional_mod = ir->conditional_mod;
863 default_state.predicate = ir->predicate;
864 default_state.predicate_inverse = ir->predicate_inverse;
865 default_state.saturate = ir->saturate;
866
867 const unsigned pre_emit_nr_inst = nr_inst;
868
869 generate_vec4_instruction(ir, dst, src);
870
871 if (ir->no_dd_clear || ir->no_dd_check) {
872 assert(nr_inst == pre_emit_nr_inst + 1 ||
873 !"no_dd_check or no_dd_clear set for IR emitting more "
874 "than 1 instruction");
875
876 gen8_instruction *last = &store[pre_emit_nr_inst];
877 gen8_set_no_dd_clear(last, ir->no_dd_clear);
878 gen8_set_no_dd_check(last, ir->no_dd_check);
879 }
880 }
881
882 patch_jump_targets();
883 annotation_finalize(&annotation, next_inst_offset);
884
885 int before_size = next_inst_offset;
886
887 if (unlikely(debug_flag)) {
888 if (shader_prog) {
889 fprintf(stderr, "Native code for %s vertex shader %d:\n",
890 shader_prog->Label ? shader_prog->Label : "unnamed",
891 shader_prog->Name);
892 } else {
893 fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
894 }
895 fprintf(stderr, "vec4 shader: %d instructions.\n", before_size / 16);
896
897 dump_assembly(store, annotation.ann_count, annotation.ann, brw, prog);
898 ralloc_free(annotation.ann);
899 }
900 }
901
902 const unsigned *
903 gen8_vec4_generator::generate_assembly(exec_list *instructions,
904 unsigned *assembly_size)
905 {
906 default_state.access_mode = BRW_ALIGN_16;
907 default_state.exec_size = BRW_EXECUTE_8;
908 generate_code(instructions);
909
910 *assembly_size = next_inst_offset;
911 return (const unsigned *) store;
912 }
913
914 } /* namespace brw */