i965: Create a new vec4 backend for Broadwell.
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_vec4_generator.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25
26 extern "C" {
27 #include "brw_eu.h"
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
31 };
32
33 namespace brw {
34
35 gen8_vec4_generator::gen8_vec4_generator(struct brw_context *brw,
36 struct gl_shader_program *shader_prog,
37 struct gl_program *prog,
38 struct brw_vec4_prog_data *prog_data,
39 void *mem_ctx,
40 bool debug_flag)
41 : gen8_generator(brw, shader_prog, prog, mem_ctx),
42 prog_data(prog_data),
43 debug_flag(debug_flag)
44 {
45 shader = shader_prog ? shader_prog->_LinkedShaders[MESA_SHADER_VERTEX] : NULL;
46 }
47
48 gen8_vec4_generator::~gen8_vec4_generator()
49 {
50 }
51
52 void
53 gen8_vec4_generator::mark_surface_used(unsigned surf_index)
54 {
55 assert(surf_index < BRW_MAX_SURFACES);
56
57 prog_data->base.binding_table.size_bytes =
58 MAX2(prog_data->base.binding_table.size_bytes, (surf_index + 1) * 4);
59 }
60
61 void
62 gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst)
63 {
64 int msg_type = 0;
65
66 switch (ir->opcode) {
67 case SHADER_OPCODE_TEX:
68 case SHADER_OPCODE_TXL:
69 if (ir->shadow_compare) {
70 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
71 } else {
72 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
73 }
74 break;
75 case SHADER_OPCODE_TXD:
76 if (ir->shadow_compare) {
77 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
78 } else {
79 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
80 }
81 break;
82 case SHADER_OPCODE_TXF:
83 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
84 break;
85 case SHADER_OPCODE_TXF_MS:
86 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
87 break;
88 case SHADER_OPCODE_TXF_MCS:
89 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
90 break;
91 case SHADER_OPCODE_TXS:
92 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
93 break;
94 case SHADER_OPCODE_TG4:
95 if (ir->shadow_compare) {
96 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
97 } else {
98 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
99 }
100 break;
101 case SHADER_OPCODE_TG4_OFFSET:
102 if (ir->shadow_compare) {
103 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
104 } else {
105 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
106 }
107 break;
108 default:
109 assert(!"should not get here: invalid VS texture opcode");
110 break;
111 }
112
113 if (ir->header_present) {
114 MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
115 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
116
117 if (ir->texture_offset) {
118 /* Set the offset bits in DWord 2. */
119 default_state.access_mode = BRW_ALIGN_1;
120 MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
121 BRW_REGISTER_TYPE_UD),
122 brw_imm_ud(ir->texture_offset));
123 default_state.access_mode = BRW_ALIGN_16;
124 }
125 }
126
127 uint32_t surf_index =
128 prog_data->base.binding_table.texture_start + ir->sampler;
129
130 gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
131 gen8_set_dst(brw, inst, dst);
132 gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
133 gen8_set_sampler_message(brw, inst,
134 surf_index,
135 ir->sampler,
136 msg_type,
137 1,
138 ir->mlen,
139 ir->header_present,
140 BRW_SAMPLER_SIMD_MODE_SIMD4X2);
141
142 mark_surface_used(surf_index);
143 }
144
145 void
146 gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
147 {
148 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
149
150 /* Copy g0. */
151 if (vs)
152 MOV_RAW(header, brw_vec8_grf(0, 0));
153
154 gen8_instruction *inst;
155 if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
156 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
157 default_state.access_mode = BRW_ALIGN_1;
158 inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
159 BRW_REGISTER_TYPE_UD),
160 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
161 brw_imm_ud(0xff00));
162 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
163 default_state.access_mode = BRW_ALIGN_16;
164 }
165
166 inst = next_inst(BRW_OPCODE_SEND);
167 gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
168 true);
169 gen8_set_dst(brw, inst, brw_null_reg());
170 gen8_set_src0(brw, inst, header);
171 }
172
173 void
174 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header,
175 struct brw_reg src)
176 {
177 /* Move the vertex count into the second MRF for the EOT write. */
178 assert(eot_mrf_header.file == BRW_MESSAGE_REGISTER_FILE);
179 int dst_nr = GEN7_MRF_HACK_START + eot_mrf_header.nr + 1;
180 MOV(retype(brw_vec8_grf(dst_nr, 0), BRW_REGISTER_TYPE_UD), src);
181 }
182
183 void
184 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
185 {
186 struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
187 gen8_instruction *inst;
188
189 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
190 default_state.access_mode = BRW_ALIGN_1;
191 inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
192 BRW_REGISTER_TYPE_UD),
193 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
194 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
195 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
196 default_state.access_mode = BRW_ALIGN_16;
197
198 /* mlen = 2: g0 header + vertex count */
199 inst = next_inst(BRW_OPCODE_SEND);
200 gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
201 gen8_set_dst(brw, inst, brw_null_reg());
202 gen8_set_src0(brw, inst, src);
203 }
204
205 void
206 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
207 struct brw_reg src0,
208 struct brw_reg src1)
209 {
210 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
211 * Header: M0.3):
212 *
213 * Slot 0 Offset. This field, after adding to the Global Offset field
214 * in the message descriptor, specifies the offset (in 256-bit units)
215 * from the start of the URB entry, as referenced by URB Handle 0, at
216 * which the data will be accessed.
217 *
218 * Similar text describes DWORD M0.4, which is slot 1 offset.
219 *
220 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
221 * of the register for geometry shader invocations 0 and 1) by the
222 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
223 *
224 * We can do this with the following EU instruction:
225 *
226 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
227 */
228 default_state.access_mode = BRW_ALIGN_1;
229 gen8_instruction *inst =
230 MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
231 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
232 default_state.access_mode = BRW_ALIGN_16;
233 }
234
235 void
236 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
237 struct brw_reg src)
238 {
239 assert(src.file == BRW_IMMEDIATE_VALUE);
240
241 default_state.access_mode = BRW_ALIGN_1;
242
243 gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
244 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
245
246 default_state.access_mode = BRW_ALIGN_16;
247 }
248
249 void
250 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
251 {
252 /* We want to left shift just DWORD 4 (the x component belonging to the
253 * second geometry shader invocation) by 4 bits. So generate the
254 * instruction:
255 *
256 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
257 */
258 dst = suboffset(vec1(dst), 4);
259 default_state.access_mode = BRW_ALIGN_1;
260 gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
261 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
262 default_state.access_mode = BRW_ALIGN_16;
263 }
264
265 void
266 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
267 struct brw_reg src)
268 {
269 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
270 * Header: M0.5):
271 *
272 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
273 *
274 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
275 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
276 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
277 * channel enable to determine the final channel enable. For the
278 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
279 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
280 * in the writeback message. For the URB_WRITE_OWORD &
281 * URB_WRITE_HWORD messages, when final channel enable is 1 it
282 * indicates that Vertex 1 DATA [3] will be written to the surface.
283 *
284 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
285 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
286 *
287 * 14 Vertex 1 DATA [2] Channel Mask
288 * 13 Vertex 1 DATA [1] Channel Mask
289 * 12 Vertex 1 DATA [0] Channel Mask
290 * 11 Vertex 0 DATA [3] Channel Mask
291 * 10 Vertex 0 DATA [2] Channel Mask
292 * 9 Vertex 0 DATA [1] Channel Mask
293 * 8 Vertex 0 DATA [0] Channel Mask
294 *
295 * (This is from a section of the PRM that is agnostic to the particular
296 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
297 * geometry shader invocations 0 and 1, respectively). Since we have the
298 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
299 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
300 * DWORD 4, we just need to OR them together and store the result in bits
301 * 15:8 of DWORD 5.
302 *
303 * It's easier to get the EU to do this if we think of the src and dst
304 * registers as composed of 32 bytes each; then, we want to pick up the
305 * contents of bytes 0 and 16 from src, OR them together, and store them in
306 * byte 21.
307 *
308 * We can do that by the following EU instruction:
309 *
310 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
311 *
312 * Note: this relies on the source register having zeros in (a) bits 7:4 of
313 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
314 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
315 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
316 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
317 * contain valid channel mask values (which are in the range 0x0-0xf).
318 */
319 dst = retype(dst, BRW_REGISTER_TYPE_UB);
320 src = retype(src, BRW_REGISTER_TYPE_UB);
321
322 default_state.access_mode = BRW_ALIGN_1;
323
324 gen8_instruction *inst =
325 OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
326 gen8_set_mask_control(inst, BRW_MASK_DISABLE);
327
328 default_state.access_mode = BRW_ALIGN_16;
329 }
330
331 void
332 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
333 struct brw_reg index)
334 {
335 int second_vertex_offset = 1;
336
337 m1 = retype(m1, BRW_REGISTER_TYPE_D);
338
339 /* Set up M1 (message payload). Only the block offsets in M1.0 and
340 * M1.4 are used, and the rest are ignored.
341 */
342 struct brw_reg m1_0 = suboffset(vec1(m1), 0);
343 struct brw_reg m1_4 = suboffset(vec1(m1), 4);
344 struct brw_reg index_0 = suboffset(vec1(index), 0);
345 struct brw_reg index_4 = suboffset(vec1(index), 4);
346
347 default_state.mask_control = BRW_MASK_DISABLE;
348 default_state.access_mode = BRW_ALIGN_1;
349
350 MOV(m1_0, index_0);
351
352 if (index.file == BRW_IMMEDIATE_VALUE) {
353 index_4.dw1.ud += second_vertex_offset;
354 MOV(m1_4, index_4);
355 } else {
356 ADD(m1_4, index_4, brw_imm_d(second_vertex_offset));
357 }
358
359 default_state.mask_control = BRW_MASK_ENABLE;
360 default_state.access_mode = BRW_ALIGN_16;
361 }
362
363 void
364 gen8_vec4_generator::generate_scratch_read(vec4_instruction *ir,
365 struct brw_reg dst,
366 struct brw_reg index)
367 {
368 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
369
370 MOV_RAW(header, brw_vec8_grf(0, 0));
371
372 generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
373
374 /* Each of the 8 channel enables is considered for whether each
375 * dword is written.
376 */
377 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
378 gen8_set_dst(brw, send, dst);
379 gen8_set_src0(brw, send, header);
380 gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
381 255, /* binding table index: stateless access */
382 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
383 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
384 2, /* mlen */
385 1, /* rlen */
386 true, /* header present */
387 false); /* EOT */
388 }
389
390 void
391 gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir,
392 struct brw_reg dst,
393 struct brw_reg src,
394 struct brw_reg index)
395 {
396 struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
397
398 MOV_RAW(header, brw_vec8_grf(0, 0));
399
400 generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index);
401
402 MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D),
403 retype(src, BRW_REGISTER_TYPE_D));
404
405 /* Each of the 8 channel enables is considered for whether each
406 * dword is written.
407 */
408 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
409 gen8_set_dst(brw, send, dst);
410 gen8_set_src0(brw, send, header);
411 gen8_set_pred_control(send, ir->predicate);
412 gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
413 255, /* binding table index: stateless access */
414 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE,
415 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
416 3, /* mlen */
417 0, /* rlen */
418 true, /* header present */
419 false); /* EOT */
420 }
421
422 void
423 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
424 struct brw_reg dst,
425 struct brw_reg index,
426 struct brw_reg offset)
427 {
428 assert(index.file == BRW_IMMEDIATE_VALUE &&
429 index.type == BRW_REGISTER_TYPE_UD);
430 uint32_t surf_index = index.dw1.ud;
431
432 assert(offset.file == BRW_GENERAL_REGISTER_FILE);
433
434 /* Each of the 8 channel enables is considered for whether each
435 * dword is written.
436 */
437 gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
438 gen8_set_dst(brw, send, dst);
439 gen8_set_src0(brw, send, offset);
440 gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
441 surf_index,
442 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
443 0, /* message control */
444 1, /* mlen */
445 1, /* rlen */
446 false, /* no header */
447 false); /* EOT */
448
449 mark_surface_used(surf_index);
450 }
451
452 void
453 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
454 struct brw_reg dst,
455 struct brw_reg *src)
456 {
457 vec4_instruction *ir = (vec4_instruction *) instruction;
458
459 if (dst.width == BRW_WIDTH_4) {
460 /* This happens in attribute fixups for "dual instanced" geometry
461 * shaders, since they use attributes that are vec4's. Since the exec
462 * width is only 4, it's essential that the caller set
463 * force_writemask_all in order to make sure the instruction is executed
464 * regardless of which channels are enabled.
465 */
466 assert(ir->force_writemask_all);
467
468 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
469 * the following register region restrictions (from Graphics BSpec:
470 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
471 * > Register Region Restrictions)
472 *
473 * 1. ExecSize must be greater than or equal to Width.
474 *
475 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
476 * to Width * HorzStride."
477 */
478 for (int i = 0; i < 3; i++) {
479 if (src[i].file == BRW_GENERAL_REGISTER_FILE)
480 src[i] = stride(src[i], 4, 4, 1);
481 }
482 }
483
484 switch (ir->opcode) {
485 case BRW_OPCODE_MOV:
486 MOV(dst, src[0]);
487 break;
488
489 case BRW_OPCODE_ADD:
490 ADD(dst, src[0], src[1]);
491 break;
492
493 case BRW_OPCODE_MUL:
494 MUL(dst, src[0], src[1]);
495 break;
496
497 case BRW_OPCODE_MACH:
498 MACH(dst, src[0], src[1]);
499 break;
500
501 case BRW_OPCODE_MAD:
502 MAD(dst, src[0], src[1], src[2]);
503 break;
504
505 case BRW_OPCODE_FRC:
506 FRC(dst, src[0]);
507 break;
508
509 case BRW_OPCODE_RNDD:
510 RNDD(dst, src[0]);
511 break;
512
513 case BRW_OPCODE_RNDE:
514 RNDE(dst, src[0]);
515 break;
516
517 case BRW_OPCODE_RNDZ:
518 RNDZ(dst, src[0]);
519 break;
520
521 case BRW_OPCODE_AND:
522 AND(dst, src[0], src[1]);
523 break;
524
525 case BRW_OPCODE_OR:
526 OR(dst, src[0], src[1]);
527 break;
528
529 case BRW_OPCODE_XOR:
530 XOR(dst, src[0], src[1]);
531 break;
532
533 case BRW_OPCODE_NOT:
534 NOT(dst, src[0]);
535 break;
536
537 case BRW_OPCODE_ASR:
538 ASR(dst, src[0], src[1]);
539 break;
540
541 case BRW_OPCODE_SHR:
542 SHR(dst, src[0], src[1]);
543 break;
544
545 case BRW_OPCODE_SHL:
546 SHL(dst, src[0], src[1]);
547 break;
548
549 case BRW_OPCODE_CMP:
550 CMP(dst, ir->conditional_mod, src[0], src[1]);
551 break;
552
553 case BRW_OPCODE_SEL:
554 SEL(dst, src[0], src[1]);
555 break;
556
557 case BRW_OPCODE_DPH:
558 DPH(dst, src[0], src[1]);
559 break;
560
561 case BRW_OPCODE_DP4:
562 DP4(dst, src[0], src[1]);
563 break;
564
565 case BRW_OPCODE_DP3:
566 DP3(dst, src[0], src[1]);
567 break;
568
569 case BRW_OPCODE_DP2:
570 DP2(dst, src[0], src[1]);
571 break;
572
573 case BRW_OPCODE_F32TO16:
574 F32TO16(dst, src[0]);
575 break;
576
577 case BRW_OPCODE_F16TO32:
578 F16TO32(dst, src[0]);
579 break;
580
581 case BRW_OPCODE_LRP:
582 LRP(dst, src[0], src[1], src[2]);
583 break;
584
585 case BRW_OPCODE_BFREV:
586 /* BFREV only supports UD type for src and dst. */
587 BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
588 retype(src[0], BRW_REGISTER_TYPE_UD));
589 break;
590
591 case BRW_OPCODE_FBH:
592 /* FBH only supports UD type for dst. */
593 FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
594 break;
595
596 case BRW_OPCODE_FBL:
597 /* FBL only supports UD type for dst. */
598 FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
599 break;
600
601 case BRW_OPCODE_CBIT:
602 /* CBIT only supports UD type for dst. */
603 CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
604 break;
605
606 case BRW_OPCODE_ADDC:
607 ADDC(dst, src[0], src[1]);
608 break;
609
610 case BRW_OPCODE_SUBB:
611 SUBB(dst, src[0], src[1]);
612 break;
613
614 case BRW_OPCODE_BFE:
615 BFE(dst, src[0], src[1], src[2]);
616 break;
617
618 case BRW_OPCODE_BFI1:
619 BFI1(dst, src[0], src[1]);
620 break;
621
622 case BRW_OPCODE_BFI2:
623 BFI2(dst, src[0], src[1], src[2]);
624 break;
625
626 case BRW_OPCODE_IF:
627 IF(ir->predicate);
628 break;
629
630 case BRW_OPCODE_ELSE:
631 ELSE();
632 break;
633
634 case BRW_OPCODE_ENDIF:
635 ENDIF();
636 break;
637
638 case BRW_OPCODE_DO:
639 DO();
640 break;
641
642 case BRW_OPCODE_BREAK:
643 BREAK();
644 break;
645
646 case BRW_OPCODE_CONTINUE:
647 CONTINUE();
648 break;
649
650 case BRW_OPCODE_WHILE:
651 WHILE();
652 break;
653
654 case SHADER_OPCODE_RCP:
655 MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
656 break;
657
658 case SHADER_OPCODE_RSQ:
659 MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
660 break;
661
662 case SHADER_OPCODE_SQRT:
663 MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
664 break;
665
666 case SHADER_OPCODE_EXP2:
667 MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
668 break;
669
670 case SHADER_OPCODE_LOG2:
671 MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
672 break;
673
674 case SHADER_OPCODE_SIN:
675 MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
676 break;
677
678 case SHADER_OPCODE_COS:
679 MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
680 break;
681
682 case SHADER_OPCODE_POW:
683 MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
684 break;
685
686 case SHADER_OPCODE_INT_QUOTIENT:
687 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
688 break;
689
690 case SHADER_OPCODE_INT_REMAINDER:
691 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
692 break;
693
694 case SHADER_OPCODE_TEX:
695 case SHADER_OPCODE_TXD:
696 case SHADER_OPCODE_TXF:
697 case SHADER_OPCODE_TXF_MS:
698 case SHADER_OPCODE_TXF_MCS:
699 case SHADER_OPCODE_TXL:
700 case SHADER_OPCODE_TXS:
701 case SHADER_OPCODE_TG4:
702 case SHADER_OPCODE_TG4_OFFSET:
703 generate_tex(ir, dst);
704 break;
705
706 case VS_OPCODE_URB_WRITE:
707 generate_urb_write(ir, true);
708 break;
709
710 case SHADER_OPCODE_GEN4_SCRATCH_READ:
711 generate_scratch_read(ir, dst, src[0]);
712 break;
713
714 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
715 generate_scratch_write(ir, dst, src[0], src[1]);
716 break;
717
718 case VS_OPCODE_PULL_CONSTANT_LOAD:
719 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
720 generate_pull_constant_load(ir, dst, src[0], src[1]);
721 break;
722
723 case GS_OPCODE_URB_WRITE:
724 generate_urb_write(ir, false);
725 break;
726
727 case GS_OPCODE_THREAD_END:
728 generate_gs_thread_end(ir);
729 break;
730
731 case GS_OPCODE_SET_WRITE_OFFSET:
732 generate_gs_set_write_offset(dst, src[0], src[1]);
733 break;
734
735 case GS_OPCODE_SET_VERTEX_COUNT:
736 generate_gs_set_vertex_count(dst, src[0]);
737 break;
738
739 case GS_OPCODE_SET_DWORD_2_IMMED:
740 generate_gs_set_dword_2_immed(dst, src[0]);
741 break;
742
743 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
744 generate_gs_prepare_channel_masks(dst);
745 break;
746
747 case GS_OPCODE_SET_CHANNEL_MASKS:
748 generate_gs_set_channel_masks(dst, src[0]);
749 break;
750
751 case SHADER_OPCODE_SHADER_TIME_ADD:
752 assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
753 break;
754
755 case SHADER_OPCODE_UNTYPED_ATOMIC:
756 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
757 break;
758
759 case SHADER_OPCODE_UNTYPED_SURFACE_READ:
760 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
761 break;
762
763 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
764 assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
765 break;
766
767 default:
768 if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
769 _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
770 opcode_descs[ir->opcode].name);
771 } else {
772 _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
773 }
774 abort();
775 }
776 }
777
778 void
779 gen8_vec4_generator::generate_code(exec_list *instructions)
780 {
781 int last_native_inst_offset = 0;
782 const char *last_annotation_string = NULL;
783 const void *last_annotation_ir = NULL;
784
785 if (unlikely(debug_flag)) {
786 if (shader) {
787 printf("Native code for vertex shader %d:\n", shader_prog->Name);
788 } else {
789 printf("Native code for vertex program %d:\n", prog->Id);
790 }
791 }
792
793 foreach_list(node, instructions) {
794 vec4_instruction *ir = (vec4_instruction *) node;
795 struct brw_reg src[3], dst;
796
797 if (unlikely(debug_flag)) {
798 if (last_annotation_ir != ir->ir) {
799 last_annotation_ir = ir->ir;
800 if (last_annotation_ir) {
801 printf(" ");
802 if (shader) {
803 ((ir_instruction *) last_annotation_ir)->print();
804 } else {
805 const prog_instruction *vpi;
806 vpi = (const prog_instruction *) ir->ir;
807 printf("%d: ", (int)(vpi - prog->Instructions));
808 _mesa_fprint_instruction_opt(stdout, vpi, 0,
809 PROG_PRINT_DEBUG, NULL);
810 }
811 printf("\n");
812 }
813 }
814 if (last_annotation_string != ir->annotation) {
815 last_annotation_string = ir->annotation;
816 if (last_annotation_string)
817 printf(" %s\n", last_annotation_string);
818 }
819 }
820
821 for (unsigned int i = 0; i < 3; i++) {
822 src[i] = ir->get_src(prog_data, i);
823 }
824 dst = ir->get_dst();
825
826 default_state.conditional_mod = ir->conditional_mod;
827 default_state.predicate = ir->predicate;
828 default_state.predicate_inverse = ir->predicate_inverse;
829 default_state.saturate = ir->saturate;
830
831 const unsigned pre_emit_nr_inst = nr_inst;
832
833 generate_vec4_instruction(ir, dst, src);
834
835 if (ir->no_dd_clear || ir->no_dd_check) {
836 assert(nr_inst == pre_emit_nr_inst + 1 ||
837 !"no_dd_check or no_dd_clear set for IR emitting more "
838 "than 1 instruction");
839
840 gen8_instruction *last = &store[pre_emit_nr_inst];
841 gen8_set_no_dd_clear(last, ir->no_dd_clear);
842 gen8_set_no_dd_check(last, ir->no_dd_check);
843 }
844
845 if (unlikely(debug_flag)) {
846 disassemble(stdout, last_native_inst_offset, next_inst_offset);
847 }
848
849 last_native_inst_offset = next_inst_offset;
850 }
851
852 if (unlikely(debug_flag)) {
853 printf("\n");
854 }
855
856 patch_jump_targets();
857
858 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
859 * emit issues, it doesn't get the jump distances into the output,
860 * which is often something we want to debug. So this is here in
861 * case you're doing that.
862 */
863 if (0 && unlikely(debug_flag)) {
864 disassemble(stdout, 0, next_inst_offset);
865 }
866 }
867
868 const unsigned *
869 gen8_vec4_generator::generate_assembly(exec_list *instructions,
870 unsigned *assembly_size)
871 {
872 default_state.access_mode = BRW_ALIGN_16;
873 default_state.exec_size = BRW_EXECUTE_8;
874 generate_code(instructions);
875 *assembly_size = next_inst_offset;
876 return (const unsigned *) store;
877 }
878
879 } /* namespace brw */