r600g,radeonsi: share r600_surface
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_vs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
37
38 struct vs_compile_context {
39 struct ilo_shader *shader;
40 const struct ilo_shader_variant *variant;
41
42 struct toy_compiler tc;
43 struct toy_tgsi tgsi;
44 enum brw_message_target const_cache;
45
46 int output_map[PIPE_MAX_SHADER_OUTPUTS];
47
48 int num_grf_per_vrf;
49 int first_const_grf;
50 int first_ucp_grf;
51 int first_vue_grf;
52 int first_free_grf;
53 int last_free_grf;
54
55 int first_free_mrf;
56 int last_free_mrf;
57 };
58
59 static void
60 vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc,
61 struct toy_dst dst, int dim, int idx)
62 {
63 struct toy_compiler *tc = &vcc->tc;
64 int slot;
65
66 assert(!dim);
67
68 slot = toy_tgsi_find_input(&vcc->tgsi, idx);
69 if (slot >= 0) {
70 const int first_in_grf = vcc->first_vue_grf +
71 (vcc->shader->in.count - vcc->tgsi.num_inputs);
72 const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index;
73 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
74
75 tc_MOV(tc, dst, src);
76 }
77 else {
78 /* undeclared input */
79 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
80 }
81 }
82
83 static bool
84 vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc,
85 struct toy_dst dst, int dim,
86 struct toy_src idx)
87 {
88 const int i = idx.val32;
89 const int grf = vcc->first_const_grf + i / 2;
90 const int grf_subreg = (i & 1) * 16;
91 struct toy_src src;
92
93 if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
94 grf >= vcc->first_ucp_grf)
95 return false;
96
97
98 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041);
99 tc_MOV(&vcc->tc, dst, src);
100
101 return true;
102 }
103
104 static void
105 vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
106 struct toy_dst dst, int dim,
107 struct toy_src idx)
108 {
109 const struct toy_dst header =
110 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
111 const struct toy_dst block_offsets =
112 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0));
113 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
114 struct toy_compiler *tc = &vcc->tc;
115 unsigned msg_type, msg_ctrl, msg_len;
116 struct toy_inst *inst;
117 struct toy_src desc;
118
119 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
120 return;
121
122 /* set message header */
123 inst = tc_MOV(tc, header, r0);
124 inst->mask_ctrl = BRW_MASK_DISABLE;
125
126 /* set block offsets */
127 tc_MOV(tc, block_offsets, idx);
128
129 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
130 msg_ctrl = BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD << 8;;
131 msg_len = 2;
132
133 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
134 msg_type, msg_ctrl, ILO_VS_CONST_SURFACE(dim));
135
136 tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache);
137 }
138
139 static void
140 vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
141 struct toy_dst dst, int dim,
142 struct toy_src idx)
143 {
144 struct toy_compiler *tc = &vcc->tc;
145 const struct toy_dst offset =
146 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
147 struct toy_src desc;
148
149 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
150 return;
151
152 /*
153 * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
154 * changed from OWord Dual Block Read to ld to increase performance in the
155 * classic driver. Since we use the constant cache instead of the data
156 * cache, I wonder if we still want to follow the classic driver.
157 */
158
159 /* set offset */
160 tc_MOV(tc, offset, idx);
161
162 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
163 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
164 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
165 0,
166 ILO_VS_CONST_SURFACE(dim));
167
168 tc_SEND(tc, dst, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
169 }
170
171 static void
172 vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc,
173 struct toy_dst dst, int idx)
174 {
175 const uint32_t *imm;
176 int ch;
177
178 imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL);
179
180 for (ch = 0; ch < 4; ch++) {
181 /* raw moves */
182 tc_MOV(&vcc->tc,
183 tdst_writemask(tdst_ud(dst), 1 << ch),
184 tsrc_imm_ud(imm[ch]));
185 }
186 }
187
188
189 static void
190 vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc,
191 struct toy_dst dst, int dim, int idx)
192 {
193 struct toy_compiler *tc = &vcc->tc;
194 const struct toy_tgsi *tgsi = &vcc->tgsi;
195 int slot;
196
197 assert(!dim);
198
199 slot = toy_tgsi_find_system_value(tgsi, idx);
200 if (slot < 0)
201 return;
202
203 switch (tgsi->system_values[slot].semantic_name) {
204 case TGSI_SEMANTIC_INSTANCEID:
205 case TGSI_SEMANTIC_VERTEXID:
206 /*
207 * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for
208 * the generated IDs, with VID in the X channel and IID in the Y
209 * channel.
210 */
211 {
212 const int grf = vcc->first_vue_grf;
213 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
214 const enum toy_swizzle swizzle =
215 (tgsi->system_values[slot].semantic_name ==
216 TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X;
217
218 tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle)));
219 }
220 break;
221 case TGSI_SEMANTIC_PRIMID:
222 default:
223 tc_fail(tc, "unhandled system value");
224 tc_MOV(tc, dst, tsrc_imm_d(0));
225 break;
226 }
227 }
228
229 static void
230 vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc,
231 struct toy_inst *inst)
232 {
233 struct toy_compiler *tc = &vcc->tc;
234 int dim, idx;
235
236 assert(inst->src[0].file == TOY_FILE_IMM);
237 dim = inst->src[0].val32;
238
239 assert(inst->src[1].file == TOY_FILE_IMM);
240 idx = inst->src[1].val32;
241
242 switch (inst->opcode) {
243 case TOY_OPCODE_TGSI_IN:
244 vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx);
245 break;
246 case TOY_OPCODE_TGSI_CONST:
247 if (tc->dev->gen >= ILO_GEN(7))
248 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]);
249 else
250 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]);
251 break;
252 case TOY_OPCODE_TGSI_SV:
253 vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx);
254 break;
255 case TOY_OPCODE_TGSI_IMM:
256 assert(!dim);
257 vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx);
258 break;
259 default:
260 tc_fail(tc, "unhandled TGSI fetch");
261 break;
262 }
263
264 tc_discard_inst(tc, inst);
265 }
266
267 static void
268 vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc,
269 struct toy_inst *inst)
270 {
271 struct toy_compiler *tc = &vcc->tc;
272 enum tgsi_file_type file;
273 int dim, idx;
274 struct toy_src indirect_dim, indirect_idx;
275
276 assert(inst->src[0].file == TOY_FILE_IMM);
277 file = inst->src[0].val32;
278
279 assert(inst->src[1].file == TOY_FILE_IMM);
280 dim = inst->src[1].val32;
281 indirect_dim = inst->src[2];
282
283 assert(inst->src[3].file == TOY_FILE_IMM);
284 idx = inst->src[3].val32;
285 indirect_idx = inst->src[4];
286
287 /* no dimension indirection */
288 assert(indirect_dim.file == TOY_FILE_IMM);
289 dim += indirect_dim.val32;
290
291 switch (inst->opcode) {
292 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
293 if (file == TGSI_FILE_CONSTANT) {
294 if (idx) {
295 struct toy_dst tmp = tc_alloc_tmp(tc);
296
297 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
298 indirect_idx = tsrc_from(tmp);
299 }
300
301 if (tc->dev->gen >= ILO_GEN(7))
302 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx);
303 else
304 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx);
305 break;
306 }
307 /* fall through */
308 case TOY_OPCODE_TGSI_INDIRECT_STORE:
309 default:
310 tc_fail(tc, "unhandled TGSI indirection");
311 break;
312 }
313
314 tc_discard_inst(tc, inst);
315 }
316
317 /**
318 * Emit instructions to move sampling parameters to the message registers.
319 */
320 static int
321 vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf,
322 struct toy_src coords, int num_coords,
323 struct toy_src bias_or_lod, struct toy_src ref_or_si,
324 struct toy_src ddx, struct toy_src ddy, int num_derivs)
325 {
326 const unsigned coords_writemask = (1 << num_coords) - 1;
327 struct toy_dst m[3];
328 int num_params, i;
329
330 assert(num_coords <= 4);
331 assert(num_derivs <= 3 && num_derivs <= num_coords);
332
333 for (i = 0; i < Elements(m); i++)
334 m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0);
335
336 switch (msg_type) {
337 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
338 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
339 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod);
340 num_params = 5;
341 break;
342 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
343 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
344 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ),
345 tsrc_swizzle(ddx, 0, 0, 1, 1));
346 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW),
347 tsrc_swizzle(ddy, 0, 0, 1, 1));
348 if (num_derivs > 2) {
349 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X),
350 tsrc_swizzle1(ddx, 2));
351 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y),
352 tsrc_swizzle1(ddy, 2));
353 }
354 num_params = 4 + num_derivs * 2;
355 break;
356 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
357 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
358 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si);
359 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod);
360 num_params = 6;
361 break;
362 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
363 assert(num_coords <= 3);
364 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords);
365 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod);
366 if (tc->dev->gen >= ILO_GEN(7)) {
367 num_params = 4;
368 }
369 else {
370 tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si);
371 num_params = 5;
372 }
373 break;
374 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
375 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod);
376 num_params = 1;
377 break;
378 default:
379 tc_fail(tc, "unknown sampler opcode");
380 num_params = 0;
381 break;
382 }
383
384 return (num_params + 3) / 4;
385 }
386
387 /**
388 * Set up message registers and return the message descriptor for sampling.
389 */
390 static struct toy_src
391 vs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
392 int base_mrf, unsigned *ret_sampler_index)
393 {
394 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
395 struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si;
396 int num_coords, ref_pos, num_derivs;
397 int sampler_src;
398
399 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD4X2;
400
401 coords = inst->src[0];
402 ddx = tsrc_null();
403 ddy = tsrc_null();
404 bias_or_lod = tsrc_null();
405 ref_or_si = tsrc_null();
406 num_derivs = 0;
407 sampler_src = 1;
408
409 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
410
411 /* extract the parameters */
412 switch (inst->opcode) {
413 case TOY_OPCODE_TGSI_TXD:
414 if (ref_pos >= 0) {
415 assert(ref_pos < 4);
416
417 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
418 ref_or_si = tsrc_swizzle1(coords, ref_pos);
419
420 if (tc->dev->gen < ILO_GEN(7.5))
421 tc_fail(tc, "TXD with shadow sampler not supported");
422 }
423 else {
424 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
425 }
426
427 ddx = inst->src[1];
428 ddy = inst->src[2];
429 num_derivs = num_coords;
430 sampler_src = 3;
431 break;
432 case TOY_OPCODE_TGSI_TXL:
433 if (ref_pos >= 0) {
434 assert(ref_pos < 3);
435
436 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
437 ref_or_si = tsrc_swizzle1(coords, ref_pos);
438 }
439 else {
440 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
441 }
442
443 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
444 break;
445 case TOY_OPCODE_TGSI_TXF:
446 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
447
448 switch (inst->tex.target) {
449 case TGSI_TEXTURE_2D_MSAA:
450 case TGSI_TEXTURE_2D_ARRAY_MSAA:
451 assert(ref_pos >= 0 && ref_pos < 4);
452 /* lod is always 0 */
453 bias_or_lod = tsrc_imm_d(0);
454 ref_or_si = tsrc_swizzle1(coords, ref_pos);
455 break;
456 default:
457 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
458 break;
459 }
460
461 /* offset the coordinates */
462 if (!tsrc_is_null(inst->tex.offsets[0])) {
463 struct toy_dst tmp;
464
465 tmp = tc_alloc_tmp(tc);
466 tc_ADD(tc, tmp, coords, inst->tex.offsets[0]);
467 coords = tsrc_from(tmp);
468 }
469
470 sampler_src = 1;
471 break;
472 case TOY_OPCODE_TGSI_TXQ:
473 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
474 num_coords = 0;
475 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X);
476 break;
477 case TOY_OPCODE_TGSI_TXQ_LZ:
478 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
479 num_coords = 0;
480 sampler_src = 0;
481 break;
482 case TOY_OPCODE_TGSI_TXL2:
483 if (ref_pos >= 0) {
484 assert(ref_pos < 4);
485
486 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
487 ref_or_si = tsrc_swizzle1(coords, ref_pos);
488 }
489 else {
490 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
491 }
492
493 bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X);
494 sampler_src = 2;
495 break;
496 default:
497 assert(!"unhandled sampling opcode");
498 if (ret_sampler_index)
499 *ret_sampler_index = 0;
500 return tsrc_null();
501 break;
502 }
503
504 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
505 sampler_index = inst->src[sampler_src].val32;
506 binding_table_index = ILO_VS_TEXTURE_SURFACE(sampler_index);
507
508 /*
509 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
510 *
511 * "Note that the (cube map) coordinates delivered to the sampling
512 * engine must already have been divided by the component with the
513 * largest absolute value."
514 */
515 switch (inst->tex.target) {
516 case TGSI_TEXTURE_CUBE:
517 case TGSI_TEXTURE_SHADOWCUBE:
518 case TGSI_TEXTURE_CUBE_ARRAY:
519 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
520 /* TXQ does not need coordinates */
521 if (num_coords >= 3) {
522 struct toy_dst tmp, max;
523 struct toy_src abs_coords[3];
524 int i;
525
526 tmp = tc_alloc_tmp(tc);
527 max = tdst_writemask(tmp, TOY_WRITEMASK_W);
528
529 for (i = 0; i < 3; i++)
530 abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i));
531
532 tc_SEL(tc, max, abs_coords[0], abs_coords[0], BRW_CONDITIONAL_GE);
533 tc_SEL(tc, max, tsrc_from(max), abs_coords[0], BRW_CONDITIONAL_GE);
534 tc_INV(tc, max, tsrc_from(max));
535
536 for (i = 0; i < 3; i++)
537 tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max));
538
539 coords = tsrc_from(tmp);
540 }
541 break;
542 }
543
544 /* set up sampler parameters */
545 msg_len = vs_add_sampler_params(tc, msg_type, base_mrf,
546 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
547
548 /*
549 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
550 *
551 * "The maximum message length allowed to the sampler is 11. This would
552 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
553 * SIMD16."
554 */
555 if (msg_len > 11)
556 tc_fail(tc, "maximum length for messages to the sampler is 11");
557
558 if (ret_sampler_index)
559 *ret_sampler_index = sampler_index;
560
561 return tsrc_imm_mdesc_sampler(tc, msg_len, 1,
562 false, simd_mode, msg_type, sampler_index, binding_table_index);
563 }
564
565 static void
566 vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc,
567 struct toy_inst *inst)
568 {
569 struct toy_compiler *tc = &vcc->tc;
570 struct toy_src desc;
571 struct toy_dst dst, tmp;
572 unsigned sampler_index;
573 int swizzles[4], i;
574 unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask;
575 bool need_filter;
576
577 desc = vs_prepare_tgsi_sampling(tc, inst,
578 vcc->first_free_mrf, &sampler_index);
579
580 switch (inst->opcode) {
581 case TOY_OPCODE_TGSI_TXF:
582 case TOY_OPCODE_TGSI_TXQ:
583 case TOY_OPCODE_TGSI_TXQ_LZ:
584 need_filter = false;
585 break;
586 default:
587 need_filter = true;
588 break;
589 }
590
591 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
592 inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0);
593 inst->src[1] = desc;
594
595 /* write to a temp first */
596 tmp = tc_alloc_tmp(tc);
597 tmp.type = inst->dst.type;
598 dst = inst->dst;
599 inst->dst = tmp;
600
601 tc_move_inst(tc, inst);
602
603 if (need_filter) {
604 assert(sampler_index < vcc->variant->num_sampler_views);
605 swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r;
606 swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g;
607 swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b;
608 swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a;
609 }
610 else {
611 swizzles[0] = PIPE_SWIZZLE_RED;
612 swizzles[1] = PIPE_SWIZZLE_GREEN;
613 swizzles[2] = PIPE_SWIZZLE_BLUE;
614 swizzles[3] = PIPE_SWIZZLE_ALPHA;
615 }
616
617 swizzle_zero_mask = 0;
618 swizzle_one_mask = 0;
619 swizzle_normal_mask = 0;
620 for (i = 0; i < 4; i++) {
621 switch (swizzles[i]) {
622 case PIPE_SWIZZLE_ZERO:
623 swizzle_zero_mask |= 1 << i;
624 swizzles[i] = i;
625 break;
626 case PIPE_SWIZZLE_ONE:
627 swizzle_one_mask |= 1 << i;
628 swizzles[i] = i;
629 break;
630 default:
631 swizzle_normal_mask |= 1 << i;
632 break;
633 }
634 }
635
636 /* swizzle the results */
637 if (swizzle_normal_mask) {
638 tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask),
639 tsrc_swizzle(tsrc_from(tmp), swizzles[0],
640 swizzles[1], swizzles[2], swizzles[3]));
641 }
642 if (swizzle_zero_mask)
643 tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f));
644 if (swizzle_one_mask)
645 tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f));
646 }
647
648 static void
649 vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst)
650 {
651 /* vs_write_vue() has set up the message registers */
652 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
653 }
654
655 static void
656 vs_lower_virtual_opcodes(struct vs_compile_context *vcc)
657 {
658 struct toy_compiler *tc = &vcc->tc;
659 struct toy_inst *inst;
660
661 tc_head(tc);
662 while ((inst = tc_next(tc)) != NULL) {
663 switch (inst->opcode) {
664 case TOY_OPCODE_TGSI_IN:
665 case TOY_OPCODE_TGSI_CONST:
666 case TOY_OPCODE_TGSI_SV:
667 case TOY_OPCODE_TGSI_IMM:
668 vs_lower_opcode_tgsi_direct(vcc, inst);
669 break;
670 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
671 case TOY_OPCODE_TGSI_INDIRECT_STORE:
672 vs_lower_opcode_tgsi_indirect(vcc, inst);
673 break;
674 case TOY_OPCODE_TGSI_TEX:
675 case TOY_OPCODE_TGSI_TXB:
676 case TOY_OPCODE_TGSI_TXD:
677 case TOY_OPCODE_TGSI_TXL:
678 case TOY_OPCODE_TGSI_TXP:
679 case TOY_OPCODE_TGSI_TXF:
680 case TOY_OPCODE_TGSI_TXQ:
681 case TOY_OPCODE_TGSI_TXQ_LZ:
682 case TOY_OPCODE_TGSI_TEX2:
683 case TOY_OPCODE_TGSI_TXB2:
684 case TOY_OPCODE_TGSI_TXL2:
685 case TOY_OPCODE_TGSI_SAMPLE:
686 case TOY_OPCODE_TGSI_SAMPLE_I:
687 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
688 case TOY_OPCODE_TGSI_SAMPLE_B:
689 case TOY_OPCODE_TGSI_SAMPLE_C:
690 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
691 case TOY_OPCODE_TGSI_SAMPLE_D:
692 case TOY_OPCODE_TGSI_SAMPLE_L:
693 case TOY_OPCODE_TGSI_GATHER4:
694 case TOY_OPCODE_TGSI_SVIEWINFO:
695 case TOY_OPCODE_TGSI_SAMPLE_POS:
696 case TOY_OPCODE_TGSI_SAMPLE_INFO:
697 vs_lower_opcode_tgsi_sampling(vcc, inst);
698 break;
699 case TOY_OPCODE_INV:
700 case TOY_OPCODE_LOG:
701 case TOY_OPCODE_EXP:
702 case TOY_OPCODE_SQRT:
703 case TOY_OPCODE_RSQ:
704 case TOY_OPCODE_SIN:
705 case TOY_OPCODE_COS:
706 case TOY_OPCODE_FDIV:
707 case TOY_OPCODE_POW:
708 case TOY_OPCODE_INT_DIV_QUOTIENT:
709 case TOY_OPCODE_INT_DIV_REMAINDER:
710 toy_compiler_lower_math(tc, inst);
711 break;
712 case TOY_OPCODE_URB_WRITE:
713 vs_lower_opcode_urb_write(tc, inst);
714 break;
715 default:
716 if (inst->opcode > 127)
717 tc_fail(tc, "unhandled virtual opcode");
718 break;
719 }
720 }
721 }
722
723 /**
724 * Compile the shader.
725 */
726 static bool
727 vs_compile(struct vs_compile_context *vcc)
728 {
729 struct toy_compiler *tc = &vcc->tc;
730 struct ilo_shader *sh = vcc->shader;
731
732 vs_lower_virtual_opcodes(vcc);
733 toy_compiler_legalize_for_ra(tc);
734 toy_compiler_optimize(tc);
735 toy_compiler_allocate_registers(tc,
736 vcc->first_free_grf,
737 vcc->last_free_grf,
738 vcc->num_grf_per_vrf);
739 toy_compiler_legalize_for_asm(tc);
740
741 if (tc->fail) {
742 ilo_err("failed to legalize VS instructions: %s\n", tc->reason);
743 return false;
744 }
745
746 if (ilo_debug & ILO_DEBUG_VS) {
747 ilo_printf("legalized instructions:\n");
748 toy_compiler_dump(tc);
749 ilo_printf("\n");
750 }
751
752 if (true) {
753 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
754 }
755 else {
756 static const uint32_t microcode[] = {
757 /* fill in the microcode here */
758 0x0, 0x0, 0x0, 0x0,
759 };
760 const bool swap = true;
761
762 sh->kernel_size = sizeof(microcode);
763 sh->kernel = MALLOC(sh->kernel_size);
764
765 if (sh->kernel) {
766 const int num_dwords = sizeof(microcode) / 4;
767 const uint32_t *src = microcode;
768 uint32_t *dst = (uint32_t *) sh->kernel;
769 int i;
770
771 for (i = 0; i < num_dwords; i += 4) {
772 if (swap) {
773 dst[i + 0] = src[i + 3];
774 dst[i + 1] = src[i + 2];
775 dst[i + 2] = src[i + 1];
776 dst[i + 3] = src[i + 0];
777 }
778 else {
779 memcpy(dst, src, 16);
780 }
781 }
782 }
783 }
784
785 if (!sh->kernel) {
786 ilo_err("failed to compile VS: %s\n", tc->reason);
787 return false;
788 }
789
790 if (ilo_debug & ILO_DEBUG_VS) {
791 ilo_printf("disassembly:\n");
792 toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
793 ilo_printf("\n");
794 }
795
796 return true;
797 }
798
799 /**
800 * Collect the toy registers to be written to the VUE.
801 */
802 static int
803 vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
804 {
805 const struct toy_tgsi *tgsi = &vcc->tgsi;
806 int i;
807
808 for (i = 0; i < vcc->shader->out.count; i++) {
809 const int slot = vcc->output_map[i];
810 const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi,
811 TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1;
812 struct toy_src src;
813
814 if (vrf >= 0) {
815 struct toy_dst dst;
816
817 dst = tdst(TOY_FILE_VRF, vrf, 0);
818 src = tsrc_from(dst);
819
820 if (i == 0) {
821 /* PSIZE is at channel W */
822 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W),
823 tsrc_swizzle1(src, TOY_SWIZZLE_X));
824
825 /* the other channels are for the header */
826 dst = tdst_d(dst);
827 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ),
828 tsrc_imm_d(0));
829 }
830 else {
831 /* initialize unused channels to 0.0f */
832 if (tgsi->outputs[slot].undefined_mask) {
833 dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask);
834 tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f));
835 }
836 }
837 }
838 else {
839 /* XXX this is too ugly */
840 if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST &&
841 slot < 0) {
842 /* ok, we need to compute clip distance */
843 int clipvert_slot = -1, clipvert_vrf, j;
844
845 for (j = 0; j < tgsi->num_outputs; j++) {
846 if (tgsi->outputs[j].semantic_name ==
847 TGSI_SEMANTIC_CLIPVERTEX) {
848 clipvert_slot = j;
849 break;
850 }
851 else if (tgsi->outputs[j].semantic_name ==
852 TGSI_SEMANTIC_POSITION) {
853 /* remember pos, but keep looking */
854 clipvert_slot = j;
855 }
856 }
857
858 clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi,
859 TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1;
860 if (clipvert_vrf >= 0) {
861 struct toy_dst tmp = tc_alloc_tmp(&vcc->tc);
862 struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0);
863 int first_ucp, last_ucp;
864
865 if (vcc->shader->out.semantic_indices[i]) {
866 first_ucp = 4;
867 last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1);
868 }
869 else {
870 first_ucp = 0;
871 last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1);
872 }
873
874 for (j = first_ucp; j <= last_ucp; j++) {
875 const int plane_grf = vcc->first_ucp_grf + j / 2;
876 const int plane_subreg = (j & 1) * 16;
877 const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF,
878 plane_grf, plane_subreg), TOY_RECT_041);
879 const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j);
880
881 tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask),
882 clipvert, plane);
883 }
884
885 src = tsrc_from(tmp);
886 }
887 else {
888 src = tsrc_imm_f(0.0f);
889 }
890 }
891 else {
892 src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
893 }
894 }
895
896 outs[i] = src;
897 }
898
899 return i;
900 }
901
902 /**
903 * Emit instructions to write the VUE.
904 */
905 static void
906 vs_write_vue(struct vs_compile_context *vcc)
907 {
908 struct toy_compiler *tc = &vcc->tc;
909 struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS];
910 struct toy_dst header;
911 struct toy_src r0;
912 struct toy_inst *inst;
913 int sent_attrs, total_attrs;
914
915 header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
916 r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
917 inst = tc_MOV(tc, header, r0);
918 inst->mask_ctrl = BRW_MASK_DISABLE;
919
920 if (tc->dev->gen >= ILO_GEN(7)) {
921 inst = tc_OR(tc, tdst_offset(header, 0, 5),
922 tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010),
923 tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010));
924 inst->exec_size = BRW_EXECUTE_1;
925 inst->access_mode = BRW_ALIGN_1;
926 inst->mask_ctrl = BRW_MASK_DISABLE;
927 }
928
929 total_attrs = vs_collect_outputs(vcc, outs);
930 sent_attrs = 0;
931 while (sent_attrs < total_attrs) {
932 struct toy_src desc;
933 int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs;
934 int num_attrs, msg_len, i;
935 bool eot;
936
937 num_attrs = total_attrs - sent_attrs;
938 eot = true;
939
940 /* see if we need another message */
941 avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1;
942 if (num_attrs > avail_mrf_for_attrs) {
943 /*
944 * From the Sandy Bridge PRM, volume 4 part 2, page 22:
945 *
946 * "Offset. This field specifies a destination offset (in 256-bit
947 * units) from the start of the URB entry(s), as referenced by
948 * URB Return Handle n, at which the data (if any) will be
949 * written."
950 *
951 * As we need to offset the following messages, we must make sure
952 * this one writes an even number of attributes.
953 */
954 num_attrs = avail_mrf_for_attrs & ~1;
955 eot = false;
956 }
957
958 if (tc->dev->gen >= ILO_GEN(7)) {
959 /* do not forget about the header */
960 msg_len = 1 + num_attrs;
961 }
962 else {
963 /*
964 * From the Sandy Bridge PRM, volume 4 part 2, page 26:
965 *
966 * "At least 256 bits per vertex (512 bits total, M1 & M2) must
967 * be written. Writing only 128 bits per vertex (256 bits
968 * total, M1 only) results in UNDEFINED operation."
969 *
970 * "[DevSNB] Interleave writes must be in multiples of 256 per
971 * vertex."
972 *
973 * That is, we must write or appear to write an even number of
974 * attributes, starting from two.
975 */
976 if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) {
977 num_attrs--;
978 eot = false;
979 }
980
981 msg_len = 1 + align(num_attrs, 2);
982 }
983
984 for (i = 0; i < num_attrs; i++)
985 tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]);
986
987 assert(sent_attrs % 2 == 0);
988 desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0,
989 eot, true, false, BRW_URB_SWIZZLE_INTERLEAVE, sent_attrs / 2, 0);
990
991 tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc);
992
993 sent_attrs += num_attrs;
994 }
995 }
996
997 /**
998 * Set up shader inputs for fixed-function units.
999 */
1000 static void
1001 vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1002 {
1003 int num_attrs, i;
1004
1005 /* vertex/instance id is the first VE if exists */
1006 for (i = 0; i < tgsi->num_system_values; i++) {
1007 bool found = false;
1008
1009 switch (tgsi->system_values[i].semantic_name) {
1010 case TGSI_SEMANTIC_INSTANCEID:
1011 case TGSI_SEMANTIC_VERTEXID:
1012 found = true;
1013 break;
1014 default:
1015 break;
1016 }
1017
1018 if (found) {
1019 sh->in.semantic_names[sh->in.count] =
1020 tgsi->system_values[i].semantic_name;
1021 sh->in.semantic_indices[sh->in.count] =
1022 tgsi->system_values[i].semantic_index;
1023 sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT;
1024 sh->in.centroid[sh->in.count] = false;
1025
1026 sh->in.count++;
1027 break;
1028 }
1029 }
1030
1031 num_attrs = 0;
1032 for (i = 0; i < tgsi->num_inputs; i++) {
1033 assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC);
1034 if (tgsi->inputs[i].semantic_index >= num_attrs)
1035 num_attrs = tgsi->inputs[i].semantic_index + 1;
1036 }
1037 assert(num_attrs <= PIPE_MAX_ATTRIBS);
1038
1039 /* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */
1040 for (i = 0; i < num_attrs; i++) {
1041 sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC;
1042 sh->in.semantic_indices[sh->in.count + i] = i;
1043 sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT;
1044 sh->in.centroid[sh->in.count + i] = false;
1045 }
1046
1047 sh->in.count += num_attrs;
1048
1049 sh->in.has_pos = false;
1050 sh->in.has_linear_interp = false;
1051 sh->in.barycentric_interpolation_mode = 0;
1052 }
1053
1054 /**
1055 * Set up shader outputs for fixed-function units.
1056 */
1057 static void
1058 vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1059 bool output_clipdist, int *output_map)
1060 {
1061 int psize_slot = -1, pos_slot = -1;
1062 int clipdist_slot[2] = { -1, -1 };
1063 int color_slot[4] = { -1, -1, -1, -1 };
1064 int num_outs, i;
1065
1066 /* find out the slots of outputs that need special care */
1067 for (i = 0; i < tgsi->num_outputs; i++) {
1068 switch (tgsi->outputs[i].semantic_name) {
1069 case TGSI_SEMANTIC_PSIZE:
1070 psize_slot = i;
1071 break;
1072 case TGSI_SEMANTIC_POSITION:
1073 pos_slot = i;
1074 break;
1075 case TGSI_SEMANTIC_CLIPDIST:
1076 if (tgsi->outputs[i].semantic_index)
1077 clipdist_slot[1] = i;
1078 else
1079 clipdist_slot[0] = i;
1080 break;
1081 case TGSI_SEMANTIC_COLOR:
1082 if (tgsi->outputs[i].semantic_index)
1083 color_slot[2] = i;
1084 else
1085 color_slot[0] = i;
1086 break;
1087 case TGSI_SEMANTIC_BCOLOR:
1088 if (tgsi->outputs[i].semantic_index)
1089 color_slot[3] = i;
1090 else
1091 color_slot[1] = i;
1092 break;
1093 default:
1094 break;
1095 }
1096 }
1097
1098 /* the first two VUEs are always PSIZE and POSITION */
1099 num_outs = 2;
1100 output_map[0] = psize_slot;
1101 output_map[1] = pos_slot;
1102
1103 sh->out.register_indices[0] =
1104 (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
1105 sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
1106 sh->out.semantic_indices[0] = 0;
1107
1108 sh->out.register_indices[1] =
1109 (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
1110 sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
1111 sh->out.semantic_indices[1] = 0;
1112
1113 sh->out.has_pos = true;
1114
1115 /* followed by optional clip distances */
1116 if (output_clipdist) {
1117 sh->out.register_indices[num_outs] =
1118 (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
1119 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1120 sh->out.semantic_indices[num_outs] = 0;
1121 output_map[num_outs++] = clipdist_slot[0];
1122
1123 sh->out.register_indices[num_outs] =
1124 (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
1125 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1126 sh->out.semantic_indices[num_outs] = 1;
1127 output_map[num_outs++] = clipdist_slot[1];
1128 }
1129
1130 /*
1131 * make BCOLOR follow COLOR so that we can make use of
1132 * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
1133 */
1134 for (i = 0; i < 4; i++) {
1135 const int slot = color_slot[i];
1136
1137 if (slot < 0)
1138 continue;
1139
1140 sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
1141 sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
1142 sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
1143
1144 output_map[num_outs++] = slot;
1145 }
1146
1147 /* add the rest of the outputs */
1148 for (i = 0; i < tgsi->num_outputs; i++) {
1149 switch (tgsi->outputs[i].semantic_name) {
1150 case TGSI_SEMANTIC_PSIZE:
1151 case TGSI_SEMANTIC_POSITION:
1152 case TGSI_SEMANTIC_CLIPDIST:
1153 case TGSI_SEMANTIC_COLOR:
1154 case TGSI_SEMANTIC_BCOLOR:
1155 break;
1156 default:
1157 sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
1158 sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
1159 sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
1160 output_map[num_outs++] = i;
1161 break;
1162 }
1163 }
1164
1165 sh->out.count = num_outs;
1166 }
1167
1168 /**
1169 * Translate the TGSI tokens.
1170 */
1171 static bool
1172 vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1173 struct toy_tgsi *tgsi)
1174 {
1175 if (ilo_debug & ILO_DEBUG_VS) {
1176 ilo_printf("dumping vertex shader\n");
1177 ilo_printf("\n");
1178
1179 tgsi_dump(tokens, 0);
1180 ilo_printf("\n");
1181 }
1182
1183 toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
1184 if (tc->fail) {
1185 ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason);
1186 return false;
1187 }
1188
1189 if (ilo_debug & ILO_DEBUG_VS) {
1190 ilo_printf("TGSI translator:\n");
1191 toy_tgsi_dump(tgsi);
1192 ilo_printf("\n");
1193 toy_compiler_dump(tc);
1194 ilo_printf("\n");
1195 }
1196
1197 return true;
1198 }
1199
1200 /**
1201 * Set up VS compile context. This includes translating the TGSI tokens.
1202 */
1203 static bool
1204 vs_setup(struct vs_compile_context *vcc,
1205 const struct ilo_shader_state *state,
1206 const struct ilo_shader_variant *variant)
1207 {
1208 int num_consts;
1209
1210 memset(vcc, 0, sizeof(*vcc));
1211
1212 vcc->shader = CALLOC_STRUCT(ilo_shader);
1213 if (!vcc->shader)
1214 return false;
1215
1216 vcc->variant = variant;
1217
1218 toy_compiler_init(&vcc->tc, state->info.dev);
1219 vcc->tc.templ.access_mode = BRW_ALIGN_16;
1220 vcc->tc.templ.exec_size = BRW_EXECUTE_8;
1221 vcc->tc.rect_linear_width = 4;
1222
1223 /*
1224 * The classic driver uses the sampler cache (gen6) or the data cache
1225 * (gen7). Why?
1226 */
1227 vcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
1228
1229 if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) {
1230 toy_compiler_cleanup(&vcc->tc);
1231 FREE(vcc->shader);
1232 return false;
1233 }
1234
1235 vs_setup_shader_in(vcc->shader, &vcc->tgsi);
1236 vs_setup_shader_out(vcc->shader, &vcc->tgsi,
1237 (vcc->variant->u.vs.num_ucps > 0), vcc->output_map);
1238
1239 if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) {
1240 num_consts = (vcc->tgsi.const_count + 1) / 2;
1241
1242 /*
1243 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1244 *
1245 * "The sum of all four read length fields (each incremented to
1246 * represent the actual read length) must be less than or equal to
1247 * 32"
1248 */
1249 if (num_consts > 32)
1250 num_consts = 0;
1251 }
1252 else {
1253 num_consts = 0;
1254 }
1255
1256 vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts);
1257 vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1258
1259 /* r0 is reserved for payload header */
1260 vcc->first_const_grf = 1;
1261 vcc->first_ucp_grf = vcc->first_const_grf + num_consts;
1262
1263 /* fit each pair of user clip planes into a register */
1264 vcc->first_vue_grf = vcc->first_ucp_grf +
1265 (vcc->variant->u.vs.num_ucps + 1) / 2;
1266
1267 vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count;
1268 vcc->last_free_grf = 127;
1269
1270 /* m0 is reserved for system routines */
1271 vcc->first_free_mrf = 1;
1272 vcc->last_free_mrf = 15;
1273
1274 vcc->num_grf_per_vrf = 1;
1275
1276 if (vcc->tc.dev->gen >= ILO_GEN(7)) {
1277 vcc->last_free_grf -= 15;
1278 vcc->first_free_mrf = vcc->last_free_grf + 1;
1279 vcc->last_free_mrf = vcc->first_free_mrf + 14;
1280 }
1281
1282 vcc->shader->in.start_grf = vcc->first_const_grf;
1283 vcc->shader->pcb.clip_state_size =
1284 vcc->variant->u.vs.num_ucps * (sizeof(float) * 4);
1285
1286 return true;
1287 }
1288
1289 /**
1290 * Compile the vertex shader.
1291 */
1292 struct ilo_shader *
1293 ilo_shader_compile_vs(const struct ilo_shader_state *state,
1294 const struct ilo_shader_variant *variant)
1295 {
1296 struct vs_compile_context vcc;
1297 bool need_gs;
1298
1299 if (!vs_setup(&vcc, state, variant))
1300 return NULL;
1301
1302 if (vcc.tc.dev->gen >= ILO_GEN(7)) {
1303 need_gs = false;
1304 }
1305 else {
1306 need_gs = variant->u.vs.rasterizer_discard ||
1307 state->info.stream_output.num_outputs;
1308 }
1309
1310 vs_write_vue(&vcc);
1311
1312 if (!vs_compile(&vcc)) {
1313 FREE(vcc.shader);
1314 vcc.shader = NULL;
1315 }
1316
1317 toy_tgsi_cleanup(&vcc.tgsi);
1318 toy_compiler_cleanup(&vcc.tc);
1319
1320 if (need_gs) {
1321 int so_mapping[PIPE_MAX_SHADER_OUTPUTS];
1322 int i, j;
1323
1324 for (i = 0; i < vcc.tgsi.num_outputs; i++) {
1325 int attr = 0;
1326
1327 for (j = 0; j < vcc.shader->out.count; j++) {
1328 if (vcc.tgsi.outputs[i].semantic_name ==
1329 vcc.shader->out.semantic_names[j] &&
1330 vcc.tgsi.outputs[i].semantic_index ==
1331 vcc.shader->out.semantic_indices[j]) {
1332 attr = j;
1333 break;
1334 }
1335 }
1336
1337 so_mapping[i] = attr;
1338 }
1339
1340 if (!ilo_shader_compile_gs_passthrough(state, variant,
1341 so_mapping, vcc.shader)) {
1342 ilo_shader_destroy_kernel(vcc.shader);
1343 vcc.shader = NULL;
1344 }
1345 }
1346
1347 return vcc.shader;
1348 }