ilo: support push constant model in shaders
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_vs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
37
38 struct vs_compile_context {
39 struct ilo_shader *shader;
40 const struct ilo_shader_variant *variant;
41
42 struct toy_compiler tc;
43 struct toy_tgsi tgsi;
44 enum brw_message_target const_cache;
45
46 int output_map[PIPE_MAX_SHADER_OUTPUTS];
47
48 int num_grf_per_vrf;
49 int first_const_grf;
50 int first_ucp_grf;
51 int first_vue_grf;
52 int first_free_grf;
53 int last_free_grf;
54
55 int first_free_mrf;
56 int last_free_mrf;
57 };
58
59 static void
60 vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc,
61 struct toy_dst dst, int dim, int idx)
62 {
63 struct toy_compiler *tc = &vcc->tc;
64 int slot;
65
66 assert(!dim);
67
68 slot = toy_tgsi_find_input(&vcc->tgsi, idx);
69 if (slot >= 0) {
70 const int first_in_grf = vcc->first_vue_grf +
71 (vcc->shader->in.count - vcc->tgsi.num_inputs);
72 const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index;
73 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
74
75 tc_MOV(tc, dst, src);
76 }
77 else {
78 /* undeclared input */
79 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
80 }
81 }
82
83 static bool
84 vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc,
85 struct toy_dst dst, int dim,
86 struct toy_src idx)
87 {
88 const int i = idx.val32;
89 const int grf = vcc->first_const_grf + i / 2;
90 const int grf_subreg = (i & 1) * 16;
91 struct toy_src src;
92
93 if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
94 grf >= vcc->first_ucp_grf)
95 return false;
96
97
98 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041);
99 tc_MOV(&vcc->tc, dst, src);
100
101 return true;
102 }
103
104 static void
105 vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
106 struct toy_dst dst, int dim,
107 struct toy_src idx)
108 {
109 const struct toy_dst header =
110 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
111 const struct toy_dst block_offsets =
112 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0));
113 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
114 struct toy_compiler *tc = &vcc->tc;
115 unsigned msg_type, msg_ctrl, msg_len;
116 struct toy_inst *inst;
117 struct toy_src desc;
118
119 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
120 return;
121
122 /* set message header */
123 inst = tc_MOV(tc, header, r0);
124 inst->mask_ctrl = BRW_MASK_DISABLE;
125
126 /* set block offsets */
127 tc_MOV(tc, block_offsets, idx);
128
129 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
130 msg_ctrl = BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD << 8;;
131 msg_len = 2;
132
133 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
134 msg_type, msg_ctrl, ILO_VS_CONST_SURFACE(dim));
135
136 tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache);
137 }
138
139 static void
140 vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
141 struct toy_dst dst, int dim,
142 struct toy_src idx)
143 {
144 struct toy_compiler *tc = &vcc->tc;
145 const struct toy_dst offset =
146 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
147 struct toy_src desc;
148
149 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
150 return;
151
152 /*
153 * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
154 * changed from OWord Dual Block Read to ld to increase performance in the
155 * classic driver. Since we use the constant cache instead of the data
156 * cache, I wonder if we still want to follow the classic driver.
157 */
158
159 /* set offset */
160 tc_MOV(tc, offset, idx);
161
162 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
163 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
164 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
165 0,
166 ILO_VS_CONST_SURFACE(dim));
167
168 tc_SEND(tc, dst, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
169 }
170
171 static void
172 vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc,
173 struct toy_dst dst, int idx)
174 {
175 const uint32_t *imm;
176 int ch;
177
178 imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL);
179
180 for (ch = 0; ch < 4; ch++) {
181 /* raw moves */
182 tc_MOV(&vcc->tc,
183 tdst_writemask(tdst_ud(dst), 1 << ch),
184 tsrc_imm_ud(imm[ch]));
185 }
186 }
187
188
189 static void
190 vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc,
191 struct toy_dst dst, int dim, int idx)
192 {
193 struct toy_compiler *tc = &vcc->tc;
194 const struct toy_tgsi *tgsi = &vcc->tgsi;
195 int slot;
196
197 assert(!dim);
198
199 slot = toy_tgsi_find_system_value(tgsi, idx);
200 if (slot < 0)
201 return;
202
203 switch (tgsi->system_values[slot].semantic_name) {
204 case TGSI_SEMANTIC_INSTANCEID:
205 case TGSI_SEMANTIC_VERTEXID:
206 /*
207 * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for
208 * the generated IDs, with VID in the X channel and IID in the Y
209 * channel.
210 */
211 {
212 const int grf = vcc->first_vue_grf;
213 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
214 const enum toy_swizzle swizzle =
215 (tgsi->system_values[slot].semantic_name ==
216 TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X;
217
218 tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle)));
219 }
220 break;
221 case TGSI_SEMANTIC_PRIMID:
222 default:
223 tc_fail(tc, "unhandled system value");
224 tc_MOV(tc, dst, tsrc_imm_d(0));
225 break;
226 }
227 }
228
229 static void
230 vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc,
231 struct toy_inst *inst)
232 {
233 struct toy_compiler *tc = &vcc->tc;
234 int dim, idx;
235
236 assert(inst->src[0].file == TOY_FILE_IMM);
237 dim = inst->src[0].val32;
238
239 assert(inst->src[1].file == TOY_FILE_IMM);
240 idx = inst->src[1].val32;
241
242 switch (inst->opcode) {
243 case TOY_OPCODE_TGSI_IN:
244 vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx);
245 break;
246 case TOY_OPCODE_TGSI_CONST:
247 if (tc->dev->gen >= ILO_GEN(7))
248 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]);
249 else
250 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]);
251 break;
252 case TOY_OPCODE_TGSI_SV:
253 vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx);
254 break;
255 case TOY_OPCODE_TGSI_IMM:
256 assert(!dim);
257 vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx);
258 break;
259 default:
260 tc_fail(tc, "unhandled TGSI fetch");
261 break;
262 }
263
264 tc_discard_inst(tc, inst);
265 }
266
267 static void
268 vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc,
269 struct toy_inst *inst)
270 {
271 struct toy_compiler *tc = &vcc->tc;
272 enum tgsi_file_type file;
273 int dim, idx;
274 struct toy_src indirect_dim, indirect_idx;
275
276 assert(inst->src[0].file == TOY_FILE_IMM);
277 file = inst->src[0].val32;
278
279 assert(inst->src[1].file == TOY_FILE_IMM);
280 dim = inst->src[1].val32;
281 indirect_dim = inst->src[2];
282
283 assert(inst->src[3].file == TOY_FILE_IMM);
284 idx = inst->src[3].val32;
285 indirect_idx = inst->src[4];
286
287 /* no dimension indirection */
288 assert(indirect_dim.file == TOY_FILE_IMM);
289 dim += indirect_dim.val32;
290
291 switch (inst->opcode) {
292 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
293 if (file == TGSI_FILE_CONSTANT) {
294 if (idx) {
295 struct toy_dst tmp = tc_alloc_tmp(tc);
296
297 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
298 indirect_idx = tsrc_from(tmp);
299 }
300
301 if (tc->dev->gen >= ILO_GEN(7))
302 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx);
303 else
304 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx);
305 break;
306 }
307 /* fall through */
308 case TOY_OPCODE_TGSI_INDIRECT_STORE:
309 default:
310 tc_fail(tc, "unhandled TGSI indirection");
311 break;
312 }
313
314 tc_discard_inst(tc, inst);
315 }
316
317 /**
318 * Emit instructions to move sampling parameters to the message registers.
319 */
320 static int
321 vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf,
322 struct toy_src coords, int num_coords,
323 struct toy_src bias_or_lod, struct toy_src ref_or_si,
324 struct toy_src ddx, struct toy_src ddy, int num_derivs)
325 {
326 const unsigned coords_writemask = (1 << num_coords) - 1;
327 struct toy_dst m[3];
328 int num_params, i;
329
330 assert(num_coords <= 4);
331 assert(num_derivs <= 3 && num_derivs <= num_coords);
332
333 for (i = 0; i < Elements(m); i++)
334 m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0);
335
336 switch (msg_type) {
337 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
338 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
339 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod);
340 num_params = 5;
341 break;
342 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
343 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
344 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ),
345 tsrc_swizzle(ddx, 0, 0, 1, 1));
346 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW),
347 tsrc_swizzle(ddy, 0, 0, 1, 1));
348 if (num_derivs > 2) {
349 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X),
350 tsrc_swizzle1(ddx, 2));
351 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y),
352 tsrc_swizzle1(ddy, 2));
353 }
354 num_params = 4 + num_derivs * 2;
355 break;
356 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
357 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
358 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si);
359 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod);
360 num_params = 6;
361 break;
362 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
363 assert(num_coords <= 3);
364 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords);
365 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod);
366 if (tc->dev->gen >= ILO_GEN(7)) {
367 num_params = 4;
368 }
369 else {
370 tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si);
371 num_params = 5;
372 }
373 break;
374 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
375 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod);
376 num_params = 1;
377 break;
378 default:
379 tc_fail(tc, "unknown sampler opcode");
380 num_params = 0;
381 break;
382 }
383
384 return (num_params + 3) / 4;
385 }
386
387 /**
388 * Set up message registers and return the message descriptor for sampling.
389 */
390 static struct toy_src
391 vs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
392 int base_mrf, unsigned *ret_sampler_index)
393 {
394 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
395 struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si;
396 int num_coords, ref_pos, num_derivs;
397 int sampler_src;
398
399 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD4X2;
400
401 coords = inst->src[0];
402 ddx = tsrc_null();
403 ddy = tsrc_null();
404 bias_or_lod = tsrc_null();
405 ref_or_si = tsrc_null();
406 num_derivs = 0;
407 sampler_src = 1;
408
409 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
410
411 /* extract the parameters */
412 switch (inst->opcode) {
413 case TOY_OPCODE_TGSI_TXD:
414 if (ref_pos >= 0)
415 tc_fail(tc, "TXD with shadow sampler not supported");
416
417 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
418 ddx = inst->src[1];
419 ddy = inst->src[2];
420 num_derivs = num_coords;
421 sampler_src = 3;
422 break;
423 case TOY_OPCODE_TGSI_TXL:
424 if (ref_pos >= 0) {
425 assert(ref_pos < 3);
426
427 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
428 ref_or_si = tsrc_swizzle1(coords, ref_pos);
429 }
430 else {
431 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
432 }
433
434 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
435 break;
436 case TOY_OPCODE_TGSI_TXF:
437 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
438
439 switch (inst->tex.target) {
440 case TGSI_TEXTURE_2D_MSAA:
441 case TGSI_TEXTURE_2D_ARRAY_MSAA:
442 assert(ref_pos >= 0 && ref_pos < 4);
443 /* lod is always 0 */
444 bias_or_lod = tsrc_imm_d(0);
445 ref_or_si = tsrc_swizzle1(coords, ref_pos);
446 break;
447 default:
448 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
449 break;
450 }
451
452 /* offset the coordinates */
453 if (!tsrc_is_null(inst->tex.offsets[0])) {
454 struct toy_dst tmp;
455
456 tmp = tc_alloc_tmp(tc);
457 tc_ADD(tc, tmp, coords, inst->tex.offsets[0]);
458 coords = tsrc_from(tmp);
459 }
460
461 sampler_src = 1;
462 break;
463 case TOY_OPCODE_TGSI_TXQ:
464 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
465 num_coords = 0;
466 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X);
467 break;
468 case TOY_OPCODE_TGSI_TXQ_LZ:
469 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
470 num_coords = 0;
471 sampler_src = 0;
472 break;
473 case TOY_OPCODE_TGSI_TXL2:
474 if (ref_pos >= 0) {
475 assert(ref_pos < 4);
476
477 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
478 ref_or_si = tsrc_swizzle1(coords, ref_pos);
479 }
480 else {
481 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
482 }
483
484 bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X);
485 sampler_src = 2;
486 break;
487 default:
488 assert(!"unhandled sampling opcode");
489 if (ret_sampler_index)
490 *ret_sampler_index = 0;
491 return tsrc_null();
492 break;
493 }
494
495 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
496 sampler_index = inst->src[sampler_src].val32;
497 binding_table_index = ILO_VS_TEXTURE_SURFACE(sampler_index);
498
499 /*
500 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
501 *
502 * "Note that the (cube map) coordinates delivered to the sampling
503 * engine must already have been divided by the component with the
504 * largest absolute value."
505 */
506 switch (inst->tex.target) {
507 case TGSI_TEXTURE_CUBE:
508 case TGSI_TEXTURE_SHADOWCUBE:
509 case TGSI_TEXTURE_CUBE_ARRAY:
510 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
511 /* TXQ does not need coordinates */
512 if (num_coords >= 3) {
513 struct toy_dst tmp, max;
514 struct toy_src abs_coords[3];
515 int i;
516
517 tmp = tc_alloc_tmp(tc);
518 max = tdst_writemask(tmp, TOY_WRITEMASK_W);
519
520 for (i = 0; i < 3; i++)
521 abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i));
522
523 tc_SEL(tc, max, abs_coords[0], abs_coords[0], BRW_CONDITIONAL_GE);
524 tc_SEL(tc, max, tsrc_from(max), abs_coords[0], BRW_CONDITIONAL_GE);
525 tc_INV(tc, max, tsrc_from(max));
526
527 for (i = 0; i < 3; i++)
528 tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max));
529
530 coords = tsrc_from(tmp);
531 }
532 break;
533 }
534
535 /* set up sampler parameters */
536 msg_len = vs_add_sampler_params(tc, msg_type, base_mrf,
537 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
538
539 /*
540 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
541 *
542 * "The maximum message length allowed to the sampler is 11. This would
543 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
544 * SIMD16."
545 */
546 if (msg_len > 11)
547 tc_fail(tc, "maximum length for messages to the sampler is 11");
548
549 if (ret_sampler_index)
550 *ret_sampler_index = sampler_index;
551
552 return tsrc_imm_mdesc_sampler(tc, msg_len, 1,
553 false, simd_mode, msg_type, sampler_index, binding_table_index);
554 }
555
556 static void
557 vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc,
558 struct toy_inst *inst)
559 {
560 struct toy_compiler *tc = &vcc->tc;
561 struct toy_src desc;
562 struct toy_dst dst, tmp;
563 unsigned sampler_index;
564 int swizzles[4], i;
565 unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask;
566 bool need_filter;
567
568 desc = vs_prepare_tgsi_sampling(tc, inst,
569 vcc->first_free_mrf, &sampler_index);
570
571 switch (inst->opcode) {
572 case TOY_OPCODE_TGSI_TXF:
573 case TOY_OPCODE_TGSI_TXQ:
574 case TOY_OPCODE_TGSI_TXQ_LZ:
575 need_filter = false;
576 break;
577 default:
578 need_filter = true;
579 break;
580 }
581
582 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
583 inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0);
584 inst->src[1] = desc;
585
586 /* write to a temp first */
587 tmp = tc_alloc_tmp(tc);
588 tmp.type = inst->dst.type;
589 dst = inst->dst;
590 inst->dst = tmp;
591
592 tc_move_inst(tc, inst);
593
594 if (need_filter) {
595 assert(sampler_index < vcc->variant->num_sampler_views);
596 swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r;
597 swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g;
598 swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b;
599 swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a;
600 }
601 else {
602 swizzles[0] = PIPE_SWIZZLE_RED;
603 swizzles[1] = PIPE_SWIZZLE_GREEN;
604 swizzles[2] = PIPE_SWIZZLE_BLUE;
605 swizzles[3] = PIPE_SWIZZLE_ALPHA;
606 }
607
608 swizzle_zero_mask = 0;
609 swizzle_one_mask = 0;
610 swizzle_normal_mask = 0;
611 for (i = 0; i < 4; i++) {
612 switch (swizzles[i]) {
613 case PIPE_SWIZZLE_ZERO:
614 swizzle_zero_mask |= 1 << i;
615 swizzles[i] = i;
616 break;
617 case PIPE_SWIZZLE_ONE:
618 swizzle_one_mask |= 1 << i;
619 swizzles[i] = i;
620 break;
621 default:
622 swizzle_normal_mask |= 1 << i;
623 break;
624 }
625 }
626
627 /* swizzle the results */
628 if (swizzle_normal_mask) {
629 tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask),
630 tsrc_swizzle(tsrc_from(tmp), swizzles[0],
631 swizzles[1], swizzles[2], swizzles[3]));
632 }
633 if (swizzle_zero_mask)
634 tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f));
635 if (swizzle_one_mask)
636 tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f));
637 }
638
639 static void
640 vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst)
641 {
642 /* vs_write_vue() has set up the message registers */
643 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
644 }
645
646 static void
647 vs_lower_virtual_opcodes(struct vs_compile_context *vcc)
648 {
649 struct toy_compiler *tc = &vcc->tc;
650 struct toy_inst *inst;
651
652 tc_head(tc);
653 while ((inst = tc_next(tc)) != NULL) {
654 switch (inst->opcode) {
655 case TOY_OPCODE_TGSI_IN:
656 case TOY_OPCODE_TGSI_CONST:
657 case TOY_OPCODE_TGSI_SV:
658 case TOY_OPCODE_TGSI_IMM:
659 vs_lower_opcode_tgsi_direct(vcc, inst);
660 break;
661 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
662 case TOY_OPCODE_TGSI_INDIRECT_STORE:
663 vs_lower_opcode_tgsi_indirect(vcc, inst);
664 break;
665 case TOY_OPCODE_TGSI_TEX:
666 case TOY_OPCODE_TGSI_TXB:
667 case TOY_OPCODE_TGSI_TXD:
668 case TOY_OPCODE_TGSI_TXL:
669 case TOY_OPCODE_TGSI_TXP:
670 case TOY_OPCODE_TGSI_TXF:
671 case TOY_OPCODE_TGSI_TXQ:
672 case TOY_OPCODE_TGSI_TXQ_LZ:
673 case TOY_OPCODE_TGSI_TEX2:
674 case TOY_OPCODE_TGSI_TXB2:
675 case TOY_OPCODE_TGSI_TXL2:
676 case TOY_OPCODE_TGSI_SAMPLE:
677 case TOY_OPCODE_TGSI_SAMPLE_I:
678 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
679 case TOY_OPCODE_TGSI_SAMPLE_B:
680 case TOY_OPCODE_TGSI_SAMPLE_C:
681 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
682 case TOY_OPCODE_TGSI_SAMPLE_D:
683 case TOY_OPCODE_TGSI_SAMPLE_L:
684 case TOY_OPCODE_TGSI_GATHER4:
685 case TOY_OPCODE_TGSI_SVIEWINFO:
686 case TOY_OPCODE_TGSI_SAMPLE_POS:
687 case TOY_OPCODE_TGSI_SAMPLE_INFO:
688 vs_lower_opcode_tgsi_sampling(vcc, inst);
689 break;
690 case TOY_OPCODE_INV:
691 case TOY_OPCODE_LOG:
692 case TOY_OPCODE_EXP:
693 case TOY_OPCODE_SQRT:
694 case TOY_OPCODE_RSQ:
695 case TOY_OPCODE_SIN:
696 case TOY_OPCODE_COS:
697 case TOY_OPCODE_FDIV:
698 case TOY_OPCODE_POW:
699 case TOY_OPCODE_INT_DIV_QUOTIENT:
700 case TOY_OPCODE_INT_DIV_REMAINDER:
701 toy_compiler_lower_math(tc, inst);
702 break;
703 case TOY_OPCODE_URB_WRITE:
704 vs_lower_opcode_urb_write(tc, inst);
705 break;
706 default:
707 if (inst->opcode > 127)
708 tc_fail(tc, "unhandled virtual opcode");
709 break;
710 }
711 }
712 }
713
714 /**
715 * Compile the shader.
716 */
717 static bool
718 vs_compile(struct vs_compile_context *vcc)
719 {
720 struct toy_compiler *tc = &vcc->tc;
721 struct ilo_shader *sh = vcc->shader;
722
723 vs_lower_virtual_opcodes(vcc);
724 toy_compiler_legalize_for_ra(tc);
725 toy_compiler_optimize(tc);
726 toy_compiler_allocate_registers(tc,
727 vcc->first_free_grf,
728 vcc->last_free_grf,
729 vcc->num_grf_per_vrf);
730 toy_compiler_legalize_for_asm(tc);
731
732 if (tc->fail) {
733 ilo_err("failed to legalize VS instructions: %s\n", tc->reason);
734 return false;
735 }
736
737 if (ilo_debug & ILO_DEBUG_VS) {
738 ilo_printf("legalized instructions:\n");
739 toy_compiler_dump(tc);
740 ilo_printf("\n");
741 }
742
743 if (true) {
744 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
745 }
746 else {
747 static const uint32_t microcode[] = {
748 /* fill in the microcode here */
749 0x0, 0x0, 0x0, 0x0,
750 };
751 const bool swap = true;
752
753 sh->kernel_size = sizeof(microcode);
754 sh->kernel = MALLOC(sh->kernel_size);
755
756 if (sh->kernel) {
757 const int num_dwords = sizeof(microcode) / 4;
758 const uint32_t *src = microcode;
759 uint32_t *dst = (uint32_t *) sh->kernel;
760 int i;
761
762 for (i = 0; i < num_dwords; i += 4) {
763 if (swap) {
764 dst[i + 0] = src[i + 3];
765 dst[i + 1] = src[i + 2];
766 dst[i + 2] = src[i + 1];
767 dst[i + 3] = src[i + 0];
768 }
769 else {
770 memcpy(dst, src, 16);
771 }
772 }
773 }
774 }
775
776 if (!sh->kernel) {
777 ilo_err("failed to compile VS: %s\n", tc->reason);
778 return false;
779 }
780
781 if (ilo_debug & ILO_DEBUG_VS) {
782 ilo_printf("disassembly:\n");
783 toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
784 ilo_printf("\n");
785 }
786
787 return true;
788 }
789
790 /**
791 * Collect the toy registers to be written to the VUE.
792 */
793 static int
794 vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
795 {
796 const struct toy_tgsi *tgsi = &vcc->tgsi;
797 int i;
798
799 for (i = 0; i < vcc->shader->out.count; i++) {
800 const int slot = vcc->output_map[i];
801 const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi,
802 TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1;
803 struct toy_src src;
804
805 if (vrf >= 0) {
806 struct toy_dst dst;
807
808 dst = tdst(TOY_FILE_VRF, vrf, 0);
809 src = tsrc_from(dst);
810
811 if (i == 0) {
812 /* PSIZE is at channel W */
813 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W),
814 tsrc_swizzle1(src, TOY_SWIZZLE_X));
815
816 /* the other channels are for the header */
817 dst = tdst_d(dst);
818 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ),
819 tsrc_imm_d(0));
820 }
821 else {
822 /* initialize unused channels to 0.0f */
823 if (tgsi->outputs[slot].undefined_mask) {
824 dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask);
825 tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f));
826 }
827 }
828 }
829 else {
830 /* XXX this is too ugly */
831 if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST &&
832 slot < 0) {
833 /* ok, we need to compute clip distance */
834 int clipvert_slot = -1, clipvert_vrf, j;
835
836 for (j = 0; j < tgsi->num_outputs; j++) {
837 if (tgsi->outputs[j].semantic_name ==
838 TGSI_SEMANTIC_CLIPVERTEX) {
839 clipvert_slot = j;
840 break;
841 }
842 else if (tgsi->outputs[j].semantic_name ==
843 TGSI_SEMANTIC_POSITION) {
844 /* remember pos, but keep looking */
845 clipvert_slot = j;
846 }
847 }
848
849 clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi,
850 TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1;
851 if (clipvert_vrf >= 0) {
852 struct toy_dst tmp = tc_alloc_tmp(&vcc->tc);
853 struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0);
854 int first_ucp, last_ucp;
855
856 if (vcc->shader->out.semantic_indices[i]) {
857 first_ucp = 4;
858 last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1);
859 }
860 else {
861 first_ucp = 0;
862 last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1);
863 }
864
865 for (j = first_ucp; j <= last_ucp; j++) {
866 const int plane_grf = vcc->first_ucp_grf + j / 2;
867 const int plane_subreg = (j & 1) * 16;
868 const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF,
869 plane_grf, plane_subreg), TOY_RECT_041);
870 const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j);
871
872 tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask),
873 clipvert, plane);
874 }
875
876 src = tsrc_from(tmp);
877 }
878 else {
879 src = tsrc_imm_f(0.0f);
880 }
881 }
882 else {
883 src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
884 }
885 }
886
887 outs[i] = src;
888 }
889
890 return i;
891 }
892
893 /**
894 * Emit instructions to write the VUE.
895 */
896 static void
897 vs_write_vue(struct vs_compile_context *vcc)
898 {
899 struct toy_compiler *tc = &vcc->tc;
900 struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS];
901 struct toy_dst header;
902 struct toy_src r0;
903 struct toy_inst *inst;
904 int sent_attrs, total_attrs;
905
906 header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
907 r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
908 inst = tc_MOV(tc, header, r0);
909 inst->mask_ctrl = BRW_MASK_DISABLE;
910
911 if (tc->dev->gen >= ILO_GEN(7)) {
912 inst = tc_OR(tc, tdst_offset(header, 0, 5),
913 tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010),
914 tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010));
915 inst->exec_size = BRW_EXECUTE_1;
916 inst->access_mode = BRW_ALIGN_1;
917 inst->mask_ctrl = BRW_MASK_DISABLE;
918 }
919
920 total_attrs = vs_collect_outputs(vcc, outs);
921 sent_attrs = 0;
922 while (sent_attrs < total_attrs) {
923 struct toy_src desc;
924 int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs;
925 int num_attrs, msg_len, i;
926 bool eot;
927
928 num_attrs = total_attrs - sent_attrs;
929 eot = true;
930
931 /* see if we need another message */
932 avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1;
933 if (num_attrs > avail_mrf_for_attrs) {
934 /*
935 * From the Sandy Bridge PRM, volume 4 part 2, page 22:
936 *
937 * "Offset. This field specifies a destination offset (in 256-bit
938 * units) from the start of the URB entry(s), as referenced by
939 * URB Return Handle n, at which the data (if any) will be
940 * written."
941 *
942 * As we need to offset the following messages, we must make sure
943 * this one writes an even number of attributes.
944 */
945 num_attrs = avail_mrf_for_attrs & ~1;
946 eot = false;
947 }
948
949 if (tc->dev->gen >= ILO_GEN(7)) {
950 /* do not forget about the header */
951 msg_len = 1 + num_attrs;
952 }
953 else {
954 /*
955 * From the Sandy Bridge PRM, volume 4 part 2, page 26:
956 *
957 * "At least 256 bits per vertex (512 bits total, M1 & M2) must
958 * be written. Writing only 128 bits per vertex (256 bits
959 * total, M1 only) results in UNDEFINED operation."
960 *
961 * "[DevSNB] Interleave writes must be in multiples of 256 per
962 * vertex."
963 *
964 * That is, we must write or appear to write an even number of
965 * attributes, starting from two.
966 */
967 if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) {
968 num_attrs--;
969 eot = false;
970 }
971
972 msg_len = 1 + align(num_attrs, 2);
973 }
974
975 for (i = 0; i < num_attrs; i++)
976 tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]);
977
978 assert(sent_attrs % 2 == 0);
979 desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0,
980 eot, true, false, BRW_URB_SWIZZLE_INTERLEAVE, sent_attrs / 2, 0);
981
982 tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc);
983
984 sent_attrs += num_attrs;
985 }
986 }
987
988 /**
989 * Set up shader inputs for fixed-function units.
990 */
991 static void
992 vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
993 {
994 int num_attrs, i;
995
996 /* vertex/instance id is the first VE if exists */
997 for (i = 0; i < tgsi->num_system_values; i++) {
998 bool found = false;
999
1000 switch (tgsi->system_values[i].semantic_name) {
1001 case TGSI_SEMANTIC_INSTANCEID:
1002 case TGSI_SEMANTIC_VERTEXID:
1003 found = true;
1004 break;
1005 default:
1006 break;
1007 }
1008
1009 if (found) {
1010 sh->in.semantic_names[sh->in.count] =
1011 tgsi->system_values[i].semantic_name;
1012 sh->in.semantic_indices[sh->in.count] =
1013 tgsi->system_values[i].semantic_index;
1014 sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT;
1015 sh->in.centroid[sh->in.count] = false;
1016
1017 sh->in.count++;
1018 break;
1019 }
1020 }
1021
1022 num_attrs = 0;
1023 for (i = 0; i < tgsi->num_inputs; i++) {
1024 assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC);
1025 if (tgsi->inputs[i].semantic_index >= num_attrs)
1026 num_attrs = tgsi->inputs[i].semantic_index + 1;
1027 }
1028 assert(num_attrs <= PIPE_MAX_ATTRIBS);
1029
1030 /* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */
1031 for (i = 0; i < num_attrs; i++) {
1032 sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC;
1033 sh->in.semantic_indices[sh->in.count + i] = i;
1034 sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT;
1035 sh->in.centroid[sh->in.count + i] = false;
1036 }
1037
1038 sh->in.count += num_attrs;
1039
1040 sh->in.has_pos = false;
1041 sh->in.has_linear_interp = false;
1042 sh->in.barycentric_interpolation_mode = 0;
1043 }
1044
1045 /**
1046 * Set up shader outputs for fixed-function units.
1047 */
1048 static void
1049 vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1050 bool output_clipdist, int *output_map)
1051 {
1052 int psize_slot = -1, pos_slot = -1;
1053 int clipdist_slot[2] = { -1, -1 };
1054 int color_slot[4] = { -1, -1, -1, -1 };
1055 int num_outs, i;
1056
1057 /* find out the slots of outputs that need special care */
1058 for (i = 0; i < tgsi->num_outputs; i++) {
1059 switch (tgsi->outputs[i].semantic_name) {
1060 case TGSI_SEMANTIC_PSIZE:
1061 psize_slot = i;
1062 break;
1063 case TGSI_SEMANTIC_POSITION:
1064 pos_slot = i;
1065 break;
1066 case TGSI_SEMANTIC_CLIPDIST:
1067 if (tgsi->outputs[i].semantic_index)
1068 clipdist_slot[1] = i;
1069 else
1070 clipdist_slot[0] = i;
1071 break;
1072 case TGSI_SEMANTIC_COLOR:
1073 if (tgsi->outputs[i].semantic_index)
1074 color_slot[2] = i;
1075 else
1076 color_slot[0] = i;
1077 break;
1078 case TGSI_SEMANTIC_BCOLOR:
1079 if (tgsi->outputs[i].semantic_index)
1080 color_slot[3] = i;
1081 else
1082 color_slot[1] = i;
1083 break;
1084 default:
1085 break;
1086 }
1087 }
1088
1089 /* the first two VUEs are always PSIZE and POSITION */
1090 num_outs = 2;
1091 output_map[0] = psize_slot;
1092 output_map[1] = pos_slot;
1093
1094 sh->out.register_indices[0] =
1095 (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
1096 sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
1097 sh->out.semantic_indices[0] = 0;
1098
1099 sh->out.register_indices[1] =
1100 (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
1101 sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
1102 sh->out.semantic_indices[1] = 0;
1103
1104 sh->out.has_pos = true;
1105
1106 /* followed by optional clip distances */
1107 if (output_clipdist) {
1108 sh->out.register_indices[num_outs] =
1109 (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
1110 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1111 sh->out.semantic_indices[num_outs] = 0;
1112 output_map[num_outs++] = clipdist_slot[0];
1113
1114 sh->out.register_indices[num_outs] =
1115 (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
1116 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1117 sh->out.semantic_indices[num_outs] = 1;
1118 output_map[num_outs++] = clipdist_slot[1];
1119 }
1120
1121 /*
1122 * make BCOLOR follow COLOR so that we can make use of
1123 * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
1124 */
1125 for (i = 0; i < 4; i++) {
1126 const int slot = color_slot[i];
1127
1128 if (slot < 0)
1129 continue;
1130
1131 sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
1132 sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
1133 sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
1134
1135 output_map[num_outs++] = slot;
1136 }
1137
1138 /* add the rest of the outputs */
1139 for (i = 0; i < tgsi->num_outputs; i++) {
1140 switch (tgsi->outputs[i].semantic_name) {
1141 case TGSI_SEMANTIC_PSIZE:
1142 case TGSI_SEMANTIC_POSITION:
1143 case TGSI_SEMANTIC_CLIPDIST:
1144 case TGSI_SEMANTIC_COLOR:
1145 case TGSI_SEMANTIC_BCOLOR:
1146 break;
1147 default:
1148 sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
1149 sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
1150 sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
1151 output_map[num_outs++] = i;
1152 break;
1153 }
1154 }
1155
1156 sh->out.count = num_outs;
1157 }
1158
1159 /**
1160 * Translate the TGSI tokens.
1161 */
1162 static bool
1163 vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1164 struct toy_tgsi *tgsi)
1165 {
1166 if (ilo_debug & ILO_DEBUG_VS) {
1167 ilo_printf("dumping vertex shader\n");
1168 ilo_printf("\n");
1169
1170 tgsi_dump(tokens, 0);
1171 ilo_printf("\n");
1172 }
1173
1174 toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
1175 if (tc->fail) {
1176 ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason);
1177 return false;
1178 }
1179
1180 if (ilo_debug & ILO_DEBUG_VS) {
1181 ilo_printf("TGSI translator:\n");
1182 toy_tgsi_dump(tgsi);
1183 ilo_printf("\n");
1184 toy_compiler_dump(tc);
1185 ilo_printf("\n");
1186 }
1187
1188 return true;
1189 }
1190
1191 /**
1192 * Set up VS compile context. This includes translating the TGSI tokens.
1193 */
1194 static bool
1195 vs_setup(struct vs_compile_context *vcc,
1196 const struct ilo_shader_state *state,
1197 const struct ilo_shader_variant *variant)
1198 {
1199 int num_consts;
1200
1201 memset(vcc, 0, sizeof(*vcc));
1202
1203 vcc->shader = CALLOC_STRUCT(ilo_shader);
1204 if (!vcc->shader)
1205 return false;
1206
1207 vcc->variant = variant;
1208
1209 toy_compiler_init(&vcc->tc, state->info.dev);
1210 vcc->tc.templ.access_mode = BRW_ALIGN_16;
1211 vcc->tc.templ.exec_size = BRW_EXECUTE_8;
1212 vcc->tc.rect_linear_width = 4;
1213
1214 /*
1215 * The classic driver uses the sampler cache (gen6) or the data cache
1216 * (gen7). Why?
1217 */
1218 vcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
1219
1220 if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) {
1221 toy_compiler_cleanup(&vcc->tc);
1222 FREE(vcc->shader);
1223 return false;
1224 }
1225
1226 vs_setup_shader_in(vcc->shader, &vcc->tgsi);
1227 vs_setup_shader_out(vcc->shader, &vcc->tgsi,
1228 (vcc->variant->u.vs.num_ucps > 0), vcc->output_map);
1229
1230 if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) {
1231 num_consts = (vcc->tgsi.const_count + 1) / 2;
1232
1233 /*
1234 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1235 *
1236 * "The sum of all four read length fields (each incremented to
1237 * represent the actual read length) must be less than or equal to
1238 * 32"
1239 */
1240 if (num_consts > 32)
1241 num_consts = 0;
1242 }
1243 else {
1244 num_consts = 0;
1245 }
1246
1247 vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts);
1248 vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1249
1250 /* r0 is reserved for payload header */
1251 vcc->first_const_grf = 1;
1252 vcc->first_ucp_grf = vcc->first_const_grf + num_consts;
1253
1254 /* fit each pair of user clip planes into a register */
1255 vcc->first_vue_grf = vcc->first_ucp_grf +
1256 (vcc->variant->u.vs.num_ucps + 1) / 2;
1257
1258 vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count;
1259 vcc->last_free_grf = 127;
1260
1261 /* m0 is reserved for system routines */
1262 vcc->first_free_mrf = 1;
1263 vcc->last_free_mrf = 15;
1264
1265 vcc->num_grf_per_vrf = 1;
1266
1267 if (vcc->tc.dev->gen >= ILO_GEN(7)) {
1268 vcc->last_free_grf -= 15;
1269 vcc->first_free_mrf = vcc->last_free_grf + 1;
1270 vcc->last_free_mrf = vcc->first_free_mrf + 14;
1271 }
1272
1273 vcc->shader->in.start_grf = vcc->first_const_grf;
1274 vcc->shader->pcb.clip_state_size =
1275 vcc->variant->u.vs.num_ucps * (sizeof(float) * 4);
1276
1277 return true;
1278 }
1279
1280 /**
1281 * Compile the vertex shader.
1282 */
1283 struct ilo_shader *
1284 ilo_shader_compile_vs(const struct ilo_shader_state *state,
1285 const struct ilo_shader_variant *variant)
1286 {
1287 struct vs_compile_context vcc;
1288 bool need_gs;
1289
1290 if (!vs_setup(&vcc, state, variant))
1291 return NULL;
1292
1293 if (vcc.tc.dev->gen >= ILO_GEN(7)) {
1294 need_gs = false;
1295 }
1296 else {
1297 need_gs = variant->u.vs.rasterizer_discard ||
1298 state->info.stream_output.num_outputs;
1299 }
1300
1301 vs_write_vue(&vcc);
1302
1303 if (!vs_compile(&vcc)) {
1304 FREE(vcc.shader);
1305 vcc.shader = NULL;
1306 }
1307
1308 toy_tgsi_cleanup(&vcc.tgsi);
1309 toy_compiler_cleanup(&vcc.tc);
1310
1311 if (need_gs) {
1312 int so_mapping[PIPE_MAX_SHADER_OUTPUTS];
1313 int i, j;
1314
1315 for (i = 0; i < vcc.tgsi.num_outputs; i++) {
1316 int attr = 0;
1317
1318 for (j = 0; j < vcc.shader->out.count; j++) {
1319 if (vcc.tgsi.outputs[i].semantic_name ==
1320 vcc.shader->out.semantic_names[j] &&
1321 vcc.tgsi.outputs[i].semantic_index ==
1322 vcc.shader->out.semantic_indices[j]) {
1323 attr = j;
1324 break;
1325 }
1326 }
1327
1328 so_mapping[i] = attr;
1329 }
1330
1331 if (!ilo_shader_compile_gs_passthrough(state, variant,
1332 so_mapping, vcc.shader)) {
1333 ilo_shader_destroy_kernel(vcc.shader);
1334 vcc.shader = NULL;
1335 }
1336 }
1337
1338 return vcc.shader;
1339 }