r600g,radeonsi: share r600_surface
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_fs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
37
38 struct fs_compile_context {
39 struct ilo_shader *shader;
40 const struct ilo_shader_variant *variant;
41
42 struct toy_compiler tc;
43 struct toy_tgsi tgsi;
44
45 enum brw_message_target const_cache;
46 int dispatch_mode;
47
48 struct {
49 int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
50 int source_depth;
51 int source_w;
52 int pos_offset;
53 } payloads[2];
54
55 int first_const_grf;
56 int first_attr_grf;
57 int first_free_grf;
58 int last_free_grf;
59
60 int num_grf_per_vrf;
61
62 int first_free_mrf;
63 int last_free_mrf;
64 };
65
66 static void
67 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
68 {
69 struct toy_compiler *tc = &fcc->tc;
70 const struct toy_src src_z =
71 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
72 const struct toy_src src_w =
73 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
74 const int fb_height =
75 (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
76 const bool origin_upper_left =
77 (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
78 const bool pixel_center_integer =
79 (fcc->tgsi.props.fs_coord_pixel_center ==
80 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
81 struct toy_src subspan_x, subspan_y;
82 struct toy_dst tmp, tmp_uw;
83 struct toy_dst real_dst[4];
84
85 tdst_transpose(dst, real_dst);
86
87 subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
88 subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
89
90 subspan_y = tsrc_offset(subspan_x, 0, 1);
91
92 tmp_uw = tdst_uw(tc_alloc_tmp(tc));
93 tmp = tc_alloc_tmp(tc);
94
95 /* X */
96 tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
97 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
98 if (pixel_center_integer)
99 tc_MOV(tc, real_dst[0], tsrc_from(tmp));
100 else
101 tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
102
103 /* Y */
104 tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
105 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
106 if (origin_upper_left && pixel_center_integer) {
107 tc_MOV(tc, real_dst[1], tsrc_from(tmp));
108 }
109 else {
110 struct toy_src y = tsrc_from(tmp);
111 float offset = 0.0f;
112
113 if (!pixel_center_integer)
114 offset += 0.5f;
115
116 if (!origin_upper_left) {
117 offset += (float) (fb_height - 1);
118 y = tsrc_negate(y);
119 }
120
121 tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
122 }
123
124 /* Z and W */
125 tc_MOV(tc, real_dst[2], src_z);
126 tc_INV(tc, real_dst[3], src_w);
127 }
128
129 static void
130 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
131 {
132 struct toy_compiler *tc = &fcc->tc;
133 const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
134 struct toy_dst tmp_f, tmp;
135 struct toy_dst real_dst[4];
136
137 tdst_transpose(dst, real_dst);
138
139 tmp_f = tc_alloc_tmp(tc);
140 tmp = tdst_d(tmp_f);
141 tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
142 tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
143 tc_MOV(tc, tmp_f, tsrc_from(tmp));
144
145 /* convert to 1.0 and -1.0 */
146 tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
147 tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
148
149 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
150 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
151 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
152 }
153
154 static void
155 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
156 {
157 struct toy_compiler *tc = &fcc->tc;
158 struct toy_dst real_dst[4];
159 bool is_const = false;
160 int grf, mode, ch;
161
162 tdst_transpose(dst, real_dst);
163
164 grf = fcc->first_attr_grf + slot * 2;
165
166 switch (fcc->tgsi.inputs[slot].interp) {
167 case TGSI_INTERPOLATE_CONSTANT:
168 is_const = true;
169 break;
170 case TGSI_INTERPOLATE_LINEAR:
171 if (fcc->tgsi.inputs[slot].centroid)
172 mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
173 else
174 mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
175 break;
176 case TGSI_INTERPOLATE_COLOR:
177 if (fcc->variant->u.fs.flatshade) {
178 is_const = true;
179 break;
180 }
181 /* fall through */
182 case TGSI_INTERPOLATE_PERSPECTIVE:
183 if (fcc->tgsi.inputs[slot].centroid)
184 mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
185 else
186 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
187 break;
188 default:
189 assert(!"unexpected FS interpolation");
190 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
191 break;
192 }
193
194 if (is_const) {
195 struct toy_src a0[4];
196
197 a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
198 a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
199 a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
200 a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
201
202 for (ch = 0; ch < 4; ch++)
203 tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
204 }
205 else {
206 struct toy_src attr[4], uv;
207
208 attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
209 attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
210 attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
211 attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
212
213 uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0);
214
215 for (ch = 0; ch < 4; ch++) {
216 tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch],
217 tsrc_rect(attr[ch], TOY_RECT_010), uv);
218 }
219 }
220
221 if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
222 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
223 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
224 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
225 }
226 }
227
228 static void
229 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
230 struct toy_dst dst, int dim, int idx)
231 {
232 int slot;
233
234 assert(!dim);
235
236 slot = toy_tgsi_find_input(&fcc->tgsi, idx);
237 if (slot < 0)
238 return;
239
240 switch (fcc->tgsi.inputs[slot].semantic_name) {
241 case TGSI_SEMANTIC_POSITION:
242 fetch_position(fcc, dst);
243 break;
244 case TGSI_SEMANTIC_FACE:
245 fetch_face(fcc, dst);
246 break;
247 default:
248 fetch_attr(fcc, dst, slot);
249 break;
250 }
251 }
252
253 static void
254 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
255 struct toy_dst dst, int dim,
256 struct toy_src idx)
257 {
258 const struct toy_dst offset =
259 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
260 struct toy_compiler *tc = &fcc->tc;
261 unsigned simd_mode, param_size;
262 struct toy_inst *inst;
263 struct toy_src desc, real_src[4];
264 struct toy_dst tmp, real_dst[4];
265 int i;
266
267 tsrc_transpose(idx, real_src);
268
269 /* set offset */
270 inst = tc_MOV(tc, offset, real_src[0]);
271 inst->mask_ctrl = BRW_MASK_DISABLE;
272
273 switch (inst->exec_size) {
274 case BRW_EXECUTE_8:
275 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
276 param_size = 1;
277 break;
278 case BRW_EXECUTE_16:
279 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
280 param_size = 2;
281 break;
282 default:
283 assert(!"unsupported execution size");
284 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
285 return;
286 break;
287 }
288
289 desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
290 simd_mode,
291 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
292 0,
293 ILO_WM_CONST_SURFACE(dim));
294
295 tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
296 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
297 inst->mask_ctrl = BRW_MASK_DISABLE;
298
299 tdst_transpose(dst, real_dst);
300 for (i = 0; i < 4; i++) {
301 const struct toy_src src =
302 tsrc_offset(tsrc_from(tmp), param_size * i, 0);
303
304 /* cast to type D to make sure these are raw moves */
305 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
306 }
307 }
308
309 static bool
310 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
311 struct toy_dst dst, int dim,
312 struct toy_src idx)
313 {
314 const int grf = fcc->first_const_grf + idx.val32 / 2;
315 const int grf_subreg = (idx.val32 & 1) * 16;
316 struct toy_src src;
317 struct toy_dst real_dst[4];
318 int i;
319
320 if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
321 grf >= fcc->first_attr_grf)
322 return false;
323
324 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
325
326 tdst_transpose(dst, real_dst);
327 for (i = 0; i < 4; i++) {
328 /* cast to type D to make sure these are raw moves */
329 tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
330 }
331
332 return true;
333 }
334
335 static void
336 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
337 struct toy_dst dst, int dim, struct toy_src idx)
338 {
339 const struct toy_dst header =
340 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
341 const struct toy_dst global_offset =
342 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
343 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
344 struct toy_compiler *tc = &fcc->tc;
345 unsigned msg_type, msg_ctrl, msg_len;
346 struct toy_inst *inst;
347 struct toy_src desc;
348 struct toy_dst tmp, real_dst[4];
349 int i;
350
351 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
352 return;
353
354 /* set message header */
355 inst = tc_MOV(tc, header, r0);
356 inst->mask_ctrl = BRW_MASK_DISABLE;
357
358 /* set global offset */
359 inst = tc_MOV(tc, global_offset, idx);
360 inst->mask_ctrl = BRW_MASK_DISABLE;
361 inst->exec_size = BRW_EXECUTE_1;
362 inst->src[0].rect = TOY_RECT_010;
363
364 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
365 msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
366 msg_len = 1;
367
368 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
369 msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));
370
371 tmp = tc_alloc_tmp(tc);
372
373 tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
374
375 tdst_transpose(dst, real_dst);
376 for (i = 0; i < 4; i++) {
377 const struct toy_src src =
378 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
379
380 /* cast to type D to make sure these are raw moves */
381 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
382 }
383 }
384
385 static void
386 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
387 struct toy_dst dst, int dim, struct toy_src idx)
388 {
389 struct toy_compiler *tc = &fcc->tc;
390 const struct toy_dst offset =
391 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
392 struct toy_src desc;
393 struct toy_inst *inst;
394 struct toy_dst tmp, real_dst[4];
395 int i;
396
397 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
398 return;
399
400 /*
401 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
402 * changed from OWord Block Read to ld to increase performance in the
403 * classic driver. Since we use the constant cache instead of the data
404 * cache, I wonder if we still want to follow the classic driver.
405 */
406
407 /* set offset */
408 inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
409 inst->exec_size = BRW_EXECUTE_8;
410 inst->mask_ctrl = BRW_MASK_DISABLE;
411
412 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
413 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
414 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
415 0,
416 ILO_WM_CONST_SURFACE(dim));
417
418 tmp = tc_alloc_tmp(tc);
419 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
420 inst->exec_size = BRW_EXECUTE_8;
421 inst->mask_ctrl = BRW_MASK_DISABLE;
422
423 tdst_transpose(dst, real_dst);
424 for (i = 0; i < 4; i++) {
425 const struct toy_src src =
426 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
427
428 /* cast to type D to make sure these are raw moves */
429 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
430 }
431 }
432
433 static void
434 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
435 struct toy_dst dst, int idx)
436 {
437 const uint32_t *imm;
438 struct toy_dst real_dst[4];
439 int ch;
440
441 imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
442
443 tdst_transpose(dst, real_dst);
444 /* raw moves */
445 for (ch = 0; ch < 4; ch++)
446 tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
447 }
448
449 static void
450 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
451 struct toy_dst dst, int dim, int idx)
452 {
453 struct toy_compiler *tc = &fcc->tc;
454 const struct toy_tgsi *tgsi = &fcc->tgsi;
455 int slot;
456
457 assert(!dim);
458
459 slot = toy_tgsi_find_system_value(tgsi, idx);
460 if (slot < 0)
461 return;
462
463 switch (tgsi->system_values[slot].semantic_name) {
464 case TGSI_SEMANTIC_PRIMID:
465 case TGSI_SEMANTIC_INSTANCEID:
466 case TGSI_SEMANTIC_VERTEXID:
467 default:
468 tc_fail(tc, "unhandled system value");
469 tc_MOV(tc, dst, tsrc_imm_d(0));
470 break;
471 }
472 }
473
474 static void
475 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
476 struct toy_inst *inst)
477 {
478 struct toy_compiler *tc = &fcc->tc;
479 int dim, idx;
480
481 assert(inst->src[0].file == TOY_FILE_IMM);
482 dim = inst->src[0].val32;
483
484 assert(inst->src[1].file == TOY_FILE_IMM);
485 idx = inst->src[1].val32;
486
487 switch (inst->opcode) {
488 case TOY_OPCODE_TGSI_IN:
489 fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
490 break;
491 case TOY_OPCODE_TGSI_CONST:
492 if (tc->dev->gen >= ILO_GEN(7))
493 fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
494 else
495 fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
496 break;
497 case TOY_OPCODE_TGSI_SV:
498 fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
499 break;
500 case TOY_OPCODE_TGSI_IMM:
501 assert(!dim);
502 fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
503 break;
504 default:
505 tc_fail(tc, "unhandled TGSI fetch");
506 break;
507 }
508
509 tc_discard_inst(tc, inst);
510 }
511
512 static void
513 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
514 struct toy_inst *inst)
515 {
516 struct toy_compiler *tc = &fcc->tc;
517 enum tgsi_file_type file;
518 int dim, idx;
519 struct toy_src indirect_dim, indirect_idx;
520
521 assert(inst->src[0].file == TOY_FILE_IMM);
522 file = inst->src[0].val32;
523
524 assert(inst->src[1].file == TOY_FILE_IMM);
525 dim = inst->src[1].val32;
526 indirect_dim = inst->src[2];
527
528 assert(inst->src[3].file == TOY_FILE_IMM);
529 idx = inst->src[3].val32;
530 indirect_idx = inst->src[4];
531
532 /* no dimension indirection */
533 assert(indirect_dim.file == TOY_FILE_IMM);
534 dim += indirect_dim.val32;
535
536 switch (inst->opcode) {
537 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
538 if (file == TGSI_FILE_CONSTANT) {
539 if (idx) {
540 struct toy_dst tmp = tc_alloc_tmp(tc);
541
542 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
543 indirect_idx = tsrc_from(tmp);
544 }
545
546 fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
547 break;
548 }
549 /* fall through */
550 case TOY_OPCODE_TGSI_INDIRECT_STORE:
551 default:
552 tc_fail(tc, "unhandled TGSI indirection");
553 break;
554 }
555
556 tc_discard_inst(tc, inst);
557 }
558
559 /**
560 * Emit instructions to move sampling parameters to the message registers.
561 */
562 static int
563 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
564 int base_mrf, int param_size,
565 struct toy_src *coords, int num_coords,
566 struct toy_src bias_or_lod, struct toy_src ref_or_si,
567 struct toy_src *ddx, struct toy_src *ddy,
568 int num_derivs)
569 {
570 int num_params, i;
571
572 assert(num_coords <= 4);
573 assert(num_derivs <= 3 && num_derivs <= num_coords);
574
575 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
576 switch (msg_type) {
577 case GEN5_SAMPLER_MESSAGE_SAMPLE:
578 for (i = 0; i < num_coords; i++)
579 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
580 num_params = num_coords;
581 break;
582 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
583 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
584 for (i = 0; i < num_coords; i++)
585 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
586 tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
587 num_params = 5;
588 break;
589 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
590 for (i = 0; i < num_coords; i++)
591 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
592 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
593 num_params = 5;
594 break;
595 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
596 for (i = 0; i < num_coords; i++)
597 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
598 for (i = 0; i < num_derivs; i++) {
599 tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
600 tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
601 }
602 num_params = 4 + num_derivs * 2;
603 break;
604 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
605 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
606 for (i = 0; i < num_coords; i++)
607 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
608 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
609 tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
610 num_params = 6;
611 break;
612 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
613 assert(num_coords <= 3);
614
615 for (i = 0; i < num_coords; i++)
616 tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
617 tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
618 tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
619 num_params = 5;
620 break;
621 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
622 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
623 num_params = 1;
624 break;
625 default:
626 tc_fail(tc, "unknown sampler opcode");
627 num_params = 0;
628 break;
629 }
630 #undef SAMPLER_PARAM
631
632 return num_params * param_size;
633 }
634
635 static int
636 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
637 int base_mrf, int param_size,
638 struct toy_src *coords, int num_coords,
639 struct toy_src bias_or_lod, struct toy_src ref_or_si,
640 struct toy_src *ddx, struct toy_src *ddy,
641 int num_derivs)
642 {
643 int num_params, i;
644
645 assert(num_coords <= 4);
646 assert(num_derivs <= 3 && num_derivs <= num_coords);
647
648 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
649 switch (msg_type) {
650 case GEN5_SAMPLER_MESSAGE_SAMPLE:
651 for (i = 0; i < num_coords; i++)
652 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
653 num_params = num_coords;
654 break;
655 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
656 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
657 tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
658 for (i = 0; i < num_coords; i++)
659 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
660 num_params = 1 + num_coords;
661 break;
662 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
663 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
664 for (i = 0; i < num_coords; i++)
665 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
666 num_params = 1 + num_coords;
667 break;
668 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
669 for (i = 0; i < num_coords; i++) {
670 tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
671 if (i < num_derivs) {
672 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
673 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
674 }
675 }
676 num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
677 break;
678 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
679 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
680 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
681 tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
682 for (i = 0; i < num_coords; i++)
683 tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
684 num_params = 2 + num_coords;
685 break;
686 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
687 assert(num_coords >= 1 && num_coords <= 3);
688
689 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
690 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
691 for (i = 1; i < num_coords; i++)
692 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
693 num_params = 1 + num_coords;
694 break;
695 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
696 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
697 num_params = 1;
698 break;
699 default:
700 tc_fail(tc, "unknown sampler opcode");
701 num_params = 0;
702 break;
703 }
704 #undef SAMPLER_PARAM
705
706 return num_params * param_size;
707 }
708
709 /**
710 * Set up message registers and return the message descriptor for sampling.
711 */
712 static struct toy_src
713 fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
714 int base_mrf, const uint32_t *saturate_coords,
715 unsigned *ret_sampler_index)
716 {
717 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
718 struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
719 int num_coords, ref_pos, num_derivs;
720 int sampler_src, param_size, i;
721
722 switch (inst->exec_size) {
723 case BRW_EXECUTE_8:
724 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
725 param_size = 1;
726 break;
727 case BRW_EXECUTE_16:
728 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
729 param_size = 2;
730 break;
731 default:
732 tc_fail(tc, "unsupported execute size for sampling");
733 return tsrc_null();
734 break;
735 }
736
737 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
738 tsrc_transpose(inst->src[0], coords);
739 bias_or_lod = tsrc_null();
740 ref_or_si = tsrc_null();
741 num_derivs = 0;
742 sampler_src = 1;
743
744 /*
745 * For TXD,
746 *
747 * src0 := (x, y, z, w)
748 * src1 := ddx
749 * src2 := ddy
750 * src3 := sampler
751 *
752 * For TEX2, TXB2, and TXL2,
753 *
754 * src0 := (x, y, z, w)
755 * src1 := (v or bias or lod, ...)
756 * src2 := sampler
757 *
758 * For TEX, TXB, TXL, and TXP,
759 *
760 * src0 := (x, y, z, w or bias or lod or projection)
761 * src1 := sampler
762 *
763 * For TXQ,
764 *
765 * src0 := (lod, ...)
766 * src1 := sampler
767 *
768 * For TXQ_LZ,
769 *
770 * src0 := sampler
771 *
772 * And for TXF,
773 *
774 * src0 := (x, y, z, w or lod)
775 * src1 := sampler
776 *
777 * State trackers should not generate opcode+texture combinations with
778 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
779 */
780 switch (inst->opcode) {
781 case TOY_OPCODE_TGSI_TEX:
782 if (ref_pos >= 0) {
783 assert(ref_pos < 4);
784
785 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
786 ref_or_si = coords[ref_pos];
787 }
788 else {
789 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
790 }
791 break;
792 case TOY_OPCODE_TGSI_TXD:
793 if (ref_pos >= 0) {
794 assert(ref_pos < 4);
795
796 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
797 ref_or_si = coords[ref_pos];
798
799 if (tc->dev->gen < ILO_GEN(7.5))
800 tc_fail(tc, "TXD with shadow sampler not supported");
801 }
802 else {
803 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
804 }
805
806 tsrc_transpose(inst->src[1], ddx);
807 tsrc_transpose(inst->src[2], ddy);
808 num_derivs = num_coords;
809 sampler_src = 3;
810 break;
811 case TOY_OPCODE_TGSI_TXP:
812 if (ref_pos >= 0) {
813 assert(ref_pos < 3);
814
815 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
816 ref_or_si = coords[ref_pos];
817 }
818 else {
819 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
820 }
821
822 /* project the coordinates */
823 {
824 struct toy_dst tmp[4];
825
826 tc_alloc_tmp4(tc, tmp);
827
828 tc_INV(tc, tmp[3], coords[3]);
829 for (i = 0; i < num_coords && i < 3; i++) {
830 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
831 coords[i] = tsrc_from(tmp[i]);
832 }
833
834 if (ref_pos >= i) {
835 tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
836 ref_or_si = tsrc_from(tmp[ref_pos]);
837 }
838 }
839 break;
840 case TOY_OPCODE_TGSI_TXB:
841 if (ref_pos >= 0) {
842 assert(ref_pos < 3);
843
844 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
845 ref_or_si = coords[ref_pos];
846 }
847 else {
848 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
849 }
850
851 bias_or_lod = coords[3];
852 break;
853 case TOY_OPCODE_TGSI_TXL:
854 if (ref_pos >= 0) {
855 assert(ref_pos < 3);
856
857 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
858 ref_or_si = coords[ref_pos];
859 }
860 else {
861 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
862 }
863
864 bias_or_lod = coords[3];
865 break;
866 case TOY_OPCODE_TGSI_TXF:
867 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
868
869 switch (inst->tex.target) {
870 case TGSI_TEXTURE_2D_MSAA:
871 case TGSI_TEXTURE_2D_ARRAY_MSAA:
872 assert(ref_pos >= 0 && ref_pos < 4);
873 /* lod is always 0 */
874 bias_or_lod = tsrc_imm_d(0);
875 ref_or_si = coords[ref_pos];
876 break;
877 default:
878 bias_or_lod = coords[3];
879 break;
880 }
881
882 /* offset the coordinates */
883 if (!tsrc_is_null(inst->tex.offsets[0])) {
884 struct toy_dst tmp[4];
885 struct toy_src offsets[4];
886
887 tc_alloc_tmp4(tc, tmp);
888 tsrc_transpose(inst->tex.offsets[0], offsets);
889
890 for (i = 0; i < num_coords; i++) {
891 tc_ADD(tc, tmp[i], coords[i], offsets[i]);
892 coords[i] = tsrc_from(tmp[i]);
893 }
894 }
895
896 sampler_src = 1;
897 break;
898 case TOY_OPCODE_TGSI_TXQ:
899 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
900 num_coords = 0;
901 bias_or_lod = coords[0];
902 break;
903 case TOY_OPCODE_TGSI_TXQ_LZ:
904 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
905 num_coords = 0;
906 sampler_src = 0;
907 break;
908 case TOY_OPCODE_TGSI_TEX2:
909 if (ref_pos >= 0) {
910 assert(ref_pos < 5);
911
912 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
913
914 if (ref_pos >= 4) {
915 struct toy_src src1[4];
916 tsrc_transpose(inst->src[1], src1);
917 ref_or_si = src1[ref_pos - 4];
918 }
919 else {
920 ref_or_si = coords[ref_pos];
921 }
922 }
923 else {
924 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
925 }
926
927 sampler_src = 2;
928 break;
929 case TOY_OPCODE_TGSI_TXB2:
930 if (ref_pos >= 0) {
931 assert(ref_pos < 4);
932
933 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
934 ref_or_si = coords[ref_pos];
935 }
936 else {
937 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
938 }
939
940 {
941 struct toy_src src1[4];
942 tsrc_transpose(inst->src[1], src1);
943 bias_or_lod = src1[0];
944 }
945
946 sampler_src = 2;
947 break;
948 case TOY_OPCODE_TGSI_TXL2:
949 if (ref_pos >= 0) {
950 assert(ref_pos < 4);
951
952 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
953 ref_or_si = coords[ref_pos];
954 }
955 else {
956 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
957 }
958
959 {
960 struct toy_src src1[4];
961 tsrc_transpose(inst->src[1], src1);
962 bias_or_lod = src1[0];
963 }
964
965 sampler_src = 2;
966 break;
967 default:
968 assert(!"unhandled sampling opcode");
969 return tsrc_null();
970 break;
971 }
972
973 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
974 sampler_index = inst->src[sampler_src].val32;
975 binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);
976
977 /*
978 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
979 *
980 * "Note that the (cube map) coordinates delivered to the sampling
981 * engine must already have been divided by the component with the
982 * largest absolute value."
983 */
984 switch (inst->tex.target) {
985 case TGSI_TEXTURE_CUBE:
986 case TGSI_TEXTURE_SHADOWCUBE:
987 case TGSI_TEXTURE_CUBE_ARRAY:
988 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
989 /* TXQ does not need coordinates */
990 if (num_coords >= 3) {
991 struct toy_dst tmp[4];
992
993 tc_alloc_tmp4(tc, tmp);
994
995 tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
996 tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
997 tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
998 tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
999 tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
1000
1001 for (i = 0; i < 3; i++) {
1002 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
1003 coords[i] = tsrc_from(tmp[i]);
1004 }
1005 }
1006 break;
1007 }
1008
1009 /*
1010 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
1011 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
1012 * so that sampling outside the border gets the correct colors.
1013 */
1014 for (i = 0; i < MIN2(num_coords, 3); i++) {
1015 bool is_rect;
1016
1017 if (!(saturate_coords[i] & (1 << sampler_index)))
1018 continue;
1019
1020 switch (inst->tex.target) {
1021 case TGSI_TEXTURE_RECT:
1022 case TGSI_TEXTURE_SHADOWRECT:
1023 is_rect = true;
1024 break;
1025 default:
1026 is_rect = false;
1027 break;
1028 }
1029
1030 if (is_rect) {
1031 struct toy_src min, max;
1032 struct toy_dst tmp;
1033
1034 tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1035 tmp = tc_alloc_tmp(tc);
1036
1037 /* saturate to [0, width] or [0, height] */
1038 /* TODO TXQ? */
1039 min = tsrc_imm_f(0.0f);
1040 max = tsrc_imm_f(2048.0f);
1041
1042 tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
1043 tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);
1044
1045 coords[i] = tsrc_from(tmp);
1046 }
1047 else {
1048 struct toy_dst tmp;
1049 struct toy_inst *inst2;
1050
1051 tmp = tc_alloc_tmp(tc);
1052
1053 /* saturate to [0.0f, 1.0f] */
1054 inst2 = tc_MOV(tc, tmp, coords[i]);
1055 inst2->saturate = true;
1056
1057 coords[i] = tsrc_from(tmp);
1058 }
1059 }
1060
1061 /* set up sampler parameters */
1062 if (tc->dev->gen >= ILO_GEN(7)) {
1063 msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1064 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1065 }
1066 else {
1067 msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1068 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1069 }
1070
1071 /*
1072 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1073 *
1074 * "The maximum message length allowed to the sampler is 11. This would
1075 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1076 * SIMD16."
1077 */
1078 if (msg_len > 11)
1079 tc_fail(tc, "maximum length for messages to the sampler is 11");
1080
1081 if (ret_sampler_index)
1082 *ret_sampler_index = sampler_index;
1083
1084 return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1085 false, simd_mode, msg_type, sampler_index, binding_table_index);
1086 }
1087
1088 static void
1089 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1090 struct toy_inst *inst)
1091 {
1092 struct toy_compiler *tc = &fcc->tc;
1093 struct toy_dst dst[4], tmp[4];
1094 struct toy_src desc;
1095 unsigned sampler_index;
1096 int swizzles[4], i;
1097 bool need_filter;
1098
1099 desc = fs_prepare_tgsi_sampling(tc, inst,
1100 fcc->first_free_mrf,
1101 fcc->variant->saturate_tex_coords,
1102 &sampler_index);
1103
1104 switch (inst->opcode) {
1105 case TOY_OPCODE_TGSI_TXF:
1106 case TOY_OPCODE_TGSI_TXQ:
1107 case TOY_OPCODE_TGSI_TXQ_LZ:
1108 need_filter = false;
1109 break;
1110 default:
1111 need_filter = true;
1112 break;
1113 }
1114
1115 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
1116 inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1117 inst->src[1] = desc;
1118 for (i = 2; i < Elements(inst->src); i++)
1119 inst->src[i] = tsrc_null();
1120
1121 /* write to temps first */
1122 tc_alloc_tmp4(tc, tmp);
1123 for (i = 0; i < 4; i++)
1124 tmp[i].type = inst->dst.type;
1125 tdst_transpose(inst->dst, dst);
1126 inst->dst = tmp[0];
1127
1128 tc_move_inst(tc, inst);
1129
1130 if (need_filter) {
1131 assert(sampler_index < fcc->variant->num_sampler_views);
1132 swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1133 swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1134 swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1135 swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1136 }
1137 else {
1138 swizzles[0] = PIPE_SWIZZLE_RED;
1139 swizzles[1] = PIPE_SWIZZLE_GREEN;
1140 swizzles[2] = PIPE_SWIZZLE_BLUE;
1141 swizzles[3] = PIPE_SWIZZLE_ALPHA;
1142 }
1143
1144 /* swizzle the results */
1145 for (i = 0; i < 4; i++) {
1146 switch (swizzles[i]) {
1147 case PIPE_SWIZZLE_ZERO:
1148 tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1149 break;
1150 case PIPE_SWIZZLE_ONE:
1151 tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1152 break;
1153 default:
1154 tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1155 break;
1156 }
1157 }
1158 }
1159
1160 static void
1161 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1162 {
1163 struct toy_dst dst[4];
1164 struct toy_src src[4];
1165 int i;
1166
1167 tdst_transpose(inst->dst, dst);
1168 tsrc_transpose(inst->src[0], src);
1169
1170 /*
1171 * Every four fragments are from a 2x2 subspan, with
1172 *
1173 * fragment 1 on the top-left,
1174 * fragment 2 on the top-right,
1175 * fragment 3 on the bottom-left,
1176 * fragment 4 on the bottom-right.
1177 *
1178 * DDX should thus produce
1179 *
1180 * dst = src.yyww - src.xxzz
1181 *
1182 * and DDY should produce
1183 *
1184 * dst = src.zzww - src.xxyy
1185 *
1186 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1187 * play with the region parameters.
1188 */
1189 if (inst->opcode == TOY_OPCODE_DDX) {
1190 for (i = 0; i < 4; i++) {
1191 struct toy_src left, right;
1192
1193 left = tsrc_rect(src[i], TOY_RECT_220);
1194 right = tsrc_offset(left, 0, 1);
1195
1196 tc_ADD(tc, dst[i], right, tsrc_negate(left));
1197 }
1198 }
1199 else {
1200 for (i = 0; i < 4; i++) {
1201 struct toy_src top, bottom;
1202
1203 /* approximate with dst = src.zzzz - src.xxxx */
1204 top = tsrc_rect(src[i], TOY_RECT_440);
1205 bottom = tsrc_offset(top, 0, 2);
1206
1207 tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1208 }
1209 }
1210
1211 tc_discard_inst(tc, inst);
1212 }
1213
1214 static void
1215 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1216 {
1217 /* fs_write_fb() has set up the message registers */
1218 toy_compiler_lower_to_send(tc, inst, true,
1219 GEN6_SFID_DATAPORT_RENDER_CACHE);
1220 }
1221
1222 static void
1223 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1224 {
1225 struct toy_dst pixel_mask_dst;
1226 struct toy_src f0, pixel_mask;
1227 struct toy_inst *tmp;
1228
1229 /* lower half of r1.7:ud */
1230 pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1231 pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1232
1233 f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010);
1234
1235 /* KILL or KILL_IF */
1236 if (tsrc_is_null(inst->src[0])) {
1237 struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1238 struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0));
1239
1240 /* create a mask that masks out all pixels */
1241 tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1242 tmp->exec_size = BRW_EXECUTE_1;
1243 tmp->mask_ctrl = BRW_MASK_DISABLE;
1244
1245 tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ);
1246
1247 /* swapping the two src operands breaks glBitmap()!? */
1248 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1249 tmp->exec_size = BRW_EXECUTE_1;
1250 tmp->mask_ctrl = BRW_MASK_DISABLE;
1251 }
1252 else {
1253 struct toy_src src[4];
1254 int i;
1255
1256 tsrc_transpose(inst->src[0], src);
1257 /* mask out killed pixels */
1258 for (i = 0; i < 4; i++) {
1259 tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1260 BRW_CONDITIONAL_GE);
1261
1262 /* swapping the two src operands breaks glBitmap()!? */
1263 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1264 tmp->exec_size = BRW_EXECUTE_1;
1265 tmp->mask_ctrl = BRW_MASK_DISABLE;
1266 }
1267 }
1268
1269 tc_discard_inst(tc, inst);
1270 }
1271
1272 static void
1273 fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1274 {
1275 struct toy_compiler *tc = &fcc->tc;
1276 struct toy_inst *inst;
1277
1278 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1279 tc_head(tc);
1280 while ((inst = tc_next(tc)) != NULL) {
1281 switch (inst->opcode) {
1282 case TOY_OPCODE_TGSI_IN:
1283 case TOY_OPCODE_TGSI_CONST:
1284 case TOY_OPCODE_TGSI_SV:
1285 case TOY_OPCODE_TGSI_IMM:
1286 fs_lower_opcode_tgsi_direct(fcc, inst);
1287 break;
1288 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1289 case TOY_OPCODE_TGSI_INDIRECT_STORE:
1290 fs_lower_opcode_tgsi_indirect(fcc, inst);
1291 break;
1292 case TOY_OPCODE_TGSI_TEX:
1293 case TOY_OPCODE_TGSI_TXB:
1294 case TOY_OPCODE_TGSI_TXD:
1295 case TOY_OPCODE_TGSI_TXL:
1296 case TOY_OPCODE_TGSI_TXP:
1297 case TOY_OPCODE_TGSI_TXF:
1298 case TOY_OPCODE_TGSI_TXQ:
1299 case TOY_OPCODE_TGSI_TXQ_LZ:
1300 case TOY_OPCODE_TGSI_TEX2:
1301 case TOY_OPCODE_TGSI_TXB2:
1302 case TOY_OPCODE_TGSI_TXL2:
1303 case TOY_OPCODE_TGSI_SAMPLE:
1304 case TOY_OPCODE_TGSI_SAMPLE_I:
1305 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1306 case TOY_OPCODE_TGSI_SAMPLE_B:
1307 case TOY_OPCODE_TGSI_SAMPLE_C:
1308 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1309 case TOY_OPCODE_TGSI_SAMPLE_D:
1310 case TOY_OPCODE_TGSI_SAMPLE_L:
1311 case TOY_OPCODE_TGSI_GATHER4:
1312 case TOY_OPCODE_TGSI_SVIEWINFO:
1313 case TOY_OPCODE_TGSI_SAMPLE_POS:
1314 case TOY_OPCODE_TGSI_SAMPLE_INFO:
1315 fs_lower_opcode_tgsi_sampling(fcc, inst);
1316 break;
1317 }
1318 }
1319
1320 tc_head(tc);
1321 while ((inst = tc_next(tc)) != NULL) {
1322 switch (inst->opcode) {
1323 case TOY_OPCODE_INV:
1324 case TOY_OPCODE_LOG:
1325 case TOY_OPCODE_EXP:
1326 case TOY_OPCODE_SQRT:
1327 case TOY_OPCODE_RSQ:
1328 case TOY_OPCODE_SIN:
1329 case TOY_OPCODE_COS:
1330 case TOY_OPCODE_FDIV:
1331 case TOY_OPCODE_POW:
1332 case TOY_OPCODE_INT_DIV_QUOTIENT:
1333 case TOY_OPCODE_INT_DIV_REMAINDER:
1334 toy_compiler_lower_math(tc, inst);
1335 break;
1336 case TOY_OPCODE_DDX:
1337 case TOY_OPCODE_DDY:
1338 fs_lower_opcode_derivative(tc, inst);
1339 break;
1340 case TOY_OPCODE_FB_WRITE:
1341 fs_lower_opcode_fb_write(tc, inst);
1342 break;
1343 case TOY_OPCODE_KIL:
1344 fs_lower_opcode_kil(tc, inst);
1345 break;
1346 default:
1347 if (inst->opcode > 127)
1348 tc_fail(tc, "unhandled virtual opcode");
1349 break;
1350 }
1351 }
1352 }
1353
1354 /**
1355 * Compile the shader.
1356 */
1357 static bool
1358 fs_compile(struct fs_compile_context *fcc)
1359 {
1360 struct toy_compiler *tc = &fcc->tc;
1361 struct ilo_shader *sh = fcc->shader;
1362
1363 fs_lower_virtual_opcodes(fcc);
1364 toy_compiler_legalize_for_ra(tc);
1365 toy_compiler_optimize(tc);
1366 toy_compiler_allocate_registers(tc,
1367 fcc->first_free_grf,
1368 fcc->last_free_grf,
1369 fcc->num_grf_per_vrf);
1370 toy_compiler_legalize_for_asm(tc);
1371
1372 if (tc->fail) {
1373 ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1374 return false;
1375 }
1376
1377 if (ilo_debug & ILO_DEBUG_FS) {
1378 ilo_printf("legalized instructions:\n");
1379 toy_compiler_dump(tc);
1380 ilo_printf("\n");
1381 }
1382
1383 if (true) {
1384 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1385 }
1386 else {
1387 static const uint32_t microcode[] = {
1388 /* fill in the microcode here */
1389 0x0, 0x0, 0x0, 0x0,
1390 };
1391 const bool swap = true;
1392
1393 sh->kernel_size = sizeof(microcode);
1394 sh->kernel = MALLOC(sh->kernel_size);
1395
1396 if (sh->kernel) {
1397 const int num_dwords = sizeof(microcode) / 4;
1398 const uint32_t *src = microcode;
1399 uint32_t *dst = (uint32_t *) sh->kernel;
1400 int i;
1401
1402 for (i = 0; i < num_dwords; i += 4) {
1403 if (swap) {
1404 dst[i + 0] = src[i + 3];
1405 dst[i + 1] = src[i + 2];
1406 dst[i + 2] = src[i + 1];
1407 dst[i + 3] = src[i + 0];
1408 }
1409 else {
1410 memcpy(dst, src, 16);
1411 }
1412 }
1413 }
1414 }
1415
1416 if (!sh->kernel) {
1417 ilo_err("failed to compile FS: %s\n", tc->reason);
1418 return false;
1419 }
1420
1421 if (ilo_debug & ILO_DEBUG_FS) {
1422 ilo_printf("disassembly:\n");
1423 toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
1424 ilo_printf("\n");
1425 }
1426
1427 return true;
1428 }
1429
1430 /**
1431 * Emit instructions to write the color buffers (and the depth buffer).
1432 */
1433 static void
1434 fs_write_fb(struct fs_compile_context *fcc)
1435 {
1436 struct toy_compiler *tc = &fcc->tc;
1437 int base_mrf = fcc->first_free_mrf;
1438 const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1439 bool header_present = false;
1440 struct toy_src desc;
1441 unsigned msg_type, ctrl;
1442 int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1443 int pos_slot = -1, cbuf, i;
1444
1445 for (i = 0; i < Elements(color_slots); i++)
1446 color_slots[i] = -1;
1447
1448 for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1449 if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1450 assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
1451 color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1452 }
1453 else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1454 pos_slot = i;
1455 }
1456 }
1457
1458 num_cbufs = fcc->variant->u.fs.num_cbufs;
1459 /* still need to send EOT (and probably depth) */
1460 if (!num_cbufs)
1461 num_cbufs = 1;
1462
1463 /* we need the header to specify the pixel mask or render target */
1464 if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1465 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1466 struct toy_inst *inst;
1467
1468 inst = tc_MOV(tc, header, r0);
1469 inst->mask_ctrl = BRW_MASK_DISABLE;
1470 base_mrf += fcc->num_grf_per_vrf;
1471
1472 /* this is a two-register header */
1473 if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
1474 inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1475 inst->mask_ctrl = BRW_MASK_DISABLE;
1476 base_mrf += fcc->num_grf_per_vrf;
1477 }
1478
1479 header_present = true;
1480 }
1481
1482 for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1483 const int slot =
1484 color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1485 int mrf = base_mrf, vrf;
1486 struct toy_src src[4];
1487
1488 if (slot >= 0) {
1489 const unsigned undefined_mask =
1490 fcc->tgsi.outputs[slot].undefined_mask;
1491 const int index = fcc->tgsi.outputs[slot].index;
1492
1493 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1494 if (vrf >= 0) {
1495 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1496 tsrc_transpose(tmp, src);
1497 }
1498 else {
1499 /* use (0, 0, 0, 0) */
1500 tsrc_transpose(tsrc_imm_f(0.0f), src);
1501 }
1502
1503 for (i = 0; i < 4; i++) {
1504 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1505
1506 if (undefined_mask & (1 << i))
1507 src[i] = tsrc_imm_f(0.0f);
1508
1509 tc_MOV(tc, dst, src[i]);
1510
1511 mrf += fcc->num_grf_per_vrf;
1512 }
1513 }
1514 else {
1515 /* use (0, 0, 0, 0) */
1516 for (i = 0; i < 4; i++) {
1517 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1518
1519 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1520 mrf += fcc->num_grf_per_vrf;
1521 }
1522 }
1523
1524 /* select BLEND_STATE[rt] */
1525 if (cbuf > 0) {
1526 struct toy_inst *inst;
1527
1528 inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1529 inst->mask_ctrl = BRW_MASK_DISABLE;
1530 inst->exec_size = BRW_EXECUTE_1;
1531 inst->src[0].rect = TOY_RECT_010;
1532 }
1533
1534 if (cbuf == 0 && pos_slot >= 0) {
1535 const int index = fcc->tgsi.outputs[pos_slot].index;
1536 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1537 struct toy_src src[4];
1538 int vrf;
1539
1540 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1541 if (vrf >= 0) {
1542 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1543 tsrc_transpose(tmp, src);
1544 }
1545 else {
1546 /* use (0, 0, 0, 0) */
1547 tsrc_transpose(tsrc_imm_f(0.0f), src);
1548 }
1549
1550 /* only Z */
1551 tc_MOV(tc, dst, src[2]);
1552
1553 mrf += fcc->num_grf_per_vrf;
1554 }
1555
1556 msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
1557 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
1558 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1559
1560 ctrl = (cbuf == num_cbufs - 1) << 12 |
1561 msg_type << 8;
1562
1563 desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1564 mrf - fcc->first_free_mrf, 0,
1565 header_present, false,
1566 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
1567 ctrl, ILO_WM_DRAW_SURFACE(cbuf));
1568
1569 tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1570 tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1571 }
1572 }
1573
1574 /**
1575 * Set up shader outputs for fixed-function units.
1576 */
1577 static void
1578 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1579 {
1580 int i;
1581
1582 sh->out.count = tgsi->num_outputs;
1583 for (i = 0; i < tgsi->num_outputs; i++) {
1584 sh->out.register_indices[i] = tgsi->outputs[i].index;
1585 sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1586 sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1587
1588 if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1589 sh->out.has_pos = true;
1590 }
1591 }
1592
1593 /**
1594 * Set up shader inputs for fixed-function units.
1595 */
1596 static void
1597 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1598 bool flatshade)
1599 {
1600 int i;
1601
1602 sh->in.count = tgsi->num_inputs;
1603 for (i = 0; i < tgsi->num_inputs; i++) {
1604 sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1605 sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1606 sh->in.interp[i] = tgsi->inputs[i].interp;
1607 sh->in.centroid[i] = tgsi->inputs[i].centroid;
1608
1609 if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1610 sh->in.has_pos = true;
1611 continue;
1612 }
1613 else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1614 continue;
1615 }
1616
1617 switch (tgsi->inputs[i].interp) {
1618 case TGSI_INTERPOLATE_CONSTANT:
1619 sh->in.const_interp_enable |= 1 << i;
1620 break;
1621 case TGSI_INTERPOLATE_LINEAR:
1622 sh->in.has_linear_interp = true;
1623
1624 if (tgsi->inputs[i].centroid) {
1625 sh->in.barycentric_interpolation_mode |=
1626 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
1627 }
1628 else {
1629 sh->in.barycentric_interpolation_mode |=
1630 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
1631 }
1632 break;
1633 case TGSI_INTERPOLATE_COLOR:
1634 if (flatshade) {
1635 sh->in.const_interp_enable |= 1 << i;
1636 break;
1637 }
1638 /* fall through */
1639 case TGSI_INTERPOLATE_PERSPECTIVE:
1640 if (tgsi->inputs[i].centroid) {
1641 sh->in.barycentric_interpolation_mode |=
1642 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
1643 }
1644 else {
1645 sh->in.barycentric_interpolation_mode |=
1646 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
1647 }
1648 break;
1649 default:
1650 break;
1651 }
1652 }
1653 }
1654
1655 static int
1656 fs_setup_payloads(struct fs_compile_context *fcc)
1657 {
1658 const struct ilo_shader *sh = fcc->shader;
1659 int grf, i;
1660
1661 grf = 0;
1662
1663 /* r0: header */
1664 grf++;
1665
1666 /* r1-r2: coordinates and etc. */
1667 grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1;
1668
1669 for (i = 0; i < Elements(fcc->payloads); i++) {
1670 int interp;
1671
1672 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1673 for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) {
1674 if (!(sh->in.barycentric_interpolation_mode & (1 << interp)))
1675 continue;
1676
1677 fcc->payloads[i].barycentric_interps[interp] = grf;
1678 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4;
1679 }
1680
1681 /* r27-r28 or r56-r57: interpoloated depth */
1682 if (sh->in.has_pos) {
1683 fcc->payloads[i].source_depth = grf;
1684 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1685 }
1686
1687 /* r29-r30 or r58-r59: interpoloated w */
1688 if (sh->in.has_pos) {
1689 fcc->payloads[i].source_w = grf;
1690 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1691 }
1692
1693 /* r31 or r60: position offset */
1694 if (false) {
1695 fcc->payloads[i].pos_offset = grf;
1696 grf++;
1697 }
1698
1699 if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE)
1700 break;
1701 }
1702
1703 return grf;
1704 }
1705
1706 /**
1707 * Translate the TGSI tokens.
1708 */
1709 static bool
1710 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1711 struct toy_tgsi *tgsi)
1712 {
1713 if (ilo_debug & ILO_DEBUG_FS) {
1714 ilo_printf("dumping fragment shader\n");
1715 ilo_printf("\n");
1716
1717 tgsi_dump(tokens, 0);
1718 ilo_printf("\n");
1719 }
1720
1721 toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1722 if (tc->fail) {
1723 ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1724 return false;
1725 }
1726
1727 if (ilo_debug & ILO_DEBUG_FS) {
1728 ilo_printf("TGSI translator:\n");
1729 toy_tgsi_dump(tgsi);
1730 ilo_printf("\n");
1731 toy_compiler_dump(tc);
1732 ilo_printf("\n");
1733 }
1734
1735 return true;
1736 }
1737
1738 /**
1739 * Set up FS compile context. This includes translating the TGSI tokens.
1740 */
1741 static bool
1742 fs_setup(struct fs_compile_context *fcc,
1743 const struct ilo_shader_state *state,
1744 const struct ilo_shader_variant *variant)
1745 {
1746 int num_consts;
1747
1748 memset(fcc, 0, sizeof(*fcc));
1749
1750 fcc->shader = CALLOC_STRUCT(ilo_shader);
1751 if (!fcc->shader)
1752 return false;
1753
1754 fcc->variant = variant;
1755
1756 toy_compiler_init(&fcc->tc, state->info.dev);
1757
1758 fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE;
1759
1760 fcc->tc.templ.access_mode = BRW_ALIGN_1;
1761 if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) {
1762 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H;
1763 fcc->tc.templ.exec_size = BRW_EXECUTE_16;
1764 }
1765 else {
1766 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q;
1767 fcc->tc.templ.exec_size = BRW_EXECUTE_8;
1768 }
1769
1770 fcc->tc.rect_linear_width = 8;
1771
1772 /*
1773 * The classic driver uses the sampler cache (gen6) or the data cache
1774 * (gen7). Why?
1775 */
1776 fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
1777
1778 if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1779 toy_compiler_cleanup(&fcc->tc);
1780 FREE(fcc->shader);
1781 return false;
1782 }
1783
1784 fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1785 fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1786
1787 if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1788 num_consts = (fcc->tgsi.const_count + 1) / 2;
1789
1790 /*
1791 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1792 *
1793 * "The sum of all four read length fields (each incremented to
1794 * represent the actual read length) must be less than or equal to
1795 * 64"
1796 *
1797 * Since we are usually under a high register pressure, do not allow
1798 * for more than 8.
1799 */
1800 if (num_consts > 8)
1801 num_consts = 0;
1802 }
1803 else {
1804 num_consts = 0;
1805 }
1806
1807 fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1808 fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1809
1810 fcc->first_const_grf = fs_setup_payloads(fcc);
1811 fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1812 fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1813 fcc->last_free_grf = 127;
1814
1815 /* m0 is reserved for system routines */
1816 fcc->first_free_mrf = 1;
1817 fcc->last_free_mrf = 15;
1818
1819 /* instructions are compressed with BRW_EXECUTE_16 */
1820 fcc->num_grf_per_vrf =
1821 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1;
1822
1823 if (fcc->tc.dev->gen >= ILO_GEN(7)) {
1824 fcc->last_free_grf -= 15;
1825 fcc->first_free_mrf = fcc->last_free_grf + 1;
1826 fcc->last_free_mrf = fcc->first_free_mrf + 14;
1827 }
1828
1829 fcc->shader->in.start_grf = fcc->first_const_grf;
1830 fcc->shader->has_kill = fcc->tgsi.uses_kill;
1831 fcc->shader->dispatch_16 =
1832 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE);
1833
1834 return true;
1835 }
1836
1837 /**
1838 * Compile the fragment shader.
1839 */
1840 struct ilo_shader *
1841 ilo_shader_compile_fs(const struct ilo_shader_state *state,
1842 const struct ilo_shader_variant *variant)
1843 {
1844 struct fs_compile_context fcc;
1845
1846 if (!fs_setup(&fcc, state, variant))
1847 return NULL;
1848
1849 fs_write_fb(&fcc);
1850
1851 if (!fs_compile(&fcc)) {
1852 FREE(fcc.shader);
1853 fcc.shader = NULL;
1854 }
1855
1856 toy_tgsi_cleanup(&fcc.tgsi);
1857 toy_compiler_cleanup(&fcc.tc);
1858
1859 return fcc.shader;
1860 }