2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_dump.h"
29 #include "toy_compiler.h"
31 #include "toy_legalize.h"
32 #include "toy_optimize.h"
33 #include "toy_helpers.h"
34 #include "ilo_context.h"
35 #include "ilo_shader.h"
37 struct fs_compile_context
{
38 struct ilo_shader
*shader
;
39 const struct ilo_shader_variant
*variant
;
41 struct toy_compiler tc
;
44 enum brw_message_target const_cache
;
48 int barycentric_interps
[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
];
66 fetch_position(struct fs_compile_context
*fcc
, struct toy_dst dst
)
68 struct toy_compiler
*tc
= &fcc
->tc
;
69 const struct toy_src src_z
=
70 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_depth
, 0);
71 const struct toy_src src_w
=
72 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_w
, 0);
74 (fcc
->variant
->u
.fs
.fb_height
) ? fcc
->variant
->u
.fs
.fb_height
: 1;
75 const bool origin_upper_left
=
76 (fcc
->tgsi
.props
.fs_coord_origin
== TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
77 const bool pixel_center_integer
=
78 (fcc
->tgsi
.props
.fs_coord_pixel_center
==
79 TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
80 struct toy_src subspan_x
, subspan_y
;
81 struct toy_dst tmp
, tmp_uw
;
82 struct toy_dst real_dst
[4];
84 tdst_transpose(dst
, real_dst
);
86 subspan_x
= tsrc_uw(tsrc(TOY_FILE_GRF
, 1, 2 * 4));
87 subspan_x
= tsrc_rect(subspan_x
, TOY_RECT_240
);
89 subspan_y
= tsrc_offset(subspan_x
, 0, 1);
91 tmp_uw
= tdst_uw(tc_alloc_tmp(tc
));
92 tmp
= tc_alloc_tmp(tc
);
95 tc_ADD(tc
, tmp_uw
, subspan_x
, tsrc_imm_v(0x10101010));
96 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
97 if (pixel_center_integer
)
98 tc_MOV(tc
, real_dst
[0], tsrc_from(tmp
));
100 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp
), tsrc_imm_f(0.5f
));
103 tc_ADD(tc
, tmp_uw
, subspan_y
, tsrc_imm_v(0x11001100));
104 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
105 if (origin_upper_left
&& pixel_center_integer
) {
106 tc_MOV(tc
, real_dst
[1], tsrc_from(tmp
));
109 struct toy_src y
= tsrc_from(tmp
);
112 if (!pixel_center_integer
)
115 if (!origin_upper_left
) {
116 offset
+= (float) (fb_height
- 1);
120 tc_ADD(tc
, real_dst
[1], y
, tsrc_imm_f(offset
));
124 tc_MOV(tc
, real_dst
[2], src_z
);
125 tc_INV(tc
, real_dst
[3], src_w
);
129 fetch_face(struct fs_compile_context
*fcc
, struct toy_dst dst
)
131 struct toy_compiler
*tc
= &fcc
->tc
;
132 const struct toy_src r0
= tsrc_d(tsrc(TOY_FILE_GRF
, 0, 0));
133 struct toy_dst tmp_f
, tmp
;
134 struct toy_dst real_dst
[4];
136 tdst_transpose(dst
, real_dst
);
138 tmp_f
= tc_alloc_tmp(tc
);
140 tc_SHR(tc
, tmp
, tsrc_rect(r0
, TOY_RECT_010
), tsrc_imm_d(15));
141 tc_AND(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(1));
142 tc_MOV(tc
, tmp_f
, tsrc_from(tmp
));
144 /* convert to 1.0 and -1.0 */
145 tc_MUL(tc
, tmp_f
, tsrc_from(tmp_f
), tsrc_imm_f(-2.0f
));
146 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp_f
), tsrc_imm_f(1.0f
));
148 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
149 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
150 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
154 fetch_attr(struct fs_compile_context
*fcc
, struct toy_dst dst
, int slot
)
156 struct toy_compiler
*tc
= &fcc
->tc
;
157 struct toy_dst real_dst
[4];
158 bool is_const
= false;
161 tdst_transpose(dst
, real_dst
);
163 grf
= fcc
->first_attr_grf
+ slot
* 2;
165 switch (fcc
->tgsi
.inputs
[slot
].interp
) {
166 case TGSI_INTERPOLATE_CONSTANT
:
169 case TGSI_INTERPOLATE_LINEAR
:
170 if (fcc
->tgsi
.inputs
[slot
].centroid
)
171 mode
= BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
173 mode
= BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
175 case TGSI_INTERPOLATE_COLOR
:
176 if (fcc
->variant
->u
.fs
.flatshade
) {
181 case TGSI_INTERPOLATE_PERSPECTIVE
:
182 if (fcc
->tgsi
.inputs
[slot
].centroid
)
183 mode
= BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
185 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
188 assert(!"unexpected FS interpolation");
189 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
194 struct toy_src a0
[4];
196 a0
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 3 * 4);
197 a0
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 7 * 4);
198 a0
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 3 * 4);
199 a0
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 7 * 4);
201 for (ch
= 0; ch
< 4; ch
++)
202 tc_MOV(tc
, real_dst
[ch
], tsrc_rect(a0
[ch
], TOY_RECT_010
));
205 struct toy_src attr
[4], uv
;
207 attr
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 0);
208 attr
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 4 * 4);
209 attr
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 0);
210 attr
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 4 * 4);
212 uv
= tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].barycentric_interps
[mode
], 0);
214 for (ch
= 0; ch
< 4; ch
++) {
215 tc_add2(tc
, BRW_OPCODE_PLN
, real_dst
[ch
],
216 tsrc_rect(attr
[ch
], TOY_RECT_010
), uv
);
220 if (fcc
->tgsi
.inputs
[slot
].semantic_name
== TGSI_SEMANTIC_FOG
) {
221 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
222 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
223 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
228 fs_lower_opcode_tgsi_in(struct fs_compile_context
*fcc
,
229 struct toy_dst dst
, int dim
, int idx
)
235 slot
= toy_tgsi_find_input(&fcc
->tgsi
, idx
);
239 switch (fcc
->tgsi
.inputs
[slot
].semantic_name
) {
240 case TGSI_SEMANTIC_POSITION
:
241 fetch_position(fcc
, dst
);
243 case TGSI_SEMANTIC_FACE
:
244 fetch_face(fcc
, dst
);
247 fetch_attr(fcc
, dst
, slot
);
253 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context
*fcc
,
254 struct toy_dst dst
, int dim
, struct toy_src idx
)
256 const struct toy_dst header
=
257 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
258 const struct toy_dst global_offset
=
259 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 2 * 4));
260 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
261 struct toy_compiler
*tc
= &fcc
->tc
;
262 unsigned msg_type
, msg_ctrl
, msg_len
;
263 struct toy_inst
*inst
;
265 struct toy_dst tmp
, real_dst
[4];
268 /* set message header */
269 inst
= tc_MOV(tc
, header
, r0
);
270 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
272 /* set global offset */
273 inst
= tc_MOV(tc
, global_offset
, idx
);
274 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
275 inst
->exec_size
= BRW_EXECUTE_1
;
276 inst
->src
[0].rect
= TOY_RECT_010
;
278 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
;
279 msg_ctrl
= BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
<< 8;
282 desc
= tsrc_imm_mdesc_data_port(tc
, false, msg_len
, 1, true, false,
283 msg_type
, msg_ctrl
, ILO_WM_CONST_SURFACE(dim
));
285 tmp
= tc_alloc_tmp(tc
);
287 tc_SEND(tc
, tmp
, tsrc_from(header
), desc
, fcc
->const_cache
);
289 tdst_transpose(dst
, real_dst
);
290 for (i
= 0; i
< 4; i
++) {
291 const struct toy_src src
=
292 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
294 /* cast to type D to make sure these are raw moves */
295 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
300 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context
*fcc
,
301 struct toy_dst dst
, int dim
, struct toy_src idx
)
303 struct toy_compiler
*tc
= &fcc
->tc
;
304 const struct toy_dst offset
=
305 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
307 struct toy_inst
*inst
;
308 struct toy_dst tmp
, real_dst
[4];
312 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
313 * changed from OWord Block Read to ld to increase performance in the
314 * classic driver. Since we use the constant cache instead of the data
315 * cache, I wonder if we still want to follow the classic driver.
319 inst
= tc_MOV(tc
, offset
, tsrc_rect(idx
, TOY_RECT_010
));
320 inst
->exec_size
= BRW_EXECUTE_8
;
321 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
323 desc
= tsrc_imm_mdesc_sampler(tc
, 1, 1, false,
324 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
325 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
327 ILO_WM_CONST_SURFACE(dim
));
329 tmp
= tc_alloc_tmp(tc
);
330 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
331 inst
->exec_size
= BRW_EXECUTE_8
;
332 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
334 tdst_transpose(dst
, real_dst
);
335 for (i
= 0; i
< 4; i
++) {
336 const struct toy_src src
=
337 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
339 /* cast to type D to make sure these are raw moves */
340 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
345 fs_lower_opcode_tgsi_imm(struct fs_compile_context
*fcc
,
346 struct toy_dst dst
, int idx
)
349 struct toy_dst real_dst
[4];
352 imm
= toy_tgsi_get_imm(&fcc
->tgsi
, idx
, NULL
);
354 tdst_transpose(dst
, real_dst
);
356 for (ch
= 0; ch
< 4; ch
++)
357 tc_MOV(&fcc
->tc
, tdst_ud(real_dst
[ch
]), tsrc_imm_ud(imm
[ch
]));
361 fs_lower_opcode_tgsi_sv(struct fs_compile_context
*fcc
,
362 struct toy_dst dst
, int dim
, int idx
)
364 struct toy_compiler
*tc
= &fcc
->tc
;
365 const struct toy_tgsi
*tgsi
= &fcc
->tgsi
;
370 slot
= toy_tgsi_find_system_value(tgsi
, idx
);
374 switch (tgsi
->system_values
[slot
].semantic_name
) {
375 case TGSI_SEMANTIC_PRIMID
:
376 case TGSI_SEMANTIC_INSTANCEID
:
377 case TGSI_SEMANTIC_VERTEXID
:
379 tc_fail(tc
, "unhandled system value");
380 tc_MOV(tc
, dst
, tsrc_imm_d(0));
386 fs_lower_opcode_tgsi_direct(struct fs_compile_context
*fcc
,
387 struct toy_inst
*inst
)
389 struct toy_compiler
*tc
= &fcc
->tc
;
392 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
393 dim
= inst
->src
[0].val32
;
395 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
396 idx
= inst
->src
[1].val32
;
398 switch (inst
->opcode
) {
399 case TOY_OPCODE_TGSI_IN
:
400 fs_lower_opcode_tgsi_in(fcc
, inst
->dst
, dim
, idx
);
402 case TOY_OPCODE_TGSI_CONST
:
403 if (tc
->dev
->gen
>= ILO_GEN(7))
404 fs_lower_opcode_tgsi_const_gen7(fcc
, inst
->dst
, dim
, inst
->src
[1]);
406 fs_lower_opcode_tgsi_const_gen6(fcc
, inst
->dst
, dim
, inst
->src
[1]);
408 case TOY_OPCODE_TGSI_SV
:
409 fs_lower_opcode_tgsi_sv(fcc
, inst
->dst
, dim
, idx
);
411 case TOY_OPCODE_TGSI_IMM
:
413 fs_lower_opcode_tgsi_imm(fcc
, inst
->dst
, idx
);
416 tc_fail(tc
, "unhandled TGSI fetch");
420 tc_discard_inst(tc
, inst
);
424 fs_lower_opcode_tgsi_indirect(struct fs_compile_context
*fcc
,
425 struct toy_inst
*inst
)
427 tc_fail(&fcc
->tc
, "no TGSI indirection support");
431 * Emit instructions to move sampling parameters to the message registers.
434 fs_add_sampler_params_gen6(struct toy_compiler
*tc
, int msg_type
,
435 int base_mrf
, int param_size
,
436 struct toy_src
*coords
, int num_coords
,
437 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
438 struct toy_src
*ddx
, struct toy_src
*ddy
,
443 assert(num_coords
<= 4);
444 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
446 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
448 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
449 for (i
= 0; i
< num_coords
; i
++)
450 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
451 num_params
= num_coords
;
453 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
454 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
455 for (i
= 0; i
< num_coords
; i
++)
456 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
457 tc_MOV(tc
, SAMPLER_PARAM(4), bias_or_lod
);
460 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
461 for (i
= 0; i
< num_coords
; i
++)
462 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
463 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
466 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
467 for (i
= 0; i
< num_coords
; i
++)
468 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
469 for (i
= 0; i
< num_derivs
; i
++) {
470 tc_MOV(tc
, SAMPLER_PARAM(4 + i
* 2), ddx
[i
]);
471 tc_MOV(tc
, SAMPLER_PARAM(5 + i
* 2), ddy
[i
]);
473 num_params
= 4 + num_derivs
* 2;
475 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
476 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
477 for (i
= 0; i
< num_coords
; i
++)
478 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
479 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
480 tc_MOV(tc
, SAMPLER_PARAM(5), bias_or_lod
);
483 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
484 assert(num_coords
<= 3);
486 for (i
= 0; i
< num_coords
; i
++)
487 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(i
)), coords
[i
]);
488 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(3)), bias_or_lod
);
489 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(4)), ref_or_si
);
492 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
493 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
497 tc_fail(tc
, "unknown sampler opcode");
503 return num_params
* param_size
;
507 fs_add_sampler_params_gen7(struct toy_compiler
*tc
, int msg_type
,
508 int base_mrf
, int param_size
,
509 struct toy_src
*coords
, int num_coords
,
510 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
511 struct toy_src
*ddx
, struct toy_src
*ddy
,
516 assert(num_coords
<= 4);
517 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
519 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
521 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
522 for (i
= 0; i
< num_coords
; i
++)
523 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
524 num_params
= num_coords
;
526 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
527 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
528 tc_MOV(tc
, SAMPLER_PARAM(0), bias_or_lod
);
529 for (i
= 0; i
< num_coords
; i
++)
530 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
531 num_params
= 1 + num_coords
;
533 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
534 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
535 for (i
= 0; i
< num_coords
; i
++)
536 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
537 num_params
= 1 + num_coords
;
539 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
540 for (i
= 0; i
< num_coords
; i
++) {
541 tc_MOV(tc
, SAMPLER_PARAM(i
* 3), coords
[i
]);
542 if (i
< num_derivs
) {
543 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 1), ddx
[i
]);
544 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 2), ddy
[i
]);
547 num_params
= num_coords
* 3 - ((num_coords
> num_derivs
) ? 2 : 0);
549 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
550 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
551 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
552 tc_MOV(tc
, SAMPLER_PARAM(1), bias_or_lod
);
553 for (i
= 0; i
< num_coords
; i
++)
554 tc_MOV(tc
, SAMPLER_PARAM(2 + i
), coords
[i
]);
555 num_params
= 2 + num_coords
;
557 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
558 assert(num_coords
>= 1 && num_coords
<= 3);
560 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), coords
[0]);
561 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1)), bias_or_lod
);
562 for (i
= 1; i
< num_coords
; i
++)
563 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1 + i
)), coords
[i
]);
564 num_params
= 1 + num_coords
;
566 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
567 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
571 tc_fail(tc
, "unknown sampler opcode");
577 return num_params
* param_size
;
581 * Set up message registers and return the message descriptor for sampling.
583 static struct toy_src
584 fs_prepare_tgsi_sampling(struct toy_compiler
*tc
, const struct toy_inst
*inst
,
585 int base_mrf
, const uint32_t *saturate_coords
,
586 unsigned *ret_sampler_index
)
588 unsigned simd_mode
, msg_type
, msg_len
, sampler_index
, binding_table_index
;
589 struct toy_src coords
[4], ddx
[4], ddy
[4], bias_or_lod
, ref_or_si
;
590 int num_coords
, ref_pos
, num_derivs
;
591 int sampler_src
, param_size
, i
;
593 switch (inst
->exec_size
) {
595 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
599 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
603 tc_fail(tc
, "unsupported execute size for sampling");
608 num_coords
= toy_tgsi_get_texture_coord_dim(inst
->tex
.target
, &ref_pos
);
609 tsrc_transpose(inst
->src
[0], coords
);
610 bias_or_lod
= tsrc_null();
611 ref_or_si
= tsrc_null();
618 * src0 := (x, y, z, w)
623 * For TEX2, TXB2, and TXL2,
625 * src0 := (x, y, z, w)
626 * src1 := (v or bias or lod, ...)
629 * For TEX, TXB, TXL, and TXP,
631 * src0 := (x, y, z, w or bias or lod or projection)
645 * src0 := (x, y, z, w or lod)
648 * State trackers should not generate opcode+texture combinations with
649 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
651 switch (inst
->opcode
) {
652 case TOY_OPCODE_TGSI_TEX
:
656 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
657 ref_or_si
= coords
[ref_pos
];
660 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
663 case TOY_OPCODE_TGSI_TXD
:
665 tc_fail(tc
, "TXD with shadow sampler not supported");
667 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
668 tsrc_transpose(inst
->src
[1], ddx
);
669 tsrc_transpose(inst
->src
[2], ddy
);
670 num_derivs
= num_coords
;
673 case TOY_OPCODE_TGSI_TXP
:
677 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
678 ref_or_si
= coords
[ref_pos
];
681 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
684 /* project the coordinates */
686 struct toy_dst tmp
[4];
688 tc_alloc_tmp4(tc
, tmp
);
690 tc_INV(tc
, tmp
[3], coords
[3]);
691 for (i
= 0; i
< num_coords
&& i
< 3; i
++) {
692 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
693 coords
[i
] = tsrc_from(tmp
[i
]);
697 tc_MUL(tc
, tmp
[ref_pos
], ref_or_si
, tsrc_from(tmp
[3]));
698 ref_or_si
= tsrc_from(tmp
[ref_pos
]);
702 case TOY_OPCODE_TGSI_TXB
:
706 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
707 ref_or_si
= coords
[ref_pos
];
710 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
713 bias_or_lod
= coords
[3];
715 case TOY_OPCODE_TGSI_TXL
:
719 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
720 ref_or_si
= coords
[ref_pos
];
723 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
726 bias_or_lod
= coords
[3];
728 case TOY_OPCODE_TGSI_TXF
:
729 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
731 switch (inst
->tex
.target
) {
732 case TGSI_TEXTURE_2D_MSAA
:
733 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
734 assert(ref_pos
>= 0 && ref_pos
< 4);
735 /* lod is always 0 */
736 bias_or_lod
= tsrc_imm_d(0);
737 ref_or_si
= coords
[ref_pos
];
740 bias_or_lod
= coords
[3];
744 /* offset the coordinates */
745 if (!tsrc_is_null(inst
->tex
.offsets
[0])) {
746 struct toy_dst tmp
[4];
747 struct toy_src offsets
[4];
749 tc_alloc_tmp4(tc
, tmp
);
750 tsrc_transpose(inst
->tex
.offsets
[0], offsets
);
752 for (i
= 0; i
< num_coords
; i
++) {
753 tc_ADD(tc
, tmp
[i
], coords
[i
], offsets
[i
]);
754 coords
[i
] = tsrc_from(tmp
[i
]);
760 case TOY_OPCODE_TGSI_TXQ
:
761 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
763 bias_or_lod
= coords
[0];
765 case TOY_OPCODE_TGSI_TXQ_LZ
:
766 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
770 case TOY_OPCODE_TGSI_TEX2
:
774 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
777 struct toy_src src1
[4];
778 tsrc_transpose(inst
->src
[1], src1
);
779 ref_or_si
= src1
[ref_pos
- 4];
782 ref_or_si
= coords
[ref_pos
];
786 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
791 case TOY_OPCODE_TGSI_TXB2
:
795 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
796 ref_or_si
= coords
[ref_pos
];
799 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
803 struct toy_src src1
[4];
804 tsrc_transpose(inst
->src
[1], src1
);
805 bias_or_lod
= src1
[0];
810 case TOY_OPCODE_TGSI_TXL2
:
814 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
815 ref_or_si
= coords
[ref_pos
];
818 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
822 struct toy_src src1
[4];
823 tsrc_transpose(inst
->src
[1], src1
);
824 bias_or_lod
= src1
[0];
830 assert(!"unhandled sampling opcode");
835 assert(inst
->src
[sampler_src
].file
== TOY_FILE_IMM
);
836 sampler_index
= inst
->src
[sampler_src
].val32
;
837 binding_table_index
= ILO_WM_TEXTURE_SURFACE(sampler_index
);
840 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
842 * "Note that the (cube map) coordinates delivered to the sampling
843 * engine must already have been divided by the component with the
844 * largest absolute value."
846 switch (inst
->tex
.target
) {
847 case TGSI_TEXTURE_CUBE
:
848 case TGSI_TEXTURE_SHADOWCUBE
:
849 case TGSI_TEXTURE_CUBE_ARRAY
:
850 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
851 /* TXQ does not need coordinates */
852 if (num_coords
>= 3) {
853 struct toy_dst tmp
[4];
855 tc_alloc_tmp4(tc
, tmp
);
857 tc_SEL(tc
, tmp
[3], tsrc_absolute(coords
[0]),
858 tsrc_absolute(coords
[1]), BRW_CONDITIONAL_GE
);
859 tc_SEL(tc
, tmp
[3], tsrc_from(tmp
[3]),
860 tsrc_absolute(coords
[2]), BRW_CONDITIONAL_GE
);
861 tc_INV(tc
, tmp
[3], tsrc_from(tmp
[3]));
863 for (i
= 0; i
< 3; i
++) {
864 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
865 coords
[i
] = tsrc_from(tmp
[i
]);
872 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
873 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
874 * so that sampling outside the border gets the correct colors.
876 for (i
= 0; i
< MIN2(num_coords
, 3); i
++) {
879 if (!(saturate_coords
[i
] & (1 << sampler_index
)))
882 switch (inst
->tex
.target
) {
883 case TGSI_TEXTURE_RECT
:
884 case TGSI_TEXTURE_SHADOWRECT
:
893 struct toy_src min
, max
;
896 tc_fail(tc
, "GL_CLAMP with rectangle texture unsupported");
897 tmp
= tc_alloc_tmp(tc
);
899 /* saturate to [0, width] or [0, height] */
901 min
= tsrc_imm_f(0.0f
);
902 max
= tsrc_imm_f(2048.0f
);
904 tc_SEL(tc
, tmp
, coords
[i
], min
, BRW_CONDITIONAL_G
);
905 tc_SEL(tc
, tmp
, tsrc_from(tmp
), max
, BRW_CONDITIONAL_L
);
907 coords
[i
] = tsrc_from(tmp
);
911 struct toy_inst
*inst2
;
913 tmp
= tc_alloc_tmp(tc
);
915 /* saturate to [0.0f, 1.0f] */
916 inst2
= tc_MOV(tc
, tmp
, coords
[i
]);
917 inst2
->saturate
= true;
919 coords
[i
] = tsrc_from(tmp
);
923 /* set up sampler parameters */
924 if (tc
->dev
->gen
>= ILO_GEN(7)) {
925 msg_len
= fs_add_sampler_params_gen7(tc
, msg_type
, base_mrf
, param_size
,
926 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
929 msg_len
= fs_add_sampler_params_gen6(tc
, msg_type
, base_mrf
, param_size
,
930 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
934 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
936 * "The maximum message length allowed to the sampler is 11. This would
937 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
941 tc_fail(tc
, "maximum length for messages to the sampler is 11");
943 if (ret_sampler_index
)
944 *ret_sampler_index
= sampler_index
;
946 return tsrc_imm_mdesc_sampler(tc
, msg_len
, 4 * param_size
,
947 false, simd_mode
, msg_type
, sampler_index
, binding_table_index
);
951 fs_lower_opcode_tgsi_sampling(struct fs_compile_context
*fcc
,
952 struct toy_inst
*inst
)
954 struct toy_compiler
*tc
= &fcc
->tc
;
955 struct toy_dst dst
[4], tmp
[4];
957 unsigned sampler_index
;
961 desc
= fs_prepare_tgsi_sampling(tc
, inst
,
963 fcc
->variant
->saturate_tex_coords
,
966 switch (inst
->opcode
) {
967 case TOY_OPCODE_TGSI_TXF
:
968 case TOY_OPCODE_TGSI_TXQ
:
969 case TOY_OPCODE_TGSI_TXQ_LZ
:
977 toy_compiler_lower_to_send(tc
, inst
, false, BRW_SFID_SAMPLER
);
978 inst
->src
[0] = tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0);
980 for (i
= 2; i
< Elements(inst
->src
); i
++)
981 inst
->src
[i
] = tsrc_null();
983 /* write to temps first */
984 tc_alloc_tmp4(tc
, tmp
);
985 tdst_transpose(inst
->dst
, dst
);
988 tc_move_inst(tc
, inst
);
991 assert(sampler_index
< fcc
->variant
->num_sampler_views
);
992 swizzles
[0] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].r
;
993 swizzles
[1] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].g
;
994 swizzles
[2] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].b
;
995 swizzles
[3] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].a
;
998 swizzles
[0] = PIPE_SWIZZLE_RED
;
999 swizzles
[1] = PIPE_SWIZZLE_GREEN
;
1000 swizzles
[2] = PIPE_SWIZZLE_BLUE
;
1001 swizzles
[3] = PIPE_SWIZZLE_ALPHA
;
1004 /* swizzle the results */
1005 for (i
= 0; i
< 4; i
++) {
1006 switch (swizzles
[i
]) {
1007 case PIPE_SWIZZLE_ZERO
:
1008 tc_MOV(tc
, dst
[i
], tsrc_imm_f(0.0f
));
1010 case PIPE_SWIZZLE_ONE
:
1011 tc_MOV(tc
, dst
[i
], tsrc_imm_f(1.0f
));
1014 tc_MOV(tc
, dst
[i
], tsrc_from(tmp
[swizzles
[i
]]));
1021 fs_lower_opcode_derivative(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1023 struct toy_dst dst
[4];
1024 struct toy_src src
[4];
1027 tdst_transpose(inst
->dst
, dst
);
1028 tsrc_transpose(inst
->src
[0], src
);
1031 * Every four fragments are from a 2x2 subspan, with
1033 * fragment 1 on the top-left,
1034 * fragment 2 on the top-right,
1035 * fragment 3 on the bottom-left,
1036 * fragment 4 on the bottom-right.
1038 * DDX should thus produce
1040 * dst = src.yyww - src.xxzz
1042 * and DDY should produce
1044 * dst = src.zzww - src.xxyy
1046 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1047 * play with the region parameters.
1049 if (inst
->opcode
== TOY_OPCODE_DDX
) {
1050 for (i
= 0; i
< 4; i
++) {
1051 struct toy_src left
, right
;
1053 left
= tsrc_rect(src
[i
], TOY_RECT_220
);
1054 right
= tsrc_offset(left
, 0, 1);
1056 tc_ADD(tc
, dst
[i
], right
, tsrc_negate(left
));
1060 for (i
= 0; i
< 4; i
++) {
1061 struct toy_src top
, bottom
;
1063 /* approximate with dst = src.zzzz - src.xxxx */
1064 top
= tsrc_rect(src
[i
], TOY_RECT_440
);
1065 bottom
= tsrc_offset(top
, 0, 2);
1067 tc_ADD(tc
, dst
[i
], bottom
, tsrc_negate(top
));
1071 tc_discard_inst(tc
, inst
);
1075 fs_lower_opcode_fb_write(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1077 /* fs_write_fb() has set up the message registers */
1078 toy_compiler_lower_to_send(tc
, inst
, true,
1079 GEN6_SFID_DATAPORT_RENDER_CACHE
);
1083 fs_lower_opcode_kil(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1085 struct toy_dst pixel_mask_dst
;
1086 struct toy_src f0
, pixel_mask
;
1087 struct toy_inst
*tmp
;
1089 /* lower half of r1.7:ud */
1090 pixel_mask_dst
= tdst_uw(tdst(TOY_FILE_GRF
, 1, 7 * 4));
1091 pixel_mask
= tsrc_rect(tsrc_from(pixel_mask_dst
), TOY_RECT_010
);
1093 f0
= tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0)), TOY_RECT_010
);
1096 if (tsrc_is_null(inst
->src
[0])) {
1097 struct toy_src dummy
= tsrc_uw(tsrc(TOY_FILE_GRF
, 0, 0));
1098 struct toy_dst f0_dst
= tdst_uw(tdst(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0));
1100 /* create a mask that masks out all pixels */
1101 tmp
= tc_MOV(tc
, f0_dst
, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010
));
1102 tmp
->exec_size
= BRW_EXECUTE_1
;
1103 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1105 tc_CMP(tc
, tdst_null(), dummy
, dummy
, BRW_CONDITIONAL_NEQ
);
1107 /* swapping the two src operands breaks glBitmap()!? */
1108 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1109 tmp
->exec_size
= BRW_EXECUTE_1
;
1110 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1113 struct toy_src src
[4];
1116 tsrc_transpose(inst
->src
[0], src
);
1117 /* mask out killed pixels */
1118 for (i
= 0; i
< 4; i
++) {
1119 tc_CMP(tc
, tdst_null(), src
[i
], tsrc_imm_f(0.0f
),
1120 BRW_CONDITIONAL_GE
);
1122 /* swapping the two src operands breaks glBitmap()!? */
1123 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1124 tmp
->exec_size
= BRW_EXECUTE_1
;
1125 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1129 tc_discard_inst(tc
, inst
);
1133 fs_lower_virtual_opcodes(struct fs_compile_context
*fcc
)
1135 struct toy_compiler
*tc
= &fcc
->tc
;
1136 struct toy_inst
*inst
;
1138 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1140 while ((inst
= tc_next(tc
)) != NULL
) {
1141 switch (inst
->opcode
) {
1142 case TOY_OPCODE_TGSI_IN
:
1143 case TOY_OPCODE_TGSI_CONST
:
1144 case TOY_OPCODE_TGSI_SV
:
1145 case TOY_OPCODE_TGSI_IMM
:
1146 fs_lower_opcode_tgsi_direct(fcc
, inst
);
1148 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
1149 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
1150 fs_lower_opcode_tgsi_indirect(fcc
, inst
);
1152 case TOY_OPCODE_TGSI_TEX
:
1153 case TOY_OPCODE_TGSI_TXB
:
1154 case TOY_OPCODE_TGSI_TXD
:
1155 case TOY_OPCODE_TGSI_TXL
:
1156 case TOY_OPCODE_TGSI_TXP
:
1157 case TOY_OPCODE_TGSI_TXF
:
1158 case TOY_OPCODE_TGSI_TXQ
:
1159 case TOY_OPCODE_TGSI_TXQ_LZ
:
1160 case TOY_OPCODE_TGSI_TEX2
:
1161 case TOY_OPCODE_TGSI_TXB2
:
1162 case TOY_OPCODE_TGSI_TXL2
:
1163 case TOY_OPCODE_TGSI_SAMPLE
:
1164 case TOY_OPCODE_TGSI_SAMPLE_I
:
1165 case TOY_OPCODE_TGSI_SAMPLE_I_MS
:
1166 case TOY_OPCODE_TGSI_SAMPLE_B
:
1167 case TOY_OPCODE_TGSI_SAMPLE_C
:
1168 case TOY_OPCODE_TGSI_SAMPLE_C_LZ
:
1169 case TOY_OPCODE_TGSI_SAMPLE_D
:
1170 case TOY_OPCODE_TGSI_SAMPLE_L
:
1171 case TOY_OPCODE_TGSI_GATHER4
:
1172 case TOY_OPCODE_TGSI_SVIEWINFO
:
1173 case TOY_OPCODE_TGSI_SAMPLE_POS
:
1174 case TOY_OPCODE_TGSI_SAMPLE_INFO
:
1175 fs_lower_opcode_tgsi_sampling(fcc
, inst
);
1181 while ((inst
= tc_next(tc
)) != NULL
) {
1182 switch (inst
->opcode
) {
1183 case TOY_OPCODE_INV
:
1184 case TOY_OPCODE_LOG
:
1185 case TOY_OPCODE_EXP
:
1186 case TOY_OPCODE_SQRT
:
1187 case TOY_OPCODE_RSQ
:
1188 case TOY_OPCODE_SIN
:
1189 case TOY_OPCODE_COS
:
1190 case TOY_OPCODE_FDIV
:
1191 case TOY_OPCODE_POW
:
1192 case TOY_OPCODE_INT_DIV_QUOTIENT
:
1193 case TOY_OPCODE_INT_DIV_REMAINDER
:
1194 toy_compiler_lower_math(tc
, inst
);
1196 case TOY_OPCODE_DDX
:
1197 case TOY_OPCODE_DDY
:
1198 fs_lower_opcode_derivative(tc
, inst
);
1200 case TOY_OPCODE_FB_WRITE
:
1201 fs_lower_opcode_fb_write(tc
, inst
);
1203 case TOY_OPCODE_KIL
:
1204 fs_lower_opcode_kil(tc
, inst
);
1207 if (inst
->opcode
> 127)
1208 tc_fail(tc
, "unhandled virtual opcode");
1215 * Compile the shader.
1218 fs_compile(struct fs_compile_context
*fcc
)
1220 struct toy_compiler
*tc
= &fcc
->tc
;
1221 struct ilo_shader
*sh
= fcc
->shader
;
1223 fs_lower_virtual_opcodes(fcc
);
1224 toy_compiler_legalize_for_ra(tc
);
1225 toy_compiler_optimize(tc
);
1226 toy_compiler_allocate_registers(tc
,
1227 fcc
->first_free_grf
,
1229 fcc
->num_grf_per_vrf
);
1230 toy_compiler_legalize_for_asm(tc
);
1233 ilo_err("failed to legalize FS instructions: %s\n", tc
->reason
);
1237 if (ilo_debug
& ILO_DEBUG_FS
) {
1238 ilo_printf("legalized instructions:\n");
1239 toy_compiler_dump(tc
);
1244 sh
->kernel
= toy_compiler_assemble(tc
, &sh
->kernel_size
);
1247 static const uint32_t microcode
[] = {
1248 /* fill in the microcode here */
1251 const bool swap
= true;
1253 sh
->kernel_size
= sizeof(microcode
);
1254 sh
->kernel
= MALLOC(sh
->kernel_size
);
1257 const int num_dwords
= sizeof(microcode
) / 4;
1258 const uint32_t *src
= microcode
;
1259 uint32_t *dst
= (uint32_t *) sh
->kernel
;
1262 for (i
= 0; i
< num_dwords
; i
+= 4) {
1264 dst
[i
+ 0] = src
[i
+ 3];
1265 dst
[i
+ 1] = src
[i
+ 2];
1266 dst
[i
+ 2] = src
[i
+ 1];
1267 dst
[i
+ 3] = src
[i
+ 0];
1270 memcpy(dst
, src
, 16);
1277 ilo_err("failed to compile FS: %s\n", tc
->reason
);
1281 if (ilo_debug
& ILO_DEBUG_FS
) {
1282 ilo_printf("disassembly:\n");
1283 toy_compiler_disassemble(tc
, sh
->kernel
, sh
->kernel_size
);
1291 * Emit instructions to write the color buffers (and the depth buffer).
1294 fs_write_fb(struct fs_compile_context
*fcc
)
1296 struct toy_compiler
*tc
= &fcc
->tc
;
1297 int base_mrf
= fcc
->first_free_mrf
;
1298 const struct toy_dst header
= tdst_ud(tdst(TOY_FILE_MRF
, base_mrf
, 0));
1299 bool header_present
= false;
1300 struct toy_src desc
;
1301 unsigned msg_type
, ctrl
;
1302 int color_slots
[ILO_MAX_DRAW_BUFFERS
], num_cbufs
;
1303 int pos_slot
= -1, cbuf
, i
;
1305 for (i
= 0; i
< Elements(color_slots
); i
++)
1306 color_slots
[i
] = -1;
1308 for (i
= 0; i
< fcc
->tgsi
.num_outputs
; i
++) {
1309 if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_COLOR
) {
1310 assert(fcc
->tgsi
.outputs
[i
].semantic_index
< Elements(color_slots
));
1311 color_slots
[fcc
->tgsi
.outputs
[i
].semantic_index
] = i
;
1313 else if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1318 num_cbufs
= fcc
->variant
->u
.fs
.num_cbufs
;
1319 /* still need to send EOT (and probably depth) */
1323 /* we need the header to specify the pixel mask or render target */
1324 if (fcc
->tgsi
.uses_kill
|| num_cbufs
> 1) {
1325 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
1326 struct toy_inst
*inst
;
1328 inst
= tc_MOV(tc
, header
, r0
);
1329 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1330 base_mrf
+= fcc
->num_grf_per_vrf
;
1332 /* this is a two-register header */
1333 if (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) {
1334 inst
= tc_MOV(tc
, tdst_offset(header
, 1, 0), tsrc_offset(r0
, 1, 0));
1335 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1336 base_mrf
+= fcc
->num_grf_per_vrf
;
1339 header_present
= true;
1342 for (cbuf
= 0; cbuf
< num_cbufs
; cbuf
++) {
1344 color_slots
[(fcc
->tgsi
.props
.fs_color0_writes_all_cbufs
) ? 0 : cbuf
];
1345 int mrf
= base_mrf
, vrf
;
1346 struct toy_src src
[4];
1349 const unsigned undefined_mask
=
1350 fcc
->tgsi
.outputs
[slot
].undefined_mask
;
1351 const int index
= fcc
->tgsi
.outputs
[slot
].index
;
1353 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1355 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1356 tsrc_transpose(tmp
, src
);
1359 /* use (0, 0, 0, 0) */
1360 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1363 for (i
= 0; i
< 4; i
++) {
1364 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1366 if (undefined_mask
& (1 << i
))
1367 src
[i
] = tsrc_imm_f(0.0f
);
1369 tc_MOV(tc
, dst
, src
[i
]);
1371 mrf
+= fcc
->num_grf_per_vrf
;
1375 /* use (0, 0, 0, 0) */
1376 for (i
= 0; i
< 4; i
++) {
1377 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1379 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
1380 mrf
+= fcc
->num_grf_per_vrf
;
1384 /* select BLEND_STATE[rt] */
1386 struct toy_inst
*inst
;
1388 inst
= tc_MOV(tc
, tdst_offset(header
, 0, 2), tsrc_imm_ud(cbuf
));
1389 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1390 inst
->exec_size
= BRW_EXECUTE_1
;
1391 inst
->src
[0].rect
= TOY_RECT_010
;
1394 if (cbuf
== 0 && pos_slot
>= 0) {
1395 const int index
= fcc
->tgsi
.outputs
[pos_slot
].index
;
1396 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1397 struct toy_src src
[4];
1400 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1402 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1403 tsrc_transpose(tmp
, src
);
1406 /* use (0, 0, 0, 0) */
1407 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1411 tc_MOV(tc
, dst
, src
[2]);
1413 mrf
+= fcc
->num_grf_per_vrf
;
1416 msg_type
= (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ?
1417 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
:
1418 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1420 ctrl
= (cbuf
== num_cbufs
- 1) << 12 |
1423 desc
= tsrc_imm_mdesc_data_port(tc
, cbuf
== num_cbufs
- 1,
1424 mrf
- fcc
->first_free_mrf
, 0,
1425 header_present
, false,
1426 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
,
1427 ctrl
, ILO_WM_DRAW_SURFACE(cbuf
));
1429 tc_add2(tc
, TOY_OPCODE_FB_WRITE
, tdst_null(),
1430 tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0), desc
);
1435 * Set up shader outputs for fixed-function units.
1438 fs_setup_shader_out(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
)
1442 sh
->out
.count
= tgsi
->num_outputs
;
1443 for (i
= 0; i
< tgsi
->num_outputs
; i
++) {
1444 sh
->out
.register_indices
[i
] = tgsi
->outputs
[i
].index
;
1445 sh
->out
.semantic_names
[i
] = tgsi
->outputs
[i
].semantic_name
;
1446 sh
->out
.semantic_indices
[i
] = tgsi
->outputs
[i
].semantic_index
;
1448 if (tgsi
->outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
)
1449 sh
->out
.has_pos
= true;
1454 * Set up shader inputs for fixed-function units.
1457 fs_setup_shader_in(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
,
1462 sh
->in
.count
= tgsi
->num_inputs
;
1463 for (i
= 0; i
< tgsi
->num_inputs
; i
++) {
1464 sh
->in
.semantic_names
[i
] = tgsi
->inputs
[i
].semantic_name
;
1465 sh
->in
.semantic_indices
[i
] = tgsi
->inputs
[i
].semantic_index
;
1466 sh
->in
.interp
[i
] = tgsi
->inputs
[i
].interp
;
1467 sh
->in
.centroid
[i
] = tgsi
->inputs
[i
].centroid
;
1469 if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1470 sh
->in
.has_pos
= true;
1473 else if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_FACE
) {
1477 switch (tgsi
->inputs
[i
].interp
) {
1478 case TGSI_INTERPOLATE_LINEAR
:
1479 sh
->in
.has_linear_interp
= true;
1481 if (tgsi
->inputs
[i
].centroid
) {
1482 sh
->in
.barycentric_interpolation_mode
|=
1483 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
1486 sh
->in
.barycentric_interpolation_mode
|=
1487 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
1490 case TGSI_INTERPOLATE_COLOR
:
1494 case TGSI_INTERPOLATE_PERSPECTIVE
:
1495 if (tgsi
->inputs
[i
].centroid
) {
1496 sh
->in
.barycentric_interpolation_mode
|=
1497 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
1500 sh
->in
.barycentric_interpolation_mode
|=
1501 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
1511 fs_setup_payloads(struct fs_compile_context
*fcc
)
1513 const struct ilo_shader
*sh
= fcc
->shader
;
1521 /* r1-r2: coordinates and etc. */
1522 grf
+= (fcc
->dispatch_mode
== GEN6_WM_32_DISPATCH_ENABLE
) ? 2 : 1;
1524 for (i
= 0; i
< Elements(fcc
->payloads
); i
++) {
1527 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1528 for (interp
= 0; interp
< BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
; interp
++) {
1529 if (!(sh
->in
.barycentric_interpolation_mode
& (1 << interp
)))
1532 fcc
->payloads
[i
].barycentric_interps
[interp
] = grf
;
1533 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 2 : 4;
1536 /* r27-r28 or r56-r57: interpoloated depth */
1537 if (sh
->in
.has_pos
) {
1538 fcc
->payloads
[i
].source_depth
= grf
;
1539 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1542 /* r29-r30 or r58-r59: interpoloated w */
1543 if (sh
->in
.has_pos
) {
1544 fcc
->payloads
[i
].source_w
= grf
;
1545 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1548 /* r31 or r60: position offset */
1550 fcc
->payloads
[i
].pos_offset
= grf
;
1554 if (fcc
->dispatch_mode
!= GEN6_WM_32_DISPATCH_ENABLE
)
1562 * Translate the TGSI tokens.
1565 fs_setup_tgsi(struct toy_compiler
*tc
, const struct tgsi_token
*tokens
,
1566 struct toy_tgsi
*tgsi
)
1568 if (ilo_debug
& ILO_DEBUG_FS
) {
1569 ilo_printf("dumping fragment shader\n");
1572 tgsi_dump(tokens
, 0);
1576 toy_compiler_translate_tgsi(tc
, tokens
, false, tgsi
);
1578 ilo_err("failed to translate FS TGSI tokens: %s\n", tc
->reason
);
1582 if (ilo_debug
& ILO_DEBUG_FS
) {
1583 ilo_printf("TGSI translator:\n");
1584 toy_tgsi_dump(tgsi
);
1586 toy_compiler_dump(tc
);
1594 * Set up FS compile context. This includes translating the TGSI tokens.
1597 fs_setup(struct fs_compile_context
*fcc
,
1598 const struct ilo_shader_state
*state
,
1599 const struct ilo_shader_variant
*variant
)
1603 memset(fcc
, 0, sizeof(*fcc
));
1605 fcc
->shader
= CALLOC_STRUCT(ilo_shader
);
1609 fcc
->variant
= variant
;
1611 toy_compiler_init(&fcc
->tc
, state
->info
.dev
);
1613 fcc
->dispatch_mode
= GEN6_WM_8_DISPATCH_ENABLE
;
1615 fcc
->tc
.templ
.access_mode
= BRW_ALIGN_1
;
1616 if (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) {
1617 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1H
;
1618 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_16
;
1621 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1Q
;
1622 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_8
;
1625 fcc
->tc
.rect_linear_width
= 8;
1628 * The classic driver uses the sampler cache (gen6) or the data cache
1631 fcc
->const_cache
= GEN6_SFID_DATAPORT_CONSTANT_CACHE
;
1633 if (!fs_setup_tgsi(&fcc
->tc
, state
->info
.tokens
, &fcc
->tgsi
)) {
1634 toy_compiler_cleanup(&fcc
->tc
);
1639 fs_setup_shader_in(fcc
->shader
, &fcc
->tgsi
, fcc
->variant
->u
.fs
.flatshade
);
1640 fs_setup_shader_out(fcc
->shader
, &fcc
->tgsi
);
1642 /* we do not make use of push constant buffers yet */
1645 fcc
->first_const_grf
= fs_setup_payloads(fcc
);
1646 fcc
->first_attr_grf
= fcc
->first_const_grf
+ num_consts
;
1647 fcc
->first_free_grf
= fcc
->first_attr_grf
+ fcc
->shader
->in
.count
* 2;
1648 fcc
->last_free_grf
= 127;
1650 /* m0 is reserved for system routines */
1651 fcc
->first_free_mrf
= 1;
1652 fcc
->last_free_mrf
= 15;
1654 /* instructions are compressed with BRW_EXECUTE_16 */
1655 fcc
->num_grf_per_vrf
=
1656 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ? 2 : 1;
1658 if (fcc
->tc
.dev
->gen
>= ILO_GEN(7)) {
1659 fcc
->last_free_grf
-= 15;
1660 fcc
->first_free_mrf
= fcc
->last_free_grf
+ 1;
1661 fcc
->last_free_mrf
= fcc
->first_free_mrf
+ 14;
1664 fcc
->shader
->in
.start_grf
= fcc
->first_const_grf
;
1665 fcc
->shader
->has_kill
= fcc
->tgsi
.uses_kill
;
1666 fcc
->shader
->dispatch_16
=
1667 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
);
1673 * Compile the fragment shader.
1676 ilo_shader_compile_fs(const struct ilo_shader_state
*state
,
1677 const struct ilo_shader_variant
*variant
)
1679 struct fs_compile_context fcc
;
1681 if (!fs_setup(&fcc
, state
, variant
))
1686 if (!fs_compile(&fcc
)) {
1691 toy_tgsi_cleanup(&fcc
.tgsi
);
1692 toy_compiler_cleanup(&fcc
.tc
);