2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
38 struct fs_compile_context
{
39 struct ilo_shader
*shader
;
40 const struct ilo_shader_variant
*variant
;
42 struct toy_compiler tc
;
45 enum brw_message_target const_cache
;
49 int barycentric_interps
[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
];
67 fetch_position(struct fs_compile_context
*fcc
, struct toy_dst dst
)
69 struct toy_compiler
*tc
= &fcc
->tc
;
70 const struct toy_src src_z
=
71 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_depth
, 0);
72 const struct toy_src src_w
=
73 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_w
, 0);
75 (fcc
->variant
->u
.fs
.fb_height
) ? fcc
->variant
->u
.fs
.fb_height
: 1;
76 const bool origin_upper_left
=
77 (fcc
->tgsi
.props
.fs_coord_origin
== TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
78 const bool pixel_center_integer
=
79 (fcc
->tgsi
.props
.fs_coord_pixel_center
==
80 TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
81 struct toy_src subspan_x
, subspan_y
;
82 struct toy_dst tmp
, tmp_uw
;
83 struct toy_dst real_dst
[4];
85 tdst_transpose(dst
, real_dst
);
87 subspan_x
= tsrc_uw(tsrc(TOY_FILE_GRF
, 1, 2 * 4));
88 subspan_x
= tsrc_rect(subspan_x
, TOY_RECT_240
);
90 subspan_y
= tsrc_offset(subspan_x
, 0, 1);
92 tmp_uw
= tdst_uw(tc_alloc_tmp(tc
));
93 tmp
= tc_alloc_tmp(tc
);
96 tc_ADD(tc
, tmp_uw
, subspan_x
, tsrc_imm_v(0x10101010));
97 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
98 if (pixel_center_integer
)
99 tc_MOV(tc
, real_dst
[0], tsrc_from(tmp
));
101 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp
), tsrc_imm_f(0.5f
));
104 tc_ADD(tc
, tmp_uw
, subspan_y
, tsrc_imm_v(0x11001100));
105 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
106 if (origin_upper_left
&& pixel_center_integer
) {
107 tc_MOV(tc
, real_dst
[1], tsrc_from(tmp
));
110 struct toy_src y
= tsrc_from(tmp
);
113 if (!pixel_center_integer
)
116 if (!origin_upper_left
) {
117 offset
+= (float) (fb_height
- 1);
121 tc_ADD(tc
, real_dst
[1], y
, tsrc_imm_f(offset
));
125 tc_MOV(tc
, real_dst
[2], src_z
);
126 tc_INV(tc
, real_dst
[3], src_w
);
130 fetch_face(struct fs_compile_context
*fcc
, struct toy_dst dst
)
132 struct toy_compiler
*tc
= &fcc
->tc
;
133 const struct toy_src r0
= tsrc_d(tsrc(TOY_FILE_GRF
, 0, 0));
134 struct toy_dst tmp_f
, tmp
;
135 struct toy_dst real_dst
[4];
137 tdst_transpose(dst
, real_dst
);
139 tmp_f
= tc_alloc_tmp(tc
);
141 tc_SHR(tc
, tmp
, tsrc_rect(r0
, TOY_RECT_010
), tsrc_imm_d(15));
142 tc_AND(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(1));
143 tc_MOV(tc
, tmp_f
, tsrc_from(tmp
));
145 /* convert to 1.0 and -1.0 */
146 tc_MUL(tc
, tmp_f
, tsrc_from(tmp_f
), tsrc_imm_f(-2.0f
));
147 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp_f
), tsrc_imm_f(1.0f
));
149 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
150 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
151 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
155 fetch_attr(struct fs_compile_context
*fcc
, struct toy_dst dst
, int slot
)
157 struct toy_compiler
*tc
= &fcc
->tc
;
158 struct toy_dst real_dst
[4];
159 bool is_const
= false;
162 tdst_transpose(dst
, real_dst
);
164 grf
= fcc
->first_attr_grf
+ slot
* 2;
166 switch (fcc
->tgsi
.inputs
[slot
].interp
) {
167 case TGSI_INTERPOLATE_CONSTANT
:
170 case TGSI_INTERPOLATE_LINEAR
:
171 if (fcc
->tgsi
.inputs
[slot
].centroid
)
172 mode
= BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
174 mode
= BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
176 case TGSI_INTERPOLATE_COLOR
:
177 if (fcc
->variant
->u
.fs
.flatshade
) {
182 case TGSI_INTERPOLATE_PERSPECTIVE
:
183 if (fcc
->tgsi
.inputs
[slot
].centroid
)
184 mode
= BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
186 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
189 assert(!"unexpected FS interpolation");
190 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
195 struct toy_src a0
[4];
197 a0
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 3 * 4);
198 a0
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 7 * 4);
199 a0
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 3 * 4);
200 a0
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 7 * 4);
202 for (ch
= 0; ch
< 4; ch
++)
203 tc_MOV(tc
, real_dst
[ch
], tsrc_rect(a0
[ch
], TOY_RECT_010
));
206 struct toy_src attr
[4], uv
;
208 attr
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 0);
209 attr
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 4 * 4);
210 attr
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 0);
211 attr
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 4 * 4);
213 uv
= tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].barycentric_interps
[mode
], 0);
215 for (ch
= 0; ch
< 4; ch
++) {
216 tc_add2(tc
, BRW_OPCODE_PLN
, real_dst
[ch
],
217 tsrc_rect(attr
[ch
], TOY_RECT_010
), uv
);
221 if (fcc
->tgsi
.inputs
[slot
].semantic_name
== TGSI_SEMANTIC_FOG
) {
222 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
223 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
224 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
229 fs_lower_opcode_tgsi_in(struct fs_compile_context
*fcc
,
230 struct toy_dst dst
, int dim
, int idx
)
236 slot
= toy_tgsi_find_input(&fcc
->tgsi
, idx
);
240 switch (fcc
->tgsi
.inputs
[slot
].semantic_name
) {
241 case TGSI_SEMANTIC_POSITION
:
242 fetch_position(fcc
, dst
);
244 case TGSI_SEMANTIC_FACE
:
245 fetch_face(fcc
, dst
);
248 fetch_attr(fcc
, dst
, slot
);
254 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context
*fcc
,
255 struct toy_dst dst
, int dim
,
258 const struct toy_dst offset
=
259 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
260 struct toy_compiler
*tc
= &fcc
->tc
;
261 unsigned simd_mode
, param_size
;
262 struct toy_inst
*inst
;
263 struct toy_src desc
, real_src
[4];
264 struct toy_dst tmp
, real_dst
[4];
267 tsrc_transpose(idx
, real_src
);
270 inst
= tc_MOV(tc
, offset
, real_src
[0]);
271 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
273 switch (inst
->exec_size
) {
275 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
279 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
283 assert(!"unsupported execution size");
284 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
289 desc
= tsrc_imm_mdesc_sampler(tc
, param_size
, param_size
* 4, false,
291 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
293 ILO_WM_CONST_SURFACE(dim
));
295 tmp
= tdst(TOY_FILE_VRF
, tc_alloc_vrf(tc
, param_size
* 4), 0);
296 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
297 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
299 tdst_transpose(dst
, real_dst
);
300 for (i
= 0; i
< 4; i
++) {
301 const struct toy_src src
=
302 tsrc_offset(tsrc_from(tmp
), param_size
* i
, 0);
304 /* cast to type D to make sure these are raw moves */
305 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
310 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context
*fcc
,
311 struct toy_dst dst
, int dim
,
314 const int grf
= fcc
->first_const_grf
+ idx
.val32
/ 2;
315 const int grf_subreg
= (idx
.val32
& 1) * 16;
317 struct toy_dst real_dst
[4];
320 if (!fcc
->variant
->use_pcb
|| dim
!= 0 || idx
.file
!= TOY_FILE_IMM
||
321 grf
>= fcc
->first_attr_grf
)
324 src
= tsrc_rect(tsrc(TOY_FILE_GRF
, grf
, grf_subreg
), TOY_RECT_010
);
326 tdst_transpose(dst
, real_dst
);
327 for (i
= 0; i
< 4; i
++) {
328 /* cast to type D to make sure these are raw moves */
329 tc_MOV(&fcc
->tc
, tdst_d(real_dst
[i
]), tsrc_d(tsrc_offset(src
, 0, i
)));
336 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context
*fcc
,
337 struct toy_dst dst
, int dim
, struct toy_src idx
)
339 const struct toy_dst header
=
340 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
341 const struct toy_dst global_offset
=
342 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 2 * 4));
343 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
344 struct toy_compiler
*tc
= &fcc
->tc
;
345 unsigned msg_type
, msg_ctrl
, msg_len
;
346 struct toy_inst
*inst
;
348 struct toy_dst tmp
, real_dst
[4];
351 if (fs_lower_opcode_tgsi_const_pcb(fcc
, dst
, dim
, idx
))
354 /* set message header */
355 inst
= tc_MOV(tc
, header
, r0
);
356 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
358 /* set global offset */
359 inst
= tc_MOV(tc
, global_offset
, idx
);
360 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
361 inst
->exec_size
= BRW_EXECUTE_1
;
362 inst
->src
[0].rect
= TOY_RECT_010
;
364 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
;
365 msg_ctrl
= BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
<< 8;
368 desc
= tsrc_imm_mdesc_data_port(tc
, false, msg_len
, 1, true, false,
369 msg_type
, msg_ctrl
, ILO_WM_CONST_SURFACE(dim
));
371 tmp
= tc_alloc_tmp(tc
);
373 tc_SEND(tc
, tmp
, tsrc_from(header
), desc
, fcc
->const_cache
);
375 tdst_transpose(dst
, real_dst
);
376 for (i
= 0; i
< 4; i
++) {
377 const struct toy_src src
=
378 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
380 /* cast to type D to make sure these are raw moves */
381 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
386 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context
*fcc
,
387 struct toy_dst dst
, int dim
, struct toy_src idx
)
389 struct toy_compiler
*tc
= &fcc
->tc
;
390 const struct toy_dst offset
=
391 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
393 struct toy_inst
*inst
;
394 struct toy_dst tmp
, real_dst
[4];
397 if (fs_lower_opcode_tgsi_const_pcb(fcc
, dst
, dim
, idx
))
401 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
402 * changed from OWord Block Read to ld to increase performance in the
403 * classic driver. Since we use the constant cache instead of the data
404 * cache, I wonder if we still want to follow the classic driver.
408 inst
= tc_MOV(tc
, offset
, tsrc_rect(idx
, TOY_RECT_010
));
409 inst
->exec_size
= BRW_EXECUTE_8
;
410 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
412 desc
= tsrc_imm_mdesc_sampler(tc
, 1, 1, false,
413 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
414 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
416 ILO_WM_CONST_SURFACE(dim
));
418 tmp
= tc_alloc_tmp(tc
);
419 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
420 inst
->exec_size
= BRW_EXECUTE_8
;
421 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
423 tdst_transpose(dst
, real_dst
);
424 for (i
= 0; i
< 4; i
++) {
425 const struct toy_src src
=
426 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
428 /* cast to type D to make sure these are raw moves */
429 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
434 fs_lower_opcode_tgsi_imm(struct fs_compile_context
*fcc
,
435 struct toy_dst dst
, int idx
)
438 struct toy_dst real_dst
[4];
441 imm
= toy_tgsi_get_imm(&fcc
->tgsi
, idx
, NULL
);
443 tdst_transpose(dst
, real_dst
);
445 for (ch
= 0; ch
< 4; ch
++)
446 tc_MOV(&fcc
->tc
, tdst_ud(real_dst
[ch
]), tsrc_imm_ud(imm
[ch
]));
450 fs_lower_opcode_tgsi_sv(struct fs_compile_context
*fcc
,
451 struct toy_dst dst
, int dim
, int idx
)
453 struct toy_compiler
*tc
= &fcc
->tc
;
454 const struct toy_tgsi
*tgsi
= &fcc
->tgsi
;
459 slot
= toy_tgsi_find_system_value(tgsi
, idx
);
463 switch (tgsi
->system_values
[slot
].semantic_name
) {
464 case TGSI_SEMANTIC_PRIMID
:
465 case TGSI_SEMANTIC_INSTANCEID
:
466 case TGSI_SEMANTIC_VERTEXID
:
468 tc_fail(tc
, "unhandled system value");
469 tc_MOV(tc
, dst
, tsrc_imm_d(0));
475 fs_lower_opcode_tgsi_direct(struct fs_compile_context
*fcc
,
476 struct toy_inst
*inst
)
478 struct toy_compiler
*tc
= &fcc
->tc
;
481 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
482 dim
= inst
->src
[0].val32
;
484 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
485 idx
= inst
->src
[1].val32
;
487 switch (inst
->opcode
) {
488 case TOY_OPCODE_TGSI_IN
:
489 fs_lower_opcode_tgsi_in(fcc
, inst
->dst
, dim
, idx
);
491 case TOY_OPCODE_TGSI_CONST
:
492 if (tc
->dev
->gen
>= ILO_GEN(7))
493 fs_lower_opcode_tgsi_const_gen7(fcc
, inst
->dst
, dim
, inst
->src
[1]);
495 fs_lower_opcode_tgsi_const_gen6(fcc
, inst
->dst
, dim
, inst
->src
[1]);
497 case TOY_OPCODE_TGSI_SV
:
498 fs_lower_opcode_tgsi_sv(fcc
, inst
->dst
, dim
, idx
);
500 case TOY_OPCODE_TGSI_IMM
:
502 fs_lower_opcode_tgsi_imm(fcc
, inst
->dst
, idx
);
505 tc_fail(tc
, "unhandled TGSI fetch");
509 tc_discard_inst(tc
, inst
);
513 fs_lower_opcode_tgsi_indirect(struct fs_compile_context
*fcc
,
514 struct toy_inst
*inst
)
516 struct toy_compiler
*tc
= &fcc
->tc
;
517 enum tgsi_file_type file
;
519 struct toy_src indirect_dim
, indirect_idx
;
521 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
522 file
= inst
->src
[0].val32
;
524 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
525 dim
= inst
->src
[1].val32
;
526 indirect_dim
= inst
->src
[2];
528 assert(inst
->src
[3].file
== TOY_FILE_IMM
);
529 idx
= inst
->src
[3].val32
;
530 indirect_idx
= inst
->src
[4];
532 /* no dimension indirection */
533 assert(indirect_dim
.file
== TOY_FILE_IMM
);
534 dim
+= indirect_dim
.val32
;
536 switch (inst
->opcode
) {
537 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
538 if (file
== TGSI_FILE_CONSTANT
) {
540 struct toy_dst tmp
= tc_alloc_tmp(tc
);
542 tc_ADD(tc
, tmp
, indirect_idx
, tsrc_imm_d(idx
));
543 indirect_idx
= tsrc_from(tmp
);
546 fs_lower_opcode_tgsi_indirect_const(fcc
, inst
->dst
, dim
, indirect_idx
);
550 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
552 tc_fail(tc
, "unhandled TGSI indirection");
556 tc_discard_inst(tc
, inst
);
560 * Emit instructions to move sampling parameters to the message registers.
563 fs_add_sampler_params_gen6(struct toy_compiler
*tc
, int msg_type
,
564 int base_mrf
, int param_size
,
565 struct toy_src
*coords
, int num_coords
,
566 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
567 struct toy_src
*ddx
, struct toy_src
*ddy
,
572 assert(num_coords
<= 4);
573 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
575 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
577 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
578 for (i
= 0; i
< num_coords
; i
++)
579 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
580 num_params
= num_coords
;
582 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
583 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
584 for (i
= 0; i
< num_coords
; i
++)
585 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
586 tc_MOV(tc
, SAMPLER_PARAM(4), bias_or_lod
);
589 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
590 for (i
= 0; i
< num_coords
; i
++)
591 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
592 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
595 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
596 for (i
= 0; i
< num_coords
; i
++)
597 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
598 for (i
= 0; i
< num_derivs
; i
++) {
599 tc_MOV(tc
, SAMPLER_PARAM(4 + i
* 2), ddx
[i
]);
600 tc_MOV(tc
, SAMPLER_PARAM(5 + i
* 2), ddy
[i
]);
602 num_params
= 4 + num_derivs
* 2;
604 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
605 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
606 for (i
= 0; i
< num_coords
; i
++)
607 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
608 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
609 tc_MOV(tc
, SAMPLER_PARAM(5), bias_or_lod
);
612 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
613 assert(num_coords
<= 3);
615 for (i
= 0; i
< num_coords
; i
++)
616 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(i
)), coords
[i
]);
617 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(3)), bias_or_lod
);
618 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(4)), ref_or_si
);
621 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
622 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
626 tc_fail(tc
, "unknown sampler opcode");
632 return num_params
* param_size
;
636 fs_add_sampler_params_gen7(struct toy_compiler
*tc
, int msg_type
,
637 int base_mrf
, int param_size
,
638 struct toy_src
*coords
, int num_coords
,
639 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
640 struct toy_src
*ddx
, struct toy_src
*ddy
,
645 assert(num_coords
<= 4);
646 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
648 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
650 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
651 for (i
= 0; i
< num_coords
; i
++)
652 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
653 num_params
= num_coords
;
655 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
656 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
657 tc_MOV(tc
, SAMPLER_PARAM(0), bias_or_lod
);
658 for (i
= 0; i
< num_coords
; i
++)
659 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
660 num_params
= 1 + num_coords
;
662 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
663 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
664 for (i
= 0; i
< num_coords
; i
++)
665 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
666 num_params
= 1 + num_coords
;
668 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
669 for (i
= 0; i
< num_coords
; i
++) {
670 tc_MOV(tc
, SAMPLER_PARAM(i
* 3), coords
[i
]);
671 if (i
< num_derivs
) {
672 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 1), ddx
[i
]);
673 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 2), ddy
[i
]);
676 num_params
= num_coords
* 3 - ((num_coords
> num_derivs
) ? 2 : 0);
678 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
679 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
680 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
681 tc_MOV(tc
, SAMPLER_PARAM(1), bias_or_lod
);
682 for (i
= 0; i
< num_coords
; i
++)
683 tc_MOV(tc
, SAMPLER_PARAM(2 + i
), coords
[i
]);
684 num_params
= 2 + num_coords
;
686 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
687 assert(num_coords
>= 1 && num_coords
<= 3);
689 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), coords
[0]);
690 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1)), bias_or_lod
);
691 for (i
= 1; i
< num_coords
; i
++)
692 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1 + i
)), coords
[i
]);
693 num_params
= 1 + num_coords
;
695 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
696 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
700 tc_fail(tc
, "unknown sampler opcode");
706 return num_params
* param_size
;
710 * Set up message registers and return the message descriptor for sampling.
712 static struct toy_src
713 fs_prepare_tgsi_sampling(struct toy_compiler
*tc
, const struct toy_inst
*inst
,
714 int base_mrf
, const uint32_t *saturate_coords
,
715 unsigned *ret_sampler_index
)
717 unsigned simd_mode
, msg_type
, msg_len
, sampler_index
, binding_table_index
;
718 struct toy_src coords
[4], ddx
[4], ddy
[4], bias_or_lod
, ref_or_si
;
719 int num_coords
, ref_pos
, num_derivs
;
720 int sampler_src
, param_size
, i
;
722 switch (inst
->exec_size
) {
724 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
728 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
732 tc_fail(tc
, "unsupported execute size for sampling");
737 num_coords
= tgsi_util_get_texture_coord_dim(inst
->tex
.target
, &ref_pos
);
738 tsrc_transpose(inst
->src
[0], coords
);
739 bias_or_lod
= tsrc_null();
740 ref_or_si
= tsrc_null();
747 * src0 := (x, y, z, w)
752 * For TEX2, TXB2, and TXL2,
754 * src0 := (x, y, z, w)
755 * src1 := (v or bias or lod, ...)
758 * For TEX, TXB, TXL, and TXP,
760 * src0 := (x, y, z, w or bias or lod or projection)
774 * src0 := (x, y, z, w or lod)
777 * State trackers should not generate opcode+texture combinations with
778 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
780 switch (inst
->opcode
) {
781 case TOY_OPCODE_TGSI_TEX
:
785 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
786 ref_or_si
= coords
[ref_pos
];
789 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
792 case TOY_OPCODE_TGSI_TXD
:
794 tc_fail(tc
, "TXD with shadow sampler not supported");
796 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
797 tsrc_transpose(inst
->src
[1], ddx
);
798 tsrc_transpose(inst
->src
[2], ddy
);
799 num_derivs
= num_coords
;
802 case TOY_OPCODE_TGSI_TXP
:
806 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
807 ref_or_si
= coords
[ref_pos
];
810 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
813 /* project the coordinates */
815 struct toy_dst tmp
[4];
817 tc_alloc_tmp4(tc
, tmp
);
819 tc_INV(tc
, tmp
[3], coords
[3]);
820 for (i
= 0; i
< num_coords
&& i
< 3; i
++) {
821 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
822 coords
[i
] = tsrc_from(tmp
[i
]);
826 tc_MUL(tc
, tmp
[ref_pos
], ref_or_si
, tsrc_from(tmp
[3]));
827 ref_or_si
= tsrc_from(tmp
[ref_pos
]);
831 case TOY_OPCODE_TGSI_TXB
:
835 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
836 ref_or_si
= coords
[ref_pos
];
839 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
842 bias_or_lod
= coords
[3];
844 case TOY_OPCODE_TGSI_TXL
:
848 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
849 ref_or_si
= coords
[ref_pos
];
852 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
855 bias_or_lod
= coords
[3];
857 case TOY_OPCODE_TGSI_TXF
:
858 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
860 switch (inst
->tex
.target
) {
861 case TGSI_TEXTURE_2D_MSAA
:
862 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
863 assert(ref_pos
>= 0 && ref_pos
< 4);
864 /* lod is always 0 */
865 bias_or_lod
= tsrc_imm_d(0);
866 ref_or_si
= coords
[ref_pos
];
869 bias_or_lod
= coords
[3];
873 /* offset the coordinates */
874 if (!tsrc_is_null(inst
->tex
.offsets
[0])) {
875 struct toy_dst tmp
[4];
876 struct toy_src offsets
[4];
878 tc_alloc_tmp4(tc
, tmp
);
879 tsrc_transpose(inst
->tex
.offsets
[0], offsets
);
881 for (i
= 0; i
< num_coords
; i
++) {
882 tc_ADD(tc
, tmp
[i
], coords
[i
], offsets
[i
]);
883 coords
[i
] = tsrc_from(tmp
[i
]);
889 case TOY_OPCODE_TGSI_TXQ
:
890 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
892 bias_or_lod
= coords
[0];
894 case TOY_OPCODE_TGSI_TXQ_LZ
:
895 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
899 case TOY_OPCODE_TGSI_TEX2
:
903 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
906 struct toy_src src1
[4];
907 tsrc_transpose(inst
->src
[1], src1
);
908 ref_or_si
= src1
[ref_pos
- 4];
911 ref_or_si
= coords
[ref_pos
];
915 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
920 case TOY_OPCODE_TGSI_TXB2
:
924 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
925 ref_or_si
= coords
[ref_pos
];
928 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
932 struct toy_src src1
[4];
933 tsrc_transpose(inst
->src
[1], src1
);
934 bias_or_lod
= src1
[0];
939 case TOY_OPCODE_TGSI_TXL2
:
943 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
944 ref_or_si
= coords
[ref_pos
];
947 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
951 struct toy_src src1
[4];
952 tsrc_transpose(inst
->src
[1], src1
);
953 bias_or_lod
= src1
[0];
959 assert(!"unhandled sampling opcode");
964 assert(inst
->src
[sampler_src
].file
== TOY_FILE_IMM
);
965 sampler_index
= inst
->src
[sampler_src
].val32
;
966 binding_table_index
= ILO_WM_TEXTURE_SURFACE(sampler_index
);
969 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
971 * "Note that the (cube map) coordinates delivered to the sampling
972 * engine must already have been divided by the component with the
973 * largest absolute value."
975 switch (inst
->tex
.target
) {
976 case TGSI_TEXTURE_CUBE
:
977 case TGSI_TEXTURE_SHADOWCUBE
:
978 case TGSI_TEXTURE_CUBE_ARRAY
:
979 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
980 /* TXQ does not need coordinates */
981 if (num_coords
>= 3) {
982 struct toy_dst tmp
[4];
984 tc_alloc_tmp4(tc
, tmp
);
986 tc_SEL(tc
, tmp
[3], tsrc_absolute(coords
[0]),
987 tsrc_absolute(coords
[1]), BRW_CONDITIONAL_GE
);
988 tc_SEL(tc
, tmp
[3], tsrc_from(tmp
[3]),
989 tsrc_absolute(coords
[2]), BRW_CONDITIONAL_GE
);
990 tc_INV(tc
, tmp
[3], tsrc_from(tmp
[3]));
992 for (i
= 0; i
< 3; i
++) {
993 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
994 coords
[i
] = tsrc_from(tmp
[i
]);
1001 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
1002 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
1003 * so that sampling outside the border gets the correct colors.
1005 for (i
= 0; i
< MIN2(num_coords
, 3); i
++) {
1008 if (!(saturate_coords
[i
] & (1 << sampler_index
)))
1011 switch (inst
->tex
.target
) {
1012 case TGSI_TEXTURE_RECT
:
1013 case TGSI_TEXTURE_SHADOWRECT
:
1022 struct toy_src min
, max
;
1025 tc_fail(tc
, "GL_CLAMP with rectangle texture unsupported");
1026 tmp
= tc_alloc_tmp(tc
);
1028 /* saturate to [0, width] or [0, height] */
1030 min
= tsrc_imm_f(0.0f
);
1031 max
= tsrc_imm_f(2048.0f
);
1033 tc_SEL(tc
, tmp
, coords
[i
], min
, BRW_CONDITIONAL_G
);
1034 tc_SEL(tc
, tmp
, tsrc_from(tmp
), max
, BRW_CONDITIONAL_L
);
1036 coords
[i
] = tsrc_from(tmp
);
1040 struct toy_inst
*inst2
;
1042 tmp
= tc_alloc_tmp(tc
);
1044 /* saturate to [0.0f, 1.0f] */
1045 inst2
= tc_MOV(tc
, tmp
, coords
[i
]);
1046 inst2
->saturate
= true;
1048 coords
[i
] = tsrc_from(tmp
);
1052 /* set up sampler parameters */
1053 if (tc
->dev
->gen
>= ILO_GEN(7)) {
1054 msg_len
= fs_add_sampler_params_gen7(tc
, msg_type
, base_mrf
, param_size
,
1055 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1058 msg_len
= fs_add_sampler_params_gen6(tc
, msg_type
, base_mrf
, param_size
,
1059 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1063 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1065 * "The maximum message length allowed to the sampler is 11. This would
1066 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1070 tc_fail(tc
, "maximum length for messages to the sampler is 11");
1072 if (ret_sampler_index
)
1073 *ret_sampler_index
= sampler_index
;
1075 return tsrc_imm_mdesc_sampler(tc
, msg_len
, 4 * param_size
,
1076 false, simd_mode
, msg_type
, sampler_index
, binding_table_index
);
1080 fs_lower_opcode_tgsi_sampling(struct fs_compile_context
*fcc
,
1081 struct toy_inst
*inst
)
1083 struct toy_compiler
*tc
= &fcc
->tc
;
1084 struct toy_dst dst
[4], tmp
[4];
1085 struct toy_src desc
;
1086 unsigned sampler_index
;
1090 desc
= fs_prepare_tgsi_sampling(tc
, inst
,
1091 fcc
->first_free_mrf
,
1092 fcc
->variant
->saturate_tex_coords
,
1095 switch (inst
->opcode
) {
1096 case TOY_OPCODE_TGSI_TXF
:
1097 case TOY_OPCODE_TGSI_TXQ
:
1098 case TOY_OPCODE_TGSI_TXQ_LZ
:
1099 need_filter
= false;
1106 toy_compiler_lower_to_send(tc
, inst
, false, BRW_SFID_SAMPLER
);
1107 inst
->src
[0] = tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0);
1108 inst
->src
[1] = desc
;
1109 for (i
= 2; i
< Elements(inst
->src
); i
++)
1110 inst
->src
[i
] = tsrc_null();
1112 /* write to temps first */
1113 tc_alloc_tmp4(tc
, tmp
);
1114 for (i
= 0; i
< 4; i
++)
1115 tmp
[i
].type
= inst
->dst
.type
;
1116 tdst_transpose(inst
->dst
, dst
);
1119 tc_move_inst(tc
, inst
);
1122 assert(sampler_index
< fcc
->variant
->num_sampler_views
);
1123 swizzles
[0] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].r
;
1124 swizzles
[1] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].g
;
1125 swizzles
[2] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].b
;
1126 swizzles
[3] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].a
;
1129 swizzles
[0] = PIPE_SWIZZLE_RED
;
1130 swizzles
[1] = PIPE_SWIZZLE_GREEN
;
1131 swizzles
[2] = PIPE_SWIZZLE_BLUE
;
1132 swizzles
[3] = PIPE_SWIZZLE_ALPHA
;
1135 /* swizzle the results */
1136 for (i
= 0; i
< 4; i
++) {
1137 switch (swizzles
[i
]) {
1138 case PIPE_SWIZZLE_ZERO
:
1139 tc_MOV(tc
, dst
[i
], tsrc_imm_f(0.0f
));
1141 case PIPE_SWIZZLE_ONE
:
1142 tc_MOV(tc
, dst
[i
], tsrc_imm_f(1.0f
));
1145 tc_MOV(tc
, dst
[i
], tsrc_from(tmp
[swizzles
[i
]]));
1152 fs_lower_opcode_derivative(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1154 struct toy_dst dst
[4];
1155 struct toy_src src
[4];
1158 tdst_transpose(inst
->dst
, dst
);
1159 tsrc_transpose(inst
->src
[0], src
);
1162 * Every four fragments are from a 2x2 subspan, with
1164 * fragment 1 on the top-left,
1165 * fragment 2 on the top-right,
1166 * fragment 3 on the bottom-left,
1167 * fragment 4 on the bottom-right.
1169 * DDX should thus produce
1171 * dst = src.yyww - src.xxzz
1173 * and DDY should produce
1175 * dst = src.zzww - src.xxyy
1177 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1178 * play with the region parameters.
1180 if (inst
->opcode
== TOY_OPCODE_DDX
) {
1181 for (i
= 0; i
< 4; i
++) {
1182 struct toy_src left
, right
;
1184 left
= tsrc_rect(src
[i
], TOY_RECT_220
);
1185 right
= tsrc_offset(left
, 0, 1);
1187 tc_ADD(tc
, dst
[i
], right
, tsrc_negate(left
));
1191 for (i
= 0; i
< 4; i
++) {
1192 struct toy_src top
, bottom
;
1194 /* approximate with dst = src.zzzz - src.xxxx */
1195 top
= tsrc_rect(src
[i
], TOY_RECT_440
);
1196 bottom
= tsrc_offset(top
, 0, 2);
1198 tc_ADD(tc
, dst
[i
], bottom
, tsrc_negate(top
));
1202 tc_discard_inst(tc
, inst
);
1206 fs_lower_opcode_fb_write(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1208 /* fs_write_fb() has set up the message registers */
1209 toy_compiler_lower_to_send(tc
, inst
, true,
1210 GEN6_SFID_DATAPORT_RENDER_CACHE
);
1214 fs_lower_opcode_kil(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1216 struct toy_dst pixel_mask_dst
;
1217 struct toy_src f0
, pixel_mask
;
1218 struct toy_inst
*tmp
;
1220 /* lower half of r1.7:ud */
1221 pixel_mask_dst
= tdst_uw(tdst(TOY_FILE_GRF
, 1, 7 * 4));
1222 pixel_mask
= tsrc_rect(tsrc_from(pixel_mask_dst
), TOY_RECT_010
);
1224 f0
= tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0)), TOY_RECT_010
);
1226 /* KILL or KILL_IF */
1227 if (tsrc_is_null(inst
->src
[0])) {
1228 struct toy_src dummy
= tsrc_uw(tsrc(TOY_FILE_GRF
, 0, 0));
1229 struct toy_dst f0_dst
= tdst_uw(tdst(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0));
1231 /* create a mask that masks out all pixels */
1232 tmp
= tc_MOV(tc
, f0_dst
, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010
));
1233 tmp
->exec_size
= BRW_EXECUTE_1
;
1234 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1236 tc_CMP(tc
, tdst_null(), dummy
, dummy
, BRW_CONDITIONAL_NEQ
);
1238 /* swapping the two src operands breaks glBitmap()!? */
1239 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1240 tmp
->exec_size
= BRW_EXECUTE_1
;
1241 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1244 struct toy_src src
[4];
1247 tsrc_transpose(inst
->src
[0], src
);
1248 /* mask out killed pixels */
1249 for (i
= 0; i
< 4; i
++) {
1250 tc_CMP(tc
, tdst_null(), src
[i
], tsrc_imm_f(0.0f
),
1251 BRW_CONDITIONAL_GE
);
1253 /* swapping the two src operands breaks glBitmap()!? */
1254 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1255 tmp
->exec_size
= BRW_EXECUTE_1
;
1256 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1260 tc_discard_inst(tc
, inst
);
1264 fs_lower_virtual_opcodes(struct fs_compile_context
*fcc
)
1266 struct toy_compiler
*tc
= &fcc
->tc
;
1267 struct toy_inst
*inst
;
1269 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1271 while ((inst
= tc_next(tc
)) != NULL
) {
1272 switch (inst
->opcode
) {
1273 case TOY_OPCODE_TGSI_IN
:
1274 case TOY_OPCODE_TGSI_CONST
:
1275 case TOY_OPCODE_TGSI_SV
:
1276 case TOY_OPCODE_TGSI_IMM
:
1277 fs_lower_opcode_tgsi_direct(fcc
, inst
);
1279 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
1280 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
1281 fs_lower_opcode_tgsi_indirect(fcc
, inst
);
1283 case TOY_OPCODE_TGSI_TEX
:
1284 case TOY_OPCODE_TGSI_TXB
:
1285 case TOY_OPCODE_TGSI_TXD
:
1286 case TOY_OPCODE_TGSI_TXL
:
1287 case TOY_OPCODE_TGSI_TXP
:
1288 case TOY_OPCODE_TGSI_TXF
:
1289 case TOY_OPCODE_TGSI_TXQ
:
1290 case TOY_OPCODE_TGSI_TXQ_LZ
:
1291 case TOY_OPCODE_TGSI_TEX2
:
1292 case TOY_OPCODE_TGSI_TXB2
:
1293 case TOY_OPCODE_TGSI_TXL2
:
1294 case TOY_OPCODE_TGSI_SAMPLE
:
1295 case TOY_OPCODE_TGSI_SAMPLE_I
:
1296 case TOY_OPCODE_TGSI_SAMPLE_I_MS
:
1297 case TOY_OPCODE_TGSI_SAMPLE_B
:
1298 case TOY_OPCODE_TGSI_SAMPLE_C
:
1299 case TOY_OPCODE_TGSI_SAMPLE_C_LZ
:
1300 case TOY_OPCODE_TGSI_SAMPLE_D
:
1301 case TOY_OPCODE_TGSI_SAMPLE_L
:
1302 case TOY_OPCODE_TGSI_GATHER4
:
1303 case TOY_OPCODE_TGSI_SVIEWINFO
:
1304 case TOY_OPCODE_TGSI_SAMPLE_POS
:
1305 case TOY_OPCODE_TGSI_SAMPLE_INFO
:
1306 fs_lower_opcode_tgsi_sampling(fcc
, inst
);
1312 while ((inst
= tc_next(tc
)) != NULL
) {
1313 switch (inst
->opcode
) {
1314 case TOY_OPCODE_INV
:
1315 case TOY_OPCODE_LOG
:
1316 case TOY_OPCODE_EXP
:
1317 case TOY_OPCODE_SQRT
:
1318 case TOY_OPCODE_RSQ
:
1319 case TOY_OPCODE_SIN
:
1320 case TOY_OPCODE_COS
:
1321 case TOY_OPCODE_FDIV
:
1322 case TOY_OPCODE_POW
:
1323 case TOY_OPCODE_INT_DIV_QUOTIENT
:
1324 case TOY_OPCODE_INT_DIV_REMAINDER
:
1325 toy_compiler_lower_math(tc
, inst
);
1327 case TOY_OPCODE_DDX
:
1328 case TOY_OPCODE_DDY
:
1329 fs_lower_opcode_derivative(tc
, inst
);
1331 case TOY_OPCODE_FB_WRITE
:
1332 fs_lower_opcode_fb_write(tc
, inst
);
1334 case TOY_OPCODE_KIL
:
1335 fs_lower_opcode_kil(tc
, inst
);
1338 if (inst
->opcode
> 127)
1339 tc_fail(tc
, "unhandled virtual opcode");
1346 * Compile the shader.
1349 fs_compile(struct fs_compile_context
*fcc
)
1351 struct toy_compiler
*tc
= &fcc
->tc
;
1352 struct ilo_shader
*sh
= fcc
->shader
;
1354 fs_lower_virtual_opcodes(fcc
);
1355 toy_compiler_legalize_for_ra(tc
);
1356 toy_compiler_optimize(tc
);
1357 toy_compiler_allocate_registers(tc
,
1358 fcc
->first_free_grf
,
1360 fcc
->num_grf_per_vrf
);
1361 toy_compiler_legalize_for_asm(tc
);
1364 ilo_err("failed to legalize FS instructions: %s\n", tc
->reason
);
1368 if (ilo_debug
& ILO_DEBUG_FS
) {
1369 ilo_printf("legalized instructions:\n");
1370 toy_compiler_dump(tc
);
1375 sh
->kernel
= toy_compiler_assemble(tc
, &sh
->kernel_size
);
1378 static const uint32_t microcode
[] = {
1379 /* fill in the microcode here */
1382 const bool swap
= true;
1384 sh
->kernel_size
= sizeof(microcode
);
1385 sh
->kernel
= MALLOC(sh
->kernel_size
);
1388 const int num_dwords
= sizeof(microcode
) / 4;
1389 const uint32_t *src
= microcode
;
1390 uint32_t *dst
= (uint32_t *) sh
->kernel
;
1393 for (i
= 0; i
< num_dwords
; i
+= 4) {
1395 dst
[i
+ 0] = src
[i
+ 3];
1396 dst
[i
+ 1] = src
[i
+ 2];
1397 dst
[i
+ 2] = src
[i
+ 1];
1398 dst
[i
+ 3] = src
[i
+ 0];
1401 memcpy(dst
, src
, 16);
1408 ilo_err("failed to compile FS: %s\n", tc
->reason
);
1412 if (ilo_debug
& ILO_DEBUG_FS
) {
1413 ilo_printf("disassembly:\n");
1414 toy_compiler_disassemble(tc
, sh
->kernel
, sh
->kernel_size
);
1422 * Emit instructions to write the color buffers (and the depth buffer).
1425 fs_write_fb(struct fs_compile_context
*fcc
)
1427 struct toy_compiler
*tc
= &fcc
->tc
;
1428 int base_mrf
= fcc
->first_free_mrf
;
1429 const struct toy_dst header
= tdst_ud(tdst(TOY_FILE_MRF
, base_mrf
, 0));
1430 bool header_present
= false;
1431 struct toy_src desc
;
1432 unsigned msg_type
, ctrl
;
1433 int color_slots
[ILO_MAX_DRAW_BUFFERS
], num_cbufs
;
1434 int pos_slot
= -1, cbuf
, i
;
1436 for (i
= 0; i
< Elements(color_slots
); i
++)
1437 color_slots
[i
] = -1;
1439 for (i
= 0; i
< fcc
->tgsi
.num_outputs
; i
++) {
1440 if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_COLOR
) {
1441 assert(fcc
->tgsi
.outputs
[i
].semantic_index
< Elements(color_slots
));
1442 color_slots
[fcc
->tgsi
.outputs
[i
].semantic_index
] = i
;
1444 else if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1449 num_cbufs
= fcc
->variant
->u
.fs
.num_cbufs
;
1450 /* still need to send EOT (and probably depth) */
1454 /* we need the header to specify the pixel mask or render target */
1455 if (fcc
->tgsi
.uses_kill
|| num_cbufs
> 1) {
1456 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
1457 struct toy_inst
*inst
;
1459 inst
= tc_MOV(tc
, header
, r0
);
1460 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1461 base_mrf
+= fcc
->num_grf_per_vrf
;
1463 /* this is a two-register header */
1464 if (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) {
1465 inst
= tc_MOV(tc
, tdst_offset(header
, 1, 0), tsrc_offset(r0
, 1, 0));
1466 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1467 base_mrf
+= fcc
->num_grf_per_vrf
;
1470 header_present
= true;
1473 for (cbuf
= 0; cbuf
< num_cbufs
; cbuf
++) {
1475 color_slots
[(fcc
->tgsi
.props
.fs_color0_writes_all_cbufs
) ? 0 : cbuf
];
1476 int mrf
= base_mrf
, vrf
;
1477 struct toy_src src
[4];
1480 const unsigned undefined_mask
=
1481 fcc
->tgsi
.outputs
[slot
].undefined_mask
;
1482 const int index
= fcc
->tgsi
.outputs
[slot
].index
;
1484 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1486 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1487 tsrc_transpose(tmp
, src
);
1490 /* use (0, 0, 0, 0) */
1491 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1494 for (i
= 0; i
< 4; i
++) {
1495 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1497 if (undefined_mask
& (1 << i
))
1498 src
[i
] = tsrc_imm_f(0.0f
);
1500 tc_MOV(tc
, dst
, src
[i
]);
1502 mrf
+= fcc
->num_grf_per_vrf
;
1506 /* use (0, 0, 0, 0) */
1507 for (i
= 0; i
< 4; i
++) {
1508 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1510 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
1511 mrf
+= fcc
->num_grf_per_vrf
;
1515 /* select BLEND_STATE[rt] */
1517 struct toy_inst
*inst
;
1519 inst
= tc_MOV(tc
, tdst_offset(header
, 0, 2), tsrc_imm_ud(cbuf
));
1520 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1521 inst
->exec_size
= BRW_EXECUTE_1
;
1522 inst
->src
[0].rect
= TOY_RECT_010
;
1525 if (cbuf
== 0 && pos_slot
>= 0) {
1526 const int index
= fcc
->tgsi
.outputs
[pos_slot
].index
;
1527 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1528 struct toy_src src
[4];
1531 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1533 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1534 tsrc_transpose(tmp
, src
);
1537 /* use (0, 0, 0, 0) */
1538 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1542 tc_MOV(tc
, dst
, src
[2]);
1544 mrf
+= fcc
->num_grf_per_vrf
;
1547 msg_type
= (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ?
1548 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
:
1549 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1551 ctrl
= (cbuf
== num_cbufs
- 1) << 12 |
1554 desc
= tsrc_imm_mdesc_data_port(tc
, cbuf
== num_cbufs
- 1,
1555 mrf
- fcc
->first_free_mrf
, 0,
1556 header_present
, false,
1557 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
,
1558 ctrl
, ILO_WM_DRAW_SURFACE(cbuf
));
1560 tc_add2(tc
, TOY_OPCODE_FB_WRITE
, tdst_null(),
1561 tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0), desc
);
1566 * Set up shader outputs for fixed-function units.
1569 fs_setup_shader_out(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
)
1573 sh
->out
.count
= tgsi
->num_outputs
;
1574 for (i
= 0; i
< tgsi
->num_outputs
; i
++) {
1575 sh
->out
.register_indices
[i
] = tgsi
->outputs
[i
].index
;
1576 sh
->out
.semantic_names
[i
] = tgsi
->outputs
[i
].semantic_name
;
1577 sh
->out
.semantic_indices
[i
] = tgsi
->outputs
[i
].semantic_index
;
1579 if (tgsi
->outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
)
1580 sh
->out
.has_pos
= true;
1585 * Set up shader inputs for fixed-function units.
1588 fs_setup_shader_in(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
,
1593 sh
->in
.count
= tgsi
->num_inputs
;
1594 for (i
= 0; i
< tgsi
->num_inputs
; i
++) {
1595 sh
->in
.semantic_names
[i
] = tgsi
->inputs
[i
].semantic_name
;
1596 sh
->in
.semantic_indices
[i
] = tgsi
->inputs
[i
].semantic_index
;
1597 sh
->in
.interp
[i
] = tgsi
->inputs
[i
].interp
;
1598 sh
->in
.centroid
[i
] = tgsi
->inputs
[i
].centroid
;
1600 if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1601 sh
->in
.has_pos
= true;
1604 else if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_FACE
) {
1608 switch (tgsi
->inputs
[i
].interp
) {
1609 case TGSI_INTERPOLATE_CONSTANT
:
1610 sh
->in
.const_interp_enable
|= 1 << i
;
1612 case TGSI_INTERPOLATE_LINEAR
:
1613 sh
->in
.has_linear_interp
= true;
1615 if (tgsi
->inputs
[i
].centroid
) {
1616 sh
->in
.barycentric_interpolation_mode
|=
1617 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
1620 sh
->in
.barycentric_interpolation_mode
|=
1621 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
1624 case TGSI_INTERPOLATE_COLOR
:
1626 sh
->in
.const_interp_enable
|= 1 << i
;
1630 case TGSI_INTERPOLATE_PERSPECTIVE
:
1631 if (tgsi
->inputs
[i
].centroid
) {
1632 sh
->in
.barycentric_interpolation_mode
|=
1633 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
1636 sh
->in
.barycentric_interpolation_mode
|=
1637 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
1647 fs_setup_payloads(struct fs_compile_context
*fcc
)
1649 const struct ilo_shader
*sh
= fcc
->shader
;
1657 /* r1-r2: coordinates and etc. */
1658 grf
+= (fcc
->dispatch_mode
== GEN6_WM_32_DISPATCH_ENABLE
) ? 2 : 1;
1660 for (i
= 0; i
< Elements(fcc
->payloads
); i
++) {
1663 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1664 for (interp
= 0; interp
< BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
; interp
++) {
1665 if (!(sh
->in
.barycentric_interpolation_mode
& (1 << interp
)))
1668 fcc
->payloads
[i
].barycentric_interps
[interp
] = grf
;
1669 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 2 : 4;
1672 /* r27-r28 or r56-r57: interpoloated depth */
1673 if (sh
->in
.has_pos
) {
1674 fcc
->payloads
[i
].source_depth
= grf
;
1675 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1678 /* r29-r30 or r58-r59: interpoloated w */
1679 if (sh
->in
.has_pos
) {
1680 fcc
->payloads
[i
].source_w
= grf
;
1681 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1684 /* r31 or r60: position offset */
1686 fcc
->payloads
[i
].pos_offset
= grf
;
1690 if (fcc
->dispatch_mode
!= GEN6_WM_32_DISPATCH_ENABLE
)
1698 * Translate the TGSI tokens.
1701 fs_setup_tgsi(struct toy_compiler
*tc
, const struct tgsi_token
*tokens
,
1702 struct toy_tgsi
*tgsi
)
1704 if (ilo_debug
& ILO_DEBUG_FS
) {
1705 ilo_printf("dumping fragment shader\n");
1708 tgsi_dump(tokens
, 0);
1712 toy_compiler_translate_tgsi(tc
, tokens
, false, tgsi
);
1714 ilo_err("failed to translate FS TGSI tokens: %s\n", tc
->reason
);
1718 if (ilo_debug
& ILO_DEBUG_FS
) {
1719 ilo_printf("TGSI translator:\n");
1720 toy_tgsi_dump(tgsi
);
1722 toy_compiler_dump(tc
);
1730 * Set up FS compile context. This includes translating the TGSI tokens.
1733 fs_setup(struct fs_compile_context
*fcc
,
1734 const struct ilo_shader_state
*state
,
1735 const struct ilo_shader_variant
*variant
)
1739 memset(fcc
, 0, sizeof(*fcc
));
1741 fcc
->shader
= CALLOC_STRUCT(ilo_shader
);
1745 fcc
->variant
= variant
;
1747 toy_compiler_init(&fcc
->tc
, state
->info
.dev
);
1749 fcc
->dispatch_mode
= GEN6_WM_8_DISPATCH_ENABLE
;
1751 fcc
->tc
.templ
.access_mode
= BRW_ALIGN_1
;
1752 if (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) {
1753 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1H
;
1754 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_16
;
1757 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1Q
;
1758 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_8
;
1761 fcc
->tc
.rect_linear_width
= 8;
1764 * The classic driver uses the sampler cache (gen6) or the data cache
1767 fcc
->const_cache
= GEN6_SFID_DATAPORT_CONSTANT_CACHE
;
1769 if (!fs_setup_tgsi(&fcc
->tc
, state
->info
.tokens
, &fcc
->tgsi
)) {
1770 toy_compiler_cleanup(&fcc
->tc
);
1775 fs_setup_shader_in(fcc
->shader
, &fcc
->tgsi
, fcc
->variant
->u
.fs
.flatshade
);
1776 fs_setup_shader_out(fcc
->shader
, &fcc
->tgsi
);
1778 if (fcc
->variant
->use_pcb
&& !fcc
->tgsi
.const_indirect
) {
1779 num_consts
= (fcc
->tgsi
.const_count
+ 1) / 2;
1782 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1784 * "The sum of all four read length fields (each incremented to
1785 * represent the actual read length) must be less than or equal to
1788 * Since we are usually under a high register pressure, do not allow
1798 fcc
->shader
->skip_cbuf0_upload
= (!fcc
->tgsi
.const_count
|| num_consts
);
1799 fcc
->shader
->pcb
.cbuf0_size
= num_consts
* (sizeof(float) * 8);
1801 fcc
->first_const_grf
= fs_setup_payloads(fcc
);
1802 fcc
->first_attr_grf
= fcc
->first_const_grf
+ num_consts
;
1803 fcc
->first_free_grf
= fcc
->first_attr_grf
+ fcc
->shader
->in
.count
* 2;
1804 fcc
->last_free_grf
= 127;
1806 /* m0 is reserved for system routines */
1807 fcc
->first_free_mrf
= 1;
1808 fcc
->last_free_mrf
= 15;
1810 /* instructions are compressed with BRW_EXECUTE_16 */
1811 fcc
->num_grf_per_vrf
=
1812 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ? 2 : 1;
1814 if (fcc
->tc
.dev
->gen
>= ILO_GEN(7)) {
1815 fcc
->last_free_grf
-= 15;
1816 fcc
->first_free_mrf
= fcc
->last_free_grf
+ 1;
1817 fcc
->last_free_mrf
= fcc
->first_free_mrf
+ 14;
1820 fcc
->shader
->in
.start_grf
= fcc
->first_const_grf
;
1821 fcc
->shader
->has_kill
= fcc
->tgsi
.uses_kill
;
1822 fcc
->shader
->dispatch_16
=
1823 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
);
1829 * Compile the fragment shader.
1832 ilo_shader_compile_fs(const struct ilo_shader_state
*state
,
1833 const struct ilo_shader_variant
*variant
)
1835 struct fs_compile_context fcc
;
1837 if (!fs_setup(&fcc
, state
, variant
))
1842 if (!fs_compile(&fcc
)) {
1847 toy_tgsi_cleanup(&fcc
.tgsi
);
1848 toy_compiler_cleanup(&fcc
.tc
);