3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader
& nir
,
36 r600_pipe_shader_selector
&sel
,
37 const r600_shader_key
&key
,
38 enum chip_class chip_class
):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT
, sel
, sh
, nir
.scratch_size
, chip_class
, 0),
40 m_max_color_exports(MAX2(key
.ps
.nr_cbufs
,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key
.ps
.color_two_side
),
43 m_last_pixel_export(nullptr),
45 m_reserved_registers(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
50 m_enable_centroid_interpolators(false),
51 m_apply_sample_mask(key
.ps
.apply_sample_id_mask
)
53 for (auto& i
: m_interpolator
) {
58 sh_info().rat_base
= key
.ps
.nr_cbufs
;
59 sh_info().atomic_base
= key
.ps
.first_atomic_counter
;
62 bool FragmentShaderFromNir::do_process_inputs(nir_variable
*input
)
64 sfn_log
<< SfnLog::io
<< "Parse input variable "
65 << input
->name
<< " location:" << input
->data
.location
66 << " driver-loc:" << input
->data
.driver_location
67 << " interpolation:" << input
->data
.interpolation
70 if (input
->data
.location
== VARYING_SLOT_FACE
) {
71 m_sv_values
.set(es_face
);
76 auto semantic
= r600_get_varying_semantic(input
->data
.location
);
77 name
= semantic
.first
;
78 sid
= semantic
.second
;
80 tgsi_semantic sname
= static_cast<tgsi_semantic
>(name
);
83 case TGSI_SEMANTIC_POSITION
: {
84 m_sv_values
.set(es_pos
);
87 case TGSI_SEMANTIC_COLOR
: {
88 m_shaderio
.add_input(new ShaderInputColor(sname
, sid
, input
));
89 m_need_back_color
= m_two_sided_color
;
92 case TGSI_SEMANTIC_PRIMID
:
93 sh_info().gs_prim_id_input
= true;
94 sh_info().ps_prim_id_input
= m_shaderio
.inputs().size();
96 case TGSI_SEMANTIC_FOG
:
97 case TGSI_SEMANTIC_GENERIC
:
98 case TGSI_SEMANTIC_TEXCOORD
:
99 case TGSI_SEMANTIC_LAYER
:
100 case TGSI_SEMANTIC_PCOORD
:
101 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
102 case TGSI_SEMANTIC_CLIPDIST
: {
103 if (!m_shaderio
.find_varying(sname
, sid
, input
->data
.location_frac
))
104 m_shaderio
.add_input(new ShaderInputVarying(sname
, sid
, input
));
112 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr
*instr
)
114 switch (instr
->type
) {
115 case nir_instr_type_intrinsic
: {
116 nir_intrinsic_instr
*ii
= nir_instr_as_intrinsic(instr
);
117 switch (ii
->intrinsic
) {
118 case nir_intrinsic_load_front_face
:
119 m_sv_values
.set(es_face
);
121 case nir_intrinsic_load_sample_mask_in
:
122 m_sv_values
.set(es_sample_mask_in
);
124 case nir_intrinsic_load_sample_pos
:
125 m_sv_values
.set(es_sample_pos
);
127 case nir_intrinsic_load_sample_id
:
128 m_sv_values
.set(es_sample_id
);
130 case nir_intrinsic_interp_deref_at_centroid
:
131 /* This is not a sysvalue, should go elsewhere */
132 m_enable_centroid_interpolators
= true;
144 bool FragmentShaderFromNir::do_allocate_reserved_registers()
146 assert(!m_reserved_registers
);
148 int face_reg_index
= -1;
149 int sample_id_index
= -1;
150 // enabled interpolators based on inputs
151 for (auto& i
: m_shaderio
.inputs()) {
152 int ij
= i
->ij_index();
154 m_interpolator
[ij
].enabled
= true;
158 /* Lazy, enable both possible interpolators,
159 * TODO: check which ones are really needed */
160 if (m_enable_centroid_interpolators
) {
161 m_interpolator
[2].enabled
= true; /* perspective */
162 m_interpolator
[5].enabled
= true; /* linear */
165 // sort the varying inputs
166 m_shaderio
.sort_varying_inputs();
168 // handle interpolators
170 for (int i
= 0; i
< 6; ++i
) {
171 if (m_interpolator
[i
].enabled
) {
172 sfn_log
<< SfnLog::io
<< "Interpolator " << i
<< " is enabled\n";
174 m_interpolator
[i
].ij_index
= num_baryc
;
176 unsigned sel
= num_baryc
/ 2;
177 unsigned chan
= 2 * (num_baryc
% 2);
179 auto ip_i
= new GPRValue(sel
, chan
+ 1);
180 ip_i
->set_as_input();
181 m_interpolator
[i
].i
.reset(ip_i
);
182 inject_register(sel
, chan
+ 1, m_interpolator
[i
].i
, false);
184 auto ip_j
= new GPRValue(sel
, chan
);
185 ip_j
->set_as_input();
186 m_interpolator
[i
].j
.reset(ip_j
);
187 inject_register(sel
, chan
, m_interpolator
[i
].j
, false);
192 m_reserved_registers
+= (num_baryc
+ 1) >> 1;
194 if (m_sv_values
.test(es_pos
)) {
195 m_frag_pos_index
= m_reserved_registers
++;
196 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION
, m_frag_pos_index
));
199 // handle system values
200 if (m_sv_values
.test(es_face
) || m_need_back_color
) {
201 face_reg_index
= m_reserved_registers
++;
202 auto ffr
= new GPRValue(face_reg_index
,0);
204 m_front_face_reg
.reset(ffr
);
205 sfn_log
<< SfnLog::io
<< "Set front_face register to " << *m_front_face_reg
<< "\n";
206 inject_register(ffr
->sel(), ffr
->chan(), m_front_face_reg
, false);
208 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE
, face_reg_index
));
212 if (m_sv_values
.test(es_sample_mask_in
)) {
213 if (face_reg_index
< 0)
214 face_reg_index
= m_reserved_registers
++;
216 auto smi
= new GPRValue(face_reg_index
,2);
218 m_sample_mask_reg
.reset(smi
);
219 sfn_log
<< SfnLog::io
<< "Set sample mask in register to " << *m_sample_mask_reg
<< "\n";
220 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
221 sh_info().nsys_inputs
= 1;
222 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK
, face_reg_index
));
225 if (m_sv_values
.test(es_sample_id
) ||
226 m_sv_values
.test(es_sample_mask_in
)) {
227 if (sample_id_index
< 0)
228 sample_id_index
= m_reserved_registers
++;
230 auto smi
= new GPRValue(sample_id_index
, 3);
232 m_sample_id_reg
.reset(smi
);
233 sfn_log
<< SfnLog::io
<< "Set sample id register to " << *m_sample_id_reg
<< "\n";
234 sh_info().nsys_inputs
++;
235 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID
, sample_id_index
));
238 // The back color handling is not emmited in the code, so we have
239 // to add the inputs here and later we also need to inject the code to set
241 if (m_need_back_color
) {
242 size_t ninputs
= m_shaderio
.inputs().size();
243 for (size_t k
= 0; k
< ninputs
; ++k
) {
244 ShaderInput
& i
= m_shaderio
.input(k
);
246 if (i
.name() != TGSI_SEMANTIC_COLOR
)
249 ShaderInputColor
& col
= static_cast<ShaderInputColor
&>(i
);
251 size_t next_pos
= m_shaderio
.size();
252 auto bcol
= new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR
, col
, next_pos
);
253 m_shaderio
.add_input(bcol
);
254 col
.set_back_color(next_pos
);
256 m_shaderio
.set_two_sided();
259 m_shaderio
.update_lds_pos();
261 set_reserved_registers(m_reserved_registers
);
266 void FragmentShaderFromNir::emit_shader_start()
268 if (m_sv_values
.test(es_face
))
271 if (m_sv_values
.test(es_pos
)) {
272 for (int i
= 0; i
< 4; ++i
) {
273 auto v
= new GPRValue(m_frag_pos_index
, i
);
275 auto reg
= PValue(v
);
277 emit_instruction(new AluInstruction(op1_recip_ieee
, reg
, reg
, {alu_write
, alu_last_instr
}));
283 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
285 if (out_var
->data
.location
== FRAG_RESULT_COLOR
)
286 return emit_export_pixel(out_var
, instr
, true);
288 if ((out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
289 out_var
->data
.location
<= FRAG_RESULT_DATA7
) ||
290 out_var
->data
.location
== FRAG_RESULT_DEPTH
||
291 out_var
->data
.location
== FRAG_RESULT_STENCIL
||
292 out_var
->data
.location
== FRAG_RESULT_SAMPLE_MASK
)
293 return emit_export_pixel(out_var
, instr
, false);
295 sfn_log
<< SfnLog::err
<< "r600-NIR: Unimplemented store_deref for " <<
296 out_var
->data
.location
<< "(" << out_var
->data
.driver_location
<< ")\n";
300 bool FragmentShaderFromNir::do_process_outputs(nir_variable
*output
)
302 sfn_log
<< SfnLog::instr
<< "Parse output variable "
303 << output
->name
<< " @" << output
->data
.location
304 << "@dl:" << output
->data
.driver_location
<< "\n";
307 r600_shader_io
& io
= sh_info().output
[output
->data
.driver_location
];
308 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result
>( output
->data
.location
),
311 /* Check whether this code has become obsolete by the IO vectorization */
312 unsigned num_components
= 4;
313 unsigned vector_elements
= glsl_get_vector_elements(glsl_without_array(output
->type
));
315 num_components
= vector_elements
;
316 unsigned component
= output
->data
.location_frac
;
318 for (unsigned j
= component
; j
< num_components
+ component
; j
++)
319 io
.write_mask
|= 1 << j
;
321 int loc
= output
->data
.location
;
322 if (loc
== FRAG_RESULT_COLOR
&&
323 (m_nir
.info
.outputs_written
& (1ull << loc
))) {
324 sh_info().fs_write_all
= true;
327 if (output
->data
.location
== FRAG_RESULT_COLOR
||
328 (output
->data
.location
>= FRAG_RESULT_DATA0
&&
329 output
->data
.location
<= FRAG_RESULT_DATA7
)) {
332 if (output
->data
.location
== FRAG_RESULT_DEPTH
||
333 output
->data
.location
== FRAG_RESULT_STENCIL
||
334 output
->data
.location
== FRAG_RESULT_SAMPLE_MASK
) {
342 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr
* instr
)
344 auto dest
= from_nir(instr
->dest
, 0);
345 assert(m_sample_id_reg
);
346 assert(m_sample_mask_reg
);
348 emit_instruction(new AluInstruction(op2_lshl_int
, dest
, Value::one_i
, m_sample_id_reg
, EmitInstruction::last_write
));
349 emit_instruction(new AluInstruction(op2_and_int
, dest
, dest
, m_sample_mask_reg
, EmitInstruction::last_write
));
353 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr
* instr
)
355 switch (instr
->intrinsic
) {
356 case nir_intrinsic_load_sample_mask_in
:
357 if (m_apply_sample_mask
) {
358 return emit_load_sample_mask_in(instr
);
360 return load_preloaded_value(instr
->dest
, 0, m_sample_mask_reg
);
361 case nir_intrinsic_load_sample_id
:
362 return load_preloaded_value(instr
->dest
, 0, m_sample_id_reg
);
363 case nir_intrinsic_load_front_face
:
364 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
365 case nir_intrinsic_interp_deref_at_sample
:
366 return emit_interp_deref_at_sample(instr
);
367 case nir_intrinsic_interp_deref_at_offset
:
368 return emit_interp_deref_at_offset(instr
);
369 case nir_intrinsic_interp_deref_at_centroid
:
370 return emit_interp_deref_at_centroid(instr
);
371 case nir_intrinsic_load_sample_pos
:
372 return emit_load_sample_pos(instr
);
379 void FragmentShaderFromNir::load_front_face()
381 assert(m_front_face_reg
);
382 if (m_front_face_loaded
)
385 auto ir
= new AluInstruction(op2_setge_dx10
, m_front_face_reg
, m_front_face_reg
,
386 Value::zero
, {alu_write
, alu_last_instr
});
387 m_front_face_loaded
= true;
388 emit_instruction(ir
);
391 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr
* instr
)
393 GPRVector dest
= vec_from_nir(instr
->dest
, nir_dest_num_components(instr
->dest
));
394 auto fetch
= new FetchInstruction(vc_fetch
,
396 fmt_32_32_32_32_float
,
404 R600_BUFFER_INFO_CONST_BUFFER
,
414 fetch
->set_flag(vtx_srf_mode
);
415 emit_instruction(fetch
);
419 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr
* instr
)
421 GPRVector slope
= get_temp_vec4();
423 auto fetch
= new FetchInstruction(vc_fetch
, no_index_offset
, slope
,
424 from_nir_with_fetch_constant(instr
->src
[1], 0),
425 0, R600_BUFFER_INFO_CONST_BUFFER
, PValue(), bim_none
);
426 fetch
->set_flag(vtx_srf_mode
);
427 emit_instruction(fetch
);
429 GPRVector grad
= get_temp_vec4();
430 auto var
= get_deref_location(instr
->src
[0]);
433 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
434 auto interpolator
= m_interpolator
[io
.ij_index()];
435 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
437 GPRVector
src({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
439 auto tex
= new TexInstruction(TexInstruction::get_gradient_h
, grad
, src
, 0, 0, PValue());
440 tex
->set_dest_swizzle({0,1,7,7});
441 emit_instruction(tex
);
443 tex
= new TexInstruction(TexInstruction::get_gradient_v
, grad
, src
, 0, 0, PValue());
444 tex
->set_dest_swizzle({7,7,0,1});
445 emit_instruction(tex
);
447 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(0), slope
.reg_i(2), interpolator
.j
}, {alu_write
}));
448 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(1), slope
.reg_i(2), interpolator
.i
}, {alu_write
, alu_last_instr
}));
450 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(2), slope
.reg_i(3), slope
.reg_i(0)}, {alu_write
}));
451 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(3), slope
.reg_i(3), slope
.reg_i(1)}, {alu_write
, alu_last_instr
}));
453 Interpolator ip
= {true, 0, slope
.reg_i(1), slope
.reg_i(0)};
455 auto dst
= vec_from_nir(instr
->dest
, 4);
456 int num_components
= instr
->dest
.is_ssa
?
457 instr
->dest
.ssa
.num_components
:
458 instr
->dest
.reg
.reg
->num_components
;
460 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
465 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr
* instr
)
467 int temp
= allocate_temp_register();
469 GPRVector
help(temp
, {0,1,2,3});
471 auto var
= get_deref_location(instr
->src
[0]);
474 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
475 auto interpolator
= m_interpolator
[io
.ij_index()];
476 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
478 GPRVector
interp({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
480 auto getgradh
= new TexInstruction(TexInstruction::get_gradient_h
, help
, interp
, 0, 0, PValue());
481 getgradh
->set_dest_swizzle({0,1,7,7});
482 getgradh
->set_flag(TexInstruction::x_unnormalized
);
483 getgradh
->set_flag(TexInstruction::y_unnormalized
);
484 getgradh
->set_flag(TexInstruction::z_unnormalized
);
485 getgradh
->set_flag(TexInstruction::w_unnormalized
);
486 emit_instruction(getgradh
);
488 auto getgradv
= new TexInstruction(TexInstruction::get_gradient_v
, help
, interp
, 0, 0, PValue());
489 getgradv
->set_dest_swizzle({7,7,0,1});
490 getgradv
->set_flag(TexInstruction::x_unnormalized
);
491 getgradv
->set_flag(TexInstruction::y_unnormalized
);
492 getgradv
->set_flag(TexInstruction::z_unnormalized
);
493 getgradv
->set_flag(TexInstruction::w_unnormalized
);
494 emit_instruction(getgradv
);
496 PValue ofs_x
= from_nir(instr
->src
[1], 0);
497 PValue ofs_y
= from_nir(instr
->src
[1], 1);
498 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(0), ofs_x
, interpolator
.j
, {alu_write
}));
499 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(1), ofs_x
, interpolator
.i
, {alu_write
, alu_last_instr
}));
500 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(2), ofs_y
, help
.reg_i(0), {alu_write
}));
501 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(3), ofs_y
, help
.reg_i(1), {alu_write
, alu_last_instr
}));
503 Interpolator ip
= {true, 0, help
.reg_i(1), help
.reg_i(0)};
505 auto dst
= vec_from_nir(instr
->dest
, 4);
506 load_interpolated(dst
, io
, ip
, nir_dest_num_components(instr
->dest
),
507 var
->data
.location_frac
);
512 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr
* instr
)
514 auto var
= get_deref_location(instr
->src
[0]);
517 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
518 io
.set_uses_interpolate_at_centroid();
520 int ij_index
= io
.ij_index() >= 3 ? 5 : 2;
521 assert (m_interpolator
[ij_index
].enabled
);
522 auto ip
= m_interpolator
[ij_index
];
524 int num_components
= nir_dest_num_components(instr
->dest
);
526 auto dst
= vec_from_nir(instr
->dest
, 4);
527 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
532 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable
*in_var
, nir_intrinsic_instr
* instr
)
534 if (in_var
->data
.location
== VARYING_SLOT_POS
) {
535 assert(instr
->dest
.is_ssa
);
537 for (int i
= 0; i
< instr
->dest
.ssa
.num_components
; ++i
) {
538 inject_register(instr
->dest
.ssa
.index
, i
, m_frag_pos
[i
], true);
543 if (in_var
->data
.location
== VARYING_SLOT_FACE
)
544 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
546 // todo: replace io with ShaderInputVarying
547 auto& io
= m_shaderio
.input(in_var
->data
.driver_location
, in_var
->data
.location_frac
);
548 unsigned num_components
= 4;
551 if (instr
->dest
.is_ssa
) {
552 num_components
= instr
->dest
.ssa
.num_components
;
554 num_components
= instr
->dest
.reg
.reg
->num_components
;
557 auto dst
= vec_from_nir(instr
->dest
, 4);
559 sfn_log
<< SfnLog::io
<< "Set input[" << in_var
->data
.driver_location
560 << "].gpr=" << dst
.sel() << "\n";
562 io
.set_gpr(dst
.sel());
564 auto& ip
= io
.interpolate() ? m_interpolator
[io
.ij_index()] : m_interpolator
[0];
566 load_interpolated(dst
, io
, ip
, num_components
, in_var
->data
.location_frac
);
568 /* These results are expected starting in slot x..*/
569 if (in_var
->data
.location_frac
> 0) {
570 int n
= instr
->dest
.is_ssa
? instr
->dest
.ssa
.num_components
:
571 instr
->dest
.reg
.reg
->num_components
;
572 AluInstruction
*ir
= nullptr;
573 for (int i
= 0; i
< n
; ++i
) {
574 ir
= new AluInstruction(op1_mov
, dst
[i
],
575 dst
[i
+ in_var
->data
.location_frac
], {alu_write
});
576 emit_instruction(ir
);
579 ir
->set_flag(alu_last_instr
);
583 if (m_need_back_color
&& io
.name() == TGSI_SEMANTIC_COLOR
) {
585 auto & color_input
= static_cast<ShaderInputColor
&> (io
);
586 auto& bgio
= m_shaderio
.input(color_input
.back_color_input_index());
588 bgio
.set_gpr(allocate_temp_register());
590 GPRVector
bgcol(bgio
.gpr(), {0,1,2,3});
591 load_interpolated(bgcol
, bgio
, ip
, num_components
, 0);
595 AluInstruction
*ir
= nullptr;
596 for (unsigned i
= 0; i
< 4 ; ++i
) {
597 ir
= new AluInstruction(op3_cnde
, dst
[i
], m_front_face_reg
, bgcol
[i
], dst
[i
], {alu_write
});
598 emit_instruction(ir
);
601 ir
->set_flag(alu_last_instr
);
607 bool FragmentShaderFromNir::load_interpolated(GPRVector
&dest
,
608 ShaderInput
& io
, const Interpolator
&ip
,
609 int num_components
, int start_comp
)
611 // replace io with ShaderInputVarying
612 if (io
.interpolate() > 0) {
614 sfn_log
<< SfnLog::io
<< "Using Interpolator " << io
.ij_index() << "\n";
616 if (num_components
== 1) {
617 switch (start_comp
) {
618 case 0: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_x
);
619 case 1: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
620 case 2: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
621 case 3: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_zw
, 2, 3);
627 if (num_components
== 2) {
628 switch (start_comp
) {
629 case 0: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3);
630 case 2: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, 0xc);
631 case 1: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
) &&
632 load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
638 if (num_components
== 3 && start_comp
== 0)
639 return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3) &&
640 load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
642 int full_write_mask
= ((1 << num_components
) - 1) << start_comp
;
644 bool success
= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, full_write_mask
& 0xc);
645 success
&= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, full_write_mask
& 0x3);
649 AluInstruction
*ir
= nullptr;
650 for (unsigned i
= 0; i
< 4 ; ++i
) {
651 ir
= new AluInstruction(op1_interp_load_p0
, dest
[i
],
652 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), i
)),
653 EmitInstruction::write
);
654 emit_instruction(ir
);
656 ir
->set_flag(alu_last_instr
);
661 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector
&dest
,
662 ShaderInput
& io
, const Interpolator
& ip
, EAluOp op
)
664 for (unsigned i
= 0; i
< 2 ; ++i
) {
666 if (op
== op2_interp_z
)
670 auto ir
= new AluInstruction(op
, dest
[chan
], i
& 1 ? ip
.j
: ip
.i
,
671 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
672 i
== 0 ? EmitInstruction::write
: EmitInstruction::last
);
673 dest
.pin_to_channel(chan
);
675 ir
->set_bank_swizzle(alu_vec_210
);
676 emit_instruction(ir
);
681 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector
&dest
, ShaderInput
& io
,
682 const Interpolator
& ip
, EAluOp op
, int writemask
)
684 AluInstruction
*ir
= nullptr;
685 for (unsigned i
= 0; i
< 4 ; ++i
) {
686 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
687 (writemask
& (1 << i
)) ? EmitInstruction::write
: EmitInstruction::empty
);
688 dest
.pin_to_channel(i
);
689 ir
->set_bank_swizzle(alu_vec_210
);
690 emit_instruction(ir
);
692 ir
->set_flag(alu_last_instr
);
696 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector
&dest
,
697 ShaderInput
& io
, const Interpolator
& ip
,
698 EAluOp op
, UNUSED
int start
, int comp
)
700 AluInstruction
*ir
= nullptr;
701 for (int i
= 0; i
< 4 ; ++i
) {
702 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
,
703 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
704 i
== comp
? EmitInstruction::write
: EmitInstruction::empty
);
705 ir
->set_bank_swizzle(alu_vec_210
);
706 dest
.pin_to_channel(i
);
707 emit_instruction(ir
);
709 ir
->set_flag(alu_last_instr
);
714 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
, bool all_chanels
)
716 int outputs
= all_chanels
? m_max_color_exports
: 1;
718 std::array
<uint32_t,4> swizzle
;
719 unsigned writemask
= nir_intrinsic_write_mask(instr
);
720 switch (out_var
->data
.location
) {
721 case FRAG_RESULT_DEPTH
:
725 case FRAG_RESULT_STENCIL
:
729 case FRAG_RESULT_SAMPLE_MASK
:
734 for (int i
= 0; i
< 4; ++i
) {
735 swizzle
[i
] = (i
< instr
->num_components
) ? i
: 7;
739 auto value
= vec_from_nir_with_fetch_constant(instr
->src
[1], writemask
, swizzle
);
741 set_output(out_var
->data
.driver_location
, value
.sel());
743 if (out_var
->data
.location
== FRAG_RESULT_COLOR
||
744 (out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
745 out_var
->data
.location
<= FRAG_RESULT_DATA7
)) {
746 for (int k
= 0 ; k
< outputs
; ++k
) {
748 unsigned location
= out_var
->data
.driver_location
+ k
- m_depth_exports
;
749 if (location
>= m_max_color_exports
) {
750 sfn_log
<< SfnLog::io
<< "Pixel output " << location
751 << " skipped because we have only " << m_max_color_exports
<< "CBs\n";
755 m_last_pixel_export
= new ExportInstruction(location
, value
, ExportInstruction::et_pixel
);
757 if (sh_info().ps_export_highest
< location
)
758 sh_info().ps_export_highest
= location
;
760 sh_info().nr_ps_color_exports
++;
762 unsigned mask
= (0xfu
<< (location
* 4));
763 sh_info().ps_color_export_mask
|= mask
;
765 emit_export_instruction(m_last_pixel_export
);
766 ++m_max_counted_color_exports
;
768 } else if (out_var
->data
.location
== FRAG_RESULT_DEPTH
||
769 out_var
->data
.location
== FRAG_RESULT_STENCIL
||
770 out_var
->data
.location
== FRAG_RESULT_SAMPLE_MASK
) {
772 emit_export_instruction(new ExportInstruction(61, value
, ExportInstruction::et_pixel
));
779 void FragmentShaderFromNir::do_finalize()
781 // update shader io info and set LDS etc.
782 sh_info().ninput
= m_shaderio
.inputs().size();
784 sfn_log
<< SfnLog::io
<< "Have " << sh_info().ninput
<< " inputs\n";
785 for (size_t i
= 0; i
< sh_info().ninput
; ++i
) {
786 int ij_idx
= (m_shaderio
.input(i
).ij_index() < 6 &&
787 m_shaderio
.input(i
).ij_index() >= 0) ? m_shaderio
.input(i
).ij_index() : 0;
788 m_shaderio
.input(i
).set_ioinfo(sh_info().input
[i
], m_interpolator
[ij_idx
].ij_index
);
791 sh_info().two_side
= m_shaderio
.two_sided();
792 sh_info().nlds
= m_shaderio
.nlds();
794 sh_info().nr_ps_max_color_exports
= m_max_counted_color_exports
;
796 if (sh_info().fs_write_all
) {
797 sh_info().nr_ps_max_color_exports
= m_max_color_exports
;
800 if (!m_last_pixel_export
) {
801 GPRVector
v(0, {7,7,7,7});
802 m_last_pixel_export
= new ExportInstruction(0, v
, ExportInstruction::et_pixel
);
803 sh_info().nr_ps_color_exports
++;
804 sh_info().ps_color_export_mask
= 0xf;
805 emit_export_instruction(m_last_pixel_export
);
808 m_last_pixel_export
->set_last();
810 if (sh_info().fs_write_all
)
811 sh_info().nr_ps_max_color_exports
= 8;