3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader
& nir
,
36 r600_pipe_shader_selector
&sel
,
37 const r600_shader_key
&key
,
38 enum chip_class chip_class
):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT
, sel
, sh
, nir
.scratch_size
, chip_class
),
40 m_max_color_exports(MAX2(key
.ps
.nr_cbufs
,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key
.ps
.color_two_side
),
43 m_last_pixel_export(nullptr),
45 m_reserved_registers(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
50 m_enable_centroid_interpolators(false)
52 for (auto& i
: m_interpolator
) {
57 sh_info().rat_base
= key
.ps
.nr_cbufs
;
58 sh_info().atomic_base
= key
.ps
.first_atomic_counter
;
61 bool FragmentShaderFromNir::do_process_inputs(nir_variable
*input
)
63 sfn_log
<< SfnLog::io
<< "Parse input variable "
64 << input
->name
<< " location:" << input
->data
.location
65 << " driver-loc:" << input
->data
.driver_location
66 << " interpolation:" << input
->data
.interpolation
71 if (input
->data
.location
== VARYING_SLOT_FACE
) {
72 m_sv_values
.set(es_face
);
76 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot
>(input
->data
.location
),
79 /* Work around the mixed tgsi/nir semantic problems, this fixes
80 * dEQP-GLES2.functional.shaders.builtin_variable.pointcoord */
81 if (input
->data
.location
== VARYING_SLOT_PNTC
) {
82 name
= TGSI_SEMANTIC_GENERIC
;
86 tgsi_semantic sname
= static_cast<tgsi_semantic
>(name
);
89 case TGSI_SEMANTIC_POSITION
: {
90 m_sv_values
.set(es_pos
);
93 case TGSI_SEMANTIC_COLOR
: {
94 m_shaderio
.add_input(new ShaderInputColor(sname
, sid
, input
));
95 m_need_back_color
= m_two_sided_color
;
98 case TGSI_SEMANTIC_PRIMID
:
99 sh_info().gs_prim_id_input
= true;
100 sh_info().ps_prim_id_input
= m_shaderio
.inputs().size();
102 case TGSI_SEMANTIC_FOG
:
103 case TGSI_SEMANTIC_GENERIC
:
104 case TGSI_SEMANTIC_TEXCOORD
:
105 case TGSI_SEMANTIC_LAYER
:
106 case TGSI_SEMANTIC_PCOORD
:
107 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
108 case TGSI_SEMANTIC_CLIPDIST
: {
109 if (!m_shaderio
.find_varying(sname
, sid
, input
->data
.location_frac
))
110 m_shaderio
.add_input(new ShaderInputVarying(sname
, sid
, input
));
118 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr
*instr
)
120 switch (instr
->type
) {
121 case nir_instr_type_intrinsic
: {
122 nir_intrinsic_instr
*ii
= nir_instr_as_intrinsic(instr
);
123 switch (ii
->intrinsic
) {
124 case nir_intrinsic_load_front_face
:
125 m_sv_values
.set(es_face
);
127 case nir_intrinsic_load_sample_mask_in
:
128 m_sv_values
.set(es_sample_mask_in
);
130 case nir_intrinsic_load_sample_id
:
131 m_sv_values
.set(es_sample_id
);
133 case nir_intrinsic_interp_deref_at_centroid
:
134 /* This is not a sysvalue, should go elsewhere */
135 m_enable_centroid_interpolators
= true;
147 bool FragmentShaderFromNir::allocate_reserved_registers()
149 assert(!m_reserved_registers
);
151 int face_reg_index
= -1;
152 // enabled interpolators based on inputs
153 for (auto& i
: m_shaderio
.inputs()) {
154 int ij
= i
->ij_index();
156 m_interpolator
[ij
].enabled
= true;
160 /* Lazy, enable both possible interpolators,
161 * TODO: check which ones are really needed */
162 if (m_enable_centroid_interpolators
) {
163 m_interpolator
[2].enabled
= true; /* perspective */
164 m_interpolator
[5].enabled
= true; /* linear */
167 // sort the varying inputs
168 m_shaderio
.sort_varying_inputs();
170 // handle interpolators
172 for (int i
= 0; i
< 6; ++i
) {
173 if (m_interpolator
[i
].enabled
) {
174 sfn_log
<< SfnLog::io
<< "Interpolator " << i
<< " is enabled\n";
176 m_interpolator
[i
].ij_index
= num_baryc
;
178 unsigned sel
= num_baryc
/ 2;
179 unsigned chan
= 2 * (num_baryc
% 2);
181 auto ip_i
= new GPRValue(sel
, chan
+ 1);
182 ip_i
->set_as_input();
183 m_interpolator
[i
].i
.reset(ip_i
);
184 inject_register(sel
, chan
+ 1, m_interpolator
[i
].i
, false);
186 auto ip_j
= new GPRValue(sel
, chan
);
187 ip_j
->set_as_input();
188 m_interpolator
[i
].j
.reset(ip_j
);
189 inject_register(sel
, chan
, m_interpolator
[i
].j
, false);
194 m_reserved_registers
+= (num_baryc
+ 1) >> 1;
196 if (m_sv_values
.test(es_pos
)) {
197 m_frag_pos_index
= m_reserved_registers
++;
198 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION
, m_frag_pos_index
));
201 // handle system values
202 if (m_sv_values
.test(es_face
) || m_need_back_color
) {
203 face_reg_index
= m_reserved_registers
++;
204 auto ffr
= new GPRValue(face_reg_index
,0);
206 m_front_face_reg
.reset(ffr
);
207 sfn_log
<< SfnLog::io
<< "Set front_face register to " << *m_front_face_reg
<< "\n";
208 inject_register(ffr
->sel(), ffr
->chan(), m_front_face_reg
, false);
210 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE
, face_reg_index
));
214 if (m_sv_values
.test(es_sample_mask_in
)) {
215 if (face_reg_index
< 0)
216 face_reg_index
= m_reserved_registers
++;
218 auto smi
= new GPRValue(face_reg_index
,2);
220 m_sample_mask_reg
.reset(smi
);
221 sfn_log
<< SfnLog::io
<< "Set sample mask in register to " << *m_sample_mask_reg
<< "\n";
222 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
223 sh_info().nsys_inputs
= 1;
224 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK
, face_reg_index
));
227 if (m_sv_values
.test(es_sample_id
)) {
228 if (face_reg_index
< 0)
229 face_reg_index
= m_reserved_registers
++;
231 auto smi
= new GPRValue(face_reg_index
, 3);
233 m_sample_id_reg
.reset(smi
);
234 sfn_log
<< SfnLog::io
<< "Set sample id register to " << *m_sample_id_reg
<< "\n";
235 sh_info().nsys_inputs
++;
236 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID
, face_reg_index
));
239 // The back color handling is not emmited in the code, so we have
240 // to add the inputs here and later we also need to inject the code to set
242 if (m_need_back_color
) {
243 size_t ninputs
= m_shaderio
.inputs().size();
244 for (size_t k
= 0; k
< ninputs
; ++k
) {
245 ShaderInput
& i
= m_shaderio
.input(k
);
247 if (i
.name() != TGSI_SEMANTIC_COLOR
)
250 ShaderInputColor
& col
= static_cast<ShaderInputColor
&>(i
);
252 size_t next_pos
= m_shaderio
.size();
253 auto bcol
= new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR
, col
, next_pos
);
254 m_shaderio
.add_input(bcol
);
255 col
.set_back_color(next_pos
);
257 m_shaderio
.set_two_sided();
260 m_shaderio
.update_lds_pos();
262 set_reserved_registers(m_reserved_registers
);
267 void FragmentShaderFromNir::emit_shader_start()
269 if (m_sv_values
.test(es_face
))
272 if (m_sv_values
.test(es_pos
)) {
273 for (int i
= 0; i
< 4; ++i
) {
274 auto v
= new GPRValue(m_frag_pos_index
, i
);
276 auto reg
= PValue(v
);
278 emit_instruction(new AluInstruction(op1_recip_ieee
, reg
, reg
, {alu_write
, alu_last_instr
}));
284 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
286 if (out_var
->data
.location
== FRAG_RESULT_COLOR
)
287 return emit_export_pixel(out_var
, instr
, true);
289 if ((out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
290 out_var
->data
.location
<= FRAG_RESULT_DATA7
) ||
291 out_var
->data
.location
== FRAG_RESULT_DEPTH
||
292 out_var
->data
.location
== FRAG_RESULT_STENCIL
)
293 return emit_export_pixel(out_var
, instr
, false);
295 sfn_log
<< SfnLog::err
<< "r600-NIR: Unimplemented store_deref for " <<
296 out_var
->data
.location
<< "(" << out_var
->data
.driver_location
<< ")\n";
300 bool FragmentShaderFromNir::do_process_outputs(nir_variable
*output
)
302 sfn_log
<< SfnLog::instr
<< "Parse output variable "
303 << output
->name
<< " @" << output
->data
.location
304 << "@dl:" << output
->data
.driver_location
<< "\n";
307 r600_shader_io
& io
= sh_info().output
[output
->data
.driver_location
];
308 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result
>( output
->data
.location
),
311 /* Check whether this code has become obsolete by the IO vectorization */
312 unsigned num_components
= 4;
313 unsigned vector_elements
= glsl_get_vector_elements(glsl_without_array(output
->type
));
315 num_components
= vector_elements
;
316 unsigned component
= output
->data
.location_frac
;
318 for (unsigned j
= component
; j
< num_components
+ component
; j
++)
319 io
.write_mask
|= 1 << j
;
321 int loc
= output
->data
.location
;
322 if (loc
== FRAG_RESULT_COLOR
&&
323 (m_nir
.info
.outputs_written
& (1ull << loc
))) {
324 sh_info().fs_write_all
= true;
327 if (output
->data
.location
== FRAG_RESULT_COLOR
||
328 (output
->data
.location
>= FRAG_RESULT_DATA0
&&
329 output
->data
.location
<= FRAG_RESULT_DATA7
)) {
332 if (output
->data
.location
== FRAG_RESULT_DEPTH
||
333 output
->data
.location
== FRAG_RESULT_STENCIL
) {
341 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr
* instr
)
343 switch (instr
->intrinsic
) {
344 case nir_intrinsic_load_sample_mask_in
:
345 return load_preloaded_value(instr
->dest
, 0, m_sample_mask_reg
);
346 case nir_intrinsic_load_sample_id
:
347 return load_preloaded_value(instr
->dest
, 0, m_sample_id_reg
);
348 case nir_intrinsic_load_front_face
:
349 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
350 case nir_intrinsic_interp_deref_at_sample
:
351 return emit_interp_deref_at_sample(instr
);
352 case nir_intrinsic_interp_deref_at_offset
:
353 return emit_interp_deref_at_offset(instr
);
354 case nir_intrinsic_interp_deref_at_centroid
:
355 return emit_interp_deref_at_centroid(instr
);
361 void FragmentShaderFromNir::load_front_face()
363 assert(m_front_face_reg
);
364 if (m_front_face_loaded
)
367 auto ir
= new AluInstruction(op2_setge_dx10
, m_front_face_reg
, m_front_face_reg
,
368 Value::zero
, {alu_write
, alu_last_instr
});
369 m_front_face_loaded
= true;
370 emit_instruction(ir
);
373 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr
* instr
)
375 GPRVector slope
= get_temp_vec4();
377 auto fetch
= new FetchInstruction(vc_fetch
, no_index_offset
, slope
,
378 from_nir_with_fetch_constant(instr
->src
[1], 0),
379 0, R600_BUFFER_INFO_CONST_BUFFER
, PValue(), bim_none
);
380 fetch
->set_flag(vtx_srf_mode
);
381 emit_instruction(fetch
);
383 GPRVector grad
= get_temp_vec4();
384 auto var
= get_deref_location(instr
->src
[0]);
387 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
388 auto interpolator
= m_interpolator
[io
.ij_index()];
389 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
391 GPRVector
src({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
393 auto tex
= new TexInstruction(TexInstruction::get_gradient_h
, grad
, src
, 0, 0, PValue());
394 tex
->set_dest_swizzle({0,1,7,7});
395 emit_instruction(tex
);
397 tex
= new TexInstruction(TexInstruction::get_gradient_v
, grad
, src
, 0, 0, PValue());
398 tex
->set_dest_swizzle({7,7,0,1});
399 emit_instruction(tex
);
401 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(0), slope
.reg_i(2), interpolator
.j
}, {alu_write
}));
402 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(1), slope
.reg_i(2), interpolator
.i
}, {alu_write
, alu_last_instr
}));
404 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(2), slope
.reg_i(3), slope
.reg_i(0)}, {alu_write
}));
405 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(3), slope
.reg_i(3), slope
.reg_i(1)}, {alu_write
, alu_last_instr
}));
407 Interpolator ip
= {true, 0, slope
.reg_i(1), slope
.reg_i(0)};
409 auto dst
= vec_from_nir(instr
->dest
, 4);
410 int num_components
= instr
->dest
.is_ssa
?
411 instr
->dest
.ssa
.num_components
:
412 instr
->dest
.reg
.reg
->num_components
;
414 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
419 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr
* instr
)
421 int temp
= allocate_temp_register();
423 GPRVector
help(temp
, {0,1,2,3});
425 auto var
= get_deref_location(instr
->src
[0]);
428 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
429 auto interpolator
= m_interpolator
[io
.ij_index()];
430 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
432 GPRVector
interp({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
434 auto getgradh
= new TexInstruction(TexInstruction::get_gradient_h
, help
, interp
, 0, 0, PValue());
435 getgradh
->set_dest_swizzle({0,1,7,7});
436 getgradh
->set_flag(TexInstruction::x_unnormalized
);
437 getgradh
->set_flag(TexInstruction::y_unnormalized
);
438 getgradh
->set_flag(TexInstruction::z_unnormalized
);
439 getgradh
->set_flag(TexInstruction::w_unnormalized
);
440 emit_instruction(getgradh
);
442 auto getgradv
= new TexInstruction(TexInstruction::get_gradient_v
, help
, interp
, 0, 0, PValue());
443 getgradv
->set_dest_swizzle({7,7,0,1});
444 getgradv
->set_flag(TexInstruction::x_unnormalized
);
445 getgradv
->set_flag(TexInstruction::y_unnormalized
);
446 getgradv
->set_flag(TexInstruction::z_unnormalized
);
447 getgradv
->set_flag(TexInstruction::w_unnormalized
);
448 emit_instruction(getgradv
);
450 PValue ofs_x
= from_nir(instr
->src
[1], 0);
451 PValue ofs_y
= from_nir(instr
->src
[1], 1);
452 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(0), ofs_x
, interpolator
.j
, {alu_write
}));
453 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(1), ofs_x
, interpolator
.i
, {alu_write
, alu_last_instr
}));
454 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(2), ofs_y
, help
.reg_i(0), {alu_write
}));
455 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(3), ofs_y
, help
.reg_i(1), {alu_write
, alu_last_instr
}));
457 Interpolator ip
= {true, 0, help
.reg_i(1), help
.reg_i(0)};
459 auto dst
= vec_from_nir(instr
->dest
, 4);
460 load_interpolated(dst
, io
, ip
, nir_dest_num_components(instr
->dest
),
461 var
->data
.location_frac
);
466 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr
* instr
)
468 auto var
= get_deref_location(instr
->src
[0]);
471 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
472 io
.set_uses_interpolate_at_centroid();
474 int ij_index
= io
.ij_index() >= 3 ? 5 : 2;
475 assert (m_interpolator
[ij_index
].enabled
);
476 auto ip
= m_interpolator
[ij_index
];
478 int num_components
= nir_dest_num_components(instr
->dest
);
480 auto dst
= vec_from_nir(instr
->dest
, 4);
481 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
486 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable
*in_var
, nir_intrinsic_instr
* instr
)
488 if (in_var
->data
.location
== VARYING_SLOT_POS
) {
489 assert(instr
->dest
.is_ssa
);
491 for (int i
= 0; i
< instr
->dest
.ssa
.num_components
; ++i
) {
492 inject_register(instr
->dest
.ssa
.index
, i
, m_frag_pos
[i
], true);
497 if (in_var
->data
.location
== VARYING_SLOT_FACE
)
498 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
500 // todo: replace io with ShaderInputVarying
501 auto& io
= m_shaderio
.input(in_var
->data
.driver_location
, in_var
->data
.location_frac
);
502 unsigned num_components
= 4;
505 if (instr
->dest
.is_ssa
) {
506 num_components
= instr
->dest
.ssa
.num_components
;
508 num_components
= instr
->dest
.reg
.reg
->num_components
;
511 auto dst
= vec_from_nir(instr
->dest
, 4);
513 sfn_log
<< SfnLog::io
<< "Set input[" << in_var
->data
.driver_location
514 << "].gpr=" << dst
.sel() << "\n";
516 io
.set_gpr(dst
.sel());
518 auto& ip
= io
.interpolate() ? m_interpolator
[io
.ij_index()] : m_interpolator
[0];
520 load_interpolated(dst
, io
, ip
, num_components
, in_var
->data
.location_frac
);
522 /* These results are expected starting in slot x..*/
523 if (in_var
->data
.location_frac
> 0) {
524 int n
= instr
->dest
.is_ssa
? instr
->dest
.ssa
.num_components
:
525 instr
->dest
.reg
.reg
->num_components
;
526 AluInstruction
*ir
= nullptr;
527 for (int i
= 0; i
< n
; ++i
) {
528 ir
= new AluInstruction(op1_mov
, dst
[i
],
529 dst
[i
+ in_var
->data
.location_frac
], {alu_write
});
530 emit_instruction(ir
);
533 ir
->set_flag(alu_last_instr
);
537 if (m_need_back_color
&& io
.name() == TGSI_SEMANTIC_COLOR
) {
539 auto & color_input
= static_cast<ShaderInputColor
&> (io
);
540 auto& bgio
= m_shaderio
.input(color_input
.back_color_input_index());
542 bgio
.set_gpr(allocate_temp_register());
544 GPRVector
bgcol(bgio
.gpr(), {0,1,2,3});
545 load_interpolated(bgcol
, bgio
, ip
, num_components
, 0);
549 AluInstruction
*ir
= nullptr;
550 for (unsigned i
= 0; i
< 4 ; ++i
) {
551 ir
= new AluInstruction(op3_cnde
, dst
[i
], m_front_face_reg
, bgcol
[i
], dst
[i
], {alu_write
});
552 emit_instruction(ir
);
555 ir
->set_flag(alu_last_instr
);
561 bool FragmentShaderFromNir::load_interpolated(GPRVector
&dest
,
562 ShaderInput
& io
, const Interpolator
&ip
,
563 int num_components
, int start_comp
)
565 // replace io with ShaderInputVarying
566 if (io
.interpolate() > 0) {
568 sfn_log
<< SfnLog::io
<< "Using Interpolator " << io
.ij_index() << "\n";
570 if (num_components
== 1) {
571 switch (start_comp
) {
572 case 0: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_x
);
573 case 1: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
574 case 2: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
575 case 3: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_zw
, 2, 3);
581 if (num_components
== 2) {
582 switch (start_comp
) {
583 case 0: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3);
584 case 2: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, 0xc);
585 case 1: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
) &&
586 load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
592 if (num_components
== 3 && start_comp
== 0)
593 return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3) &&
594 load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
596 int full_write_mask
= ((1 << num_components
) - 1) << start_comp
;
598 bool success
= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, full_write_mask
& 0xc);
599 success
&= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, full_write_mask
& 0x3);
603 AluInstruction
*ir
= nullptr;
604 for (unsigned i
= 0; i
< 4 ; ++i
) {
605 ir
= new AluInstruction(op1_interp_load_p0
, dest
[i
],
606 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), i
)),
607 EmitInstruction::write
);
608 emit_instruction(ir
);
610 ir
->set_flag(alu_last_instr
);
615 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector
&dest
,
616 ShaderInput
& io
, const Interpolator
& ip
, EAluOp op
)
618 for (unsigned i
= 0; i
< 2 ; ++i
) {
620 if (op
== op2_interp_z
)
624 auto ir
= new AluInstruction(op
, dest
[chan
], i
& 1 ? ip
.j
: ip
.i
,
625 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
626 i
== 0 ? EmitInstruction::write
: EmitInstruction::last
);
627 dest
.pin_to_channel(chan
);
629 ir
->set_bank_swizzle(alu_vec_210
);
630 emit_instruction(ir
);
635 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector
&dest
, ShaderInput
& io
,
636 const Interpolator
& ip
, EAluOp op
, int writemask
)
638 AluInstruction
*ir
= nullptr;
639 for (unsigned i
= 0; i
< 4 ; ++i
) {
640 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
641 (writemask
& (1 << i
)) ? EmitInstruction::write
: EmitInstruction::empty
);
642 dest
.pin_to_channel(i
);
643 ir
->set_bank_swizzle(alu_vec_210
);
644 emit_instruction(ir
);
646 ir
->set_flag(alu_last_instr
);
650 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector
&dest
,
651 ShaderInput
& io
, const Interpolator
& ip
,
652 EAluOp op
, UNUSED
int start
, int comp
)
654 AluInstruction
*ir
= nullptr;
655 for (int i
= 0; i
< 4 ; ++i
) {
656 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
,
657 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
658 i
== comp
? EmitInstruction::write
: EmitInstruction::empty
);
659 ir
->set_bank_swizzle(alu_vec_210
);
660 dest
.pin_to_channel(i
);
661 emit_instruction(ir
);
663 ir
->set_flag(alu_last_instr
);
668 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
, bool all_chanels
)
670 int outputs
= all_chanels
? m_max_color_exports
: 1;
672 std::array
<uint32_t,4> swizzle
;
673 unsigned writemask
= nir_intrinsic_write_mask(instr
);
674 if (out_var
->data
.location
!= FRAG_RESULT_STENCIL
) {
675 for (int i
= 0; i
< 4; ++i
) {
676 swizzle
[i
] = (i
< instr
->num_components
) ? i
: 7;
682 GPRVector
*value
= vec_from_nir_with_fetch_constant(instr
->src
[1], writemask
, swizzle
);
684 set_output(out_var
->data
.driver_location
, PValue(value
));
686 if (out_var
->data
.location
== FRAG_RESULT_COLOR
||
687 (out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
688 out_var
->data
.location
<= FRAG_RESULT_DATA7
)) {
689 for (int k
= 0 ; k
< outputs
; ++k
) {
691 unsigned location
= out_var
->data
.driver_location
+ k
- m_depth_exports
;
692 if (location
>= m_max_color_exports
) {
693 sfn_log
<< SfnLog::io
<< "Pixel output " << location
694 << " skipped because we have only " << m_max_color_exports
<< "CBs\n";
698 m_last_pixel_export
= new ExportInstruction(location
, *value
, ExportInstruction::et_pixel
);
700 if (sh_info().ps_export_highest
< location
)
701 sh_info().ps_export_highest
= location
;
703 sh_info().nr_ps_color_exports
++;
705 unsigned mask
= (0xfu
<< (location
* 4));
706 sh_info().ps_color_export_mask
|= mask
;
708 emit_export_instruction(m_last_pixel_export
);
709 ++m_max_counted_color_exports
;
711 } else if (out_var
->data
.location
== FRAG_RESULT_DEPTH
) {
713 emit_export_instruction(new ExportInstruction(61, *value
, ExportInstruction::et_pixel
));
714 } else if (out_var
->data
.location
== FRAG_RESULT_STENCIL
) {
716 emit_export_instruction(new ExportInstruction(61, *value
, ExportInstruction::et_pixel
));
724 void FragmentShaderFromNir::do_finalize()
726 // update shader io info and set LDS etc.
727 sh_info().ninput
= m_shaderio
.inputs().size();
729 sfn_log
<< SfnLog::io
<< "Have " << sh_info().ninput
<< " inputs\n";
730 for (size_t i
= 0; i
< sh_info().ninput
; ++i
) {
731 int ij_idx
= (m_shaderio
.input(i
).ij_index() < 6 &&
732 m_shaderio
.input(i
).ij_index() >= 0) ? m_shaderio
.input(i
).ij_index() : 0;
733 m_shaderio
.input(i
).set_ioinfo(sh_info().input
[i
], m_interpolator
[ij_idx
].ij_index
);
736 sh_info().two_side
= m_shaderio
.two_sided();
737 sh_info().nlds
= m_shaderio
.nlds();
739 sh_info().nr_ps_max_color_exports
= m_max_counted_color_exports
;
741 if (sh_info().fs_write_all
) {
742 sh_info().nr_ps_max_color_exports
= m_max_color_exports
;
745 if (!m_last_pixel_export
) {
746 GPRVector
v(0, {7,7,7,7});
747 m_last_pixel_export
= new ExportInstruction(0, v
, ExportInstruction::et_pixel
);
748 sh_info().nr_ps_color_exports
++;
749 sh_info().ps_color_export_mask
= 0xf;
750 emit_export_instruction(m_last_pixel_export
);
753 m_last_pixel_export
->set_last();
755 if (sh_info().fs_write_all
)
756 sh_info().nr_ps_max_color_exports
= 8;