3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader
& nir
,
36 r600_pipe_shader_selector
&sel
,
37 const r600_shader_key
&key
,
38 enum chip_class chip_class
):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT
, sel
, sh
, nir
.scratch_size
, chip_class
),
40 m_max_color_exports(MAX2(key
.ps
.nr_cbufs
,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key
.ps
.color_two_side
),
43 m_last_pixel_export(nullptr),
45 m_reserved_registers(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
50 m_enable_centroid_interpolators(false)
52 for (auto& i
: m_interpolator
) {
57 sh_info().rat_base
= key
.ps
.nr_cbufs
;
58 sh_info().atomic_base
= key
.ps
.first_atomic_counter
;
61 bool FragmentShaderFromNir::do_process_inputs(nir_variable
*input
)
63 sfn_log
<< SfnLog::io
<< "Parse input variable "
64 << input
->name
<< " location:" << input
->data
.location
65 << " driver-loc:" << input
->data
.driver_location
66 << " interpolation:" << input
->data
.interpolation
69 if (input
->data
.location
== VARYING_SLOT_FACE
) {
70 m_sv_values
.set(es_face
);
75 auto semantic
= r600_get_varying_semantic(input
->data
.location
);
76 name
= semantic
.first
;
77 sid
= semantic
.second
;
79 tgsi_semantic sname
= static_cast<tgsi_semantic
>(name
);
82 case TGSI_SEMANTIC_POSITION
: {
83 m_sv_values
.set(es_pos
);
86 case TGSI_SEMANTIC_COLOR
: {
87 m_shaderio
.add_input(new ShaderInputColor(sname
, sid
, input
));
88 m_need_back_color
= m_two_sided_color
;
91 case TGSI_SEMANTIC_PRIMID
:
92 sh_info().gs_prim_id_input
= true;
93 sh_info().ps_prim_id_input
= m_shaderio
.inputs().size();
95 case TGSI_SEMANTIC_FOG
:
96 case TGSI_SEMANTIC_GENERIC
:
97 case TGSI_SEMANTIC_TEXCOORD
:
98 case TGSI_SEMANTIC_LAYER
:
99 case TGSI_SEMANTIC_PCOORD
:
100 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
101 case TGSI_SEMANTIC_CLIPDIST
: {
102 if (!m_shaderio
.find_varying(sname
, sid
, input
->data
.location_frac
))
103 m_shaderio
.add_input(new ShaderInputVarying(sname
, sid
, input
));
111 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr
*instr
)
113 switch (instr
->type
) {
114 case nir_instr_type_intrinsic
: {
115 nir_intrinsic_instr
*ii
= nir_instr_as_intrinsic(instr
);
116 switch (ii
->intrinsic
) {
117 case nir_intrinsic_load_front_face
:
118 m_sv_values
.set(es_face
);
120 case nir_intrinsic_load_sample_mask_in
:
121 m_sv_values
.set(es_sample_mask_in
);
123 case nir_intrinsic_load_sample_id
:
124 m_sv_values
.set(es_sample_id
);
126 case nir_intrinsic_interp_deref_at_centroid
:
127 /* This is not a sysvalue, should go elsewhere */
128 m_enable_centroid_interpolators
= true;
140 bool FragmentShaderFromNir::allocate_reserved_registers()
142 assert(!m_reserved_registers
);
144 int face_reg_index
= -1;
145 // enabled interpolators based on inputs
146 for (auto& i
: m_shaderio
.inputs()) {
147 int ij
= i
->ij_index();
149 m_interpolator
[ij
].enabled
= true;
153 /* Lazy, enable both possible interpolators,
154 * TODO: check which ones are really needed */
155 if (m_enable_centroid_interpolators
) {
156 m_interpolator
[2].enabled
= true; /* perspective */
157 m_interpolator
[5].enabled
= true; /* linear */
160 // sort the varying inputs
161 m_shaderio
.sort_varying_inputs();
163 // handle interpolators
165 for (int i
= 0; i
< 6; ++i
) {
166 if (m_interpolator
[i
].enabled
) {
167 sfn_log
<< SfnLog::io
<< "Interpolator " << i
<< " is enabled\n";
169 m_interpolator
[i
].ij_index
= num_baryc
;
171 unsigned sel
= num_baryc
/ 2;
172 unsigned chan
= 2 * (num_baryc
% 2);
174 auto ip_i
= new GPRValue(sel
, chan
+ 1);
175 ip_i
->set_as_input();
176 m_interpolator
[i
].i
.reset(ip_i
);
177 inject_register(sel
, chan
+ 1, m_interpolator
[i
].i
, false);
179 auto ip_j
= new GPRValue(sel
, chan
);
180 ip_j
->set_as_input();
181 m_interpolator
[i
].j
.reset(ip_j
);
182 inject_register(sel
, chan
, m_interpolator
[i
].j
, false);
187 m_reserved_registers
+= (num_baryc
+ 1) >> 1;
189 if (m_sv_values
.test(es_pos
)) {
190 m_frag_pos_index
= m_reserved_registers
++;
191 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION
, m_frag_pos_index
));
194 // handle system values
195 if (m_sv_values
.test(es_face
) || m_need_back_color
) {
196 face_reg_index
= m_reserved_registers
++;
197 auto ffr
= new GPRValue(face_reg_index
,0);
199 m_front_face_reg
.reset(ffr
);
200 sfn_log
<< SfnLog::io
<< "Set front_face register to " << *m_front_face_reg
<< "\n";
201 inject_register(ffr
->sel(), ffr
->chan(), m_front_face_reg
, false);
203 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE
, face_reg_index
));
207 if (m_sv_values
.test(es_sample_mask_in
)) {
208 if (face_reg_index
< 0)
209 face_reg_index
= m_reserved_registers
++;
211 auto smi
= new GPRValue(face_reg_index
,2);
213 m_sample_mask_reg
.reset(smi
);
214 sfn_log
<< SfnLog::io
<< "Set sample mask in register to " << *m_sample_mask_reg
<< "\n";
215 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
216 sh_info().nsys_inputs
= 1;
217 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK
, face_reg_index
));
220 if (m_sv_values
.test(es_sample_id
)) {
221 if (face_reg_index
< 0)
222 face_reg_index
= m_reserved_registers
++;
224 auto smi
= new GPRValue(face_reg_index
, 3);
226 m_sample_id_reg
.reset(smi
);
227 sfn_log
<< SfnLog::io
<< "Set sample id register to " << *m_sample_id_reg
<< "\n";
228 sh_info().nsys_inputs
++;
229 m_shaderio
.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID
, face_reg_index
));
232 // The back color handling is not emmited in the code, so we have
233 // to add the inputs here and later we also need to inject the code to set
235 if (m_need_back_color
) {
236 size_t ninputs
= m_shaderio
.inputs().size();
237 for (size_t k
= 0; k
< ninputs
; ++k
) {
238 ShaderInput
& i
= m_shaderio
.input(k
);
240 if (i
.name() != TGSI_SEMANTIC_COLOR
)
243 ShaderInputColor
& col
= static_cast<ShaderInputColor
&>(i
);
245 size_t next_pos
= m_shaderio
.size();
246 auto bcol
= new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR
, col
, next_pos
);
247 m_shaderio
.add_input(bcol
);
248 col
.set_back_color(next_pos
);
250 m_shaderio
.set_two_sided();
253 m_shaderio
.update_lds_pos();
255 set_reserved_registers(m_reserved_registers
);
260 void FragmentShaderFromNir::emit_shader_start()
262 if (m_sv_values
.test(es_face
))
265 if (m_sv_values
.test(es_pos
)) {
266 for (int i
= 0; i
< 4; ++i
) {
267 auto v
= new GPRValue(m_frag_pos_index
, i
);
269 auto reg
= PValue(v
);
271 emit_instruction(new AluInstruction(op1_recip_ieee
, reg
, reg
, {alu_write
, alu_last_instr
}));
277 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
279 if (out_var
->data
.location
== FRAG_RESULT_COLOR
)
280 return emit_export_pixel(out_var
, instr
, true);
282 if ((out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
283 out_var
->data
.location
<= FRAG_RESULT_DATA7
) ||
284 out_var
->data
.location
== FRAG_RESULT_DEPTH
||
285 out_var
->data
.location
== FRAG_RESULT_STENCIL
)
286 return emit_export_pixel(out_var
, instr
, false);
288 sfn_log
<< SfnLog::err
<< "r600-NIR: Unimplemented store_deref for " <<
289 out_var
->data
.location
<< "(" << out_var
->data
.driver_location
<< ")\n";
293 bool FragmentShaderFromNir::do_process_outputs(nir_variable
*output
)
295 sfn_log
<< SfnLog::instr
<< "Parse output variable "
296 << output
->name
<< " @" << output
->data
.location
297 << "@dl:" << output
->data
.driver_location
<< "\n";
300 r600_shader_io
& io
= sh_info().output
[output
->data
.driver_location
];
301 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result
>( output
->data
.location
),
304 /* Check whether this code has become obsolete by the IO vectorization */
305 unsigned num_components
= 4;
306 unsigned vector_elements
= glsl_get_vector_elements(glsl_without_array(output
->type
));
308 num_components
= vector_elements
;
309 unsigned component
= output
->data
.location_frac
;
311 for (unsigned j
= component
; j
< num_components
+ component
; j
++)
312 io
.write_mask
|= 1 << j
;
314 int loc
= output
->data
.location
;
315 if (loc
== FRAG_RESULT_COLOR
&&
316 (m_nir
.info
.outputs_written
& (1ull << loc
))) {
317 sh_info().fs_write_all
= true;
320 if (output
->data
.location
== FRAG_RESULT_COLOR
||
321 (output
->data
.location
>= FRAG_RESULT_DATA0
&&
322 output
->data
.location
<= FRAG_RESULT_DATA7
)) {
325 if (output
->data
.location
== FRAG_RESULT_DEPTH
||
326 output
->data
.location
== FRAG_RESULT_STENCIL
) {
334 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr
* instr
)
336 switch (instr
->intrinsic
) {
337 case nir_intrinsic_load_sample_mask_in
:
338 return load_preloaded_value(instr
->dest
, 0, m_sample_mask_reg
);
339 case nir_intrinsic_load_sample_id
:
340 return load_preloaded_value(instr
->dest
, 0, m_sample_id_reg
);
341 case nir_intrinsic_load_front_face
:
342 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
343 case nir_intrinsic_interp_deref_at_sample
:
344 return emit_interp_deref_at_sample(instr
);
345 case nir_intrinsic_interp_deref_at_offset
:
346 return emit_interp_deref_at_offset(instr
);
347 case nir_intrinsic_interp_deref_at_centroid
:
348 return emit_interp_deref_at_centroid(instr
);
354 void FragmentShaderFromNir::load_front_face()
356 assert(m_front_face_reg
);
357 if (m_front_face_loaded
)
360 auto ir
= new AluInstruction(op2_setge_dx10
, m_front_face_reg
, m_front_face_reg
,
361 Value::zero
, {alu_write
, alu_last_instr
});
362 m_front_face_loaded
= true;
363 emit_instruction(ir
);
366 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr
* instr
)
368 GPRVector slope
= get_temp_vec4();
370 auto fetch
= new FetchInstruction(vc_fetch
, no_index_offset
, slope
,
371 from_nir_with_fetch_constant(instr
->src
[1], 0),
372 0, R600_BUFFER_INFO_CONST_BUFFER
, PValue(), bim_none
);
373 fetch
->set_flag(vtx_srf_mode
);
374 emit_instruction(fetch
);
376 GPRVector grad
= get_temp_vec4();
377 auto var
= get_deref_location(instr
->src
[0]);
380 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
381 auto interpolator
= m_interpolator
[io
.ij_index()];
382 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
384 GPRVector
src({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
386 auto tex
= new TexInstruction(TexInstruction::get_gradient_h
, grad
, src
, 0, 0, PValue());
387 tex
->set_dest_swizzle({0,1,7,7});
388 emit_instruction(tex
);
390 tex
= new TexInstruction(TexInstruction::get_gradient_v
, grad
, src
, 0, 0, PValue());
391 tex
->set_dest_swizzle({7,7,0,1});
392 emit_instruction(tex
);
394 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(0), slope
.reg_i(2), interpolator
.j
}, {alu_write
}));
395 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(1), slope
.reg_i(2), interpolator
.i
}, {alu_write
, alu_last_instr
}));
397 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(0), {grad
.reg_i(2), slope
.reg_i(3), slope
.reg_i(0)}, {alu_write
}));
398 emit_instruction(new AluInstruction(op3_muladd
, slope
.reg_i(1), {grad
.reg_i(3), slope
.reg_i(3), slope
.reg_i(1)}, {alu_write
, alu_last_instr
}));
400 Interpolator ip
= {true, 0, slope
.reg_i(1), slope
.reg_i(0)};
402 auto dst
= vec_from_nir(instr
->dest
, 4);
403 int num_components
= instr
->dest
.is_ssa
?
404 instr
->dest
.ssa
.num_components
:
405 instr
->dest
.reg
.reg
->num_components
;
407 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
412 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr
* instr
)
414 int temp
= allocate_temp_register();
416 GPRVector
help(temp
, {0,1,2,3});
418 auto var
= get_deref_location(instr
->src
[0]);
421 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
422 auto interpolator
= m_interpolator
[io
.ij_index()];
423 PValue
dummy(new GPRValue(interpolator
.i
->sel(), 7));
425 GPRVector
interp({interpolator
.j
, interpolator
.i
, dummy
, dummy
});
427 auto getgradh
= new TexInstruction(TexInstruction::get_gradient_h
, help
, interp
, 0, 0, PValue());
428 getgradh
->set_dest_swizzle({0,1,7,7});
429 getgradh
->set_flag(TexInstruction::x_unnormalized
);
430 getgradh
->set_flag(TexInstruction::y_unnormalized
);
431 getgradh
->set_flag(TexInstruction::z_unnormalized
);
432 getgradh
->set_flag(TexInstruction::w_unnormalized
);
433 emit_instruction(getgradh
);
435 auto getgradv
= new TexInstruction(TexInstruction::get_gradient_v
, help
, interp
, 0, 0, PValue());
436 getgradv
->set_dest_swizzle({7,7,0,1});
437 getgradv
->set_flag(TexInstruction::x_unnormalized
);
438 getgradv
->set_flag(TexInstruction::y_unnormalized
);
439 getgradv
->set_flag(TexInstruction::z_unnormalized
);
440 getgradv
->set_flag(TexInstruction::w_unnormalized
);
441 emit_instruction(getgradv
);
443 PValue ofs_x
= from_nir(instr
->src
[1], 0);
444 PValue ofs_y
= from_nir(instr
->src
[1], 1);
445 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(0), ofs_x
, interpolator
.j
, {alu_write
}));
446 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(1), ofs_x
, interpolator
.i
, {alu_write
, alu_last_instr
}));
447 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(0), help
.reg_i(2), ofs_y
, help
.reg_i(0), {alu_write
}));
448 emit_instruction(new AluInstruction(op3_muladd
, help
.reg_i(1), help
.reg_i(3), ofs_y
, help
.reg_i(1), {alu_write
, alu_last_instr
}));
450 Interpolator ip
= {true, 0, help
.reg_i(1), help
.reg_i(0)};
452 auto dst
= vec_from_nir(instr
->dest
, 4);
453 load_interpolated(dst
, io
, ip
, nir_dest_num_components(instr
->dest
),
454 var
->data
.location_frac
);
459 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr
* instr
)
461 auto var
= get_deref_location(instr
->src
[0]);
464 auto& io
= m_shaderio
.input(var
->data
.driver_location
, var
->data
.location_frac
);
465 io
.set_uses_interpolate_at_centroid();
467 int ij_index
= io
.ij_index() >= 3 ? 5 : 2;
468 assert (m_interpolator
[ij_index
].enabled
);
469 auto ip
= m_interpolator
[ij_index
];
471 int num_components
= nir_dest_num_components(instr
->dest
);
473 auto dst
= vec_from_nir(instr
->dest
, 4);
474 load_interpolated(dst
, io
, ip
, num_components
, var
->data
.location_frac
);
479 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable
*in_var
, nir_intrinsic_instr
* instr
)
481 if (in_var
->data
.location
== VARYING_SLOT_POS
) {
482 assert(instr
->dest
.is_ssa
);
484 for (int i
= 0; i
< instr
->dest
.ssa
.num_components
; ++i
) {
485 inject_register(instr
->dest
.ssa
.index
, i
, m_frag_pos
[i
], true);
490 if (in_var
->data
.location
== VARYING_SLOT_FACE
)
491 return load_preloaded_value(instr
->dest
, 0, m_front_face_reg
);
493 // todo: replace io with ShaderInputVarying
494 auto& io
= m_shaderio
.input(in_var
->data
.driver_location
, in_var
->data
.location_frac
);
495 unsigned num_components
= 4;
498 if (instr
->dest
.is_ssa
) {
499 num_components
= instr
->dest
.ssa
.num_components
;
501 num_components
= instr
->dest
.reg
.reg
->num_components
;
504 auto dst
= vec_from_nir(instr
->dest
, 4);
506 sfn_log
<< SfnLog::io
<< "Set input[" << in_var
->data
.driver_location
507 << "].gpr=" << dst
.sel() << "\n";
509 io
.set_gpr(dst
.sel());
511 auto& ip
= io
.interpolate() ? m_interpolator
[io
.ij_index()] : m_interpolator
[0];
513 load_interpolated(dst
, io
, ip
, num_components
, in_var
->data
.location_frac
);
515 /* These results are expected starting in slot x..*/
516 if (in_var
->data
.location_frac
> 0) {
517 int n
= instr
->dest
.is_ssa
? instr
->dest
.ssa
.num_components
:
518 instr
->dest
.reg
.reg
->num_components
;
519 AluInstruction
*ir
= nullptr;
520 for (int i
= 0; i
< n
; ++i
) {
521 ir
= new AluInstruction(op1_mov
, dst
[i
],
522 dst
[i
+ in_var
->data
.location_frac
], {alu_write
});
523 emit_instruction(ir
);
526 ir
->set_flag(alu_last_instr
);
530 if (m_need_back_color
&& io
.name() == TGSI_SEMANTIC_COLOR
) {
532 auto & color_input
= static_cast<ShaderInputColor
&> (io
);
533 auto& bgio
= m_shaderio
.input(color_input
.back_color_input_index());
535 bgio
.set_gpr(allocate_temp_register());
537 GPRVector
bgcol(bgio
.gpr(), {0,1,2,3});
538 load_interpolated(bgcol
, bgio
, ip
, num_components
, 0);
542 AluInstruction
*ir
= nullptr;
543 for (unsigned i
= 0; i
< 4 ; ++i
) {
544 ir
= new AluInstruction(op3_cnde
, dst
[i
], m_front_face_reg
, bgcol
[i
], dst
[i
], {alu_write
});
545 emit_instruction(ir
);
548 ir
->set_flag(alu_last_instr
);
554 bool FragmentShaderFromNir::load_interpolated(GPRVector
&dest
,
555 ShaderInput
& io
, const Interpolator
&ip
,
556 int num_components
, int start_comp
)
558 // replace io with ShaderInputVarying
559 if (io
.interpolate() > 0) {
561 sfn_log
<< SfnLog::io
<< "Using Interpolator " << io
.ij_index() << "\n";
563 if (num_components
== 1) {
564 switch (start_comp
) {
565 case 0: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_x
);
566 case 1: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
567 case 2: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
568 case 3: return load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_zw
, 2, 3);
574 if (num_components
== 2) {
575 switch (start_comp
) {
576 case 0: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3);
577 case 2: return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, 0xc);
578 case 1: return load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
) &&
579 load_interpolated_two_comp_for_one(dest
, io
, ip
, op2_interp_xy
, 0, 1);
585 if (num_components
== 3 && start_comp
== 0)
586 return load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, 0x3) &&
587 load_interpolated_one_comp(dest
, io
, ip
, op2_interp_z
);
589 int full_write_mask
= ((1 << num_components
) - 1) << start_comp
;
591 bool success
= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_zw
, full_write_mask
& 0xc);
592 success
&= load_interpolated_two_comp(dest
, io
, ip
, op2_interp_xy
, full_write_mask
& 0x3);
596 AluInstruction
*ir
= nullptr;
597 for (unsigned i
= 0; i
< 4 ; ++i
) {
598 ir
= new AluInstruction(op1_interp_load_p0
, dest
[i
],
599 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), i
)),
600 EmitInstruction::write
);
601 emit_instruction(ir
);
603 ir
->set_flag(alu_last_instr
);
608 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector
&dest
,
609 ShaderInput
& io
, const Interpolator
& ip
, EAluOp op
)
611 for (unsigned i
= 0; i
< 2 ; ++i
) {
613 if (op
== op2_interp_z
)
617 auto ir
= new AluInstruction(op
, dest
[chan
], i
& 1 ? ip
.j
: ip
.i
,
618 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
619 i
== 0 ? EmitInstruction::write
: EmitInstruction::last
);
620 dest
.pin_to_channel(chan
);
622 ir
->set_bank_swizzle(alu_vec_210
);
623 emit_instruction(ir
);
628 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector
&dest
, ShaderInput
& io
,
629 const Interpolator
& ip
, EAluOp op
, int writemask
)
631 AluInstruction
*ir
= nullptr;
632 for (unsigned i
= 0; i
< 4 ; ++i
) {
633 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
634 (writemask
& (1 << i
)) ? EmitInstruction::write
: EmitInstruction::empty
);
635 dest
.pin_to_channel(i
);
636 ir
->set_bank_swizzle(alu_vec_210
);
637 emit_instruction(ir
);
639 ir
->set_flag(alu_last_instr
);
643 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector
&dest
,
644 ShaderInput
& io
, const Interpolator
& ip
,
645 EAluOp op
, UNUSED
int start
, int comp
)
647 AluInstruction
*ir
= nullptr;
648 for (int i
= 0; i
< 4 ; ++i
) {
649 ir
= new AluInstruction(op
, dest
[i
], i
& 1 ? ip
.j
: ip
.i
,
650 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE
+ io
.lds_pos(), 0)),
651 i
== comp
? EmitInstruction::write
: EmitInstruction::empty
);
652 ir
->set_bank_swizzle(alu_vec_210
);
653 dest
.pin_to_channel(i
);
654 emit_instruction(ir
);
656 ir
->set_flag(alu_last_instr
);
661 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
, bool all_chanels
)
663 int outputs
= all_chanels
? m_max_color_exports
: 1;
665 std::array
<uint32_t,4> swizzle
;
666 unsigned writemask
= nir_intrinsic_write_mask(instr
);
667 switch (out_var
->data
.location
) {
668 case FRAG_RESULT_STENCIL
:
672 case FRAG_RESULT_SAMPLE_MASK
:
677 std::cerr
<< "Swizzle = ";
678 for (int i
= 0; i
< 4; ++i
) {
679 swizzle
[i
] = (i
< instr
->num_components
) ? i
: 7;
680 std::cerr
<< swizzle
[i
] << ", ";
684 auto value
= vec_from_nir_with_fetch_constant(instr
->src
[1], writemask
, swizzle
);
686 set_output(out_var
->data
.driver_location
, value
.sel());
688 if (out_var
->data
.location
== FRAG_RESULT_COLOR
||
689 (out_var
->data
.location
>= FRAG_RESULT_DATA0
&&
690 out_var
->data
.location
<= FRAG_RESULT_DATA7
)) {
691 for (int k
= 0 ; k
< outputs
; ++k
) {
693 unsigned location
= out_var
->data
.driver_location
+ k
- m_depth_exports
;
694 if (location
>= m_max_color_exports
) {
695 sfn_log
<< SfnLog::io
<< "Pixel output " << location
696 << " skipped because we have only " << m_max_color_exports
<< "CBs\n";
700 m_last_pixel_export
= new ExportInstruction(location
, value
, ExportInstruction::et_pixel
);
702 if (sh_info().ps_export_highest
< location
)
703 sh_info().ps_export_highest
= location
;
705 sh_info().nr_ps_color_exports
++;
707 unsigned mask
= (0xfu
<< (location
* 4));
708 sh_info().ps_color_export_mask
|= mask
;
710 emit_export_instruction(m_last_pixel_export
);
711 ++m_max_counted_color_exports
;
713 } else if (out_var
->data
.location
== FRAG_RESULT_DEPTH
||
714 out_var
->data
.location
== FRAG_RESULT_STENCIL
||
715 out_var
->data
.location
== FRAG_RESULT_SAMPLE_MASK
) {
717 emit_export_instruction(new ExportInstruction(61, value
, ExportInstruction::et_pixel
));
724 void FragmentShaderFromNir::do_finalize()
726 // update shader io info and set LDS etc.
727 sh_info().ninput
= m_shaderio
.inputs().size();
729 sfn_log
<< SfnLog::io
<< "Have " << sh_info().ninput
<< " inputs\n";
730 for (size_t i
= 0; i
< sh_info().ninput
; ++i
) {
731 int ij_idx
= (m_shaderio
.input(i
).ij_index() < 6 &&
732 m_shaderio
.input(i
).ij_index() >= 0) ? m_shaderio
.input(i
).ij_index() : 0;
733 m_shaderio
.input(i
).set_ioinfo(sh_info().input
[i
], m_interpolator
[ij_idx
].ij_index
);
736 sh_info().two_side
= m_shaderio
.two_sided();
737 sh_info().nlds
= m_shaderio
.nlds();
739 sh_info().nr_ps_max_color_exports
= m_max_counted_color_exports
;
741 if (sh_info().fs_write_all
) {
742 sh_info().nr_ps_max_color_exports
= m_max_color_exports
;
745 if (!m_last_pixel_export
) {
746 GPRVector
v(0, {7,7,7,7});
747 m_last_pixel_export
= new ExportInstruction(0, v
, ExportInstruction::et_pixel
);
748 sh_info().nr_ps_color_exports
++;
749 sh_info().ps_color_export_mask
= 0xf;
750 emit_export_instruction(m_last_pixel_export
);
753 m_last_pixel_export
->set_last();
755 if (sh_info().fs_write_all
)
756 sh_info().nr_ps_max_color_exports
= 8;