r600/sfn: rework getting a vector and uniforms from the value pool
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_fragment.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key,
38 enum chip_class chip_class):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class),
40 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key.ps.color_two_side),
43 m_last_pixel_export(nullptr),
44 m_nir(nir),
45 m_reserved_registers(0),
46 m_frag_pos_index(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
49 m_depth_exports(0),
50 m_enable_centroid_interpolators(false)
51 {
52 for (auto& i: m_interpolator) {
53 i.enabled = false;
54 i.ij_index= 0;
55 }
56
57 sh_info().rat_base = key.ps.nr_cbufs;
58 sh_info().atomic_base = key.ps.first_atomic_counter;
59 }
60
61 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
62 {
63 sfn_log << SfnLog::io << "Parse input variable "
64 << input->name << " location:" << input->data.location
65 << " driver-loc:" << input->data.driver_location
66 << " interpolation:" << input->data.interpolation
67 << "\n";
68
69 if (input->data.location == VARYING_SLOT_FACE) {
70 m_sv_values.set(es_face);
71 return true;
72 }
73
74 unsigned name, sid;
75 auto semantic = r600_get_varying_semantic(input->data.location);
76 name = semantic.first;
77 sid = semantic.second;
78
79 tgsi_semantic sname = static_cast<tgsi_semantic>(name);
80
81 switch (sname) {
82 case TGSI_SEMANTIC_POSITION: {
83 m_sv_values.set(es_pos);
84 return true;
85 }
86 case TGSI_SEMANTIC_COLOR: {
87 m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
88 m_need_back_color = m_two_sided_color;
89 return true;
90 }
91 case TGSI_SEMANTIC_PRIMID:
92 sh_info().gs_prim_id_input = true;
93 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
94 /* fallthrough */
95 case TGSI_SEMANTIC_FOG:
96 case TGSI_SEMANTIC_GENERIC:
97 case TGSI_SEMANTIC_TEXCOORD:
98 case TGSI_SEMANTIC_LAYER:
99 case TGSI_SEMANTIC_PCOORD:
100 case TGSI_SEMANTIC_VIEWPORT_INDEX:
101 case TGSI_SEMANTIC_CLIPDIST: {
102 if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
103 m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
104 return true;
105 }
106 default:
107 return false;
108 }
109 }
110
111 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
112 {
113 switch (instr->type) {
114 case nir_instr_type_intrinsic: {
115 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
116 switch (ii->intrinsic) {
117 case nir_intrinsic_load_front_face:
118 m_sv_values.set(es_face);
119 break;
120 case nir_intrinsic_load_sample_mask_in:
121 m_sv_values.set(es_sample_mask_in);
122 break;
123 case nir_intrinsic_load_sample_id:
124 m_sv_values.set(es_sample_id);
125 break;
126 case nir_intrinsic_interp_deref_at_centroid:
127 /* This is not a sysvalue, should go elsewhere */
128 m_enable_centroid_interpolators = true;
129 break;
130 default:
131 ;
132 }
133 }
134 default:
135 ;
136 }
137 return true;
138 }
139
140 bool FragmentShaderFromNir::allocate_reserved_registers()
141 {
142 assert(!m_reserved_registers);
143
144 int face_reg_index = -1;
145 // enabled interpolators based on inputs
146 for (auto& i: m_shaderio.inputs()) {
147 int ij = i->ij_index();
148 if (ij >= 0) {
149 m_interpolator[ij].enabled = true;
150 }
151 }
152
153 /* Lazy, enable both possible interpolators,
154 * TODO: check which ones are really needed */
155 if (m_enable_centroid_interpolators) {
156 m_interpolator[2].enabled = true; /* perspective */
157 m_interpolator[5].enabled = true; /* linear */
158 }
159
160 // sort the varying inputs
161 m_shaderio.sort_varying_inputs();
162
163 // handle interpolators
164 int num_baryc = 0;
165 for (int i = 0; i < 6; ++i) {
166 if (m_interpolator[i].enabled) {
167 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
168
169 m_interpolator[i].ij_index = num_baryc;
170
171 unsigned sel = num_baryc / 2;
172 unsigned chan = 2 * (num_baryc % 2);
173
174 auto ip_i = new GPRValue(sel, chan + 1);
175 ip_i->set_as_input();
176 m_interpolator[i].i.reset(ip_i);
177 inject_register(sel, chan + 1, m_interpolator[i].i, false);
178
179 auto ip_j = new GPRValue(sel, chan);
180 ip_j->set_as_input();
181 m_interpolator[i].j.reset(ip_j);
182 inject_register(sel, chan, m_interpolator[i].j, false);
183
184 ++num_baryc;
185 }
186 }
187 m_reserved_registers += (num_baryc + 1) >> 1;
188
189 if (m_sv_values.test(es_pos)) {
190 m_frag_pos_index = m_reserved_registers++;
191 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
192 }
193
194 // handle system values
195 if (m_sv_values.test(es_face) || m_need_back_color) {
196 face_reg_index = m_reserved_registers++;
197 auto ffr = new GPRValue(face_reg_index,0);
198 ffr->set_as_input();
199 m_front_face_reg.reset(ffr);
200 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
201 inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
202
203 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
204 load_front_face();
205 }
206
207 if (m_sv_values.test(es_sample_mask_in)) {
208 if (face_reg_index < 0)
209 face_reg_index = m_reserved_registers++;
210
211 auto smi = new GPRValue(face_reg_index,2);
212 smi->set_as_input();
213 m_sample_mask_reg.reset(smi);
214 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
215 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
216 sh_info().nsys_inputs = 1;
217 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
218 }
219
220 if (m_sv_values.test(es_sample_id)) {
221 if (face_reg_index < 0)
222 face_reg_index = m_reserved_registers++;
223
224 auto smi = new GPRValue(face_reg_index, 3);
225 smi->set_as_input();
226 m_sample_id_reg.reset(smi);
227 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
228 sh_info().nsys_inputs++;
229 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, face_reg_index));
230 }
231
232 // The back color handling is not emmited in the code, so we have
233 // to add the inputs here and later we also need to inject the code to set
234 // the right color
235 if (m_need_back_color) {
236 size_t ninputs = m_shaderio.inputs().size();
237 for (size_t k = 0; k < ninputs; ++k) {
238 ShaderInput& i = m_shaderio.input(k);
239
240 if (i.name() != TGSI_SEMANTIC_COLOR)
241 continue;
242
243 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
244
245 size_t next_pos = m_shaderio.size();
246 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
247 m_shaderio.add_input(bcol);
248 col.set_back_color(next_pos);
249 }
250 m_shaderio.set_two_sided();
251 }
252
253 m_shaderio.update_lds_pos();
254
255 set_reserved_registers(m_reserved_registers);
256
257 return true;
258 }
259
260 void FragmentShaderFromNir::emit_shader_start()
261 {
262 if (m_sv_values.test(es_face))
263 load_front_face();
264
265 if (m_sv_values.test(es_pos)) {
266 for (int i = 0; i < 4; ++i) {
267 auto v = new GPRValue(m_frag_pos_index, i);
268 v->set_as_input();
269 auto reg = PValue(v);
270 if (i == 3)
271 emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
272 m_frag_pos[i] = reg;
273 }
274 }
275 }
276
277 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
278 {
279 if (out_var->data.location == FRAG_RESULT_COLOR)
280 return emit_export_pixel(out_var, instr, true);
281
282 if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
283 out_var->data.location <= FRAG_RESULT_DATA7) ||
284 out_var->data.location == FRAG_RESULT_DEPTH ||
285 out_var->data.location == FRAG_RESULT_STENCIL)
286 return emit_export_pixel(out_var, instr, false);
287
288 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
289 out_var->data.location << "(" << out_var->data.driver_location << ")\n";
290 return false;
291 }
292
293 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
294 {
295 sfn_log << SfnLog::instr << "Parse output variable "
296 << output->name << " @" << output->data.location
297 << "@dl:" << output->data.driver_location << "\n";
298
299 ++sh_info().noutput;
300 r600_shader_io& io = sh_info().output[output->data.driver_location];
301 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
302 &io.name, &io.sid);
303
304 /* Check whether this code has become obsolete by the IO vectorization */
305 unsigned num_components = 4;
306 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
307 if (vector_elements)
308 num_components = vector_elements;
309 unsigned component = output->data.location_frac;
310
311 for (unsigned j = component; j < num_components + component; j++)
312 io.write_mask |= 1 << j;
313
314 int loc = output->data.location;
315 if (loc == FRAG_RESULT_COLOR &&
316 (m_nir.info.outputs_written & (1ull << loc))) {
317 sh_info().fs_write_all = true;
318 }
319
320 if (output->data.location == FRAG_RESULT_COLOR ||
321 (output->data.location >= FRAG_RESULT_DATA0 &&
322 output->data.location <= FRAG_RESULT_DATA7)) {
323 return true;
324 }
325 if (output->data.location == FRAG_RESULT_DEPTH ||
326 output->data.location == FRAG_RESULT_STENCIL) {
327 io.write_mask = 15;
328 return true;
329 }
330
331 return false;
332 }
333
334 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
335 {
336 switch (instr->intrinsic) {
337 case nir_intrinsic_load_sample_mask_in:
338 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
339 case nir_intrinsic_load_sample_id:
340 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
341 case nir_intrinsic_load_front_face:
342 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
343 case nir_intrinsic_interp_deref_at_sample:
344 return emit_interp_deref_at_sample(instr);
345 case nir_intrinsic_interp_deref_at_offset:
346 return emit_interp_deref_at_offset(instr);
347 case nir_intrinsic_interp_deref_at_centroid:
348 return emit_interp_deref_at_centroid(instr);
349 default:
350 return false;
351 }
352 }
353
354 void FragmentShaderFromNir::load_front_face()
355 {
356 assert(m_front_face_reg);
357 if (m_front_face_loaded)
358 return;
359
360 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
361 Value::zero, {alu_write, alu_last_instr});
362 m_front_face_loaded = true;
363 emit_instruction(ir);
364 }
365
366 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
367 {
368 GPRVector slope = get_temp_vec4();
369
370 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
371 from_nir_with_fetch_constant(instr->src[1], 0),
372 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
373 fetch->set_flag(vtx_srf_mode);
374 emit_instruction(fetch);
375
376 GPRVector grad = get_temp_vec4();
377 auto var = get_deref_location(instr->src[0]);
378 assert(var);
379
380 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
381 auto interpolator = m_interpolator[io.ij_index()];
382 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
383
384 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
385
386 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
387 tex->set_dest_swizzle({0,1,7,7});
388 emit_instruction(tex);
389
390 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
391 tex->set_dest_swizzle({7,7,0,1});
392 emit_instruction(tex);
393
394 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
395 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
396
397 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
398 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
399
400 Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
401
402 auto dst = vec_from_nir(instr->dest, 4);
403 int num_components = instr->dest.is_ssa ?
404 instr->dest.ssa.num_components:
405 instr->dest.reg.reg->num_components;
406
407 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
408
409 return true;
410 }
411
412 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
413 {
414 int temp = allocate_temp_register();
415
416 GPRVector help(temp, {0,1,2,3});
417
418 auto var = get_deref_location(instr->src[0]);
419 assert(var);
420
421 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
422 auto interpolator = m_interpolator[io.ij_index()];
423 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
424
425 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
426
427 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
428 getgradh->set_dest_swizzle({0,1,7,7});
429 getgradh->set_flag(TexInstruction::x_unnormalized);
430 getgradh->set_flag(TexInstruction::y_unnormalized);
431 getgradh->set_flag(TexInstruction::z_unnormalized);
432 getgradh->set_flag(TexInstruction::w_unnormalized);
433 emit_instruction(getgradh);
434
435 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
436 getgradv->set_dest_swizzle({7,7,0,1});
437 getgradv->set_flag(TexInstruction::x_unnormalized);
438 getgradv->set_flag(TexInstruction::y_unnormalized);
439 getgradv->set_flag(TexInstruction::z_unnormalized);
440 getgradv->set_flag(TexInstruction::w_unnormalized);
441 emit_instruction(getgradv);
442
443 PValue ofs_x = from_nir(instr->src[1], 0);
444 PValue ofs_y = from_nir(instr->src[1], 1);
445 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
446 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
447 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
448 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
449
450 Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
451
452 auto dst = vec_from_nir(instr->dest, 4);
453 load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
454 var->data.location_frac);
455
456 return true;
457 }
458
459 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
460 {
461 auto var = get_deref_location(instr->src[0]);
462 assert(var);
463
464 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
465 io.set_uses_interpolate_at_centroid();
466
467 int ij_index = io.ij_index() >= 3 ? 5 : 2;
468 assert (m_interpolator[ij_index].enabled);
469 auto ip = m_interpolator[ij_index];
470
471 int num_components = nir_dest_num_components(instr->dest);
472
473 auto dst = vec_from_nir(instr->dest, 4);
474 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
475 return true;
476 }
477
478
479 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
480 {
481 if (in_var->data.location == VARYING_SLOT_POS) {
482 assert(instr->dest.is_ssa);
483
484 for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
485 inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
486 }
487 return true;
488 }
489
490 if (in_var->data.location == VARYING_SLOT_FACE)
491 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
492
493 // todo: replace io with ShaderInputVarying
494 auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
495 unsigned num_components = 4;
496
497
498 if (instr->dest.is_ssa) {
499 num_components = instr->dest.ssa.num_components;
500 } else {
501 num_components = instr->dest.reg.reg->num_components;
502 }
503
504 auto dst = vec_from_nir(instr->dest, 4);
505
506 sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
507 << "].gpr=" << dst.sel() << "\n";
508
509 io.set_gpr(dst.sel());
510
511 auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
512
513 load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
514
515 /* These results are expected starting in slot x..*/
516 if (in_var->data.location_frac > 0) {
517 int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
518 instr->dest.reg.reg->num_components;
519 AluInstruction *ir = nullptr;
520 for (int i = 0; i < n; ++i) {
521 ir = new AluInstruction(op1_mov, dst[i],
522 dst[i + in_var->data.location_frac], {alu_write});
523 emit_instruction(ir);
524 }
525 if (ir)
526 ir->set_flag(alu_last_instr);
527 }
528
529
530 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
531
532 auto & color_input = static_cast<ShaderInputColor&> (io);
533 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
534
535 bgio.set_gpr(allocate_temp_register());
536
537 GPRVector bgcol(bgio.gpr(), {0,1,2,3});
538 load_interpolated(bgcol, bgio, ip, num_components, 0);
539
540 load_front_face();
541
542 AluInstruction *ir = nullptr;
543 for (unsigned i = 0; i < 4 ; ++i) {
544 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
545 emit_instruction(ir);
546 }
547 if (ir)
548 ir->set_flag(alu_last_instr);
549 }
550
551 return true;
552 }
553
554 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
555 ShaderInput& io, const Interpolator &ip,
556 int num_components, int start_comp)
557 {
558 // replace io with ShaderInputVarying
559 if (io.interpolate() > 0) {
560
561 sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
562
563 if (num_components == 1) {
564 switch (start_comp) {
565 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
566 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
567 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
568 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
569 default:
570 assert(0);
571 }
572 }
573
574 if (num_components == 2) {
575 switch (start_comp) {
576 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
577 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
578 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
579 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
580 default:
581 assert(0);
582 }
583 }
584
585 if (num_components == 3 && start_comp == 0)
586 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
587 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
588
589 int full_write_mask = ((1 << num_components) - 1) << start_comp;
590
591 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
592 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
593 return success;
594
595 } else {
596 AluInstruction *ir = nullptr;
597 for (unsigned i = 0; i < 4 ; ++i) {
598 ir = new AluInstruction(op1_interp_load_p0, dest[i],
599 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
600 EmitInstruction::write);
601 emit_instruction(ir);
602 }
603 ir->set_flag(alu_last_instr);
604 }
605 return true;
606 }
607
608 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
609 ShaderInput& io, const Interpolator& ip, EAluOp op)
610 {
611 for (unsigned i = 0; i < 2 ; ++i) {
612 int chan = i;
613 if (op == op2_interp_z)
614 chan += 2;
615
616
617 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
618 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
619 i == 0 ? EmitInstruction::write : EmitInstruction::last);
620 dest.pin_to_channel(chan);
621
622 ir->set_bank_swizzle(alu_vec_210);
623 emit_instruction(ir);
624 }
625 return true;
626 }
627
628 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
629 const Interpolator& ip, EAluOp op, int writemask)
630 {
631 AluInstruction *ir = nullptr;
632 for (unsigned i = 0; i < 4 ; ++i) {
633 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
634 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
635 dest.pin_to_channel(i);
636 ir->set_bank_swizzle(alu_vec_210);
637 emit_instruction(ir);
638 }
639 ir->set_flag(alu_last_instr);
640 return true;
641 }
642
643 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
644 ShaderInput& io, const Interpolator& ip,
645 EAluOp op, UNUSED int start, int comp)
646 {
647 AluInstruction *ir = nullptr;
648 for (int i = 0; i < 4 ; ++i) {
649 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
650 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
651 i == comp ? EmitInstruction::write : EmitInstruction::empty);
652 ir->set_bank_swizzle(alu_vec_210);
653 dest.pin_to_channel(i);
654 emit_instruction(ir);
655 }
656 ir->set_flag(alu_last_instr);
657 return true;
658 }
659
660
661 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
662 {
663 int outputs = all_chanels ? m_max_color_exports : 1;
664
665 std::array<uint32_t,4> swizzle;
666 unsigned writemask = nir_intrinsic_write_mask(instr);
667 switch (out_var->data.location) {
668 case FRAG_RESULT_STENCIL:
669 writemask = 2;
670 swizzle = {7,0,7,7};
671 break;
672 case FRAG_RESULT_SAMPLE_MASK:
673 writemask = 4;
674 swizzle = {7,7,0,7};
675 break;
676 default:
677 std::cerr << "Swizzle = ";
678 for (int i = 0; i < 4; ++i) {
679 swizzle[i] = (i < instr->num_components) ? i : 7;
680 std::cerr << swizzle[i] << ", ";
681 }
682 }
683
684 auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
685
686 set_output(out_var->data.driver_location, value.sel());
687
688 if (out_var->data.location == FRAG_RESULT_COLOR ||
689 (out_var->data.location >= FRAG_RESULT_DATA0 &&
690 out_var->data.location <= FRAG_RESULT_DATA7)) {
691 for (int k = 0 ; k < outputs; ++k) {
692
693 unsigned location = out_var->data.driver_location + k - m_depth_exports;
694 if (location >= m_max_color_exports) {
695 sfn_log << SfnLog::io << "Pixel output " << location
696 << " skipped because we have only " << m_max_color_exports << "CBs\n";
697 continue;
698 }
699
700 m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
701
702 if (sh_info().ps_export_highest < location)
703 sh_info().ps_export_highest = location;
704
705 sh_info().nr_ps_color_exports++;
706
707 unsigned mask = (0xfu << (location * 4));
708 sh_info().ps_color_export_mask |= mask;
709
710 emit_export_instruction(m_last_pixel_export);
711 ++m_max_counted_color_exports;
712 };
713 } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
714 out_var->data.location == FRAG_RESULT_STENCIL ||
715 out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
716 m_depth_exports++;
717 emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
718 } else {
719 return false;
720 }
721 return true;
722 }
723
724 void FragmentShaderFromNir::do_finalize()
725 {
726 // update shader io info and set LDS etc.
727 sh_info().ninput = m_shaderio.inputs().size();
728
729 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
730 for (size_t i = 0; i < sh_info().ninput; ++i) {
731 int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
732 m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
733 m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
734 }
735
736 sh_info().two_side = m_shaderio.two_sided();
737 sh_info().nlds = m_shaderio.nlds();
738
739 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
740
741 if (sh_info().fs_write_all) {
742 sh_info().nr_ps_max_color_exports = m_max_color_exports;
743 }
744
745 if (!m_last_pixel_export) {
746 GPRVector v(0, {7,7,7,7});
747 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
748 sh_info().nr_ps_color_exports++;
749 sh_info().ps_color_export_mask = 0xf;
750 emit_export_instruction(m_last_pixel_export);
751 }
752
753 m_last_pixel_export->set_last();
754
755 if (sh_info().fs_write_all)
756 sh_info().nr_ps_max_color_exports = 8;
757 }
758
759 }