2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50_program.h"
24 #include "nv50_context.h"
26 #include "codegen/nv50_ir_driver.h"
28 static INLINE
unsigned
29 bitcount4(const uint32_t val
)
31 static const uint8_t cnt
[16]
32 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
33 return cnt
[val
& 0xf];
37 nv50_vertprog_assign_slots(struct nv50_ir_prog_info
*info
)
39 struct nv50_program
*prog
= (struct nv50_program
*)info
->driverPriv
;
43 for (i
= 0; i
< info
->numInputs
; ++i
) {
45 prog
->in
[i
].sn
= info
->in
[i
].sn
;
46 prog
->in
[i
].si
= info
->in
[i
].si
;
48 prog
->in
[i
].mask
= info
->in
[i
].mask
;
50 prog
->vp
.attrs
[(4 * i
) / 32] |= info
->in
[i
].mask
<< ((4 * i
) % 32);
52 for (c
= 0; c
< 4; ++c
)
53 if (info
->in
[i
].mask
& (1 << c
))
54 info
->in
[i
].slot
[c
] = n
++;
56 prog
->in_nr
= info
->numInputs
;
58 for (i
= 0; i
< info
->numSysVals
; ++i
) {
59 switch (info
->sv
[i
].sn
) {
60 case TGSI_SEMANTIC_INSTANCEID
:
61 prog
->vp
.attrs
[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID
;
63 case TGSI_SEMANTIC_VERTEXID
:
64 prog
->vp
.attrs
[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID
;
65 prog
->vp
.attrs
[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12
;
71 /* VertexID before InstanceID */
72 if (info
->io
.vertexId
< info
->numSysVals
)
73 info
->sv
[info
->io
.vertexId
].slot
[0] = n
++;
74 if (info
->io
.instanceId
< info
->numSysVals
)
75 info
->sv
[info
->io
.instanceId
].slot
[0] = n
++;
78 for (i
= 0; i
< info
->numOutputs
; ++i
) {
79 switch (info
->out
[i
].sn
) {
80 case TGSI_SEMANTIC_PSIZE
:
83 case TGSI_SEMANTIC_CLIPDIST
:
84 prog
->vp
.clpd
[info
->out
[i
].si
] = n
;
86 case TGSI_SEMANTIC_EDGEFLAG
:
87 prog
->vp
.edgeflag
= i
;
89 case TGSI_SEMANTIC_BCOLOR
:
90 prog
->vp
.bfc
[info
->out
[i
].si
] = i
;
96 prog
->out
[i
].sn
= info
->out
[i
].sn
;
97 prog
->out
[i
].si
= info
->out
[i
].si
;
99 prog
->out
[i
].mask
= info
->out
[i
].mask
;
101 for (c
= 0; c
< 4; ++c
)
102 if (info
->out
[i
].mask
& (1 << c
))
103 info
->out
[i
].slot
[c
] = n
++;
105 prog
->out_nr
= info
->numOutputs
;
108 if (prog
->vp
.psiz
< info
->numOutputs
)
109 prog
->vp
.psiz
= prog
->out
[prog
->vp
.psiz
].hw
;
115 nv50_fragprog_assign_slots(struct nv50_ir_prog_info
*info
)
117 struct nv50_program
*prog
= (struct nv50_program
*)info
->driverPriv
;
123 /* count recorded non-flat inputs */
124 for (m
= 0, i
= 0; i
< info
->numInputs
; ++i
) {
125 switch (info
->in
[i
].sn
) {
126 case TGSI_SEMANTIC_POSITION
:
127 case TGSI_SEMANTIC_FACE
:
130 m
+= info
->in
[i
].flat
? 0 : 1;
134 /* careful: id may be != i in info->in[prog->in[i].id] */
136 /* Fill prog->in[] so that non-flat inputs are first and
137 * kick out special inputs that don't use the RESULT_MAP.
139 for (n
= 0, i
= 0; i
< info
->numInputs
; ++i
) {
140 if (info
->in
[i
].sn
== TGSI_SEMANTIC_POSITION
) {
141 prog
->fp
.interp
|= info
->in
[i
].mask
<< 24;
142 for (c
= 0; c
< 4; ++c
)
143 if (info
->in
[i
].mask
& (1 << c
))
144 info
->in
[i
].slot
[c
] = nintp
++;
146 if (info
->in
[i
].sn
== TGSI_SEMANTIC_FACE
) {
147 info
->in
[i
].slot
[0] = 255;
149 unsigned j
= info
->in
[i
].flat
? m
++ : n
++;
151 if (info
->in
[i
].sn
== TGSI_SEMANTIC_COLOR
)
152 prog
->vp
.bfc
[info
->in
[i
].si
] = j
;
155 prog
->in
[j
].mask
= info
->in
[i
].mask
;
156 prog
->in
[j
].sn
= info
->in
[i
].sn
;
157 prog
->in
[j
].si
= info
->in
[i
].si
;
158 prog
->in
[j
].linear
= info
->in
[i
].linear
;
163 if (!(prog
->fp
.interp
& (8 << 24))) {
165 prog
->fp
.interp
|= 8 << 24;
168 for (i
= 0; i
< prog
->in_nr
; ++i
) {
169 int j
= prog
->in
[i
].id
;
171 prog
->in
[i
].hw
= nintp
;
172 for (c
= 0; c
< 4; ++c
)
173 if (info
->in
[i
].mask
& (1 << c
))
174 info
->in
[j
].slot
[c
] = nintp
++;
176 /* (n == m) if m never increased, i.e. no flat inputs */
177 nflat
= (n
< m
) ? (nintp
- prog
->in
[n
].hw
) : 0;
178 nintp
-= bitcount4(prog
->fp
.interp
>> 24); /* subtract position inputs */
179 nvary
= nintp
- nflat
;
181 prog
->fp
.interp
|= nvary
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT
;
182 prog
->fp
.interp
|= nintp
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT
;
184 /* put front/back colors right after HPOS */
185 prog
->fp
.colors
= 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT
;
186 for (i
= 0; i
< 2; ++i
)
187 if (prog
->vp
.bfc
[i
] < 0x80)
188 prog
->fp
.colors
+= bitcount4(prog
->in
[prog
->vp
.bfc
[i
]].mask
) << 16;
192 if (info
->prop
.fp
.numColourResults
> 1)
193 prog
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS
;
195 for (i
= 0; i
< info
->numOutputs
; ++i
) {
197 prog
->out
[i
].sn
= info
->out
[i
].sn
;
198 prog
->out
[i
].si
= info
->out
[i
].si
;
199 prog
->out
[i
].mask
= info
->out
[i
].mask
;
201 if (i
== info
->io
.fragDepth
|| i
== info
->io
.sampleMask
)
203 prog
->out
[i
].hw
= info
->out
[i
].si
* 4;
205 for (c
= 0; c
< 4; ++c
)
206 info
->out
[i
].slot
[c
] = prog
->out
[i
].hw
+ c
;
208 prog
->max_out
= MAX2(prog
->max_out
, prog
->out
[i
].hw
+ 4);
211 if (info
->io
.sampleMask
< PIPE_MAX_SHADER_OUTPUTS
)
212 info
->out
[info
->io
.sampleMask
].slot
[0] = prog
->max_out
++;
214 if (info
->io
.fragDepth
< PIPE_MAX_SHADER_OUTPUTS
)
215 info
->out
[info
->io
.fragDepth
].slot
[2] = prog
->max_out
++;
224 nv50_program_assign_varying_slots(struct nv50_ir_prog_info
*info
)
226 switch (info
->type
) {
227 case PIPE_SHADER_VERTEX
:
228 return nv50_vertprog_assign_slots(info
);
229 case PIPE_SHADER_GEOMETRY
:
230 return nv50_vertprog_assign_slots(info
);
231 case PIPE_SHADER_FRAGMENT
:
232 return nv50_fragprog_assign_slots(info
);
239 nv50_program_translate(struct nv50_program
*prog
, uint16_t chipset
)
241 struct nv50_ir_prog_info
*info
;
244 info
= CALLOC_STRUCT(nv50_ir_prog_info
);
248 info
->type
= prog
->type
;
249 info
->target
= chipset
;
250 info
->bin
.sourceRep
= NV50_PROGRAM_IR_TGSI
;
251 info
->bin
.source
= (void *)prog
->pipe
.tokens
;
253 info
->io
.genUserClip
= prog
->vp
.clpd_nr
;
255 info
->assignSlots
= nv50_program_assign_varying_slots
;
257 prog
->vp
.bfc
[0] = 0x80;
258 prog
->vp
.bfc
[1] = 0x80;
259 prog
->vp
.clpd
[0] = 0x80;
260 prog
->vp
.clpd
[1] = 0x80;
261 prog
->vp
.psiz
= 0x80;
262 prog
->vp
.edgeflag
= 0x80;
263 prog
->gp
.primid
= 0x80;
265 info
->driverPriv
= prog
;
268 info
->optLevel
= debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
269 info
->dbgFlags
= debug_get_num_option("NV50_PROG_DEBUG", 0);
274 ret
= nv50_ir_generate_code(info
);
276 NOUVEAU_ERR("shader translation failed: %i\n", ret
);
279 prog
->code
= info
->bin
.code
;
280 prog
->code_size
= info
->bin
.codeSize
;
281 prog
->fixups
= info
->bin
.relocData
;
282 prog
->max_gpr
= MAX2(4, (info
->bin
.maxGPR
>> 1) + 1);
284 if (prog
->type
== PIPE_SHADER_FRAGMENT
) {
285 if (info
->prop
.fp
.writesDepth
) {
286 prog
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z
;
287 prog
->fp
.flags
[1] = 0x11;
289 if (info
->prop
.fp
.usesDiscard
)
290 prog
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_USES_KIL
;
299 nv50_program_upload_code(struct nv50_context
*nv50
, struct nv50_program
*prog
)
301 struct nouveau_heap
*heap
;
303 uint32_t size
= align(prog
->code_size
, 0x40);
305 switch (prog
->type
) {
306 case PIPE_SHADER_VERTEX
: heap
= nv50
->screen
->vp_code_heap
; break;
307 case PIPE_SHADER_GEOMETRY
: heap
= nv50
->screen
->fp_code_heap
; break;
308 case PIPE_SHADER_FRAGMENT
: heap
= nv50
->screen
->gp_code_heap
; break;
310 assert(!"invalid program type");
314 ret
= nouveau_heap_alloc(heap
, size
, prog
, &prog
->mem
);
316 /* Out of space: evict everything to compactify the code segment, hoping
317 * the working set is much smaller and drifts slowly. Improve me !
320 struct nv50_program
*evict
= heap
->next
->priv
;
322 nouveau_heap_free(&evict
->mem
);
324 debug_printf("WARNING: out of code space, evicting all shaders.\n");
326 prog
->code_base
= prog
->mem
->start
;
329 nv50_ir_relocate_code(prog
->fixups
, prog
->code
, prog
->code_base
, 0, 0);
331 nv50_sifc_linear_u8(&nv50
->base
, nv50
->screen
->code
,
332 (prog
->type
<< NV50_CODE_BO_SIZE_LOG2
) + prog
->code_base
,
333 NOUVEAU_BO_VRAM
, prog
->code_size
, prog
->code
);
335 BEGIN_NV04(nv50
->base
.pushbuf
, NV50_3D(CODE_CB_FLUSH
), 1);
336 PUSH_DATA (nv50
->base
.pushbuf
, 0);
342 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
344 const struct pipe_shader_state pipe
= p
->pipe
;
345 const ubyte type
= p
->type
;
348 nouveau_heap_free(&p
->mem
);
356 memset(p
, 0, sizeof(*p
));