nv50/ir: fix constant folding for OP_MUL subop HIGH
[mesa.git] / src / gallium / drivers / nouveau / nv30 / nv30_vertprog.c
1 /*
2 * Copyright 2012 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs
23 *
24 */
25
26 #include "util/u_dynarray.h"
27 #include "tgsi/tgsi_parse.h"
28
29 #include "nv_object.xml.h"
30 #include "nv30/nv30-40_3d.xml.h"
31 #include "nv30/nv30_context.h"
32 #include "nv30/nvfx_shader.h"
33 #include "nv30/nv30_state.h"
34
35 static void
36 nv30_vertprog_destroy(struct nv30_vertprog *vp)
37 {
38 util_dynarray_fini(&vp->branch_relocs);
39 nouveau_heap_free(&vp->exec);
40 FREE(vp->insns);
41 vp->insns = NULL;
42 vp->nr_insns = 0;
43
44 util_dynarray_fini(&vp->const_relocs);
45 nouveau_heap_free(&vp->data);
46 FREE(vp->consts);
47 vp->consts = NULL;
48 vp->nr_consts = 0;
49
50 vp->translated = FALSE;
51 }
52
53 void
54 nv30_vertprog_validate(struct nv30_context *nv30)
55 {
56 struct nouveau_pushbuf *push = nv30->base.pushbuf;
57 struct nouveau_object *eng3d = nv30->screen->eng3d;
58 struct nv30_vertprog *vp = nv30->vertprog.program;
59 struct nv30_fragprog *fp = nv30->fragprog.program;
60 boolean upload_code = FALSE;
61 boolean upload_data = FALSE;
62 unsigned i;
63
64 if (nv30->dirty & NV30_NEW_FRAGPROG) {
65 if (memcmp(vp->texcoord, fp->texcoord, sizeof(vp->texcoord))) {
66 if (vp->translated)
67 nv30_vertprog_destroy(vp);
68 memcpy(vp->texcoord, fp->texcoord, sizeof(vp->texcoord));
69 }
70 }
71
72 if (nv30->rast && nv30->rast->pipe.clip_plane_enable != vp->enabled_ucps) {
73 vp->enabled_ucps = nv30->rast->pipe.clip_plane_enable;
74 if (vp->translated)
75 nv30_vertprog_destroy(vp);
76 }
77
78 if (!vp->translated) {
79 vp->translated = _nvfx_vertprog_translate(eng3d->oclass, vp);
80 if (!vp->translated) {
81 nv30->draw_flags |= NV30_NEW_VERTPROG;
82 return;
83 }
84 nv30->dirty |= NV30_NEW_VERTPROG;
85 }
86
87 if (!vp->exec) {
88 struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
89 struct nv30_shader_reloc *reloc = vp->branch_relocs.data;
90 unsigned nr_reloc = vp->branch_relocs.size / sizeof(*reloc);
91 uint32_t *inst, target;
92
93 if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) {
94 while (heap->next && heap->size < vp->nr_insns) {
95 struct nouveau_heap **evict = heap->next->priv;
96 nouveau_heap_free(evict);
97 }
98
99 if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) {
100 nv30->draw_flags |= NV30_NEW_VERTPROG;
101 return;
102 }
103 }
104
105 if (eng3d->oclass < NV40_3D_CLASS) {
106 while (nr_reloc--) {
107 inst = vp->insns[reloc->location].data;
108 target = vp->exec->start + reloc->target;
109
110 inst[2] &= ~0x000007fc;
111 inst[2] |= target << 2;
112 reloc++;
113 }
114 } else {
115 while (nr_reloc--) {
116 inst = vp->insns[reloc->location].data;
117 target = vp->exec->start + reloc->target;
118
119 inst[2] &= ~0x0000003f;
120 inst[2] |= target >> 3;
121 inst[3] &= ~0xe0000000;
122 inst[3] |= target << 29;
123 reloc++;
124 }
125 }
126
127 upload_code = TRUE;
128 }
129
130 if (vp->nr_consts && !vp->data) {
131 struct nouveau_heap *heap = nv30->screen->vp_data_heap;
132 struct nv30_shader_reloc *reloc = vp->const_relocs.data;
133 unsigned nr_reloc = vp->const_relocs.size / sizeof(*reloc);
134 uint32_t *inst, target;
135
136 if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) {
137 while (heap->next && heap->size < vp->nr_consts) {
138 struct nv30_vertprog *evp = heap->next->priv;
139 nouveau_heap_free(&evp->data);
140 }
141
142 if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) {
143 nv30->draw_flags |= NV30_NEW_VERTPROG;
144 return;
145 }
146 }
147
148 if (eng3d->oclass < NV40_3D_CLASS) {
149 while (nr_reloc--) {
150 inst = vp->insns[reloc->location].data;
151 target = vp->data->start + reloc->target;
152
153 inst[1] &= ~0x0007fc000;
154 inst[1] |= (target & 0x1ff) << 14;
155 reloc++;
156 }
157 } else {
158 while (nr_reloc--) {
159 inst = vp->insns[reloc->location].data;
160 target = vp->data->start + reloc->target;
161
162 inst[1] &= ~0x0001ff000;
163 inst[1] |= (target & 0x1ff) << 12;
164 reloc++;
165 }
166 }
167
168 upload_code = TRUE;
169 upload_data = TRUE;
170 }
171
172 if (vp->nr_consts) {
173 struct nv04_resource *res = nv04_resource(nv30->vertprog.constbuf);
174
175 for (i = 0; i < vp->nr_consts; i++) {
176 struct nv30_vertprog_data *data = &vp->consts[i];
177
178 if (data->index < 0) {
179 if (!upload_data)
180 continue;
181 } else {
182 float *constbuf = (float *)res->data;
183 if (!upload_data &&
184 !memcmp(data->value, &constbuf[data->index * 4], 16))
185 continue;
186 memcpy(data->value, &constbuf[data->index * 4], 16);
187 }
188
189 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
190 PUSH_DATA (push, vp->data->start + i);
191 PUSH_DATAp(push, data->value, 4);
192 }
193 }
194
195 if (upload_code) {
196 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
197 PUSH_DATA (push, vp->exec->start);
198 for (i = 0; i < vp->nr_insns; i++) {
199 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
200 PUSH_DATAp(push, vp->insns[i].data, 4);
201 }
202 }
203
204 if (nv30->dirty & (NV30_NEW_VERTPROG | NV30_NEW_FRAGPROG)) {
205 BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
206 PUSH_DATA (push, vp->exec->start);
207 if (eng3d->oclass < NV40_3D_CLASS) {
208 BEGIN_NV04(push, NV30_3D(ENGINE), 1);
209 PUSH_DATA (push, 0x00000013); /* vp instead of ff, somehow */
210 } else {
211 BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
212 PUSH_DATA (push, vp->ir);
213 PUSH_DATA (push, vp->or | fp->vp_or);
214 BEGIN_NV04(push, NV30_3D(ENGINE), 1);
215 PUSH_DATA (push, 0x00000011);
216 }
217 }
218 }
219
220 static void *
221 nv30_vp_state_create(struct pipe_context *pipe,
222 const struct pipe_shader_state *cso)
223 {
224 struct nv30_vertprog *vp = CALLOC_STRUCT(nv30_vertprog);
225 if (!vp)
226 return NULL;
227
228 vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
229 tgsi_scan_shader(vp->pipe.tokens, &vp->info);
230 return vp;
231 }
232
233 static void
234 nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)
235 {
236 struct nv30_vertprog *vp = hwcso;
237
238 if (vp->translated)
239 nv30_vertprog_destroy(vp);
240 FREE((void *)vp->pipe.tokens);
241 FREE(vp);
242 }
243
244 static void
245 nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso)
246 {
247 struct nv30_context *nv30 = nv30_context(pipe);
248
249 nv30->vertprog.program = hwcso;
250 nv30->dirty |= NV30_NEW_VERTPROG;
251 }
252
253 void
254 nv30_vertprog_init(struct pipe_context *pipe)
255 {
256 pipe->create_vs_state = nv30_vp_state_create;
257 pipe->bind_vs_state = nv30_vp_state_bind;
258 pipe->delete_vs_state = nv30_vp_state_delete;
259 }