nvc0: bind images on fragment and compute shaders for Fermi
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_tex.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29
30 #include "util/u_format.h"
31
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34
35 static inline uint32_t
36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38 switch (swz) {
39 case PIPE_SWIZZLE_X : return fmt->tic.src_x;
40 case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41 case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42 case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43 case PIPE_SWIZZLE_1:
44 return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45 case PIPE_SWIZZLE_0:
46 default:
47 return G80_TIC_SOURCE_ZERO;
48 }
49 }
50
51 struct pipe_sampler_view *
52 nvc0_create_sampler_view(struct pipe_context *pipe,
53 struct pipe_resource *res,
54 const struct pipe_sampler_view *templ)
55 {
56 uint32_t flags = 0;
57
58 if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59 flags |= NV50_TEXVIEW_SCALED_COORDS;
60
61 return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
62 }
63
64 static struct pipe_sampler_view *
65 gm107_create_texture_view(struct pipe_context *pipe,
66 struct pipe_resource *texture,
67 const struct pipe_sampler_view *templ,
68 uint32_t flags,
69 enum pipe_texture_target target)
70 {
71 const struct util_format_description *desc;
72 const struct nvc0_format *fmt;
73 uint64_t address;
74 uint32_t *tic;
75 uint32_t swz[4];
76 uint32_t width, height;
77 uint32_t depth;
78 struct nv50_tic_entry *view;
79 struct nv50_miptree *mt;
80 bool tex_int;
81
82 view = MALLOC_STRUCT(nv50_tic_entry);
83 if (!view)
84 return NULL;
85 mt = nv50_miptree(texture);
86
87 view->pipe = *templ;
88 view->pipe.reference.count = 1;
89 view->pipe.texture = NULL;
90 view->pipe.context = pipe;
91
92 view->id = -1;
93
94 pipe_resource_reference(&view->pipe.texture, texture);
95
96 tic = &view->tic[0];
97
98 desc = util_format_description(view->pipe.format);
99 tex_int = util_format_is_pure_integer(view->pipe.format);
100
101 fmt = &nvc0_format_table[view->pipe.format];
102 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
103 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
104 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
105 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
106
107 tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
108 tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
109 tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
110 tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
111 tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
112 tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
113 tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
114 tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
115 tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
116
117 address = mt->base.address;
118
119 tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
120 tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
121 tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
122
123 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
124 tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
125
126 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
127 tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
128 else
129 tic[5] = 0;
130
131 /* check for linear storage type */
132 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
133 if (texture->target == PIPE_BUFFER) {
134 assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
135 width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element;
136 address +=
137 view->pipe.u.buf.first_element * desc->block.bits / 8;
138 tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
139 tic[3] |= width >> 16;
140 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
141 tic[4] |= width & 0xffff;
142 } else {
143 assert(!(mt->level[0].pitch & 0x1f));
144 /* must be 2D texture without mip maps */
145 tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
146 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
147 tic[3] |= mt->level[0].pitch >> 5;
148 tic[4] |= mt->base.base.width0 - 1;
149 tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
150 tic[5] |= mt->base.base.height0 - 1;
151 }
152 tic[1] = address;
153 tic[2] |= address >> 32;
154 tic[6] = 0;
155 tic[7] = 0;
156 return &view->pipe;
157 }
158
159 tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
160 tic[3] |=
161 ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
162 ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
163
164 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
165
166 if (mt->base.base.array_size > 1) {
167 /* there doesn't seem to be a base layer field in TIC */
168 address += view->pipe.u.tex.first_layer * mt->layer_stride;
169 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
170 }
171 tic[1] = address;
172 tic[2] |= address >> 32;
173
174 switch (target) {
175 case PIPE_TEXTURE_1D:
176 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
177 break;
178 case PIPE_TEXTURE_2D:
179 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
180 break;
181 case PIPE_TEXTURE_RECT:
182 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
183 break;
184 case PIPE_TEXTURE_3D:
185 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
186 break;
187 case PIPE_TEXTURE_CUBE:
188 depth /= 6;
189 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
190 break;
191 case PIPE_TEXTURE_1D_ARRAY:
192 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
193 break;
194 case PIPE_TEXTURE_2D_ARRAY:
195 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
196 break;
197 case PIPE_TEXTURE_CUBE_ARRAY:
198 depth /= 6;
199 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
200 break;
201 default:
202 unreachable("unexpected/invalid texture target");
203 }
204
205 tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
206 GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
207 GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
208 GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
209
210 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
211 width = mt->base.base.width0 << mt->ms_x;
212 height = mt->base.base.height0 << mt->ms_y;
213 } else {
214 width = mt->base.base.width0;
215 height = mt->base.base.height0;
216 }
217
218 tic[4] |= width - 1;
219
220 tic[5] |= (height - 1) & 0xffff;
221 tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
222 tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
223
224 /* sampling points: (?) */
225 if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
226 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
227 tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
228 } else {
229 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
230 tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
231 }
232
233 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
234 tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
235
236 return &view->pipe;
237 }
238
239 static struct pipe_sampler_view *
240 gf100_create_texture_view(struct pipe_context *pipe,
241 struct pipe_resource *texture,
242 const struct pipe_sampler_view *templ,
243 uint32_t flags,
244 enum pipe_texture_target target)
245 {
246 const struct util_format_description *desc;
247 const struct nvc0_format *fmt;
248 uint64_t address;
249 uint32_t *tic;
250 uint32_t swz[4];
251 uint32_t width, height;
252 uint32_t depth;
253 uint32_t tex_fmt;
254 struct nv50_tic_entry *view;
255 struct nv50_miptree *mt;
256 bool tex_int;
257
258 view = MALLOC_STRUCT(nv50_tic_entry);
259 if (!view)
260 return NULL;
261 mt = nv50_miptree(texture);
262
263 view->pipe = *templ;
264 view->pipe.reference.count = 1;
265 view->pipe.texture = NULL;
266 view->pipe.context = pipe;
267
268 view->id = -1;
269
270 pipe_resource_reference(&view->pipe.texture, texture);
271
272 tic = &view->tic[0];
273
274 desc = util_format_description(view->pipe.format);
275
276 fmt = &nvc0_format_table[view->pipe.format];
277
278 tex_int = util_format_is_pure_integer(view->pipe.format);
279 tex_fmt = fmt->tic.format & 0x3f;
280
281 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
282 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
283 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
284 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
285 tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
286 (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
287 (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
288 (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
289 (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
290 (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
291 (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
292 (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
293 (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
294 ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
295
296 address = mt->base.address;
297
298 tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
299
300 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
301 tic[2] |= G80_TIC_2_SRGB_CONVERSION;
302
303 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
304 tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
305
306 /* check for linear storage type */
307 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
308 if (texture->target == PIPE_BUFFER) {
309 assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
310 address +=
311 view->pipe.u.buf.first_element * desc->block.bits / 8;
312 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
313 tic[3] = 0;
314 tic[4] = /* width */
315 view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
316 tic[5] = 0;
317 } else {
318 /* must be 2D texture without mip maps */
319 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
320 tic[3] = mt->level[0].pitch;
321 tic[4] = mt->base.base.width0;
322 tic[5] = (1 << 16) | mt->base.base.height0;
323 }
324 tic[6] =
325 tic[7] = 0;
326 tic[1] = address;
327 tic[2] |= address >> 32;
328 return &view->pipe;
329 }
330
331 tic[2] |=
332 ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
333 ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
334
335 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
336
337 if (mt->base.base.array_size > 1) {
338 /* there doesn't seem to be a base layer field in TIC */
339 address += view->pipe.u.tex.first_layer * mt->layer_stride;
340 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
341 }
342 tic[1] = address;
343 tic[2] |= address >> 32;
344
345 switch (target) {
346 case PIPE_TEXTURE_1D:
347 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
348 break;
349 case PIPE_TEXTURE_2D:
350 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
351 break;
352 case PIPE_TEXTURE_RECT:
353 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
354 break;
355 case PIPE_TEXTURE_3D:
356 tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
357 break;
358 case PIPE_TEXTURE_CUBE:
359 depth /= 6;
360 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
361 break;
362 case PIPE_TEXTURE_1D_ARRAY:
363 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
364 break;
365 case PIPE_TEXTURE_2D_ARRAY:
366 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
367 break;
368 case PIPE_TEXTURE_CUBE_ARRAY:
369 depth /= 6;
370 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
371 break;
372 default:
373 unreachable("unexpected/invalid texture target");
374 }
375
376 tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
377
378 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
379 width = mt->base.base.width0 << mt->ms_x;
380 height = mt->base.base.height0 << mt->ms_y;
381 } else {
382 width = mt->base.base.width0;
383 height = mt->base.base.height0;
384 }
385
386 tic[4] = (1 << 31) | width;
387
388 tic[5] = height & 0xffff;
389 tic[5] |= depth << 16;
390 tic[5] |= mt->base.base.last_level << 28;
391
392 /* sampling points: (?) */
393 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
394 tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
395 else
396 tic[6] = 0x03000000;
397
398 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
399 tic[7] |= mt->ms_mode << 12;
400
401 return &view->pipe;
402 }
403
404 struct pipe_sampler_view *
405 nvc0_create_texture_view(struct pipe_context *pipe,
406 struct pipe_resource *texture,
407 const struct pipe_sampler_view *templ,
408 uint32_t flags,
409 enum pipe_texture_target target)
410 {
411 if (nvc0_context(pipe)->screen->tic.maxwell)
412 return gm107_create_texture_view(pipe, texture, templ, flags, target);
413 return gf100_create_texture_view(pipe, texture, templ, flags, target);
414 }
415
416 static void
417 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
418 struct nv04_resource *res)
419 {
420 uint64_t address = res->address;
421 if (res->base.target != PIPE_BUFFER)
422 return;
423 address += tic->pipe.u.buf.first_element *
424 util_format_get_blocksize(tic->pipe.format);
425 if (tic->tic[1] == (uint32_t)address &&
426 (tic->tic[2] & 0xff) == address >> 32)
427 return;
428
429 nvc0_screen_tic_unlock(nvc0->screen, tic);
430 tic->id = -1;
431 tic->tic[1] = address;
432 tic->tic[2] &= 0xffffff00;
433 tic->tic[2] |= address >> 32;
434 }
435
436 bool
437 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
438 {
439 uint32_t commands[32];
440 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
441 struct nouveau_bo *txc = nvc0->screen->txc;
442 unsigned i;
443 unsigned n = 0;
444 bool need_flush = false;
445
446 for (i = 0; i < nvc0->num_textures[s]; ++i) {
447 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
448 struct nv04_resource *res;
449 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
450
451 if (!tic) {
452 if (dirty)
453 commands[n++] = (i << 1) | 0;
454 continue;
455 }
456 res = nv04_resource(tic->pipe.texture);
457 nvc0_update_tic(nvc0, tic, res);
458
459 if (tic->id < 0) {
460 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
461
462 PUSH_SPACE(push, 17);
463 BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
464 PUSH_DATAh(push, txc->offset + (tic->id * 32));
465 PUSH_DATA (push, txc->offset + (tic->id * 32));
466 BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
467 PUSH_DATA (push, 32);
468 PUSH_DATA (push, 1);
469 BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
470 PUSH_DATA (push, 0x100111);
471 BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
472 PUSH_DATAp(push, &tic->tic[0], 8);
473
474 need_flush = true;
475 } else
476 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
477 if (unlikely(s == 5))
478 BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
479 else
480 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
481 PUSH_DATA (push, (tic->id << 4) | 1);
482 NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
483 }
484 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
485
486 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
487 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
488
489 if (!dirty)
490 continue;
491 commands[n++] = (tic->id << 9) | (i << 1) | 1;
492
493 if (unlikely(s == 5))
494 BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
495 else
496 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
497 }
498 for (; i < nvc0->state.num_textures[s]; ++i)
499 commands[n++] = (i << 1) | 0;
500
501 nvc0->state.num_textures[s] = nvc0->num_textures[s];
502
503 if (n) {
504 if (unlikely(s == 5))
505 BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
506 else
507 BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
508 PUSH_DATAp(push, commands, n);
509 }
510 nvc0->textures_dirty[s] = 0;
511
512 return need_flush;
513 }
514
515 static bool
516 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
517 {
518 struct nouveau_bo *txc = nvc0->screen->txc;
519 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
520 unsigned i;
521 bool need_flush = false;
522
523 for (i = 0; i < nvc0->num_textures[s]; ++i) {
524 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
525 struct nv04_resource *res;
526 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
527
528 if (!tic) {
529 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
530 continue;
531 }
532 res = nv04_resource(tic->pipe.texture);
533 nvc0_update_tic(nvc0, tic, res);
534
535 if (tic->id < 0) {
536 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
537
538 PUSH_SPACE(push, 16);
539 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
540 PUSH_DATAh(push, txc->offset + (tic->id * 32));
541 PUSH_DATA (push, txc->offset + (tic->id * 32));
542 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
543 PUSH_DATA (push, 32);
544 PUSH_DATA (push, 1);
545 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), 9);
546 PUSH_DATA (push, 0x1001);
547 PUSH_DATAp(push, &tic->tic[0], 8);
548
549 need_flush = true;
550 } else
551 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
552 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
553 PUSH_DATA (push, (tic->id << 4) | 1);
554 }
555 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
556
557 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
558 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
559
560 nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
561 nvc0->tex_handles[s][i] |= tic->id;
562 if (dirty)
563 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
564 }
565 for (; i < nvc0->state.num_textures[s]; ++i) {
566 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
567 nvc0->textures_dirty[s] |= 1 << i;
568 }
569
570 nvc0->state.num_textures[s] = nvc0->num_textures[s];
571
572 return need_flush;
573 }
574
575 void nvc0_validate_textures(struct nvc0_context *nvc0)
576 {
577 bool need_flush = false;
578 int i;
579
580 for (i = 0; i < 5; i++) {
581 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
582 need_flush |= nve4_validate_tic(nvc0, i);
583 else
584 need_flush |= nvc0_validate_tic(nvc0, i);
585 }
586
587 if (need_flush) {
588 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
589 PUSH_DATA (nvc0->base.pushbuf, 0);
590 }
591 }
592
593 bool
594 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
595 {
596 uint32_t commands[16];
597 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
598 unsigned i;
599 unsigned n = 0;
600 bool need_flush = false;
601
602 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
603 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
604
605 if (!(nvc0->samplers_dirty[s] & (1 << i)))
606 continue;
607 if (!tsc) {
608 commands[n++] = (i << 4) | 0;
609 continue;
610 }
611 nvc0->seamless_cube_map = tsc->seamless_cube_map;
612 if (tsc->id < 0) {
613 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
614
615 nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
616 65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
617 32, tsc->tsc);
618 need_flush = true;
619 }
620 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
621
622 commands[n++] = (tsc->id << 12) | (i << 4) | 1;
623 }
624 for (; i < nvc0->state.num_samplers[s]; ++i)
625 commands[n++] = (i << 4) | 0;
626
627 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
628
629 if (n) {
630 if (unlikely(s == 5))
631 BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
632 else
633 BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
634 PUSH_DATAp(push, commands, n);
635 }
636 nvc0->samplers_dirty[s] = 0;
637
638 return need_flush;
639 }
640
641 bool
642 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
643 {
644 struct nouveau_bo *txc = nvc0->screen->txc;
645 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
646 unsigned i;
647 bool need_flush = false;
648
649 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
650 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
651
652 if (!tsc) {
653 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
654 continue;
655 }
656 if (tsc->id < 0) {
657 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
658
659 PUSH_SPACE(push, 16);
660 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
661 PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
662 PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
663 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
664 PUSH_DATA (push, 32);
665 PUSH_DATA (push, 1);
666 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), 9);
667 PUSH_DATA (push, 0x1001);
668 PUSH_DATAp(push, &tsc->tsc[0], 8);
669
670 need_flush = true;
671 }
672 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
673
674 nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
675 nvc0->tex_handles[s][i] |= tsc->id << 20;
676 }
677 for (; i < nvc0->state.num_samplers[s]; ++i) {
678 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
679 nvc0->samplers_dirty[s] |= 1 << i;
680 }
681
682 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
683
684 return need_flush;
685 }
686
687 void nvc0_validate_samplers(struct nvc0_context *nvc0)
688 {
689 bool need_flush = false;
690 int i;
691
692 for (i = 0; i < 5; i++) {
693 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
694 need_flush |= nve4_validate_tsc(nvc0, i);
695 else
696 need_flush |= nvc0_validate_tsc(nvc0, i);
697 }
698
699 if (need_flush) {
700 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
701 PUSH_DATA (nvc0->base.pushbuf, 0);
702 }
703 }
704
705 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
706 * At some point we might want to get a list of the combinations used by a
707 * shader and fill in those entries instead of having it extract the handles.
708 */
709 void
710 nve4_set_tex_handles(struct nvc0_context *nvc0)
711 {
712 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
713 struct nvc0_screen *screen = nvc0->screen;
714 unsigned s;
715
716 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
717 return;
718
719 for (s = 0; s < 5; ++s) {
720 uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
721 if (!dirty)
722 continue;
723 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
724 PUSH_DATA (push, 2048);
725 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
726 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
727 do {
728 int i = ffs(dirty) - 1;
729 dirty &= ~(1 << i);
730
731 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
732 PUSH_DATA (push, (8 + i) * 4);
733 PUSH_DATA (push, nvc0->tex_handles[s][i]);
734 } while (dirty);
735
736 nvc0->textures_dirty[s] = 0;
737 nvc0->samplers_dirty[s] = 0;
738 }
739 }
740
741
742 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
743 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
744 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
745
746 static void
747 nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
748 int *depth)
749 {
750 struct nv04_resource *res = nv04_resource(view->resource);
751 int level;
752
753 *width = *height = *depth = 1;
754 if (res->base.target == PIPE_BUFFER) {
755 *width = view->u.buf.last_element - view->u.buf.first_element + 1;
756 return;
757 }
758
759 level = view->u.tex.level;
760 *width = u_minify(view->resource->width0, level);
761 *height = u_minify(view->resource->height0, level);
762 *depth = u_minify(view->resource->depth0, level);
763
764 switch (res->base.target) {
765 case PIPE_TEXTURE_1D_ARRAY:
766 case PIPE_TEXTURE_2D_ARRAY:
767 case PIPE_TEXTURE_CUBE:
768 case PIPE_TEXTURE_CUBE_ARRAY:
769 *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
770 break;
771 case PIPE_TEXTURE_1D:
772 case PIPE_TEXTURE_2D:
773 case PIPE_TEXTURE_RECT:
774 case PIPE_TEXTURE_3D:
775 break;
776 default:
777 assert(!"unexpected texture target");
778 break;
779 }
780 }
781
782 void
783 nve4_set_surface_info(struct nouveau_pushbuf *push,
784 struct pipe_image_view *view,
785 struct nvc0_context *nvc0)
786 {
787 struct nvc0_screen *screen = nvc0->screen;
788 struct nv04_resource *res;
789 uint64_t address;
790 uint32_t *const info = push->cur;
791 int width, height, depth;
792 uint8_t log2cpp;
793
794 if (view && !nve4_su_format_map[view->format])
795 NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
796
797 push->cur += 16;
798
799 if (!view || !nve4_su_format_map[view->format]) {
800 memset(info, 0, 16 * sizeof(*info));
801
802 info[0] = 0xbadf0000;
803 info[1] = 0x80004000;
804 info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
805 screen->lib_code->start;
806 return;
807 }
808 res = nv04_resource(view->resource);
809
810 address = res->address;
811
812 /* get surface dimensions based on the target. */
813 nvc0_get_surface_dims(view, &width, &height, &depth);
814
815 info[8] = width;
816 info[9] = height;
817 info[10] = depth;
818 switch (res->base.target) {
819 case PIPE_TEXTURE_1D_ARRAY:
820 info[11] = 1;
821 break;
822 case PIPE_TEXTURE_2D:
823 case PIPE_TEXTURE_RECT:
824 info[11] = 2;
825 break;
826 case PIPE_TEXTURE_3D:
827 info[11] = 3;
828 break;
829 case PIPE_TEXTURE_2D_ARRAY:
830 case PIPE_TEXTURE_CUBE:
831 case PIPE_TEXTURE_CUBE_ARRAY:
832 info[11] = 4;
833 break;
834 default:
835 info[11] = 0;
836 break;
837 }
838 log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
839
840 /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
841 * format doesn't mismatch. */
842 info[12] = util_format_get_blocksize(view->format);
843
844 /* limit in bytes for raw access */
845 info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
846
847 info[1] = nve4_su_format_map[view->format];
848
849 #if 0
850 switch (util_format_get_blocksizebits(view->format)) {
851 case 16: info[1] |= 1 << 16; break;
852 case 32: info[1] |= 2 << 16; break;
853 case 64: info[1] |= 3 << 16; break;
854 case 128: info[1] |= 4 << 16; break;
855 default:
856 break;
857 }
858 #else
859 info[1] |= log2cpp << 16;
860 info[1] |= 0x4000;
861 info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
862 #endif
863
864 if (res->base.target == PIPE_BUFFER) {
865 unsigned blocksize = util_format_get_blocksize(view->format);
866
867 address += view->u.buf.first_element * blocksize;
868
869 info[0] = address >> 8;
870 info[2] = width - 1;
871 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
872 info[3] = 0;
873 info[4] = 0;
874 info[5] = 0;
875 info[6] = 0;
876 info[7] = 0;
877 info[14] = 0;
878 info[15] = 0;
879 } else {
880 struct nv50_miptree *mt = nv50_miptree(&res->base);
881 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
882 const unsigned z = view->u.tex.first_layer;
883
884 if (z) {
885 if (mt->layout_3d) {
886 address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
887 /* doesn't work if z passes z-tile boundary */
888 if (depth > 1) {
889 pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
890 "3D images are not really supported!");
891 debug_printf("3D images are not really supported!\n");
892 }
893 } else {
894 address += mt->layer_stride * z;
895 }
896 }
897 address += lvl->offset;
898
899 info[0] = address >> 8;
900 info[2] = width - 1;
901 /* NOTE: this is really important: */
902 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
903 info[3] = (0x88 << 24) | (lvl->pitch / 64);
904 info[4] = height - 1;
905 info[4] |= (lvl->tile_mode & 0x0f0) << 25;
906 info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
907 info[5] = mt->layer_stride >> 8;
908 info[6] = depth - 1;
909 info[6] |= (lvl->tile_mode & 0xf00) << 21;
910 info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
911 info[7] = 0;
912 info[14] = mt->ms_x;
913 info[15] = mt->ms_y;
914 }
915 }
916
917 static inline void
918 nvc0_set_surface_info(struct nouveau_pushbuf *push,
919 struct pipe_image_view *view, uint64_t address,
920 int width, int height, int depth)
921 {
922 struct nv04_resource *res;
923 uint32_t *const info = push->cur;
924
925 push->cur += 16;
926
927 /* Make sure to always initialize the surface information area because it's
928 * used to check if the given image is bound or not. */
929 memset(info, 0, 16 * sizeof(*info));
930
931 if (!view || !view->resource)
932 return;
933 res = nv04_resource(view->resource);
934
935 /* Stick the image dimensions for the imageSize() builtin. */
936 info[8] = width;
937 info[9] = height;
938 info[10] = depth;
939
940 /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
941 * offset and to check if the format doesn't mismatch. */
942 info[12] = util_format_get_blocksize(view->format);
943
944 if (res->base.target == PIPE_BUFFER) {
945 info[0] = address >> 8;
946 info[2] = width;
947 } else {
948 struct nv50_miptree *mt = nv50_miptree(&res->base);
949
950 info[0] = address >> 8;
951 info[2] = width;
952 info[4] = height;
953 info[5] = mt->layer_stride >> 8;
954 info[6] = depth;
955 info[14] = mt->ms_x;
956 info[15] = mt->ms_y;
957 }
958 }
959
960 void
961 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
962 {
963 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
964 struct nvc0_screen *screen = nvc0->screen;
965
966 if (s == 5)
967 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
968 else
969 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
970
971 for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
972 struct pipe_image_view *view = &nvc0->images[s][i];
973 int width, height, depth;
974 uint64_t address = 0;
975
976 if (s == 5)
977 BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
978 else
979 BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
980
981 if (view->resource) {
982 struct nv04_resource *res = nv04_resource(view->resource);
983 unsigned rt = nvc0_format_table[view->format].rt;
984
985 if (util_format_is_depth_or_stencil(view->format))
986 rt = rt << 12;
987 else
988 rt = (rt << 4) | (0x14 << 12);
989
990 /* get surface dimensions based on the target. */
991 nvc0_get_surface_dims(view, &width, &height, &depth);
992
993 address = res->address;
994 if (res->base.target == PIPE_BUFFER) {
995 unsigned blocksize = util_format_get_blocksize(view->format);
996
997 address += view->u.buf.first_element * blocksize;
998 assert(!(address & 0xff));
999
1000 PUSH_DATAh(push, address);
1001 PUSH_DATA (push, address);
1002 PUSH_DATA (push, align(width * blocksize, 0x100));
1003 PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1004 PUSH_DATA (push, rt);
1005 PUSH_DATA (push, 0);
1006 } else {
1007 struct nv50_miptree *mt = nv50_miptree(view->resource);
1008 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1009 const unsigned z = view->u.tex.first_layer;
1010
1011 if (mt->layout_3d) {
1012 address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
1013 if (depth >= 1) {
1014 pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
1015 "3D images are not supported!");
1016 debug_printf("3D images are not supported!\n");
1017 }
1018 } else {
1019 address += mt->layer_stride * z;
1020 }
1021 address += lvl->offset;
1022
1023 PUSH_DATAh(push, address);
1024 PUSH_DATA (push, address);
1025 PUSH_DATA (push, width);
1026 PUSH_DATA (push, height);
1027 PUSH_DATA (push, rt);
1028 PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1029 }
1030
1031 if (s == 5)
1032 BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1033 else
1034 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1035 } else {
1036 PUSH_DATA(push, 0);
1037 PUSH_DATA(push, 0);
1038 PUSH_DATA(push, 0);
1039 PUSH_DATA(push, 0);
1040 PUSH_DATA(push, 0x14000);
1041 PUSH_DATA(push, 0);
1042 }
1043
1044 /* stick surface information into the driver constant buffer */
1045 if (s == 5)
1046 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1047 else
1048 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1049 PUSH_DATA (push, 2048);
1050 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1051 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1052 if (s == 5)
1053 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1054 else
1055 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1056 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1057
1058 nvc0_set_surface_info(push, view, address, width, height, depth);
1059 }
1060 }
1061
1062 static inline void
1063 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1064 {
1065 nvc0_validate_suf(nvc0, 4);
1066
1067 /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1068 nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1069 nvc0->images_dirty[5] |= nvc0->images_valid[5];
1070 }
1071
1072 static inline void
1073 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1074 {
1075 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1076 struct nvc0_screen *screen = nvc0->screen;
1077 int i, j, s;
1078
1079 for (s = 0; s < 5; s++) {
1080 if (!nvc0->images_dirty[s])
1081 continue;
1082
1083 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1084 PUSH_DATA (push, 2048);
1085 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1086 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1087 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16 * NVC0_MAX_IMAGES);
1088 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(0));
1089
1090 for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1091 struct pipe_image_view *view = &nvc0->images[s][i];
1092 if (view->resource) {
1093 struct nv04_resource *res = nv04_resource(view->resource);
1094
1095 nve4_set_surface_info(push, view, nvc0);
1096 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1097 } else {
1098 for (j = 0; j < 16; j++)
1099 PUSH_DATA(push, 0);
1100 }
1101 }
1102 }
1103 }
1104
1105 void
1106 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1107 {
1108 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1109 nve4_update_surface_bindings(nvc0);
1110 } else {
1111 nvc0_update_surface_bindings(nvc0);
1112 }
1113 }
1114
1115
1116 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1117 {
1118 [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1119 [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1120 [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1121 [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1122 [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1123 [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1124 [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1125 [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1126 [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1127 [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1128 [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1129 [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1130 [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1131 [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1132 [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1133 [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1134 [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1135 [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1136 [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1137 [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1138 [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1139 [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1140 [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1141 [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1142 [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1143 [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1144 [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1145 [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1146 [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1147 [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1148 [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1149 [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1150 [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1151 [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1152 [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1153 [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1154 [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1155 [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1156 [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1157 };
1158
1159 /* Auxiliary format description values for surface instructions.
1160 * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1161 */
1162 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1163 {
1164 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1165 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1166 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1167
1168 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1169 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1170 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1171 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1172 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1173
1174 [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1175 [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1176 [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1177
1178 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1179 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1180 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1181 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1182 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1183 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1184 [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1185
1186 [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1187 [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1188 [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1189 [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1190 [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1191
1192 [PIPE_FORMAT_R32_SINT] = 0x2024,
1193 [PIPE_FORMAT_R32_UINT] = 0x2024,
1194 [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1195
1196 [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1197 [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1198 [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1199 [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1200
1201 [PIPE_FORMAT_R16_UNORM] = 0x1115,
1202 [PIPE_FORMAT_R16_SNORM] = 0x1115,
1203 [PIPE_FORMAT_R16_SINT] = 0x1115,
1204 [PIPE_FORMAT_R16_UINT] = 0x1115,
1205 [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1206
1207 [PIPE_FORMAT_R8_UNORM] = 0x0206,
1208 [PIPE_FORMAT_R8_SNORM] = 0x0206,
1209 [PIPE_FORMAT_R8_SINT] = 0x0206,
1210 [PIPE_FORMAT_R8_UINT] = 0x0206
1211 };
1212
1213 /* NOTE: These are hardcoded offsets for the shader library.
1214 * TODO: Automate them.
1215 */
1216 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1217 {
1218 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1219 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
1220 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
1221 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1222 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1223 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
1224 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
1225 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1226 [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
1227 [PIPE_FORMAT_R32G32_SINT] = 0x468,
1228 [PIPE_FORMAT_R32G32_UINT] = 0x468,
1229 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
1230 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530,
1231 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
1232 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
1233 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
1234 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
1235 [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
1236 [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
1237 [PIPE_FORMAT_R16G16_UNORM] = 0x828,
1238 [PIPE_FORMAT_R16G16_SNORM] = 0x890,
1239 [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
1240 [PIPE_FORMAT_R16G16_UINT] = 0x948,
1241 [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
1242 [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
1243 [PIPE_FORMAT_R32_SINT] = 0xa30,
1244 [PIPE_FORMAT_R32_UINT] = 0xa30,
1245 [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
1246 [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
1247 [PIPE_FORMAT_R8G8_UINT] = 0xb48,
1248 [PIPE_FORMAT_R8G8_SINT] = 0xb98,
1249 [PIPE_FORMAT_R16_UNORM] = 0xbe8,
1250 [PIPE_FORMAT_R16_SNORM] = 0xc48,
1251 [PIPE_FORMAT_R16_SINT] = 0xca0,
1252 [PIPE_FORMAT_R16_UINT] = 0xce8,
1253 [PIPE_FORMAT_R16_FLOAT] = 0xd30,
1254 [PIPE_FORMAT_R8_UNORM] = 0xd88,
1255 [PIPE_FORMAT_R8_SNORM] = 0xde0,
1256 [PIPE_FORMAT_R8_SINT] = 0xe38,
1257 [PIPE_FORMAT_R8_UINT] = 0xe88,
1258 [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
1259 };