Merge branch 'master' into crestline
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_cache.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_state.h"
34 #include "brw_aub.h"
35 #include "intel_batchbuffer.h"
36 #include "imports.h"
37
38 /* XXX: Fixme - have to include these to get the sizes of the prog_key
39 * structs:
40 */
41 #include "brw_wm.h"
42 #include "brw_vs.h"
43 #include "brw_clip.h"
44 #include "brw_sf.h"
45 #include "brw_gs.h"
46
47
48 /***********************************************************************
49 * Check cache for uploaded version of struct, else upload new one.
50 * Fail when memory is exhausted.
51 *
52 * XXX: FIXME: Currently search is so slow it would be quicker to
53 * regenerate the data every time...
54 */
55
56 static GLuint hash_key( const void *key, GLuint key_size )
57 {
58 GLuint *ikey = (GLuint *)key;
59 GLuint hash = 0, i;
60
61 assert(key_size % 4 == 0);
62
63 /* I'm sure this can be improved on:
64 */
65 for (i = 0; i < key_size/4; i++)
66 hash ^= ikey[i];
67
68 return hash;
69 }
70
71 static struct brw_cache_item *search_cache( struct brw_cache *cache,
72 GLuint hash,
73 const void *key,
74 GLuint key_size)
75 {
76 struct brw_cache_item *c;
77
78 for (c = cache->items[hash % cache->size]; c; c = c->next) {
79 if (c->hash == hash &&
80 c->key_size == key_size &&
81 memcmp(c->key, key, key_size) == 0)
82 return c;
83 }
84
85 return NULL;
86 }
87
88
89 static void rehash( struct brw_cache *cache )
90 {
91 struct brw_cache_item **items;
92 struct brw_cache_item *c, *next;
93 GLuint size, i;
94
95 size = cache->size * 3;
96 items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items));
97 _mesa_memset(items, 0, size * sizeof(*items));
98
99 for (i = 0; i < cache->size; i++)
100 for (c = cache->items[i]; c; c = next) {
101 next = c->next;
102 c->next = items[c->hash % size];
103 items[c->hash % size] = c;
104 }
105
106 FREE(cache->items);
107 cache->items = items;
108 cache->size = size;
109 }
110
111
112 GLboolean brw_search_cache( struct brw_cache *cache,
113 const void *key,
114 GLuint key_size,
115 void *aux_return,
116 GLuint *offset_return)
117 {
118 struct brw_cache_item *item;
119 GLuint addr = 0;
120 GLuint hash = hash_key(key, key_size);
121
122 item = search_cache(cache, hash, key, key_size);
123
124 if (item) {
125 if (aux_return)
126 *(void **)aux_return = (void *)((char *)item->key + item->key_size);
127
128 *offset_return = addr = item->offset;
129 }
130
131 if (item == NULL || addr != cache->last_addr) {
132 cache->brw->state.dirty.cache |= 1<<cache->id;
133 cache->last_addr = addr;
134 }
135
136 return item != NULL;
137 }
138
139 GLuint brw_upload_cache( struct brw_cache *cache,
140 const void *key,
141 GLuint key_size,
142 const void *data,
143 GLuint data_size,
144 const void *aux,
145 void *aux_return )
146 {
147 GLuint offset;
148 struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
149 GLuint hash = hash_key(key, key_size);
150 void *tmp = _mesa_malloc(key_size + cache->aux_size);
151
152 if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) {
153 /* Should not be possible:
154 */
155 _mesa_printf("brw_pool_alloc failed\n");
156 exit(1);
157 }
158
159 memcpy(tmp, key, key_size);
160
161 if (cache->aux_size)
162 memcpy(tmp+key_size, aux, cache->aux_size);
163
164 item->key = tmp;
165 item->hash = hash;
166 item->key_size = key_size;
167 item->offset = offset;
168 item->data_size = data_size;
169
170 if (++cache->n_items > cache->size * 1.5)
171 rehash(cache);
172
173 hash %= cache->size;
174 item->next = cache->items[hash];
175 cache->items[hash] = item;
176
177 if (aux_return) {
178 assert(cache->aux_size);
179 *(void **)aux_return = (void *)((char *)item->key + item->key_size);
180 }
181
182 if (INTEL_DEBUG & DEBUG_STATE)
183 _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n",
184 cache->name,
185 data_size,
186 cache->pool->buffer,
187 offset);
188
189 /* Copy data to the buffer:
190 */
191 bmBufferSubDataAUB(&cache->brw->intel,
192 cache->pool->buffer,
193 offset,
194 data_size,
195 data,
196 cache->aub_type,
197 cache->aub_sub_type);
198
199
200 cache->brw->state.dirty.cache |= 1<<cache->id;
201 cache->last_addr = offset;
202
203 return offset;
204 }
205
206 /* This doesn't really work with aux data. Use search/upload instead
207 */
208 GLuint brw_cache_data_sz(struct brw_cache *cache,
209 const void *data,
210 GLuint data_size)
211 {
212 GLuint addr;
213
214 if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
215 addr = brw_upload_cache(cache,
216 data, data_size,
217 data, data_size,
218 NULL, NULL);
219 }
220
221 return addr;
222 }
223
224 GLuint brw_cache_data(struct brw_cache *cache,
225 const void *data)
226 {
227 return brw_cache_data_sz(cache, data, cache->key_size);
228 }
229
230
231
232
233
234 static void brw_init_cache( struct brw_context *brw,
235 const char *name,
236 GLuint id,
237 GLuint key_size,
238 GLuint aux_size,
239 GLuint aub_type,
240 GLuint aub_sub_type )
241 {
242 struct brw_cache *cache = &brw->cache[id];
243 cache->brw = brw;
244 cache->id = id;
245 cache->name = name;
246 cache->items = NULL;
247
248 cache->size = 7;
249 cache->n_items = 0;
250 cache->items = (struct brw_cache_item **)
251 _mesa_calloc(cache->size *
252 sizeof(struct brw_cache_item));
253
254
255 cache->key_size = key_size;
256 cache->aux_size = aux_size;
257 cache->aub_type = aub_type;
258 cache->aub_sub_type = aub_sub_type;
259 switch (aub_type) {
260 case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
261 case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
262 default: assert(0); break;
263 }
264 }
265
266 void brw_init_caches( struct brw_context *brw )
267 {
268
269 brw_init_cache(brw,
270 "CC_VP",
271 BRW_CC_VP,
272 sizeof(struct brw_cc_viewport),
273 0,
274 DW_GENERAL_STATE,
275 DWGS_COLOR_CALC_VIEWPORT_STATE);
276
277 brw_init_cache(brw,
278 "CC_UNIT",
279 BRW_CC_UNIT,
280 sizeof(struct brw_cc_unit_state),
281 0,
282 DW_GENERAL_STATE,
283 DWGS_COLOR_CALC_STATE);
284
285 brw_init_cache(brw,
286 "WM_PROG",
287 BRW_WM_PROG,
288 sizeof(struct brw_wm_prog_key),
289 sizeof(struct brw_wm_prog_data),
290 DW_GENERAL_STATE,
291 DWGS_KERNEL_INSTRUCTIONS);
292
293 brw_init_cache(brw,
294 "SAMPLER_DEFAULT_COLOR",
295 BRW_SAMPLER_DEFAULT_COLOR,
296 sizeof(struct brw_sampler_default_color),
297 0,
298 DW_GENERAL_STATE,
299 DWGS_SAMPLER_DEFAULT_COLOR);
300
301 brw_init_cache(brw,
302 "SAMPLER",
303 BRW_SAMPLER,
304 0, /* variable key/data size */
305 0,
306 DW_GENERAL_STATE,
307 DWGS_SAMPLER_STATE);
308
309 brw_init_cache(brw,
310 "WM_UNIT",
311 BRW_WM_UNIT,
312 sizeof(struct brw_wm_unit_state),
313 0,
314 DW_GENERAL_STATE,
315 DWGS_WINDOWER_IZ_STATE);
316
317 brw_init_cache(brw,
318 "SF_PROG",
319 BRW_SF_PROG,
320 sizeof(struct brw_sf_prog_key),
321 sizeof(struct brw_sf_prog_data),
322 DW_GENERAL_STATE,
323 DWGS_KERNEL_INSTRUCTIONS);
324
325 brw_init_cache(brw,
326 "SF_VP",
327 BRW_SF_VP,
328 sizeof(struct brw_sf_viewport),
329 0,
330 DW_GENERAL_STATE,
331 DWGS_STRIPS_FANS_VIEWPORT_STATE);
332
333 brw_init_cache(brw,
334 "SF_UNIT",
335 BRW_SF_UNIT,
336 sizeof(struct brw_sf_unit_state),
337 0,
338 DW_GENERAL_STATE,
339 DWGS_STRIPS_FANS_STATE);
340
341 brw_init_cache(brw,
342 "VS_UNIT",
343 BRW_VS_UNIT,
344 sizeof(struct brw_vs_unit_state),
345 0,
346 DW_GENERAL_STATE,
347 DWGS_VERTEX_SHADER_STATE);
348
349 brw_init_cache(brw,
350 "VS_PROG",
351 BRW_VS_PROG,
352 sizeof(struct brw_vs_prog_key),
353 sizeof(struct brw_vs_prog_data),
354 DW_GENERAL_STATE,
355 DWGS_KERNEL_INSTRUCTIONS);
356
357 brw_init_cache(brw,
358 "CLIP_UNIT",
359 BRW_CLIP_UNIT,
360 sizeof(struct brw_clip_unit_state),
361 0,
362 DW_GENERAL_STATE,
363 DWGS_CLIPPER_STATE);
364
365 brw_init_cache(brw,
366 "CLIP_PROG",
367 BRW_CLIP_PROG,
368 sizeof(struct brw_clip_prog_key),
369 sizeof(struct brw_clip_prog_data),
370 DW_GENERAL_STATE,
371 DWGS_KERNEL_INSTRUCTIONS);
372
373 brw_init_cache(brw,
374 "GS_UNIT",
375 BRW_GS_UNIT,
376 sizeof(struct brw_gs_unit_state),
377 0,
378 DW_GENERAL_STATE,
379 DWGS_GEOMETRY_SHADER_STATE);
380
381 brw_init_cache(brw,
382 "GS_PROG",
383 BRW_GS_PROG,
384 sizeof(struct brw_gs_prog_key),
385 sizeof(struct brw_gs_prog_data),
386 DW_GENERAL_STATE,
387 DWGS_KERNEL_INSTRUCTIONS);
388
389 brw_init_cache(brw,
390 "SS_SURFACE",
391 BRW_SS_SURFACE,
392 sizeof(struct brw_surface_state),
393 0,
394 DW_SURFACE_STATE,
395 DWSS_SURFACE_STATE);
396
397 brw_init_cache(brw,
398 "SS_SURF_BIND",
399 BRW_SS_SURF_BIND,
400 sizeof(struct brw_surface_binding_table),
401 0,
402 DW_SURFACE_STATE,
403 DWSS_BINDING_TABLE_STATE);
404 }
405
406
407 /* When we lose hardware context, need to invalidate the surface cache
408 * as these structs must be explicitly re-uploaded. They are subject
409 * to fixup by the memory manager as they contain absolute agp
410 * offsets, so we need to ensure there is a fresh version of the
411 * struct available to receive the fixup.
412 *
413 * XXX: Need to ensure that there aren't two versions of a surface or
414 * bufferobj with different backing data active in the same buffer at
415 * once? Otherwise the cache could confuse them. Maybe better not to
416 * cache at all?
417 *
418 * --> Isn't this the same as saying need to ensure batch is flushed
419 * before new data is uploaded to an existing buffer? We
420 * already try to make sure of that.
421 */
422 static void clear_cache( struct brw_cache *cache )
423 {
424 struct brw_cache_item *c, *next;
425 GLuint i;
426
427 for (i = 0; i < cache->size; i++) {
428 for (c = cache->items[i]; c; c = next) {
429 next = c->next;
430 free((void *)c->key);
431 free(c);
432 }
433 cache->items[i] = NULL;
434 }
435
436 cache->n_items = 0;
437 }
438
439 void brw_clear_all_caches( struct brw_context *brw )
440 {
441 GLint i;
442
443 if (INTEL_DEBUG & DEBUG_STATE)
444 _mesa_printf("%s\n", __FUNCTION__);
445
446 for (i = 0; i < BRW_MAX_CACHE; i++)
447 clear_cache(&brw->cache[i]);
448
449 if (brw->curbe.last_buf) {
450 _mesa_free(brw->curbe.last_buf);
451 brw->curbe.last_buf = NULL;
452 }
453
454 brw->state.dirty.mesa |= ~0;
455 brw->state.dirty.brw |= ~0;
456 brw->state.dirty.cache |= ~0;
457 }
458
459
460
461
462
463 void brw_destroy_caches( struct brw_context *brw )
464 {
465 GLuint i;
466
467 for (i = 0; i < BRW_MAX_CACHE; i++)
468 clear_cache(&brw->cache[i]);
469 }