nouveau: Make the state cache hierarchical.
authorStephane Marchesin <marchesin@icps.u-strasbg.fr>
Sun, 14 Jan 2007 20:17:08 +0000 (21:17 +0100)
committerStephane Marchesin <marchesin@icps.u-strasbg.fr>
Sun, 14 Jan 2007 20:17:08 +0000 (21:17 +0100)
src/mesa/drivers/dri/nouveau/nouveau_fifo.h
src/mesa/drivers/dri/nouveau/nouveau_state_cache.c
src/mesa/drivers/dri/nouveau/nouveau_state_cache.h

index 05d00d47690d31fc00530d238422003e3c002920..9056bfb2557a273c1ec8540c847b583370a47a88 100644 (file)
@@ -31,6 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "nouveau_context.h"
 #include "nouveau_ctrlreg.h"
+#include "nouveau_state_cache.h"
 
 //#define NOUVEAU_RING_DEBUG
 //#define NOUVEAU_STATE_CACHE_DISABLE
@@ -114,6 +115,7 @@ extern void nouveau_state_cache_init(nouveauContextPtr nmesa);
 #define OUT_RING_CACHE(n) do {                                                                 \
        if (nmesa->state_cache.atoms[nmesa->state_cache.current_pos].value!=(n))        {       \
                nmesa->state_cache.atoms[nmesa->state_cache.current_pos].dirty=1;               \
+               nmesa->state_cache.hdirty[nmesa->state_cache.current_pos/NOUVEAU_STATE_CACHE_HIER_SIZE]=1;              \
                nmesa->state_cache.atoms[nmesa->state_cache.current_pos].value=(n);             \
        }                                                                                       \
        nmesa->state_cache.current_pos++;                                                       \
@@ -122,6 +124,7 @@ extern void nouveau_state_cache_init(nouveauContextPtr nmesa);
 #define OUT_RING_CACHEf(n) do {                                                                        \
        if ((*(float*)(&nmesa->state_cache.atoms[nmesa->state_cache.current_pos].value))!=(n)){ \
                nmesa->state_cache.atoms[nmesa->state_cache.current_pos].dirty=1;               \
+               nmesa->state_cache.hdirty[nmesa->state_cache.current_pos/NOUVEAU_STATE_CACHE_HIER_SIZE]=1;              \
                (*(float*)(&nmesa->state_cache.atoms[nmesa->state_cache.current_pos].value))=(n);\
        }                                                                                       \
        nmesa->state_cache.current_pos++;                                                       \
index 36f0c1024b61eab06ef3b4e77b49f1181f6d836f..cb4b9d30270d34f2ebf977e2e957ee041c4dccdb 100644 (file)
@@ -25,6 +25,8 @@ void nouveau_state_cache_flush(nouveauContextPtr nmesa)
        do
        {
                // jump to a dirty state
+               while((nmesa->state_cache.hdirty[i/NOUVEAU_STATE_CACHE_HIER_SIZE]==0)&&(i<NOUVEAU_STATE_CACHE_ENTRIES))
+                       i=(i&~(NOUVEAU_STATE_CACHE_HIER_SIZE-1))+NOUVEAU_STATE_CACHE_HIER_SIZE;
                while((nmesa->state_cache.atoms[i].dirty==0)&&(i<NOUVEAU_STATE_CACHE_ENTRIES))
                        i++;
 
@@ -42,11 +44,14 @@ void nouveau_state_cache_flush(nouveauContextPtr nmesa)
                        {
                                OUT_RING(nmesa->state_cache.atoms[i+j].value);
                                nmesa->state_cache.atoms[i+j].dirty=0;
+                               if ((i+j)%NOUVEAU_STATE_CACHE_HIER_SIZE==0)
+                                       nmesa->state_cache.hdirty[(i+j)/NOUVEAU_STATE_CACHE_HIER_SIZE-1]=0;
                        }
                        i+=run;
                }
        }
        while(i<NOUVEAU_STATE_CACHE_ENTRIES);
+       nmesa->state_cache.hdirty[NOUVEAU_STATE_CACHE_HIER_SIZE/NOUVEAU_STATE_CACHE_HIER_SIZE-1]=0;
 }
 
 
index 24882748468e5d79261774a1b615baaea0f5e985..5f9d426450bbfc692c35faebfe177596c04cd513 100644 (file)
@@ -5,6 +5,10 @@
 #include "mtypes.h"
 
 #define NOUVEAU_STATE_CACHE_ENTRIES 2048
+// size of a dirty requests block
+// you can play with that and tune the value to increase/decrease performance
+// but keep it a power of 2 !
+#define NOUVEAU_STATE_CACHE_HIER_SIZE  32
 
 typedef struct nouveau_state_atom_t{
        uint32_t value;
@@ -14,8 +18,10 @@ typedef struct nouveau_state_atom_t{
 typedef struct nouveau_state_cache_t{
        nouveau_state_atom atoms[NOUVEAU_STATE_CACHE_ENTRIES];
        uint32_t current_pos;
+       // hierarchical dirty flags
+       uint8_t hdirty[NOUVEAU_STATE_CACHE_ENTRIES/NOUVEAU_STATE_CACHE_HIER_SIZE];
        // master dirty flag
-       uint32_t dirty;
+       uint8_t dirty;
 }nouveau_state_cache;