changes to make smp work in linux
authorAli Saidi <saidi@eecs.umich.edu>
Wed, 1 Sep 2004 04:23:00 +0000 (00:23 -0400)
committerAli Saidi <saidi@eecs.umich.edu>
Wed, 1 Sep 2004 04:23:00 +0000 (00:23 -0400)
console/console.c:
    Remove Printed SimOS references and replace with M5
    Rework the SMP stuff, so we don't trash any stacks, or what we
    thought were stacks, but are actually other ppls memory.
console/dbmentry.s:
    add a carefully crafted piece of assembly that doesn't use the stack,
    so we don't clobber anthing in the time between when we are spinning
    and when the OS tells us to go.
palcode/platform_m5.s:
    add/fix code for IPI, multiprocessor interrupts (DIR), and initial
    bootstrapping of the cpu

system/alpha/console/console.c
system/alpha/console/dbmentry.s
system/alpha/palcode/platform_m5.s

index 12db3d9f557b2bac83a396cb91654020984c756f..a82e695e17072d9c7a4becd4aaf4e9a364da0e53 100644 (file)
@@ -152,19 +152,12 @@ main(int argc, char **argv)
 
 
    InitConsole();
-   printf("SimOS console \n");
+   printf("M5 console\n");
    /*
     * get configuration from backdoor
     */
    simosConf.last_offset = k1Conf->last_offset;
-   printf(" Got simosConfiguration %d \n",simosConf.last_offset);
-
-/*   for (i=1;i<=simosConf.last_offset/4;i++) {
-      ui *k1ptr = (ui*)k1Conf + i;
-      ui *ksegptr = (ui*)(&simosConf.last_offset)+i;
-      *ksegptr = *k1ptr;
-
-   }*/
+   printf("Got Configuration %d \n",simosConf.last_offset);
 
     simosConf.last_offset = k1Conf->last_offset;
     simosConf.version = k1Conf->version;
@@ -186,7 +179,7 @@ main(int argc, char **argv)
     simosConf.bootStrapCPU = k1Conf->bootStrapCPU;
 
    if (simosConf.version != ALPHA_ACCESS_VERSION)  {
-      panic("Console version mismatch. Console expects %d. SimOS has %d \n",
+      panic("Console version mismatch. Console expects %d. has %d \n",
             ALPHA_ACCESS_VERSION,simosConf.version);
    }
 
@@ -227,8 +220,8 @@ struct rpb xxm_rpb = {
 #if 0
    0x12,               /* 050: system type - masquarade as some random 21064 */
 #endif
-   0, /* masquerade a Tsunami RGD */
-   (1<<10),            /* 058: system variation */
+   0, /* OVERRIDDEN */
+   (1<<10),            /* 058: system variation OVERRIDDEN */
    'c'|('o'<<8)|('o'<<16)|('l'<< 24),          /* 060: system revision */
    1024*4096,          /* 068: scaled interval clock intr freq  OVERRIDEN*/
    0,                  /* 070: cycle counter frequency */
@@ -268,17 +261,17 @@ ul xxm_tbb[] = { 0x1e1e1e1e1e1e1e1e, 0x1e1e1e1e1e1e1e1e, 0x1e1e1e1e1e1e1e1e, 0x1
                  0x1e1e1e1e1e1e1e1e, 0x1e1e1e1e1e1e1e1e, 0x1e1e1e1e1e1e1e1e, 0x1e1e1e1e1e1e1e1e};
 
 struct rpb_percpu xxm_rpb_percpu = {
-   {0,0,0,0,0,0,0,{0,0},{0,0,0,0,0,0,0,0}},                            /* 000: boot/restart HWPCB */
+   {0,0,0,0,0,0,1,{0,0},{0,0,0,0,0,0,0,0}},                            /* 000: boot/restart HWPCB */
    (STATE_PA | STATE_PP | STATE_CV | STATE_PV | STATE_PMV | STATE_PL),         /* 080: per-cpu state bits */
    0xc000,                             /* 088: palcode memory length */
    0x2000,                             /* 090: palcode scratch length */
    0x4000,                             /* 098: phys addr of palcode mem space */
    0x2000,                             /* 0A0: phys addr of palcode scratch space */
    (2 << 16) | (5 << 8) | 1,           /* 0A8: PALcode rev required */
-   5 | (2L  << 32),                            /* 0B0: processor type */
+   11 | (2L  << 32),                           /* 0B0: processor type */
    7,                                  /* 0B8: processor variation */
-   'D'|('a'<<8)|('v'<<16)|('e'<<24),   /* 0C0: processor revision */
-   {'D','a','v','e','C','o','n','r','o','y',0,0,0,0,0,0},      /* 0C8: proc serial num: 10 ascii chars */
+   'M'|('5'<<8)|('A'<<16)|('0'<<24),   /* 0C0: processor revision */
+   {'M','5','/','A','l','p','h','a','0','0','0','0','0','0','0','0'},  /* 0C8: proc serial num: 10 ascii chars */
    0,                                  /* 0D8: phys addr of logout area */
    0,                                  /* 0E0: length in bytes of logout area */
    0,                                  /* 0E8: halt pcb base */
@@ -795,7 +788,7 @@ unixBoot(int go, int argc, char **argv)
 
   rpb_name = (char *) ROUNDUP8(((ul)rpb_lurt) + sizeof(xxm_lurt));
   rpb_dsr->rpb_sysname_off = ((ul) rpb_name) - (ul) rpb_dsr;
-#define THENAME "             SimOS ALPHA/EV5"
+#define THENAME "             M5/Alpha       "
   sum = sizeof(THENAME);
   bcopy(THENAME, rpb_name, sum);
   *(ul *)rpb_name = sizeof(THENAME); /* put in length field */
@@ -1246,6 +1239,9 @@ void SlaveCmd(int cpu, struct rpb_percpu *my_rpb)
 /*   extern void palJToSlave[]; */
    extern unsigned int palJToSlave[];
 
+   SpinLock(&theLock);
+   printf("Slave CPU %d console command %s", cpu,my_rpb->rpb_iccb.iccb_rxbuf);
+   SpinUnlock(&theLock);
 
    my_rpb->rpb_state |= STATE_BIP;
    my_rpb->rpb_state &= ~STATE_RC;
@@ -1264,6 +1260,8 @@ void SlaveCmd(int cpu, struct rpb_percpu *my_rpb)
           rpb->rpb_restart_pv,
           rpb->rpb_vptb,
           KSEG_TO_PHYS(my_rpb));
+
+   panic("SlaveCmd returned \n");
 }
 
 void SlaveLoop( int cpu)
@@ -1272,7 +1270,6 @@ void SlaveLoop( int cpu)
    struct rpb_percpu *my_rpb = (struct rpb_percpu*)
       ((ul)rpb_percpu + size*cpu);
 
-
    SpinLock(&theLock);
    if (cpu==0) {
       panic("CPU 0 entering slaveLoop. Reenetering the console. HOSED \n");
@@ -1280,19 +1277,10 @@ void SlaveLoop( int cpu)
       printf("Entering slaveloop for cpu %d my_rpb=%x \n",cpu,my_rpb);
    }
    SpinUnlock(&theLock);
-   while(1) {
-      int i;
-      for (i=0; i < 1000000 ; i++) {
-         if (my_rpb->rpb_iccb.iccb_rxlen) {
-            SpinLock(&theLock);
-            printf("Slave CPU %d console command %s",
-                   cpu,my_rpb->rpb_iccb.iccb_rxbuf);
-            SpinUnlock(&theLock);
-            SlaveCmd(cpu,my_rpb);
-            panic("SlaveCmd returned \n");
-         }
-      }
-      printf("*");
-   }
-}
 
+   // swap the processors context to the one in the
+   // rpb_percpu struct very carefully (i.e. no stack usage)
+   // so that linux knows which processor ends up in __smp_callin
+   // and we don't trash any data is the process
+   SlaveSpin(cpu,my_rpb,&my_rpb->rpb_iccb.iccb_rxlen);
+}
index d8741e658af3b5518b6dbf23298fec15440a44ef..7a718928b670c97918856ce7e774ee3dc6bd43ca 100644 (file)
@@ -209,5 +209,34 @@ SpinLock:
 2:
        br      zero,1b
        .end    SpinLock
+
+        .globl loadContext
+       .ent    loadContext 2
+loadContext:
+       .option O1
+       .frame  sp, 0, ra
+       call_pal PAL_SWPCTX_ENTRY
+       ret     zero, (ra)
+       .end    loadContext
+
        
-       
+       .globl  SlaveSpin          # Very carefully spin wait 
+       .ent    SlaveSpin 2        # and swap context without
+SlaveSpin:                         # using any stack space
+       .option O1
+       .frame  sp, 0, ra
+        mov a0, t0                 # cpu number 
+        mov a1, t1                 # cpu rpb pointer (virtual)
+        mov a2, t2                 # what to spin on
+       
+test:   ldl  t3, 0(t2)
+        beq  t3, test
+        zapnot t1,0x1f,a0          # make rpb physical 
+       call_pal PAL_SWPCTX_ENTRY  # switch to pcb
+        mov t0, a0                 # setup args for SlaveCmd
+        mov t1, a1
+        jsr SlaveCmd               # call SlaveCmd
+       ret     zero, (ra)         # Should never be reached
+       .end    SlaveSpin
+
+
index ce7a9fedb9d5cfbb8f53e94ada8b8720d959ace5..acf02e69eb361cdc39c4d1227bc0c0a9227ed110 100644 (file)
@@ -432,20 +432,26 @@ EXPORT(sys_wripir)
 // Convert the processor number to a CPU mask
 //--
 
-       and     r16,0xf, r14            // mask the top stuff (16 CPUs supported)
+       and     r16,0x3, r14            // mask the top stuff (4 CPUs supported)
        bis     r31,0x1,r16             // get a one
        sll     r16,r14,r14             // shift the bit to the right place
+        sll     r14,12,r14
+        
 
 //++
 // Build the Broadcast Space base address
 //--
-       lda     r13,0xff8e(r31)         // Load the upper address bits
-       sll     r13,24,r13              // shift them to the top
+        lda     r16,0xf01(r31)
+        sll     r16,32,r16
+        ldah    r13,0xa0(r31)
+        sll    r13,8,r13
+        bis    r16,r13,r16     
+        lda     r16,0x0080(r16)
   
 //++
 // Send out the IP Intr
 //--
-       stqp    r14, 0x40(r13)  // Write to TLIPINTR reg WAS  TLSB_TLIPINTR_OFFSET  
+       stqp    r14, 0(r16)     // Tsunami MISC Register
        wmb                             // Push out the store
 
        hw_rei
@@ -737,20 +743,21 @@ EXPORT(sys_interrupt)
 //-
        ALIGN_BRANCH
 sys_int_23:
-       Read_TLINTRSUMx(r13,r10,r14)            // read the right TLINTRSUMx
-       srl     r13, 22, r13                    // shift down to examine IPL17
-
-       Intr_Find_TIOP(r13,r14)
-       beq     r14, 1f
-
-       Get_TLSB_Node_Address(r14,r10)
-       lda     r10, 0xac0(r10) // Get base TLILID address
+        or      r31,0,r16                       // IPI interrupt A0 = 0
+        lda     r12,0xf01(r31)                   // build up an address for the MISC register
+        sll     r12,16,r12
+        lda     r12,0xa000(r12)                   
+        sll     r12,16,r12                       
+        lda     r12,0x080(r12)                  
 
-       ldlp    r13, 0(r10)                     // Read the TLILID register
-       bne     r13, pal_post_dev_interrupt
+        ldq_p   r10,0(r12)                       // read misc register
+        and     r10,0x3,r10                     // isolate CPUID
+        or      r31,0x1,r14                     // load r14 with bit to clear
+        sll     r14,r10,r14                       // left shift by CPU ID
+        sll     r14,8,r14
+        stq_p   r14, 0(r12)                       // clear the rtc interrupt
 
-1:     lda     r16, osfint_c_passrel(r31)      // passive release
-       br      r31, pal_post_interrupt         // 
+       br      r31, pal_post_interrupt         // Notify the OS 
 
 
        ALIGN_BRANCH
@@ -764,7 +771,7 @@ sys_int_22:
 
         ldq_p   r10,0(r12)                       // read misc register
         and     r10,0x3,r10                     // isolate CPUID
-        or      r31,0x10,r14                     // load r9 with bit to clear
+        or      r31,0x10,r14                     // load r14 with bit to clear
         sll     r14,r10,r14                       // left shift by CPU ID
         stq_p   r14, 0(r12)                       // clear the rtc interrupt
          
@@ -811,10 +818,10 @@ sys_int_21:
     lda     r12,0x0080(r12)
     ldqp    r13, 0(r12)                       // read the MISC register for CPUID
         
-    and     r13,0x1,r14                      // grab LSB and shift left 2
-    sll     r14,2,r14
-    and     r13,0x2,r10                      // grabl LSB+1 and shift left 5
-    sll     r10,5,r10
+    and     r13,0x1,r14                      // grab LSB and shift left 6
+    sll     r14,6,r14
+    and     r13,0x2,r10                      // grabl LSB+1 and shift left 9
+    sll     r10,9,r10
     
     mskbl   r12,0,r12                         // calculate DIRn address
     lda     r13,0x280(r31)
@@ -1479,8 +1486,20 @@ sys_reset:
        mtpr    r1, pt_scbb             // load scbb
        mtpr    r31, pt_prbr            // clear out prbr
 #ifdef SIMOS
+        // yes, this is ugly, but you figure out a better
+        // way to get the address of the kludge_initial_pcbb 
+        // in r1 with an uncooperative assembler --ali
+        br     r1, kludge_getpcb_addr
+        br     r31, kludge_initial_pcbb     
+kludge_getpcb_addr:
+        ldqp   r19, 0(r1)
+        sll    r19, 44, r19
+        srl    r19, 44, r19
+        mulq   r19,4,r19
+        addq   r19, r1, r1 
+        addq   r1,4,r1
 //        or      zero,kludge_initial_pcbb,r1
-        GET_ADDR(r1, (kludge_initial_pcbb-pal_base), r1)
+//        GET_ADDR(r1, (kludge_initial_pcbb-pal_base), r1)
 #else
        mfpr    r1, pal_base
 //orig sget_addr r1, (kludge_initial_pcbb-pal$base), r1, verify=0// get address for temp pcbb
@@ -2598,7 +2617,7 @@ check_done:                               // do these now and return
 // .sbttl KLUDGE_INITIAL_PCBB - PCB for Boot use only
 
         ALIGN_128
-
+.globl kludge_initial_pcbb
 kludge_initial_pcbb:                   // PCB is 128 bytes long
 //     .repeat 16
 //     .quad   0