/* * Copyright (c) 2004 Tim Kelly/Dialectronics * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * Product contains software written by Dialectronics.com for use * by operating systems running on PowerPC processors. The currently * supported version may be found at * http://www.dialectronics.com/PowerPC/code/L2Config.c * 4. Inclusion of L2 read/write algorithm in otherwise original and/or * derived source must contain these conditions, as this algorithm * constitutes the core value to this software, regardless of extent * of modifications to balance of this software. * 5. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #define MPC601 1 #define MPC603 3 #define MPC604 4 #define MPC603e 6 #define MPC603ev 7 #define MPC750 8 #define MPC604ev 9 #define MPC7400 12 #define IBM750FX 0x7000 #define MPC7410 0x800c #define MPC7450 0x8000 #define MPC7455 0x8001 #define L2CR 1017 #define L2CR_L2E 0x80000000 /* 0: L2 enable */ #define L2CR_L2PE 0x40000000 /* 1: L2 data parity enable */ #define L2CR_L2SIZ 0x30000000 /* 2-3: L2 size */ #define L2SIZ_RESERVED 0x00000000 /* on G3 */ #define L2SIZ_256K 0x10000000 #define L2SIZ_512K 0x20000000 #define L2SIZ_1M 0x30000000 #define L2SIZ_2M 0x00000000 /* on G4 */ #define L2CR_L2CLK 0x0e000000 /* 4-6: L2 clock ratio */ #define L2CLK_DIS 0x00000000 /* disable L2 clock */ #define L2CLK_10 0x02000000 /* core clock / 1 */ #define L2CLK_15 0x04000000 /* / 1.5 */ #define L2CLK_20 0x08000000 /* / 2 */ #define L2CLK_25 0x0a000000 /* / 2.5 */ #define L2CLK_30 0x0c000000 /* / 3 */ #define L2CLK_40 0x0e000000 /* / 4 */ #define L2CR_L2RAM 0x01800000 /* 7-8: L2 RAM type */ #define L2RAM_FT_BURST 0x00000000 #define L2RAM_PL_BURST 0x01000000 #define L2RAM_PL_LATE 0x01800000 #define L2CR_L2DO 0x00400000 /* 9: L2 data-only. Setting this bit disables instruction caching. */ #define L2CR_L2I 0x00200000 /* 10: L2 global invalidate. */ #define L2CR_L2CTL 0x00100000 /* 11: L2 RAM control (ZZ enable). Enables automatic operation of the L2ZZ (low-power mode) signal. */ #define L2CR_L2WT 0x00080000 /* 12: L2 write-through. */ #define L2CR_L2TS 0x00040000 /* 13: L2 test support. */ #define L2CR_L2OH 0x00030000 /* 14-15: L2 output hold. */ #define L2CR_OH05 0x00000000 /* 0.5 nS */ #define L2CR_OH10 0x00010000 /* 1.0 nS */ #define L2CR_OHLTE 0x00020000 /* recommended late write hold */ #define L2CR_OHLNG 0x00030000 /* longest output hold */ #define L2CR_L2SL 0x00008000 /* 16: L2 DLL slow. */ #define L2CR_L2DF 0x00004000 /* 17: L2 differential clock. */ #define L2CR_L2BYP 0x00002000 /* 18: L2 DLL bypass. */ #define L2CR_L2IP 0x00000001 /* 31: L2 global invalidate in progress (read only). */ // function prototypes u_int32_t ConfigPPCCache(u_int32_t cpu); u_int32_t L2Config(u_int32_t cpu, u_int32_t useTestSettings); u_int32_t EnableL2Cache(u_int32_t cpu, u_int32_t l2_value); u_int32_t ChangeL2Setting(u_int32_t cpu, u_int32_t l2_value); u_int32_t AutosizeL2Cache(u_int32_t cpu, u_int32_t l2_value); void bootloader_delay(u_int32_t cycles); // need prototype for determineL2CacheSize #define L2_DISABLED 0x00000000 #define L2_TRYTOSTART 0x00000001 #define L2_ENABLED 0x00000002 #define L2_DETECTSIZE 0x00000004 // get passed the CPU type and return errors // assumes L1 enabled already and returns with -1 // if L2 already on, 0 if bad l2_value u_int32_t ConfigPPCCache(u_int32_t cpu) { u_int32_t l2cr; int err = -1; if ((cpu < MPC750) || (cpu == MPC604ev)) { printf("no backside L2 cache present...\n"); return 0; } l2cr = ppc_mfl2cr(); printf("\nl2cr value before: %x\n", l2cr); if ((l2cr & L2CR_L2E) == 0) { printf("L2 currently disabled, trying...\n"); err = L2Config(cpu, L2_DISABLED | L2_TRYTOSTART); } else { // fall through returns -1 printf("L2 enabled, testing cache size...\n"); l2cr = L2Config(cpu, l2cr); } return err; } // gets passed the CPU and returns an appropriate L2 value u_int32_t L2Config(u_int32_t cpu, u_int32_t directions) { int l2_value = 0, retSize = 0, x = 0; u_int32_t testSetting = 0, retVal = 0; if (directions & L2CR_L2E) l2_value = directions; else { // generally expect to try to enable it... // ???...need to determine which cpus need long hold times switch (cpu) { // these have four possible cache sizes case MPC7400: testSetting |= L2RAM_PL_BURST | L2CLK_20; break; // these have 256k L2 and 1M or 2M L3 caches // L2 appears to be late write case MPC7410: case MPC7450: case MPC7455: // all prefer 2:1 clock divider // however, they may not like CTL and WT testSetting |= L2RAM_PL_LATE | L2CR_OHLTE | L2CLK_20; // L3 configuration handled elsewhere break; case IBM750FX: // 512k L2? testSetting |= L2RAM_PL_BURST | L2CLK_20 | L2CR_L2CTL | L2CR_L2WT; break; case MPC750: // these have four possible cache sizes // configure conservatively: // pipelined synchronous burst, nap (ZZ) control, write through to bus // no parity, data + instruction caching and 0.5 nS hold set by zero value // some G3's like 1:1.5 clock divider, but not all testSetting |= L2RAM_PL_BURST | L2CLK_20 | L2CR_L2CTL | L2CR_L2WT; break; } // switch (cpu) printf("turning on base settings (%x)...\n", testSetting); // this will handle turning it on for the first time // can not be called once already on retVal = EnableL2Cache(cpu, testSetting); l2_value = retVal; } // else of if (directions & L2_L2E) printf("autosizing...\n"); retSize = AutosizeL2Cache(cpu, l2_value); if (directions & L2CR_L2E) testSetting = l2_value; else { switch(retSize) { case 0x00200000: if ((cpu < MPC7400) || (cpu == IBM750FX)) printf("2M backside cache reported for pre-G4 cpu!!!"); retVal |= L2SIZ_2M; break; case 0x00100000: retVal |= L2SIZ_1M; break; case 0x00080000: retVal |= L2SIZ_512K; break; case 0x00040000: retVal |= L2SIZ_256K; // no default: - memory size was cleared above } retVal = ChangeL2Setting(cpu, retVal); printf("L2 enabled with final setting: %x\n", retVal); } return retVal; } // gets passed an L2 value to enable the cache with // should only be called when cache is off u_int32_t EnableL2Cache(u_int32_t cpu, u_int32_t l2cr) { u_int32_t retVal = 0, x = 0; __asm __volatile ("mfspr %0, 1017" : "=r"(x)); if (x & L2CR_L2E) { printf("EnableL2Cache called when L2 cache already on!!!\n,"); printf("Current L2 value: %x\n", x); return x; } l2cr &= ~L2CR_L2E; // just to be sure... retVal = ChangeL2Setting(cpu, l2cr); if (retVal == l2cr) { /* Enable L2 cache. */ l2cr |= L2CR_L2E; // turn on officially retVal = ChangeL2Setting(cpu, l2cr); } else printf("initial load of settings returned L2CR value: %x...L2 is not on.\n", retVal); return retVal; } // handles the steps before and after changing a setting // if off before, will return in off condition u_int32_t ChangeL2Setting(u_int32_t cpu, u_int32_t l2cr) { u_int32_t origL2CR = l2cr; u_int32_t retVal = 0, x = 0; // sync before changing setting // to ensure no access while changing switch (cpu) { case (MPC7400): case (MPC7410): case (MPC7450): case (MPC7455): // G4's can be on or off for invalidate (???) // but if on, do not turn off // G4's need to end any stream touch instructions __asm __volatile(".long 0x7c00066c"); // dssall opcode break; case (MPC750): // G3's need to be turned off to invalidate // l2cr &= ~L2CR_L2E; break; } __asm __volatile ("sync"); __asm __volatile ("mtspr 1017,%0" :: "r"(l2cr)); // no need to wait for DLL clock to be stable // according to Motorola and IBM documentation // global invalidate _always_ takes longer than // DLL stabilization /* Invalidate all L2 contents. */ l2cr |= L2CR_L2I; __asm __volatile ("mtspr 1017,%0" :: "r"(l2cr)); do { __asm __volatile ("mfspr %0, 1017" : "=r"(x)); } while (x & L2CR_L2IP); // post invalidate processing switch (cpu) { case (MPC7400): case (MPC7410): case (MPC7450): case (MPC7455): break; case (MPC750): break; } l2cr = origL2CR; l2cr &= ~L2CR_L2I; // just to be sure... __asm __volatile ("mtspr 1017,%0" :: "r"(l2cr)); __asm __volatile ("sync"); // and recover the value again so that // return value is from the L2CR // and not from how we set it __asm __volatile ("mfspr %0, 1017" : "=r"(l2cr)); return l2cr; } // called to autodetect cache size u_int32_t AutosizeL2Cache(u_int32_t cpu, u_int32_t l2cr) { u_int32_t maxSize, testSetting = l2cr; int cacheSize; void* memStart; // we're going to need some clean memory, could see 2M caches // &1M should be clear, align in OF 1.0.5 doesn't work, blah maxSize = 0x00200000; memStart = (void*)OF_claim(maxSize>>1, maxSize, maxSize>>1); if (memStart == (void*)-1) return 0; printf("memory claimed (0x%x at 0x%x)...\n", maxSize, memStart); // turn on test mode testSetting |= L2CR_L2TS | L2CR_L2DO; if (cpu > MPC604ev) testSetting |= L2SIZ_2M; // zero = 2M, N/C if already on else testSetting |= L2SIZ_1M; printf("memory test setting: %x...\n",testSetting); testSetting = ChangeL2Setting(cpu, testSetting); cacheSize = __determineL2CacheSize(memStart, maxSize); // restore original setting testSetting = ChangeL2Setting(cpu, l2cr); printf("measured memory size: %x...\n", cacheSize); // free up earlier memory OF_release(memStart, maxSize); printf("memory released...\n"); return (u_int32_t)cacheSize; } /* */ /* determineL2CacheSize */ /* */ asm(" .text .globl __determineL2CacheSize __determineL2CacheSize: /* r3 has our start address, r4 our baseline size */ /* we'll use r3, r6 and r7 for changing values, */ /* but r4 and r5 are static, and r8 stores HID0 */ xor 0, 0, 0 /* clear r0 (use for 0) */ add 5, 3, 4 /* set up end address in r5 */ mr 4, 3 /* set up our start address in r4 */ /* flush L1, do a global invalidate of L1 /* turn on data cache only in L1, save original in r8 */ mr 3, 5 /* initialize end address in r3 */ /* by starting high and going low */ /* we can minimize effects of using */ /* same address space for flushing */ /* and testing (most of 2M pushed out) */ sync isync loop: cmpw 3, 4 ble nextStep lwz 6, 0(3) subi 3, 3, 0x04 /* don't know cache line size */ b loop nextStep: /* global (flash) invalidate */ mfspr 8, 1008 ori 3, 8, 0x4400 sync mtspr 1008, 3 isync sync mr 3, 4 /* use r3 for storing the value */ /* (which is our start address) */ l2SzWriteLoop: dcbz 0, 3 /* zero out the cache line */ stw 3, 0(3) /* write our address to a cache line */ dcbf 0, 3 /* flush cache to L2 */ addi 3, 3, 0x20 /* Increase the address */ cmp 0x0, 0x0, 3, 5 /* index less than end address? */ blt l2SzWriteLoop readBackCache: /* begin at end address */ /* caches run in aligned values */ /* (256k, 512k, 1M, 2M, et al) */ /* so we check backwards from the end */ /* reading on possible boundaries */ /* minus 4 bytes relative to end address */ /* (r5 - 4, r5 - 256k - 4, et al) */ /* basically this checks for a match at */ /* the first spot in a next larger size cache */ /* i.e., matches @ 256k+4 => 512k cache */ /* the index is (upper limit - boundary - 4)*/ /* keep running until we get a cache miss */ /* on a boundary */ /* */ /* a miss at first pass indicates 0k cache */ /* r4 still contains start address */ /* this sets up our boundary pre-first pass */ li 7, 0x01 /* set r7 = 1 */ slwi 7, 7, 0x11 /* 2^17 = 128k */ /* r5 is upper limit so reduce one index value */ subi 3, 5, 0x20 nextBoundary: lwz 6, 0(3) /* Load the word from the cache line */ dcbi 0, 3 /* invalidate the cache line */ cmp 0x0, 0x0, 3, 6 /* our address should be stored at our address */ bne cacheMiss slwi 7, 7, 0x01 /* bit shift to double r7 value (1 to the left) */ sub 3, 5, 7 /* subtract the boundary from end address */ cmp 0x0, 0x0, 3, 4 /* have to check boundary vs. start address */ blt cacheMiss /* out of memory space, last hit means full size*/ /* subi 3, 3, 0x04*/ /* one word shy of boundary */ b nextBoundary cacheMiss: /* r7 is the last boundary that hit */ /* r7 + 4 missed an index */ srwi 3, 7, 0x013 /* drop 128k */ slwi 7, 3, 0x012 /* x 256k */ /* return L1 to original setting */ /* globally flush to prevent coherency issues */ mr 3, 5 isync sync loop2: cmpw 3, 4 ble nextStep2 lwz 6, 0(3) subi 3, 3, 0x04 /* don't know cache line size */ b loop2 nextStep2: sync mtspr 1008, 8 isync sync mr 3, 7 blr "); void bootloader_delay(u_int32_t time) { #define FREEZE (1<<31) #define COUNT_CYCLES 1 uint32_t cycles = 0; ppc_mtmmcr0(FREEZE); ppc_mtpmc2(0); ppc_mtmmcr0(COUNT_CYCLES); while (cycles < time) { cycles = ppc_mfpmc2(); } ppc_mtmmcr0(FREEZE); return; } // handles the steps before and after changing a setting /* u_int32_t ChangeL2Setting(u_int32_t cpu, u_int32_t l2cr) { u_int32_t retVal; // sync before changing setting // to ensure no access while changing //asm volatile ("isync"); asm volatile ("sync"); __asm __volatile ("mtspr 1017,%0" :: "r"(l2cr)); // Wait for L2 clock to be stable (640 L2 clocks). // we do this because global invalidate can not be done once // cache is turned on bootloader_delay(640); // sync after changing setting //__asm __volatile ("isync"); __asm __volatile ("sync"); // and recover the value again so that // return value is from the L2CR // and not from how we set it __asm __volatile ("mfspr %0, 1017" : "=r"(retVal)); return retVal; } */