585fa72493
On the 34K the redundant cache operations were causing excessive stalls resulting in realtime code running on the second VPE missing its deadline. For all other platforms this patch is just a significant performance improvment as illustrated by below benchmark numbers. Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 25Kf 2.6.18-rc4 533 0.49 1.16 7.57 33.4 30.5 1.34 12.4 5497 17.K 54.K 25Kf 2.6.18-rc4-p 533 0.49 1.16 6.68 23.0 30.7 1.36 8.55 5030 16.K 48.K 4Kc 2.6.18-rc4 80 4.21 15.0 131. 289. 261. 16.5 258. 18.K 70.K 227K 4Kc 2.6.18-rc4-p 80 4.34 13.1 128. 285. 262. 18.2 258. 12.K 52.K 176K 34Kc 2.6.18-rc4 40 5.01 14.0 61.6 90.0 477. 17.9 94.7 29.K 108K 342K 34Kc 2.6.18-rc4-p 40 4.98 13.9 61.2 89.7 475. 17.6 93.7 8758 44.K 158K BCM1480 2.6.18-rc4 700 0.28 0.60 3.68 5.92 16.0 0.78 5.08 931. 3163 15.K BCM1480 2.6.18-rc4-p 700 0.28 0.61 3.65 5.85 16.0 0.79 5.20 395. 1464 8385 TX49-16K 2.6.18-rc3 197 0.73 2.41 19.0 37.8 82.9 2.94 17.5 4438 14.K 56.K TX49-16K 2.6.18-rc3-p 197 0.73 2.40 19.9 36.3 82.9 2.94 23.4 2577 9103 38.K TX49-32K 2.6.18-rc3 396 0.36 1.19 6.80 11.8 41.0 1.46 8.17 2738 8465 32.K TX49-32K 2.6.18-rc3-p 396 0.36 1.19 6.82 10.2 41.0 1.46 8.18 1330 4638 18.K Original patch by me with enhancements by Atsushi Nemoto. Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
155 lines
3.7 KiB
C
155 lines
3.7 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1994 - 2003 by Ralf Baechle
|
|
*/
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/mm.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/cpu.h>
|
|
#include <asm/cpu-features.h>
|
|
|
|
/* Cache operations. */
|
|
void (*flush_cache_all)(void);
|
|
void (*__flush_cache_all)(void);
|
|
void (*flush_cache_mm)(struct mm_struct *mm);
|
|
void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
|
|
unsigned long end);
|
|
void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
|
|
unsigned long pfn);
|
|
void (*flush_icache_range)(unsigned long start, unsigned long end);
|
|
void (*__flush_icache_page)(struct vm_area_struct *vma, struct page *page);
|
|
|
|
/* MIPS specific cache operations */
|
|
void (*flush_cache_sigtramp)(unsigned long addr);
|
|
void (*local_flush_data_cache_page)(void * addr);
|
|
void (*flush_data_cache_page)(unsigned long addr);
|
|
void (*flush_icache_all)(void);
|
|
|
|
EXPORT_SYMBOL(flush_data_cache_page);
|
|
|
|
#ifdef CONFIG_DMA_NONCOHERENT
|
|
|
|
/* DMA cache operations. */
|
|
void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
|
|
void (*_dma_cache_wback)(unsigned long start, unsigned long size);
|
|
void (*_dma_cache_inv)(unsigned long start, unsigned long size);
|
|
|
|
EXPORT_SYMBOL(_dma_cache_wback_inv);
|
|
EXPORT_SYMBOL(_dma_cache_wback);
|
|
EXPORT_SYMBOL(_dma_cache_inv);
|
|
|
|
#endif /* CONFIG_DMA_NONCOHERENT */
|
|
|
|
/*
|
|
* We could optimize the case where the cache argument is not BCACHE but
|
|
* that seems very atypical use ...
|
|
*/
|
|
asmlinkage int sys_cacheflush(unsigned long addr,
|
|
unsigned long bytes, unsigned int cache)
|
|
{
|
|
if (bytes == 0)
|
|
return 0;
|
|
if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes))
|
|
return -EFAULT;
|
|
|
|
flush_icache_range(addr, addr + bytes);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __flush_dcache_page(struct page *page)
|
|
{
|
|
struct address_space *mapping = page_mapping(page);
|
|
unsigned long addr;
|
|
|
|
if (PageHighMem(page))
|
|
return;
|
|
if (mapping && !mapping_mapped(mapping)) {
|
|
SetPageDcacheDirty(page);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We could delay the flush for the !page_mapping case too. But that
|
|
* case is for exec env/arg pages and those are %99 certainly going to
|
|
* get faulted into the tlb (and thus flushed) anyways.
|
|
*/
|
|
addr = (unsigned long) page_address(page);
|
|
flush_data_cache_page(addr);
|
|
}
|
|
|
|
EXPORT_SYMBOL(__flush_dcache_page);
|
|
|
|
void __update_cache(struct vm_area_struct *vma, unsigned long address,
|
|
pte_t pte)
|
|
{
|
|
struct page *page;
|
|
unsigned long pfn, addr;
|
|
int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc;
|
|
|
|
pfn = pte_pfn(pte);
|
|
if (unlikely(!pfn_valid(pfn)))
|
|
return;
|
|
page = pfn_to_page(pfn);
|
|
if (page_mapping(page) && Page_dcache_dirty(page)) {
|
|
addr = (unsigned long) page_address(page);
|
|
if (exec || pages_do_alias(addr, address & PAGE_MASK))
|
|
flush_data_cache_page(addr);
|
|
ClearPageDcacheDirty(page);
|
|
}
|
|
}
|
|
|
|
#define __weak __attribute__((weak))
|
|
|
|
static char cache_panic[] __initdata = "Yeee, unsupported cache architecture.";
|
|
|
|
void __init cpu_cache_init(void)
|
|
{
|
|
if (cpu_has_3k_cache) {
|
|
extern void __weak r3k_cache_init(void);
|
|
|
|
r3k_cache_init();
|
|
return;
|
|
}
|
|
if (cpu_has_6k_cache) {
|
|
extern void __weak r6k_cache_init(void);
|
|
|
|
r6k_cache_init();
|
|
return;
|
|
}
|
|
if (cpu_has_4k_cache) {
|
|
extern void __weak r4k_cache_init(void);
|
|
|
|
r4k_cache_init();
|
|
return;
|
|
}
|
|
if (cpu_has_8k_cache) {
|
|
extern void __weak r8k_cache_init(void);
|
|
|
|
r8k_cache_init();
|
|
return;
|
|
}
|
|
if (cpu_has_tx39_cache) {
|
|
extern void __weak tx39_cache_init(void);
|
|
|
|
tx39_cache_init();
|
|
return;
|
|
}
|
|
if (cpu_has_sb1_cache) {
|
|
extern void __weak sb1_cache_init(void);
|
|
|
|
sb1_cache_init();
|
|
return;
|
|
}
|
|
|
|
panic(cache_panic);
|
|
}
|