comparison src/os/linux/vm/os_linux.cpp @ 3286:139667d9836a

7034464: Support transparent large pages on Linux Summary: Support transparent huge pages on Linux available since 2.6.38 Reviewed-by: iveresov, ysr Contributed-by: aph@redhat.com
author iveresov
date Wed, 20 Apr 2011 17:12:04 -0700
parents da091bb67459
children 7f3faf7159fd
comparison
equal deleted inserted replaced
3285:49a67202bc67 3286:139667d9836a
2463 uintptr_t res = (uintptr_t) ::mmap(addr, size, prot, 2463 uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
2464 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); 2464 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
2465 return res != (uintptr_t) MAP_FAILED; 2465 return res != (uintptr_t) MAP_FAILED;
2466 } 2466 }
2467 2467
2468 // Define MAP_HUGETLB here so we can build HotSpot on old systems.
2469 #ifndef MAP_HUGETLB
2470 #define MAP_HUGETLB 0x40000
2471 #endif
2472
2473 // Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
2474 #ifndef MADV_HUGEPAGE
2475 #define MADV_HUGEPAGE 14
2476 #endif
2477
2468 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, 2478 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
2469 bool exec) { 2479 bool exec) {
2480 if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
2481 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
2482 uintptr_t res =
2483 (uintptr_t) ::mmap(addr, size, prot,
2484 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
2485 -1, 0);
2486 return res != (uintptr_t) MAP_FAILED;
2487 }
2488
2470 return commit_memory(addr, size, exec); 2489 return commit_memory(addr, size, exec);
2471 } 2490 }
2472 2491
2473 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { } 2492 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
2493 if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
2494 // We don't check the return value: madvise(MADV_HUGEPAGE) may not
2495 // be supported or the memory may already be backed by huge pages.
2496 ::madvise(addr, bytes, MADV_HUGEPAGE);
2497 }
2498 }
2474 2499
2475 void os::free_memory(char *addr, size_t bytes) { 2500 void os::free_memory(char *addr, size_t bytes) {
2476 ::mmap(addr, bytes, PROT_READ | PROT_WRITE, 2501 ::madvise(addr, bytes, MADV_DONTNEED);
2477 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
2478 } 2502 }
2479 2503
2480 void os::numa_make_global(char *addr, size_t bytes) { 2504 void os::numa_make_global(char *addr, size_t bytes) {
2481 Linux::numa_interleave_memory(addr, bytes); 2505 Linux::numa_interleave_memory(addr, bytes);
2482 } 2506 }
2814 return linux_mprotect(addr, size, PROT_NONE); 2838 return linux_mprotect(addr, size, PROT_NONE);
2815 } 2839 }
2816 2840
2817 bool os::unguard_memory(char* addr, size_t size) { 2841 bool os::unguard_memory(char* addr, size_t size) {
2818 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); 2842 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
2843 }
2844
2845 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
2846 bool result = false;
2847 void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
2848 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
2849 -1, 0);
2850
2851 if (p != (void *) -1) {
2852 // We don't know if this really is a huge page or not.
2853 FILE *fp = fopen("/proc/self/maps", "r");
2854 if (fp) {
2855 while (!feof(fp)) {
2856 char chars[257];
2857 long x = 0;
2858 if (fgets(chars, sizeof(chars), fp)) {
2859 if (sscanf(chars, "%lx-%*lx", &x) == 1
2860 && x == (long)p) {
2861 if (strstr (chars, "hugepage")) {
2862 result = true;
2863 break;
2864 }
2865 }
2866 }
2867 }
2868 fclose(fp);
2869 }
2870 munmap (p, page_size);
2871 if (result)
2872 return true;
2873 }
2874
2875 if (warn) {
2876 warning("HugeTLBFS is not supported by the operating system.");
2877 }
2878
2879 return result;
2819 } 2880 }
2820 2881
2821 /* 2882 /*
2822 * Set the coredump_filter bits to include largepages in core dump (bit 6) 2883 * Set the coredump_filter bits to include largepages in core dump (bit 6)
2823 * 2884 *
2858 // Large page support 2919 // Large page support
2859 2920
2860 static size_t _large_page_size = 0; 2921 static size_t _large_page_size = 0;
2861 2922
2862 bool os::large_page_init() { 2923 bool os::large_page_init() {
2863 if (!UseLargePages) return false; 2924 if (!UseLargePages) {
2925 UseHugeTLBFS = false;
2926 UseSHM = false;
2927 return false;
2928 }
2929
2930 if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
2931 // Our user has not expressed a preference, so we'll try both.
2932 UseHugeTLBFS = UseSHM = true;
2933 }
2864 2934
2865 if (LargePageSizeInBytes) { 2935 if (LargePageSizeInBytes) {
2866 _large_page_size = LargePageSizeInBytes; 2936 _large_page_size = LargePageSizeInBytes;
2867 } else { 2937 } else {
2868 // large_page_size on Linux is used to round up heap size. x86 uses either 2938 // large_page_size on Linux is used to round up heap size. x86 uses either
2903 } 2973 }
2904 fclose(fp); 2974 fclose(fp);
2905 } 2975 }
2906 } 2976 }
2907 2977
2978 // print a warning if any large page related flag is specified on command line
2979 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
2980
2908 const size_t default_page_size = (size_t)Linux::page_size(); 2981 const size_t default_page_size = (size_t)Linux::page_size();
2909 if (_large_page_size > default_page_size) { 2982 if (_large_page_size > default_page_size) {
2910 _page_sizes[0] = _large_page_size; 2983 _page_sizes[0] = _large_page_size;
2911 _page_sizes[1] = default_page_size; 2984 _page_sizes[1] = default_page_size;
2912 _page_sizes[2] = 0; 2985 _page_sizes[2] = 0;
2913 } 2986 }
2914 2987
2988 UseHugeTLBFS = UseHugeTLBFS &&
2989 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
2990
2991 if (UseHugeTLBFS)
2992 UseSHM = false;
2993
2994 UseLargePages = UseHugeTLBFS || UseSHM;
2995
2915 set_coredump_filter(); 2996 set_coredump_filter();
2916 2997
2917 // Large page support is available on 2.6 or newer kernel, some vendors 2998 // Large page support is available on 2.6 or newer kernel, some vendors
2918 // (e.g. Redhat) have backported it to their 2.4 based distributions. 2999 // (e.g. Redhat) have backported it to their 2.4 based distributions.
2919 // We optimistically assume the support is available. If later it turns out 3000 // We optimistically assume the support is available. If later it turns out
2926 #endif 3007 #endif
2927 3008
2928 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { 3009 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
2929 // "exec" is passed in but not used. Creating the shared image for 3010 // "exec" is passed in but not used. Creating the shared image for
2930 // the code cache doesn't have an SHM_X executable permission to check. 3011 // the code cache doesn't have an SHM_X executable permission to check.
2931 assert(UseLargePages, "only for large pages"); 3012 assert(UseLargePages && UseSHM, "only for SHM large pages");
2932 3013
2933 key_t key = IPC_PRIVATE; 3014 key_t key = IPC_PRIVATE;
2934 char *addr; 3015 char *addr;
2935 3016
2936 bool warn_on_failure = UseLargePages && 3017 bool warn_on_failure = UseLargePages &&
2993 3074
2994 size_t os::large_page_size() { 3075 size_t os::large_page_size() {
2995 return _large_page_size; 3076 return _large_page_size;
2996 } 3077 }
2997 3078
2998 // Linux does not support anonymous mmap with large page memory. The only way 3079 // HugeTLBFS allows application to commit large page memory on demand;
2999 // to reserve large page memory without file backing is through SysV shared 3080 // with SysV SHM the entire memory region must be allocated as shared
3000 // memory API. The entire memory region is committed and pinned upfront. 3081 // memory.
3001 // Hopefully this will change in the future...
3002 bool os::can_commit_large_page_memory() { 3082 bool os::can_commit_large_page_memory() {
3003 return false; 3083 return UseHugeTLBFS;
3004 } 3084 }
3005 3085
3006 bool os::can_execute_large_page_memory() { 3086 bool os::can_execute_large_page_memory() {
3007 return false; 3087 return UseHugeTLBFS;
3008 } 3088 }
3009 3089
3010 // Reserve memory at an arbitrary address, only if that area is 3090 // Reserve memory at an arbitrary address, only if that area is
3011 // available (and not reserved for something else). 3091 // available (and not reserved for something else).
3012 3092