comparison src/os/linux/vm/os_linux.cpp @ 12355:cefad50507d8

Merge with hs25-b53
author Gilles Duboscq <duboscq@ssw.jku.at>
date Fri, 11 Oct 2013 10:38:03 +0200
parents 6b0fd0964b87 899ecf76b570
children 096c224171c4
comparison
equal deleted inserted replaced
12058:ccb4f2af2319 12355:cefad50507d8
129 bool os::Linux::_is_floating_stack = false; 129 bool os::Linux::_is_floating_stack = false;
130 bool os::Linux::_is_NPTL = false; 130 bool os::Linux::_is_NPTL = false;
131 bool os::Linux::_supports_fast_thread_cpu_time = false; 131 bool os::Linux::_supports_fast_thread_cpu_time = false;
132 const char * os::Linux::_glibc_version = NULL; 132 const char * os::Linux::_glibc_version = NULL;
133 const char * os::Linux::_libpthread_version = NULL; 133 const char * os::Linux::_libpthread_version = NULL;
134 pthread_condattr_t os::Linux::_condattr[1];
134 135
135 static jlong initial_time_count=0; 136 static jlong initial_time_count=0;
136 137
137 static int clock_tics_per_sec = 100; 138 static int clock_tics_per_sec = 100;
138 139
1397 struct timespec tp; 1398 struct timespec tp;
1398 if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 && 1399 if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
1399 clock_gettime_func(CLOCK_MONOTONIC, &tp) == 0) { 1400 clock_gettime_func(CLOCK_MONOTONIC, &tp) == 0) {
1400 // yes, monotonic clock is supported 1401 // yes, monotonic clock is supported
1401 _clock_gettime = clock_gettime_func; 1402 _clock_gettime = clock_gettime_func;
1403 return;
1402 } else { 1404 } else {
1403 // close librt if there is no monotonic clock 1405 // close librt if there is no monotonic clock
1404 dlclose(handle); 1406 dlclose(handle);
1405 } 1407 }
1406 } 1408 }
1407 } 1409 }
1410 warning("No monotonic clock was available - timed services may " \
1411 "be adversely affected if the time-of-day clock changes");
1408 } 1412 }
1409 1413
1410 #ifndef SYS_clock_getres 1414 #ifndef SYS_clock_getres
1411 1415
1412 #if defined(IA32) || defined(AMD64) 1416 #if defined(IA32) || defined(AMD64)
2163 2167
2164 os::Linux::print_full_memory_info(st); 2168 os::Linux::print_full_memory_info(st);
2165 } 2169 }
2166 2170
2167 // Try to identify popular distros. 2171 // Try to identify popular distros.
2168 // Most Linux distributions have /etc/XXX-release file, which contains 2172 // Most Linux distributions have a /etc/XXX-release file, which contains
2169 // the OS version string. Some have more than one /etc/XXX-release file 2173 // the OS version string. Newer Linux distributions have a /etc/lsb-release
2170 // (e.g. Mandrake has both /etc/mandrake-release and /etc/redhat-release.), 2174 // file that also contains the OS version string. Some have more than one
2171 // so the order is important. 2175 // /etc/XXX-release file (e.g. Mandrake has both /etc/mandrake-release and
2176 // /etc/redhat-release.), so the order is important.
2177 // Any Linux that is based on Redhat (i.e. Oracle, Mandrake, Sun JDS...) have
2178 // their own specific XXX-release file as well as a redhat-release file.
2179 // Because of this the XXX-release file needs to be searched for before the
2180 // redhat-release file.
2181 // Since Red Hat has a lsb-release file that is not very descriptive the
2182 // search for redhat-release needs to be before lsb-release.
2183 // Since the lsb-release file is the new standard it needs to be searched
2184 // before the older style release files.
2185 // Searching system-release (Red Hat) and os-release (other Linuxes) are a
2186 // next to last resort. The os-release file is a new standard that contains
2187 // distribution information and the system-release file seems to be an old
2188 // standard that has been replaced by the lsb-release and os-release files.
2189 // Searching for the debian_version file is the last resort. It contains
2190 // an informative string like "6.0.6" or "wheezy/sid". Because of this
2191 // "Debian " is printed before the contents of the debian_version file.
2172 void os::Linux::print_distro_info(outputStream* st) { 2192 void os::Linux::print_distro_info(outputStream* st) {
2173 if (!_print_ascii_file("/etc/mandrake-release", st) && 2193 if (!_print_ascii_file("/etc/oracle-release", st) &&
2174 !_print_ascii_file("/etc/sun-release", st) && 2194 !_print_ascii_file("/etc/mandriva-release", st) &&
2175 !_print_ascii_file("/etc/redhat-release", st) && 2195 !_print_ascii_file("/etc/mandrake-release", st) &&
2176 !_print_ascii_file("/etc/SuSE-release", st) && 2196 !_print_ascii_file("/etc/sun-release", st) &&
2177 !_print_ascii_file("/etc/turbolinux-release", st) && 2197 !_print_ascii_file("/etc/redhat-release", st) &&
2178 !_print_ascii_file("/etc/gentoo-release", st) && 2198 !_print_ascii_file("/etc/lsb-release", st) &&
2179 !_print_ascii_file("/etc/debian_version", st) && 2199 !_print_ascii_file("/etc/SuSE-release", st) &&
2180 !_print_ascii_file("/etc/ltib-release", st) && 2200 !_print_ascii_file("/etc/turbolinux-release", st) &&
2181 !_print_ascii_file("/etc/angstrom-version", st)) { 2201 !_print_ascii_file("/etc/gentoo-release", st) &&
2182 st->print("Linux"); 2202 !_print_ascii_file("/etc/ltib-release", st) &&
2183 } 2203 !_print_ascii_file("/etc/angstrom-version", st) &&
2184 st->cr(); 2204 !_print_ascii_file("/etc/system-release", st) &&
2205 !_print_ascii_file("/etc/os-release", st)) {
2206
2207 if (file_exists("/etc/debian_version")) {
2208 st->print("Debian ");
2209 _print_ascii_file("/etc/debian_version", st);
2210 } else {
2211 st->print("Linux");
2212 }
2213 }
2214 st->cr();
2185 } 2215 }
2186 2216
2187 void os::Linux::print_libversion_info(outputStream* st) { 2217 void os::Linux::print_libversion_info(outputStream* st) {
2188 // libc, pthread 2218 // libc, pthread
2189 st->print("libc:"); 2219 st->print("libc:");
2718 #define MADV_HUGEPAGE 14 2748 #define MADV_HUGEPAGE 14
2719 #endif 2749 #endif
2720 2750
2721 int os::Linux::commit_memory_impl(char* addr, size_t size, 2751 int os::Linux::commit_memory_impl(char* addr, size_t size,
2722 size_t alignment_hint, bool exec) { 2752 size_t alignment_hint, bool exec) {
2723 int err; 2753 int err = os::Linux::commit_memory_impl(addr, size, exec);
2724 if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
2725 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
2726 uintptr_t res =
2727 (uintptr_t) ::mmap(addr, size, prot,
2728 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
2729 -1, 0);
2730 if (res != (uintptr_t) MAP_FAILED) {
2731 if (UseNUMAInterleaving) {
2732 numa_make_global(addr, size);
2733 }
2734 return 0;
2735 }
2736
2737 err = errno; // save errno from mmap() call above
2738
2739 if (!recoverable_mmap_error(err)) {
2740 // However, it is not clear that this loss of our reserved mapping
2741 // happens with large pages on Linux or that we cannot recover
2742 // from the loss. For now, we just issue a warning and we don't
2743 // call vm_exit_out_of_memory(). This issue is being tracked by
2744 // JBS-8007074.
2745 warn_fail_commit_memory(addr, size, alignment_hint, exec, err);
2746 // vm_exit_out_of_memory(size, OOM_MMAP_ERROR,
2747 // "committing reserved memory.");
2748 }
2749 // Fall through and try to use small pages
2750 }
2751
2752 err = os::Linux::commit_memory_impl(addr, size, exec);
2753 if (err == 0) { 2754 if (err == 0) {
2754 realign_memory(addr, size, alignment_hint); 2755 realign_memory(addr, size, alignment_hint);
2755 } 2756 }
2756 return err; 2757 return err;
2757 } 2758 }
2772 vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg); 2773 vm_exit_out_of_memory(size, OOM_MMAP_ERROR, mesg);
2773 } 2774 }
2774 } 2775 }
2775 2776
2776 void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) { 2777 void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
2777 if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { 2778 if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) {
2778 // We don't check the return value: madvise(MADV_HUGEPAGE) may not 2779 // We don't check the return value: madvise(MADV_HUGEPAGE) may not
2779 // be supported or the memory may already be backed by huge pages. 2780 // be supported or the memory may already be backed by huge pages.
2780 ::madvise(addr, bytes, MADV_HUGEPAGE); 2781 ::madvise(addr, bytes, MADV_HUGEPAGE);
2781 } 2782 }
2782 } 2783 }
2785 // This method works by doing an mmap over an existing mmaping and effectively discarding 2786 // This method works by doing an mmap over an existing mmaping and effectively discarding
2786 // the existing pages. However it won't work for SHM-based large pages that cannot be 2787 // the existing pages. However it won't work for SHM-based large pages that cannot be
2787 // uncommitted at all. We don't do anything in this case to avoid creating a segment with 2788 // uncommitted at all. We don't do anything in this case to avoid creating a segment with
2788 // small pages on top of the SHM segment. This method always works for small pages, so we 2789 // small pages on top of the SHM segment. This method always works for small pages, so we
2789 // allow that in any case. 2790 // allow that in any case.
2790 if (alignment_hint <= (size_t)os::vm_page_size() || !UseSHM) { 2791 if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) {
2791 commit_memory(addr, bytes, alignment_hint, !ExecMem); 2792 commit_memory(addr, bytes, alignment_hint, !ExecMem);
2792 } 2793 }
2793 } 2794 }
2794 2795
2795 void os::numa_make_global(char *addr, size_t bytes) { 2796 void os::numa_make_global(char *addr, size_t bytes) {
2796 Linux::numa_interleave_memory(addr, bytes); 2797 Linux::numa_interleave_memory(addr, bytes);
2797 } 2798 }
2798 2799
2800 // Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
2801 // bind policy to MPOL_PREFERRED for the current thread.
2802 #define USE_MPOL_PREFERRED 0
2803
2799 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { 2804 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
2805 // To make NUMA and large pages more robust when both enabled, we need to ease
2806 // the requirements on where the memory should be allocated. MPOL_BIND is the
2807 // default policy and it will force memory to be allocated on the specified
2808 // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
2809 // the specified node, but will not force it. Using this policy will prevent
2810 // getting SIGBUS when trying to allocate large pages on NUMA nodes with no
2811 // free large pages.
2812 Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
2800 Linux::numa_tonode_memory(addr, bytes, lgrp_hint); 2813 Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
2801 } 2814 }
2802 2815
2803 bool os::numa_topology_changed() { return false; } 2816 bool os::numa_topology_changed() { return false; }
2804 2817
2896 libnuma_dlsym(handle, "numa_available"))); 2909 libnuma_dlsym(handle, "numa_available")));
2897 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, 2910 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
2898 libnuma_dlsym(handle, "numa_tonode_memory"))); 2911 libnuma_dlsym(handle, "numa_tonode_memory")));
2899 set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, 2912 set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
2900 libnuma_dlsym(handle, "numa_interleave_memory"))); 2913 libnuma_dlsym(handle, "numa_interleave_memory")));
2914 set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
2915 libnuma_dlsym(handle, "numa_set_bind_policy")));
2901 2916
2902 2917
2903 if (numa_available() != -1) { 2918 if (numa_available() != -1) {
2904 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); 2919 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
2905 // Create a cpu -> node mapping 2920 // Create a cpu -> node mapping
2962 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; 2977 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
2963 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; 2978 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
2964 os::Linux::numa_available_func_t os::Linux::_numa_available; 2979 os::Linux::numa_available_func_t os::Linux::_numa_available;
2965 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; 2980 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
2966 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; 2981 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
2982 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
2967 unsigned long* os::Linux::_numa_all_nodes; 2983 unsigned long* os::Linux::_numa_all_nodes;
2968 2984
2969 bool os::pd_uncommit_memory(char* addr, size_t size) { 2985 bool os::pd_uncommit_memory(char* addr, size_t size) {
2970 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE, 2986 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
2971 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0); 2987 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
2972 return res != (uintptr_t) MAP_FAILED; 2988 return res != (uintptr_t) MAP_FAILED;
2973 } 2989 }
2990
2991 static
2992 address get_stack_commited_bottom(address bottom, size_t size) {
2993 address nbot = bottom;
2994 address ntop = bottom + size;
2995
2996 size_t page_sz = os::vm_page_size();
2997 unsigned pages = size / page_sz;
2998
2999 unsigned char vec[1];
3000 unsigned imin = 1, imax = pages + 1, imid;
3001 int mincore_return_value;
3002
3003 while (imin < imax) {
3004 imid = (imax + imin) / 2;
3005 nbot = ntop - (imid * page_sz);
3006
3007 // Use a trick with mincore to check whether the page is mapped or not.
3008 // mincore sets vec to 1 if page resides in memory and to 0 if page
3009 // is swapped output but if page we are asking for is unmapped
3010 // it returns -1,ENOMEM
3011 mincore_return_value = mincore(nbot, page_sz, vec);
3012
3013 if (mincore_return_value == -1) {
3014 // Page is not mapped go up
3015 // to find first mapped page
3016 if (errno != EAGAIN) {
3017 assert(errno == ENOMEM, "Unexpected mincore errno");
3018 imax = imid;
3019 }
3020 } else {
3021 // Page is mapped go down
3022 // to find first not mapped page
3023 imin = imid + 1;
3024 }
3025 }
3026
3027 nbot = nbot + page_sz;
3028
3029 // Adjust stack bottom one page up if last checked page is not mapped
3030 if (mincore_return_value == -1) {
3031 nbot = nbot + page_sz;
3032 }
3033
3034 return nbot;
3035 }
3036
2974 3037
2975 // Linux uses a growable mapping for the stack, and if the mapping for 3038 // Linux uses a growable mapping for the stack, and if the mapping for
2976 // the stack guard pages is not removed when we detach a thread the 3039 // the stack guard pages is not removed when we detach a thread the
2977 // stack cannot grow beyond the pages where the stack guard was 3040 // stack cannot grow beyond the pages where the stack guard was
2978 // mapped. If at some point later in the process the stack expands to 3041 // mapped. If at some point later in the process the stack expands to
2984 // so if the stack mapping has already grown beyond the guard pages at 3047 // so if the stack mapping has already grown beyond the guard pages at
2985 // the time we create them, we have to truncate the stack mapping. 3048 // the time we create them, we have to truncate the stack mapping.
2986 // So, we need to know the extent of the stack mapping when 3049 // So, we need to know the extent of the stack mapping when
2987 // create_stack_guard_pages() is called. 3050 // create_stack_guard_pages() is called.
2988 3051
2989 // Find the bounds of the stack mapping. Return true for success.
2990 //
2991 // We only need this for stacks that are growable: at the time of 3052 // We only need this for stacks that are growable: at the time of
2992 // writing thread stacks don't use growable mappings (i.e. those 3053 // writing thread stacks don't use growable mappings (i.e. those
2993 // creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this 3054 // creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
2994 // only applies to the main thread. 3055 // only applies to the main thread.
2995 3056
2996 static
2997 bool get_stack_bounds(uintptr_t *bottom, uintptr_t *top) {
2998
2999 char buf[128];
3000 int fd, sz;
3001
3002 if ((fd = ::open("/proc/self/maps", O_RDONLY)) < 0) {
3003 return false;
3004 }
3005
3006 const char kw[] = "[stack]";
3007 const int kwlen = sizeof(kw)-1;
3008
3009 // Address part of /proc/self/maps couldn't be more than 128 bytes
3010 while ((sz = os::get_line_chars(fd, buf, sizeof(buf))) > 0) {
3011 if (sz > kwlen && ::memcmp(buf+sz-kwlen, kw, kwlen) == 0) {
3012 // Extract addresses
3013 if (sscanf(buf, "%" SCNxPTR "-%" SCNxPTR, bottom, top) == 2) {
3014 uintptr_t sp = (uintptr_t) __builtin_frame_address(0);
3015 if (sp >= *bottom && sp <= *top) {
3016 ::close(fd);
3017 return true;
3018 }
3019 }
3020 }
3021 }
3022
3023 ::close(fd);
3024 return false;
3025 }
3026
3027
3028 // If the (growable) stack mapping already extends beyond the point 3057 // If the (growable) stack mapping already extends beyond the point
3029 // where we're going to put our guard pages, truncate the mapping at 3058 // where we're going to put our guard pages, truncate the mapping at
3030 // that point by munmap()ping it. This ensures that when we later 3059 // that point by munmap()ping it. This ensures that when we later
3031 // munmap() the guard pages we don't leave a hole in the stack 3060 // munmap() the guard pages we don't leave a hole in the stack
3032 // mapping. This only affects the main/initial thread, but guard 3061 // mapping. This only affects the main/initial thread
3033 // against future OS changes 3062
3034 bool os::pd_create_stack_guard_pages(char* addr, size_t size) { 3063 bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
3035 uintptr_t stack_extent, stack_base; 3064
3036 bool chk_bounds = NOT_DEBUG(os::Linux::is_initial_thread()) DEBUG_ONLY(true); 3065 if (os::Linux::is_initial_thread()) {
3037 if (chk_bounds && get_stack_bounds(&stack_extent, &stack_base)) { 3066 // As we manually grow stack up to bottom inside create_attached_thread(),
3038 assert(os::Linux::is_initial_thread(), 3067 // it's likely that os::Linux::initial_thread_stack_bottom is mapped and
3039 "growable stack in non-initial thread"); 3068 // we don't need to do anything special.
3040 if (stack_extent < (uintptr_t)addr) 3069 // Check it first, before calling heavy function.
3041 ::munmap((void*)stack_extent, (uintptr_t)addr - stack_extent); 3070 uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom();
3071 unsigned char vec[1];
3072
3073 if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) {
3074 // Fallback to slow path on all errors, including EAGAIN
3075 stack_extent = (uintptr_t) get_stack_commited_bottom(
3076 os::Linux::initial_thread_stack_bottom(),
3077 (size_t)addr - stack_extent);
3078 }
3079
3080 if (stack_extent < (uintptr_t)addr) {
3081 ::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent));
3082 }
3042 } 3083 }
3043 3084
3044 return os::commit_memory(addr, size, !ExecMem); 3085 return os::commit_memory(addr, size, !ExecMem);
3045 } 3086 }
3046 3087
3047 // If this is a growable mapping, remove the guard pages entirely by 3088 // If this is a growable mapping, remove the guard pages entirely by
3048 // munmap()ping them. If not, just call uncommit_memory(). This only 3089 // munmap()ping them. If not, just call uncommit_memory(). This only
3049 // affects the main/initial thread, but guard against future OS changes 3090 // affects the main/initial thread, but guard against future OS changes
3091 // It's safe to always unmap guard pages for initial thread because we
3092 // always place it right after end of the mapped region
3093
3050 bool os::remove_stack_guard_pages(char* addr, size_t size) { 3094 bool os::remove_stack_guard_pages(char* addr, size_t size) {
3051 uintptr_t stack_extent, stack_base; 3095 uintptr_t stack_extent, stack_base;
3052 bool chk_bounds = NOT_DEBUG(os::Linux::is_initial_thread()) DEBUG_ONLY(true); 3096
3053 if (chk_bounds && get_stack_bounds(&stack_extent, &stack_base)) { 3097 if (os::Linux::is_initial_thread()) {
3054 assert(os::Linux::is_initial_thread(),
3055 "growable stack in non-initial thread");
3056
3057 return ::munmap(addr, size) == 0; 3098 return ::munmap(addr, size) == 0;
3058 } 3099 }
3059 3100
3060 return os::uncommit_memory(addr, size); 3101 return os::uncommit_memory(addr, size);
3061 } 3102 }
3155 3196
3156 bool os::unguard_memory(char* addr, size_t size) { 3197 bool os::unguard_memory(char* addr, size_t size) {
3157 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); 3198 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
3158 } 3199 }
3159 3200
3201 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
3202 bool result = false;
3203 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
3204 MAP_ANONYMOUS|MAP_PRIVATE,
3205 -1, 0);
3206 if (p != MAP_FAILED) {
3207 void *aligned_p = align_ptr_up(p, page_size);
3208
3209 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
3210
3211 munmap(p, page_size * 2);
3212 }
3213
3214 if (warn && !result) {
3215 warning("TransparentHugePages is not supported by the operating system.");
3216 }
3217
3218 return result;
3219 }
3220
3160 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { 3221 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
3161 bool result = false; 3222 bool result = false;
3162 void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE, 3223 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
3163 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, 3224 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
3164 -1, 0); 3225 -1, 0);
3165 3226
3166 if (p != MAP_FAILED) { 3227 if (p != MAP_FAILED) {
3167 // We don't know if this really is a huge page or not. 3228 // We don't know if this really is a huge page or not.
3168 FILE *fp = fopen("/proc/self/maps", "r"); 3229 FILE *fp = fopen("/proc/self/maps", "r");
3169 if (fp) { 3230 if (fp) {
3180 } 3241 }
3181 } 3242 }
3182 } 3243 }
3183 fclose(fp); 3244 fclose(fp);
3184 } 3245 }
3185 munmap (p, page_size); 3246 munmap(p, page_size);
3186 if (result) 3247 }
3187 return true; 3248
3188 } 3249 if (warn && !result) {
3189
3190 if (warn) {
3191 warning("HugeTLBFS is not supported by the operating system."); 3250 warning("HugeTLBFS is not supported by the operating system.");
3192 } 3251 }
3193 3252
3194 return result; 3253 return result;
3195 } 3254 }
3233 3292
3234 // Large page support 3293 // Large page support
3235 3294
3236 static size_t _large_page_size = 0; 3295 static size_t _large_page_size = 0;
3237 3296
3238 void os::large_page_init() { 3297 size_t os::Linux::find_large_page_size() {
3239 if (!UseLargePages) { 3298 size_t large_page_size = 0;
3240 UseHugeTLBFS = false; 3299
3241 UseSHM = false; 3300 // large_page_size on Linux is used to round up heap size. x86 uses either
3242 return; 3301 // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3243 } 3302 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3244 3303 // page as large as 256M.
3245 if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) { 3304 //
3246 // If UseLargePages is specified on the command line try both methods, 3305 // Here we try to figure out page size by parsing /proc/meminfo and looking
3247 // if it's default, then try only HugeTLBFS. 3306 // for a line with the following format:
3248 if (FLAG_IS_DEFAULT(UseLargePages)) { 3307 // Hugepagesize: 2048 kB
3249 UseHugeTLBFS = true; 3308 //
3250 } else { 3309 // If we can't determine the value (e.g. /proc is not mounted, or the text
3251 UseHugeTLBFS = UseSHM = true; 3310 // format has been changed), we'll use the largest page size supported by
3252 } 3311 // the processor.
3253 }
3254
3255 if (LargePageSizeInBytes) {
3256 _large_page_size = LargePageSizeInBytes;
3257 } else {
3258 // large_page_size on Linux is used to round up heap size. x86 uses either
3259 // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3260 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3261 // page as large as 256M.
3262 //
3263 // Here we try to figure out page size by parsing /proc/meminfo and looking
3264 // for a line with the following format:
3265 // Hugepagesize: 2048 kB
3266 //
3267 // If we can't determine the value (e.g. /proc is not mounted, or the text
3268 // format has been changed), we'll use the largest page size supported by
3269 // the processor.
3270 3312
3271 #ifndef ZERO 3313 #ifndef ZERO
3272 _large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) 3314 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
3273 ARM_ONLY(2 * M) PPC_ONLY(4 * M); 3315 ARM_ONLY(2 * M) PPC_ONLY(4 * M);
3274 #endif // ZERO 3316 #endif // ZERO
3275 3317
3276 FILE *fp = fopen("/proc/meminfo", "r"); 3318 FILE *fp = fopen("/proc/meminfo", "r");
3277 if (fp) { 3319 if (fp) {
3278 while (!feof(fp)) { 3320 while (!feof(fp)) {
3279 int x = 0; 3321 int x = 0;
3280 char buf[16]; 3322 char buf[16];
3281 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { 3323 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
3282 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { 3324 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
3283 _large_page_size = x * K; 3325 large_page_size = x * K;
3284 break; 3326 break;
3285 } 3327 }
3286 } else { 3328 } else {
3287 // skip to next line 3329 // skip to next line
3288 for (;;) { 3330 for (;;) {
3289 int ch = fgetc(fp); 3331 int ch = fgetc(fp);
3290 if (ch == EOF || ch == (int)'\n') break; 3332 if (ch == EOF || ch == (int)'\n') break;
3291 }
3292 } 3333 }
3293 } 3334 }
3294 fclose(fp); 3335 }
3295 } 3336 fclose(fp);
3296 } 3337 }
3297 3338
3298 // print a warning if any large page related flag is specified on command line 3339 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
3299 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); 3340 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
3300 3341 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
3342 proper_unit_for_byte_size(large_page_size));
3343 }
3344
3345 return large_page_size;
3346 }
3347
3348 size_t os::Linux::setup_large_page_size() {
3349 _large_page_size = Linux::find_large_page_size();
3301 const size_t default_page_size = (size_t)Linux::page_size(); 3350 const size_t default_page_size = (size_t)Linux::page_size();
3302 if (_large_page_size > default_page_size) { 3351 if (_large_page_size > default_page_size) {
3303 _page_sizes[0] = _large_page_size; 3352 _page_sizes[0] = _large_page_size;
3304 _page_sizes[1] = default_page_size; 3353 _page_sizes[1] = default_page_size;
3305 _page_sizes[2] = 0; 3354 _page_sizes[2] = 0;
3306 } 3355 }
3307 UseHugeTLBFS = UseHugeTLBFS && 3356
3308 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size); 3357 return _large_page_size;
3309 3358 }
3310 if (UseHugeTLBFS) 3359
3360 bool os::Linux::setup_large_page_type(size_t page_size) {
3361 if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
3362 FLAG_IS_DEFAULT(UseSHM) &&
3363 FLAG_IS_DEFAULT(UseTransparentHugePages)) {
3364 // If UseLargePages is specified on the command line try all methods,
3365 // if it's default, then try only UseTransparentHugePages.
3366 if (FLAG_IS_DEFAULT(UseLargePages)) {
3367 UseTransparentHugePages = true;
3368 } else {
3369 UseHugeTLBFS = UseTransparentHugePages = UseSHM = true;
3370 }
3371 }
3372
3373 if (UseTransparentHugePages) {
3374 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
3375 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
3376 UseHugeTLBFS = false;
3377 UseSHM = false;
3378 return true;
3379 }
3380 UseTransparentHugePages = false;
3381 }
3382
3383 if (UseHugeTLBFS) {
3384 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
3385 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
3386 UseSHM = false;
3387 return true;
3388 }
3389 UseHugeTLBFS = false;
3390 }
3391
3392 return UseSHM;
3393 }
3394
3395 void os::large_page_init() {
3396 if (!UseLargePages) {
3397 UseHugeTLBFS = false;
3398 UseTransparentHugePages = false;
3311 UseSHM = false; 3399 UseSHM = false;
3312 3400 return;
3313 UseLargePages = UseHugeTLBFS || UseSHM; 3401 }
3402
3403 size_t large_page_size = Linux::setup_large_page_size();
3404 UseLargePages = Linux::setup_large_page_type(large_page_size);
3314 3405
3315 set_coredump_filter(); 3406 set_coredump_filter();
3316 } 3407 }
3317 3408
3318 #ifndef SHM_HUGETLB 3409 #ifndef SHM_HUGETLB
3319 #define SHM_HUGETLB 04000 3410 #define SHM_HUGETLB 04000
3320 #endif 3411 #endif
3321 3412
3322 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { 3413 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3323 // "exec" is passed in but not used. Creating the shared image for 3414 // "exec" is passed in but not used. Creating the shared image for
3324 // the code cache doesn't have an SHM_X executable permission to check. 3415 // the code cache doesn't have an SHM_X executable permission to check.
3325 assert(UseLargePages && UseSHM, "only for SHM large pages"); 3416 assert(UseLargePages && UseSHM, "only for SHM large pages");
3417 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3418
3419 if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
3420 return NULL; // Fallback to small pages.
3421 }
3326 3422
3327 key_t key = IPC_PRIVATE; 3423 key_t key = IPC_PRIVATE;
3328 char *addr; 3424 char *addr;
3329 3425
3330 bool warn_on_failure = UseLargePages && 3426 bool warn_on_failure = UseLargePages &&
3331 (!FLAG_IS_DEFAULT(UseLargePages) || 3427 (!FLAG_IS_DEFAULT(UseLargePages) ||
3428 !FLAG_IS_DEFAULT(UseSHM) ||
3332 !FLAG_IS_DEFAULT(LargePageSizeInBytes) 3429 !FLAG_IS_DEFAULT(LargePageSizeInBytes)
3333 ); 3430 );
3334 char msg[128]; 3431 char msg[128];
3335 3432
3336 // Create a large shared memory region to attach to based on size. 3433 // Create a large shared memory region to attach to based on size.
3374 warning(msg); 3471 warning(msg);
3375 } 3472 }
3376 return NULL; 3473 return NULL;
3377 } 3474 }
3378 3475
3379 if ((addr != NULL) && UseNUMAInterleaving) {
3380 numa_make_global(addr, bytes);
3381 }
3382
3383 // The memory is committed
3384 MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, mtNone, CALLER_PC);
3385
3386 return addr; 3476 return addr;
3387 } 3477 }
3388 3478
3479 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
3480 assert(error == ENOMEM, "Only expect to fail if no memory is available");
3481
3482 bool warn_on_failure = UseLargePages &&
3483 (!FLAG_IS_DEFAULT(UseLargePages) ||
3484 !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
3485 !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3486
3487 if (warn_on_failure) {
3488 char msg[128];
3489 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
3490 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
3491 warning(msg);
3492 }
3493 }
3494
3495 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
3496 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3497 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
3498 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3499
3500 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3501 char* addr = (char*)::mmap(req_addr, bytes, prot,
3502 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
3503 -1, 0);
3504
3505 if (addr == MAP_FAILED) {
3506 warn_on_large_pages_failure(req_addr, bytes, errno);
3507 return NULL;
3508 }
3509
3510 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
3511
3512 return addr;
3513 }
3514
3515 char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3516 size_t large_page_size = os::large_page_size();
3517
3518 assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes");
3519
3520 // Allocate small pages.
3521
3522 char* start;
3523 if (req_addr != NULL) {
3524 assert(is_ptr_aligned(req_addr, alignment), "Must be");
3525 assert(is_size_aligned(bytes, alignment), "Must be");
3526 start = os::reserve_memory(bytes, req_addr);
3527 assert(start == NULL || start == req_addr, "Must be");
3528 } else {
3529 start = os::reserve_memory_aligned(bytes, alignment);
3530 }
3531
3532 if (start == NULL) {
3533 return NULL;
3534 }
3535
3536 assert(is_ptr_aligned(start, alignment), "Must be");
3537
3538 // os::reserve_memory_special will record this memory area.
3539 // Need to release it here to prevent overlapping reservations.
3540 MemTracker::record_virtual_memory_release((address)start, bytes);
3541
3542 char* end = start + bytes;
3543
3544 // Find the regions of the allocated chunk that can be promoted to large pages.
3545 char* lp_start = (char*)align_ptr_up(start, large_page_size);
3546 char* lp_end = (char*)align_ptr_down(end, large_page_size);
3547
3548 size_t lp_bytes = lp_end - lp_start;
3549
3550 assert(is_size_aligned(lp_bytes, large_page_size), "Must be");
3551
3552 if (lp_bytes == 0) {
3553 // The mapped region doesn't even span the start and the end of a large page.
3554 // Fall back to allocate a non-special area.
3555 ::munmap(start, end - start);
3556 return NULL;
3557 }
3558
3559 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3560
3561
3562 void* result;
3563
3564 if (start != lp_start) {
3565 result = ::mmap(start, lp_start - start, prot,
3566 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3567 -1, 0);
3568 if (result == MAP_FAILED) {
3569 ::munmap(lp_start, end - lp_start);
3570 return NULL;
3571 }
3572 }
3573
3574 result = ::mmap(lp_start, lp_bytes, prot,
3575 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB,
3576 -1, 0);
3577 if (result == MAP_FAILED) {
3578 warn_on_large_pages_failure(req_addr, bytes, errno);
3579 // If the mmap above fails, the large pages region will be unmapped and we
3580 // have regions before and after with small pages. Release these regions.
3581 //
3582 // | mapped | unmapped | mapped |
3583 // ^ ^ ^ ^
3584 // start lp_start lp_end end
3585 //
3586 ::munmap(start, lp_start - start);
3587 ::munmap(lp_end, end - lp_end);
3588 return NULL;
3589 }
3590
3591 if (lp_end != end) {
3592 result = ::mmap(lp_end, end - lp_end, prot,
3593 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
3594 -1, 0);
3595 if (result == MAP_FAILED) {
3596 ::munmap(start, lp_end - start);
3597 return NULL;
3598 }
3599 }
3600
3601 return start;
3602 }
3603
3604 char* os::Linux::reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3605 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3606 assert(is_ptr_aligned(req_addr, alignment), "Must be");
3607 assert(is_power_of_2(alignment), "Must be");
3608 assert(is_power_of_2(os::large_page_size()), "Must be");
3609 assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes");
3610
3611 if (is_size_aligned(bytes, os::large_page_size()) && alignment <= os::large_page_size()) {
3612 return reserve_memory_special_huge_tlbfs_only(bytes, req_addr, exec);
3613 } else {
3614 return reserve_memory_special_huge_tlbfs_mixed(bytes, alignment, req_addr, exec);
3615 }
3616 }
3617
3618 char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3619 assert(UseLargePages, "only for large pages");
3620
3621 char* addr;
3622 if (UseSHM) {
3623 addr = os::Linux::reserve_memory_special_shm(bytes, alignment, req_addr, exec);
3624 } else {
3625 assert(UseHugeTLBFS, "must be");
3626 addr = os::Linux::reserve_memory_special_huge_tlbfs(bytes, alignment, req_addr, exec);
3627 }
3628
3629 if (addr != NULL) {
3630 if (UseNUMAInterleaving) {
3631 numa_make_global(addr, bytes);
3632 }
3633
3634 // The memory is committed
3635 MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, mtNone, CALLER_PC);
3636 }
3637
3638 return addr;
3639 }
3640
3641 bool os::Linux::release_memory_special_shm(char* base, size_t bytes) {
3642 // detaching the SHM segment will also delete it, see reserve_memory_special_shm()
3643 return shmdt(base) == 0;
3644 }
3645
3646 bool os::Linux::release_memory_special_huge_tlbfs(char* base, size_t bytes) {
3647 return pd_release_memory(base, bytes);
3648 }
3649
3389 bool os::release_memory_special(char* base, size_t bytes) { 3650 bool os::release_memory_special(char* base, size_t bytes) {
3651 assert(UseLargePages, "only for large pages");
3652
3390 MemTracker::Tracker tkr = MemTracker::get_virtual_memory_release_tracker(); 3653 MemTracker::Tracker tkr = MemTracker::get_virtual_memory_release_tracker();
3391 // detaching the SHM segment will also delete it, see reserve_memory_special() 3654
3392 int rslt = shmdt(base); 3655 bool res;
3393 if (rslt == 0) { 3656 if (UseSHM) {
3657 res = os::Linux::release_memory_special_shm(base, bytes);
3658 } else {
3659 assert(UseHugeTLBFS, "must be");
3660 res = os::Linux::release_memory_special_huge_tlbfs(base, bytes);
3661 }
3662
3663 if (res) {
3394 tkr.record((address)base, bytes); 3664 tkr.record((address)base, bytes);
3395 return true;
3396 } else { 3665 } else {
3397 tkr.discard(); 3666 tkr.discard();
3398 return false; 3667 }
3399 } 3668
3669 return res;
3400 } 3670 }
3401 3671
3402 size_t os::large_page_size() { 3672 size_t os::large_page_size() {
3403 return _large_page_size; 3673 return _large_page_size;
3404 } 3674 }
3405 3675
3406 // HugeTLBFS allows application to commit large page memory on demand; 3676 // With SysV SHM the entire memory region must be allocated as shared
3407 // with SysV SHM the entire memory region must be allocated as shared
3408 // memory. 3677 // memory.
3678 // HugeTLBFS allows application to commit large page memory on demand.
3679 // However, when committing memory with HugeTLBFS fails, the region
3680 // that was supposed to be committed will lose the old reservation
3681 // and allow other threads to steal that memory region. Because of this
3682 // behavior we can't commit HugeTLBFS memory.
3409 bool os::can_commit_large_page_memory() { 3683 bool os::can_commit_large_page_memory() {
3410 return UseHugeTLBFS; 3684 return UseTransparentHugePages;
3411 } 3685 }
3412 3686
3413 bool os::can_execute_large_page_memory() { 3687 bool os::can_execute_large_page_memory() {
3414 return UseHugeTLBFS; 3688 return UseTransparentHugePages || UseHugeTLBFS;
3415 } 3689 }
3416 3690
3417 // Reserve memory at an arbitrary address, only if that area is 3691 // Reserve memory at an arbitrary address, only if that area is
3418 // available (and not reserved for something else). 3692 // available (and not reserved for something else).
3419 3693
4463 // main_thread points to the aboriginal thread 4737 // main_thread points to the aboriginal thread
4464 Linux::_main_thread = pthread_self(); 4738 Linux::_main_thread = pthread_self();
4465 4739
4466 Linux::clock_init(); 4740 Linux::clock_init();
4467 initial_time_count = os::elapsed_counter(); 4741 initial_time_count = os::elapsed_counter();
4742
4743 // pthread_condattr initialization for monotonic clock
4744 int status;
4745 pthread_condattr_t* _condattr = os::Linux::condAttr();
4746 if ((status = pthread_condattr_init(_condattr)) != 0) {
4747 fatal(err_msg("pthread_condattr_init: %s", strerror(status)));
4748 }
4749 // Only set the clock if CLOCK_MONOTONIC is available
4750 if (Linux::supports_monotonic_clock()) {
4751 if ((status = pthread_condattr_setclock(_condattr, CLOCK_MONOTONIC)) != 0) {
4752 if (status == EINVAL) {
4753 warning("Unable to use monotonic clock with relative timed-waits" \
4754 " - changes to the time-of-day clock may have adverse affects");
4755 } else {
4756 fatal(err_msg("pthread_condattr_setclock: %s", strerror(status)));
4757 }
4758 }
4759 }
4760 // else it defaults to CLOCK_REALTIME
4761
4468 pthread_mutex_init(&dl_mutex, NULL); 4762 pthread_mutex_init(&dl_mutex, NULL);
4469 4763
4470 // If the pagesize of the VM is greater than 8K determine the appropriate 4764 // If the pagesize of the VM is greater than 8K determine the appropriate
4471 // number of initial guard pages. The user can change this with the 4765 // number of initial guard pages. The user can change this with the
4472 // command line arguments, if needed. 4766 // command line arguments, if needed.
4508 #ifndef PRODUCT 4802 #ifndef PRODUCT
4509 if(Verbose && PrintMiscellaneous) 4803 if(Verbose && PrintMiscellaneous)
4510 tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page); 4804 tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page);
4511 #endif 4805 #endif
4512 } 4806 }
4513
4514 os::large_page_init();
4515 4807
4516 // initialize suspend/resume support - must do this before signal_sets_init() 4808 // initialize suspend/resume support - must do this before signal_sets_init()
4517 if (SR_initialize() != 0) { 4809 if (SR_initialize() != 0) {
4518 perror("SR_initialize failed"); 4810 perror("SR_initialize failed");
4519 return JNI_ERR; 4811 return JNI_ERR;
4545 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes, 4837 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4546 vm_page_size())); 4838 vm_page_size()));
4547 4839
4548 Linux::capture_initial_stack(JavaThread::stack_size_at_create()); 4840 Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4549 4841
4842 #if defined(IA32)
4843 workaround_expand_exec_shield_cs_limit();
4844 #endif
4845
4550 Linux::libpthread_init(); 4846 Linux::libpthread_init();
4551 if (PrintMiscellaneous && (Verbose || WizardMode)) { 4847 if (PrintMiscellaneous && (Verbose || WizardMode)) {
4552 tty->print_cr("[HotSpot is running with %s, %s(%s)]\n", 4848 tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
4553 Linux::glibc_version(), Linux::libpthread_version(), 4849 Linux::glibc_version(), Linux::libpthread_version(),
4554 Linux::is_floating_stack() ? "floating stack" : "fixed stack"); 4850 Linux::is_floating_stack() ? "floating stack" : "fixed stack");
4561 if ((Linux::numa_max_node() < 1)) { 4857 if ((Linux::numa_max_node() < 1)) {
4562 // There's only one node(they start from 0), disable NUMA. 4858 // There's only one node(they start from 0), disable NUMA.
4563 UseNUMA = false; 4859 UseNUMA = false;
4564 } 4860 }
4565 } 4861 }
4566 // With SHM large pages we cannot uncommit a page, so there's not way 4862 // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4567 // we can make the adaptive lgrp chunk resizing work. If the user specified 4863 // we can make the adaptive lgrp chunk resizing work. If the user specified
4568 // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and 4864 // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4569 // disable adaptive resizing. 4865 // disable adaptive resizing.
4570 if (UseNUMA && UseLargePages && UseSHM) { 4866 if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4571 if (!FLAG_IS_DEFAULT(UseNUMA)) { 4867 if (FLAG_IS_DEFAULT(UseNUMA)) {
4572 if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) { 4868 UseNUMA = false;
4869 } else {
4870 if (FLAG_IS_DEFAULT(UseLargePages) &&
4871 FLAG_IS_DEFAULT(UseSHM) &&
4872 FLAG_IS_DEFAULT(UseHugeTLBFS)) {
4573 UseLargePages = false; 4873 UseLargePages = false;
4574 } else { 4874 } else {
4575 warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing"); 4875 warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, disabling adaptive resizing");
4576 UseAdaptiveSizePolicy = false; 4876 UseAdaptiveSizePolicy = false;
4577 UseAdaptiveNUMAChunkSizing = false; 4877 UseAdaptiveNUMAChunkSizing = false;
4578 } 4878 }
4579 } else {
4580 UseNUMA = false;
4581 } 4879 }
4582 } 4880 }
4583 if (!UseNUMA && ForceNUMA) { 4881 if (!UseNUMA && ForceNUMA) {
4584 UseNUMA = true; 4882 UseNUMA = true;
4585 } 4883 }
5271 // abstime will be the absolute timeout time 5569 // abstime will be the absolute timeout time
5272 // TODO: replace compute_abstime() with unpackTime() 5570 // TODO: replace compute_abstime() with unpackTime()
5273 5571
5274 static struct timespec* compute_abstime(timespec* abstime, jlong millis) { 5572 static struct timespec* compute_abstime(timespec* abstime, jlong millis) {
5275 if (millis < 0) millis = 0; 5573 if (millis < 0) millis = 0;
5276 struct timeval now; 5574
5277 int status = gettimeofday(&now, NULL);
5278 assert(status == 0, "gettimeofday");
5279 jlong seconds = millis / 1000; 5575 jlong seconds = millis / 1000;
5280 millis %= 1000; 5576 millis %= 1000;
5281 if (seconds > 50000000) { // see man cond_timedwait(3T) 5577 if (seconds > 50000000) { // see man cond_timedwait(3T)
5282 seconds = 50000000; 5578 seconds = 50000000;
5283 } 5579 }
5284 abstime->tv_sec = now.tv_sec + seconds; 5580
5285 long usec = now.tv_usec + millis * 1000; 5581 if (os::Linux::supports_monotonic_clock()) {
5286 if (usec >= 1000000) { 5582 struct timespec now;
5287 abstime->tv_sec += 1; 5583 int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5288 usec -= 1000000; 5584 assert_status(status == 0, status, "clock_gettime");
5289 } 5585 abstime->tv_sec = now.tv_sec + seconds;
5290 abstime->tv_nsec = usec * 1000; 5586 long nanos = now.tv_nsec + millis * NANOSECS_PER_MILLISEC;
5587 if (nanos >= NANOSECS_PER_SEC) {
5588 abstime->tv_sec += 1;
5589 nanos -= NANOSECS_PER_SEC;
5590 }
5591 abstime->tv_nsec = nanos;
5592 } else {
5593 struct timeval now;
5594 int status = gettimeofday(&now, NULL);
5595 assert(status == 0, "gettimeofday");
5596 abstime->tv_sec = now.tv_sec + seconds;
5597 long usec = now.tv_usec + millis * 1000;
5598 if (usec >= 1000000) {
5599 abstime->tv_sec += 1;
5600 usec -= 1000000;
5601 }
5602 abstime->tv_nsec = usec * 1000;
5603 }
5291 return abstime; 5604 return abstime;
5292 } 5605 }
5293 5606
5294 5607
5295 // Test-and-clear _Event, always leaves _Event set to 0, returns immediately. 5608 // Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
5377 5690
5378 while (_Event < 0) { 5691 while (_Event < 0) {
5379 status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst); 5692 status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst);
5380 if (status != 0 && WorkAroundNPTLTimedWaitHang) { 5693 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5381 pthread_cond_destroy (_cond); 5694 pthread_cond_destroy (_cond);
5382 pthread_cond_init (_cond, NULL) ; 5695 pthread_cond_init (_cond, os::Linux::condAttr()) ;
5383 } 5696 }
5384 assert_status(status == 0 || status == EINTR || 5697 assert_status(status == 0 || status == EINTR ||
5385 status == ETIME || status == ETIMEDOUT, 5698 status == ETIME || status == ETIMEDOUT,
5386 status, "cond_timedwait"); 5699 status, "cond_timedwait");
5387 if (!FilterSpuriousWakeups) break ; // previous semantics 5700 if (!FilterSpuriousWakeups) break ; // previous semantics
5478 * years from "now". 5791 * years from "now".
5479 */ 5792 */
5480 5793
5481 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) { 5794 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) {
5482 assert (time > 0, "convertTime"); 5795 assert (time > 0, "convertTime");
5483 5796 time_t max_secs = 0;
5484 struct timeval now; 5797
5485 int status = gettimeofday(&now, NULL); 5798 if (!os::Linux::supports_monotonic_clock() || isAbsolute) {
5486 assert(status == 0, "gettimeofday"); 5799 struct timeval now;
5487 5800 int status = gettimeofday(&now, NULL);
5488 time_t max_secs = now.tv_sec + MAX_SECS; 5801 assert(status == 0, "gettimeofday");
5489 5802
5490 if (isAbsolute) { 5803 max_secs = now.tv_sec + MAX_SECS;
5491 jlong secs = time / 1000; 5804
5492 if (secs > max_secs) { 5805 if (isAbsolute) {
5493 absTime->tv_sec = max_secs; 5806 jlong secs = time / 1000;
5494 } 5807 if (secs > max_secs) {
5495 else { 5808 absTime->tv_sec = max_secs;
5496 absTime->tv_sec = secs; 5809 } else {
5497 } 5810 absTime->tv_sec = secs;
5498 absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC; 5811 }
5499 } 5812 absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC;
5500 else { 5813 } else {
5814 jlong secs = time / NANOSECS_PER_SEC;
5815 if (secs >= MAX_SECS) {
5816 absTime->tv_sec = max_secs;
5817 absTime->tv_nsec = 0;
5818 } else {
5819 absTime->tv_sec = now.tv_sec + secs;
5820 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000;
5821 if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5822 absTime->tv_nsec -= NANOSECS_PER_SEC;
5823 ++absTime->tv_sec; // note: this must be <= max_secs
5824 }
5825 }
5826 }
5827 } else {
5828 // must be relative using monotonic clock
5829 struct timespec now;
5830 int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now);
5831 assert_status(status == 0, status, "clock_gettime");
5832 max_secs = now.tv_sec + MAX_SECS;
5501 jlong secs = time / NANOSECS_PER_SEC; 5833 jlong secs = time / NANOSECS_PER_SEC;
5502 if (secs >= MAX_SECS) { 5834 if (secs >= MAX_SECS) {
5503 absTime->tv_sec = max_secs; 5835 absTime->tv_sec = max_secs;
5504 absTime->tv_nsec = 0; 5836 absTime->tv_nsec = 0;
5505 } 5837 } else {
5506 else {
5507 absTime->tv_sec = now.tv_sec + secs; 5838 absTime->tv_sec = now.tv_sec + secs;
5508 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000; 5839 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_nsec;
5509 if (absTime->tv_nsec >= NANOSECS_PER_SEC) { 5840 if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
5510 absTime->tv_nsec -= NANOSECS_PER_SEC; 5841 absTime->tv_nsec -= NANOSECS_PER_SEC;
5511 ++absTime->tv_sec; // note: this must be <= max_secs 5842 ++absTime->tv_sec; // note: this must be <= max_secs
5512 } 5843 }
5513 } 5844 }
5583 5914
5584 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */); 5915 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5585 jt->set_suspend_equivalent(); 5916 jt->set_suspend_equivalent();
5586 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self() 5917 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5587 5918
5919 assert(_cur_index == -1, "invariant");
5588 if (time == 0) { 5920 if (time == 0) {
5589 status = pthread_cond_wait (_cond, _mutex) ; 5921 _cur_index = REL_INDEX; // arbitrary choice when not timed
5922 status = pthread_cond_wait (&_cond[_cur_index], _mutex) ;
5590 } else { 5923 } else {
5591 status = os::Linux::safe_cond_timedwait (_cond, _mutex, &absTime) ; 5924 _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5925 status = os::Linux::safe_cond_timedwait (&_cond[_cur_index], _mutex, &absTime) ;
5592 if (status != 0 && WorkAroundNPTLTimedWaitHang) { 5926 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5593 pthread_cond_destroy (_cond) ; 5927 pthread_cond_destroy (&_cond[_cur_index]) ;
5594 pthread_cond_init (_cond, NULL); 5928 pthread_cond_init (&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5595 } 5929 }
5596 } 5930 }
5931 _cur_index = -1;
5597 assert_status(status == 0 || status == EINTR || 5932 assert_status(status == 0 || status == EINTR ||
5598 status == ETIME || status == ETIMEDOUT, 5933 status == ETIME || status == ETIMEDOUT,
5599 status, "cond_timedwait"); 5934 status, "cond_timedwait");
5600 5935
5601 #ifdef ASSERT 5936 #ifdef ASSERT
5620 status = pthread_mutex_lock(_mutex); 5955 status = pthread_mutex_lock(_mutex);
5621 assert (status == 0, "invariant") ; 5956 assert (status == 0, "invariant") ;
5622 s = _counter; 5957 s = _counter;
5623 _counter = 1; 5958 _counter = 1;
5624 if (s < 1) { 5959 if (s < 1) {
5625 if (WorkAroundNPTLTimedWaitHang) { 5960 // thread might be parked
5626 status = pthread_cond_signal (_cond) ; 5961 if (_cur_index != -1) {
5627 assert (status == 0, "invariant") ; 5962 // thread is definitely parked
5963 if (WorkAroundNPTLTimedWaitHang) {
5964 status = pthread_cond_signal (&_cond[_cur_index]);
5965 assert (status == 0, "invariant");
5628 status = pthread_mutex_unlock(_mutex); 5966 status = pthread_mutex_unlock(_mutex);
5629 assert (status == 0, "invariant") ; 5967 assert (status == 0, "invariant");
5630 } else { 5968 } else {
5631 status = pthread_mutex_unlock(_mutex); 5969 status = pthread_mutex_unlock(_mutex);
5632 assert (status == 0, "invariant") ; 5970 assert (status == 0, "invariant");
5633 status = pthread_cond_signal (_cond) ; 5971 status = pthread_cond_signal (&_cond[_cur_index]);
5634 assert (status == 0, "invariant") ; 5972 assert (status == 0, "invariant");
5635 } 5973 }
5974 } else {
5975 pthread_mutex_unlock(_mutex);
5976 assert (status == 0, "invariant") ;
5977 }
5636 } else { 5978 } else {
5637 pthread_mutex_unlock(_mutex); 5979 pthread_mutex_unlock(_mutex);
5638 assert (status == 0, "invariant") ; 5980 assert (status == 0, "invariant") ;
5639 } 5981 }
5640 } 5982 }
5846 new MemNotifyThread(fd); 6188 new MemNotifyThread(fd);
5847 } 6189 }
5848 } 6190 }
5849 6191
5850 #endif // JAVASE_EMBEDDED 6192 #endif // JAVASE_EMBEDDED
6193
6194
6195 /////////////// Unit tests ///////////////
6196
6197 #ifndef PRODUCT
6198
6199 #define test_log(...) \
6200 do {\
6201 if (VerboseInternalVMTests) { \
6202 tty->print_cr(__VA_ARGS__); \
6203 tty->flush(); \
6204 }\
6205 } while (false)
6206
6207 class TestReserveMemorySpecial : AllStatic {
6208 public:
6209 static void small_page_write(void* addr, size_t size) {
6210 size_t page_size = os::vm_page_size();
6211
6212 char* end = (char*)addr + size;
6213 for (char* p = (char*)addr; p < end; p += page_size) {
6214 *p = 1;
6215 }
6216 }
6217
6218 static void test_reserve_memory_special_huge_tlbfs_only(size_t size) {
6219 if (!UseHugeTLBFS) {
6220 return;
6221 }
6222
6223 test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size);
6224
6225 char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false);
6226
6227 if (addr != NULL) {
6228 small_page_write(addr, size);
6229
6230 os::Linux::release_memory_special_huge_tlbfs(addr, size);
6231 }
6232 }
6233
6234 static void test_reserve_memory_special_huge_tlbfs_only() {
6235 if (!UseHugeTLBFS) {
6236 return;
6237 }
6238
6239 size_t lp = os::large_page_size();
6240
6241 for (size_t size = lp; size <= lp * 10; size += lp) {
6242 test_reserve_memory_special_huge_tlbfs_only(size);
6243 }
6244 }
6245
6246 static void test_reserve_memory_special_huge_tlbfs_mixed(size_t size, size_t alignment) {
6247 if (!UseHugeTLBFS) {
6248 return;
6249 }
6250
6251 test_log("test_reserve_memory_special_huge_tlbfs_mixed(" SIZE_FORMAT ", " SIZE_FORMAT ")",
6252 size, alignment);
6253
6254 assert(size >= os::large_page_size(), "Incorrect input to test");
6255
6256 char* addr = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false);
6257
6258 if (addr != NULL) {
6259 small_page_write(addr, size);
6260
6261 os::Linux::release_memory_special_huge_tlbfs(addr, size);
6262 }
6263 }
6264
6265 static void test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(size_t size) {
6266 size_t lp = os::large_page_size();
6267 size_t ag = os::vm_allocation_granularity();
6268
6269 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6270 test_reserve_memory_special_huge_tlbfs_mixed(size, alignment);
6271 }
6272 }
6273
6274 static void test_reserve_memory_special_huge_tlbfs_mixed() {
6275 size_t lp = os::large_page_size();
6276 size_t ag = os::vm_allocation_granularity();
6277
6278 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp);
6279 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + ag);
6280 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + lp / 2);
6281 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2);
6282 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + ag);
6283 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 - ag);
6284 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + lp / 2);
6285 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10);
6286 test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10 + lp / 2);
6287 }
6288
6289 static void test_reserve_memory_special_huge_tlbfs() {
6290 if (!UseHugeTLBFS) {
6291 return;
6292 }
6293
6294 test_reserve_memory_special_huge_tlbfs_only();
6295 test_reserve_memory_special_huge_tlbfs_mixed();
6296 }
6297
6298 static void test_reserve_memory_special_shm(size_t size, size_t alignment) {
6299 if (!UseSHM) {
6300 return;
6301 }
6302
6303 test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment);
6304
6305 char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false);
6306
6307 if (addr != NULL) {
6308 assert(is_ptr_aligned(addr, alignment), "Check");
6309 assert(is_ptr_aligned(addr, os::large_page_size()), "Check");
6310
6311 small_page_write(addr, size);
6312
6313 os::Linux::release_memory_special_shm(addr, size);
6314 }
6315 }
6316
6317 static void test_reserve_memory_special_shm() {
6318 size_t lp = os::large_page_size();
6319 size_t ag = os::vm_allocation_granularity();
6320
6321 for (size_t size = ag; size < lp * 3; size += ag) {
6322 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) {
6323 test_reserve_memory_special_shm(size, alignment);
6324 }
6325 }
6326 }
6327
6328 static void test() {
6329 test_reserve_memory_special_huge_tlbfs();
6330 test_reserve_memory_special_shm();
6331 }
6332 };
6333
6334 void TestReserveMemorySpecial_test() {
6335 TestReserveMemorySpecial::test();
6336 }
6337
6338 #endif