Mercurial > hg > graal-jvmci-8
comparison src/os/linux/vm/os_linux.cpp @ 24007:776cb7cbe2e4
8017629: G1: UseSHM in combination with a G1HeapRegionSize > os::large_page_size() falls back to use small pages
Reviewed-by: pliden, sjohanss, stuefe
author | stefank |
---|---|
date | Mon, 25 Apr 2016 11:36:14 +0200 |
parents | 44c8fe602a5e |
children | 9b69cec6d01b e21dd2c95cf0 |
comparison
equal
deleted
inserted
replaced
24006:44c8fe602a5e | 24007:776cb7cbe2e4 |
---|---|
3045 } | 3045 } |
3046 | 3046 |
3047 return addr == MAP_FAILED ? NULL : addr; | 3047 return addr == MAP_FAILED ? NULL : addr; |
3048 } | 3048 } |
3049 | 3049 |
3050 // Don't update _highest_vm_reserved_address, because there might be memory | |
3051 // regions above addr + size. If so, releasing a memory region only creates | |
3052 // a hole in the address space, it doesn't help prevent heap-stack collision. | |
3053 // | |
3054 static int anon_munmap(char * addr, size_t size) { | |
3055 return ::munmap(addr, size) == 0; | |
3056 } | |
3057 | |
3058 char* os::pd_reserve_memory(size_t bytes, char* requested_addr, | |
3059 size_t alignment_hint) { | |
3060 return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); | |
3061 } | |
3062 | |
3063 bool os::pd_release_memory(char* addr, size_t size) { | |
3064 return anon_munmap(addr, size); | |
3065 } | |
3066 | |
3067 static address highest_vm_reserved_address() { | |
3068 return _highest_vm_reserved_address; | |
3069 } | |
3070 | |
3071 static bool linux_mprotect(char* addr, size_t size, int prot) { | |
3072 // Linux wants the mprotect address argument to be page aligned. | |
3073 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); | |
3074 | |
3075 // According to SUSv3, mprotect() should only be used with mappings | |
3076 // established by mmap(), and mmap() always maps whole pages. Unaligned | |
3077 // 'addr' likely indicates problem in the VM (e.g. trying to change | |
3078 // protection of malloc'ed or statically allocated memory). Check the | |
3079 // caller if you hit this assert. | |
3080 assert(addr == bottom, "sanity check"); | |
3081 | |
3082 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); | |
3083 return ::mprotect(bottom, size, prot) == 0; | |
3084 } | |
3085 | |
3086 // Set protections specified | |
3087 bool os::protect_memory(char* addr, size_t bytes, ProtType prot, | |
3088 bool is_committed) { | |
3089 unsigned int p = 0; | |
3090 switch (prot) { | |
3091 case MEM_PROT_NONE: p = PROT_NONE; break; | |
3092 case MEM_PROT_READ: p = PROT_READ; break; | |
3093 case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; | |
3094 case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; | |
3095 default: | |
3096 ShouldNotReachHere(); | |
3097 } | |
3098 // is_committed is unused. | |
3099 return linux_mprotect(addr, bytes, p); | |
3100 } | |
3101 | |
3102 bool os::guard_memory(char* addr, size_t size) { | |
3103 return linux_mprotect(addr, size, PROT_NONE); | |
3104 } | |
3105 | |
3106 bool os::unguard_memory(char* addr, size_t size) { | |
3107 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); | |
3108 } | |
3109 | |
3110 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { | |
3111 bool result = false; | |
3112 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, | |
3113 MAP_ANONYMOUS|MAP_PRIVATE, | |
3114 -1, 0); | |
3115 if (p != MAP_FAILED) { | |
3116 void *aligned_p = align_ptr_up(p, page_size); | |
3117 | |
3118 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; | |
3119 | |
3120 munmap(p, page_size * 2); | |
3121 } | |
3122 | |
3123 if (warn && !result) { | |
3124 warning("TransparentHugePages is not supported by the operating system."); | |
3125 } | |
3126 | |
3127 return result; | |
3128 } | |
3129 | |
3130 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { | |
3131 bool result = false; | |
3132 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, | |
3133 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, | |
3134 -1, 0); | |
3135 | |
3136 if (p != MAP_FAILED) { | |
3137 // We don't know if this really is a huge page or not. | |
3138 FILE *fp = fopen("/proc/self/maps", "r"); | |
3139 if (fp) { | |
3140 while (!feof(fp)) { | |
3141 char chars[257]; | |
3142 long x = 0; | |
3143 if (fgets(chars, sizeof(chars), fp)) { | |
3144 if (sscanf(chars, "%lx-%*x", &x) == 1 | |
3145 && x == (long)p) { | |
3146 if (strstr (chars, "hugepage")) { | |
3147 result = true; | |
3148 break; | |
3149 } | |
3150 } | |
3151 } | |
3152 } | |
3153 fclose(fp); | |
3154 } | |
3155 munmap(p, page_size); | |
3156 } | |
3157 | |
3158 if (warn && !result) { | |
3159 warning("HugeTLBFS is not supported by the operating system."); | |
3160 } | |
3161 | |
3162 return result; | |
3163 } | |
3164 | |
3165 /* | |
3166 * Set the coredump_filter bits to include largepages in core dump (bit 6) | |
3167 * | |
3168 * From the coredump_filter documentation: | |
3169 * | |
3170 * - (bit 0) anonymous private memory | |
3171 * - (bit 1) anonymous shared memory | |
3172 * - (bit 2) file-backed private memory | |
3173 * - (bit 3) file-backed shared memory | |
3174 * - (bit 4) ELF header pages in file-backed private memory areas (it is | |
3175 * effective only if the bit 2 is cleared) | |
3176 * - (bit 5) hugetlb private memory | |
3177 * - (bit 6) hugetlb shared memory | |
3178 */ | |
3179 static void set_coredump_filter(void) { | |
3180 FILE *f; | |
3181 long cdm; | |
3182 | |
3183 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { | |
3184 return; | |
3185 } | |
3186 | |
3187 if (fscanf(f, "%lx", &cdm) != 1) { | |
3188 fclose(f); | |
3189 return; | |
3190 } | |
3191 | |
3192 rewind(f); | |
3193 | |
3194 if ((cdm & LARGEPAGES_BIT) == 0) { | |
3195 cdm |= LARGEPAGES_BIT; | |
3196 fprintf(f, "%#lx", cdm); | |
3197 } | |
3198 | |
3199 fclose(f); | |
3200 } | |
3201 | |
3202 // Large page support | |
3203 | |
3204 static size_t _large_page_size = 0; | |
3205 | |
3206 size_t os::Linux::find_large_page_size() { | |
3207 size_t large_page_size = 0; | |
3208 | |
3209 // large_page_size on Linux is used to round up heap size. x86 uses either | |
3210 // 2M or 4M page, depending on whether PAE (Physical Address Extensions) | |
3211 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use | |
3212 // page as large as 256M. | |
3213 // | |
3214 // Here we try to figure out page size by parsing /proc/meminfo and looking | |
3215 // for a line with the following format: | |
3216 // Hugepagesize: 2048 kB | |
3217 // | |
3218 // If we can't determine the value (e.g. /proc is not mounted, or the text | |
3219 // format has been changed), we'll use the largest page size supported by | |
3220 // the processor. | |
3221 | |
3222 #ifndef ZERO | |
3223 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) | |
3224 ARM_ONLY(2 * M) PPC_ONLY(4 * M); | |
3225 #endif // ZERO | |
3226 | |
3227 FILE *fp = fopen("/proc/meminfo", "r"); | |
3228 if (fp) { | |
3229 while (!feof(fp)) { | |
3230 int x = 0; | |
3231 char buf[16]; | |
3232 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { | |
3233 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { | |
3234 large_page_size = x * K; | |
3235 break; | |
3236 } | |
3237 } else { | |
3238 // skip to next line | |
3239 for (;;) { | |
3240 int ch = fgetc(fp); | |
3241 if (ch == EOF || ch == (int)'\n') break; | |
3242 } | |
3243 } | |
3244 } | |
3245 fclose(fp); | |
3246 } | |
3247 | |
3248 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { | |
3249 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " | |
3250 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), | |
3251 proper_unit_for_byte_size(large_page_size)); | |
3252 } | |
3253 | |
3254 return large_page_size; | |
3255 } | |
3256 | |
3257 size_t os::Linux::setup_large_page_size() { | |
3258 _large_page_size = Linux::find_large_page_size(); | |
3259 const size_t default_page_size = (size_t)Linux::page_size(); | |
3260 if (_large_page_size > default_page_size) { | |
3261 _page_sizes[0] = _large_page_size; | |
3262 _page_sizes[1] = default_page_size; | |
3263 _page_sizes[2] = 0; | |
3264 } | |
3265 | |
3266 return _large_page_size; | |
3267 } | |
3268 | |
3269 bool os::Linux::setup_large_page_type(size_t page_size) { | |
3270 if (FLAG_IS_DEFAULT(UseHugeTLBFS) && | |
3271 FLAG_IS_DEFAULT(UseSHM) && | |
3272 FLAG_IS_DEFAULT(UseTransparentHugePages)) { | |
3273 | |
3274 // The type of large pages has not been specified by the user. | |
3275 | |
3276 // Try UseHugeTLBFS and then UseSHM. | |
3277 UseHugeTLBFS = UseSHM = true; | |
3278 | |
3279 // Don't try UseTransparentHugePages since there are known | |
3280 // performance issues with it turned on. This might change in the future. | |
3281 UseTransparentHugePages = false; | |
3282 } | |
3283 | |
3284 if (UseTransparentHugePages) { | |
3285 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); | |
3286 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { | |
3287 UseHugeTLBFS = false; | |
3288 UseSHM = false; | |
3289 return true; | |
3290 } | |
3291 UseTransparentHugePages = false; | |
3292 } | |
3293 | |
3294 if (UseHugeTLBFS) { | |
3295 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); | |
3296 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { | |
3297 UseSHM = false; | |
3298 return true; | |
3299 } | |
3300 UseHugeTLBFS = false; | |
3301 } | |
3302 | |
3303 return UseSHM; | |
3304 } | |
3305 | |
3306 void os::large_page_init() { | |
3307 if (!UseLargePages && | |
3308 !UseTransparentHugePages && | |
3309 !UseHugeTLBFS && | |
3310 !UseSHM) { | |
3311 // Not using large pages. | |
3312 return; | |
3313 } | |
3314 | |
3315 if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { | |
3316 // The user explicitly turned off large pages. | |
3317 // Ignore the rest of the large pages flags. | |
3318 UseTransparentHugePages = false; | |
3319 UseHugeTLBFS = false; | |
3320 UseSHM = false; | |
3321 return; | |
3322 } | |
3323 | |
3324 size_t large_page_size = Linux::setup_large_page_size(); | |
3325 UseLargePages = Linux::setup_large_page_type(large_page_size); | |
3326 | |
3327 set_coredump_filter(); | |
3328 } | |
3329 | |
3330 #ifndef SHM_HUGETLB | |
3331 #define SHM_HUGETLB 04000 | |
3332 #endif | |
3333 | |
3334 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { | |
3335 // "exec" is passed in but not used. Creating the shared image for | |
3336 // the code cache doesn't have an SHM_X executable permission to check. | |
3337 assert(UseLargePages && UseSHM, "only for SHM large pages"); | |
3338 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); | |
3339 | |
3340 if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { | |
3341 return NULL; // Fallback to small pages. | |
3342 } | |
3343 | |
3344 key_t key = IPC_PRIVATE; | |
3345 char *addr; | |
3346 | |
3347 bool warn_on_failure = UseLargePages && | |
3348 (!FLAG_IS_DEFAULT(UseLargePages) || | |
3349 !FLAG_IS_DEFAULT(UseSHM) || | |
3350 !FLAG_IS_DEFAULT(LargePageSizeInBytes) | |
3351 ); | |
3352 char msg[128]; | |
3353 | |
3354 // Create a large shared memory region to attach to based on size. | |
3355 // Currently, size is the total size of the heap | |
3356 int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); | |
3357 if (shmid == -1) { | |
3358 // Possible reasons for shmget failure: | |
3359 // 1. shmmax is too small for Java heap. | |
3360 // > check shmmax value: cat /proc/sys/kernel/shmmax | |
3361 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax | |
3362 // 2. not enough large page memory. | |
3363 // > check available large pages: cat /proc/meminfo | |
3364 // > increase amount of large pages: | |
3365 // echo new_value > /proc/sys/vm/nr_hugepages | |
3366 // Note 1: different Linux may use different name for this property, | |
3367 // e.g. on Redhat AS-3 it is "hugetlb_pool". | |
3368 // Note 2: it's possible there's enough physical memory available but | |
3369 // they are so fragmented after a long run that they can't | |
3370 // coalesce into large pages. Try to reserve large pages when | |
3371 // the system is still "fresh". | |
3372 if (warn_on_failure) { | |
3373 jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno); | |
3374 warning("%s", msg); | |
3375 } | |
3376 return NULL; | |
3377 } | |
3378 | |
3379 // attach to the region | |
3380 addr = (char*)shmat(shmid, req_addr, 0); | |
3381 int err = errno; | |
3382 | |
3383 // Remove shmid. If shmat() is successful, the actual shared memory segment | |
3384 // will be deleted when it's detached by shmdt() or when the process | |
3385 // terminates. If shmat() is not successful this will remove the shared | |
3386 // segment immediately. | |
3387 shmctl(shmid, IPC_RMID, NULL); | |
3388 | |
3389 if ((intptr_t)addr == -1) { | |
3390 if (warn_on_failure) { | |
3391 jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err); | |
3392 warning("%s", msg); | |
3393 } | |
3394 return NULL; | |
3395 } | |
3396 | |
3397 return addr; | |
3398 } | |
3399 | |
3400 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { | |
3401 assert(error == ENOMEM, "Only expect to fail if no memory is available"); | |
3402 | |
3403 bool warn_on_failure = UseLargePages && | |
3404 (!FLAG_IS_DEFAULT(UseLargePages) || | |
3405 !FLAG_IS_DEFAULT(UseHugeTLBFS) || | |
3406 !FLAG_IS_DEFAULT(LargePageSizeInBytes)); | |
3407 | |
3408 if (warn_on_failure) { | |
3409 char msg[128]; | |
3410 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " | |
3411 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); | |
3412 warning("%s", msg); | |
3413 } | |
3414 } | |
3415 | |
3416 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { | |
3417 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); | |
3418 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); | |
3419 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); | |
3420 | |
3421 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; | |
3422 char* addr = (char*)::mmap(req_addr, bytes, prot, | |
3423 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, | |
3424 -1, 0); | |
3425 | |
3426 if (addr == MAP_FAILED) { | |
3427 warn_on_large_pages_failure(req_addr, bytes, errno); | |
3428 return NULL; | |
3429 } | |
3430 | |
3431 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); | |
3432 | |
3433 return addr; | |
3434 } | |
3435 | |
3436 // Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed(). | |
3437 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address | 3050 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address |
3438 // (req_addr != NULL) or with a given alignment. | 3051 // (req_addr != NULL) or with a given alignment. |
3439 // - bytes shall be a multiple of alignment. | 3052 // - bytes shall be a multiple of alignment. |
3440 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. | 3053 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. |
3441 // - alignment sets the alignment at which memory shall be allocated. | 3054 // - alignment sets the alignment at which memory shall be allocated. |
3472 } | 3085 } |
3473 start = start_aligned; | 3086 start = start_aligned; |
3474 } | 3087 } |
3475 } | 3088 } |
3476 return start; | 3089 return start; |
3477 | 3090 } |
3091 | |
3092 // Don't update _highest_vm_reserved_address, because there might be memory | |
3093 // regions above addr + size. If so, releasing a memory region only creates | |
3094 // a hole in the address space, it doesn't help prevent heap-stack collision. | |
3095 // | |
3096 static int anon_munmap(char * addr, size_t size) { | |
3097 return ::munmap(addr, size) == 0; | |
3098 } | |
3099 | |
3100 char* os::pd_reserve_memory(size_t bytes, char* requested_addr, | |
3101 size_t alignment_hint) { | |
3102 return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); | |
3103 } | |
3104 | |
3105 bool os::pd_release_memory(char* addr, size_t size) { | |
3106 return anon_munmap(addr, size); | |
3107 } | |
3108 | |
3109 static address highest_vm_reserved_address() { | |
3110 return _highest_vm_reserved_address; | |
3111 } | |
3112 | |
3113 static bool linux_mprotect(char* addr, size_t size, int prot) { | |
3114 // Linux wants the mprotect address argument to be page aligned. | |
3115 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); | |
3116 | |
3117 // According to SUSv3, mprotect() should only be used with mappings | |
3118 // established by mmap(), and mmap() always maps whole pages. Unaligned | |
3119 // 'addr' likely indicates problem in the VM (e.g. trying to change | |
3120 // protection of malloc'ed or statically allocated memory). Check the | |
3121 // caller if you hit this assert. | |
3122 assert(addr == bottom, "sanity check"); | |
3123 | |
3124 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); | |
3125 return ::mprotect(bottom, size, prot) == 0; | |
3126 } | |
3127 | |
3128 // Set protections specified | |
3129 bool os::protect_memory(char* addr, size_t bytes, ProtType prot, | |
3130 bool is_committed) { | |
3131 unsigned int p = 0; | |
3132 switch (prot) { | |
3133 case MEM_PROT_NONE: p = PROT_NONE; break; | |
3134 case MEM_PROT_READ: p = PROT_READ; break; | |
3135 case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; | |
3136 case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; | |
3137 default: | |
3138 ShouldNotReachHere(); | |
3139 } | |
3140 // is_committed is unused. | |
3141 return linux_mprotect(addr, bytes, p); | |
3142 } | |
3143 | |
3144 bool os::guard_memory(char* addr, size_t size) { | |
3145 return linux_mprotect(addr, size, PROT_NONE); | |
3146 } | |
3147 | |
3148 bool os::unguard_memory(char* addr, size_t size) { | |
3149 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); | |
3150 } | |
3151 | |
3152 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { | |
3153 bool result = false; | |
3154 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, | |
3155 MAP_ANONYMOUS|MAP_PRIVATE, | |
3156 -1, 0); | |
3157 if (p != MAP_FAILED) { | |
3158 void *aligned_p = align_ptr_up(p, page_size); | |
3159 | |
3160 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; | |
3161 | |
3162 munmap(p, page_size * 2); | |
3163 } | |
3164 | |
3165 if (warn && !result) { | |
3166 warning("TransparentHugePages is not supported by the operating system."); | |
3167 } | |
3168 | |
3169 return result; | |
3170 } | |
3171 | |
3172 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { | |
3173 bool result = false; | |
3174 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, | |
3175 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, | |
3176 -1, 0); | |
3177 | |
3178 if (p != MAP_FAILED) { | |
3179 // We don't know if this really is a huge page or not. | |
3180 FILE *fp = fopen("/proc/self/maps", "r"); | |
3181 if (fp) { | |
3182 while (!feof(fp)) { | |
3183 char chars[257]; | |
3184 long x = 0; | |
3185 if (fgets(chars, sizeof(chars), fp)) { | |
3186 if (sscanf(chars, "%lx-%*x", &x) == 1 | |
3187 && x == (long)p) { | |
3188 if (strstr (chars, "hugepage")) { | |
3189 result = true; | |
3190 break; | |
3191 } | |
3192 } | |
3193 } | |
3194 } | |
3195 fclose(fp); | |
3196 } | |
3197 munmap(p, page_size); | |
3198 } | |
3199 | |
3200 if (warn && !result) { | |
3201 warning("HugeTLBFS is not supported by the operating system."); | |
3202 } | |
3203 | |
3204 return result; | |
3205 } | |
3206 | |
3207 /* | |
3208 * Set the coredump_filter bits to include largepages in core dump (bit 6) | |
3209 * | |
3210 * From the coredump_filter documentation: | |
3211 * | |
3212 * - (bit 0) anonymous private memory | |
3213 * - (bit 1) anonymous shared memory | |
3214 * - (bit 2) file-backed private memory | |
3215 * - (bit 3) file-backed shared memory | |
3216 * - (bit 4) ELF header pages in file-backed private memory areas (it is | |
3217 * effective only if the bit 2 is cleared) | |
3218 * - (bit 5) hugetlb private memory | |
3219 * - (bit 6) hugetlb shared memory | |
3220 */ | |
3221 static void set_coredump_filter(void) { | |
3222 FILE *f; | |
3223 long cdm; | |
3224 | |
3225 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { | |
3226 return; | |
3227 } | |
3228 | |
3229 if (fscanf(f, "%lx", &cdm) != 1) { | |
3230 fclose(f); | |
3231 return; | |
3232 } | |
3233 | |
3234 rewind(f); | |
3235 | |
3236 if ((cdm & LARGEPAGES_BIT) == 0) { | |
3237 cdm |= LARGEPAGES_BIT; | |
3238 fprintf(f, "%#lx", cdm); | |
3239 } | |
3240 | |
3241 fclose(f); | |
3242 } | |
3243 | |
3244 // Large page support | |
3245 | |
3246 static size_t _large_page_size = 0; | |
3247 | |
3248 size_t os::Linux::find_large_page_size() { | |
3249 size_t large_page_size = 0; | |
3250 | |
3251 // large_page_size on Linux is used to round up heap size. x86 uses either | |
3252 // 2M or 4M page, depending on whether PAE (Physical Address Extensions) | |
3253 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use | |
3254 // page as large as 256M. | |
3255 // | |
3256 // Here we try to figure out page size by parsing /proc/meminfo and looking | |
3257 // for a line with the following format: | |
3258 // Hugepagesize: 2048 kB | |
3259 // | |
3260 // If we can't determine the value (e.g. /proc is not mounted, or the text | |
3261 // format has been changed), we'll use the largest page size supported by | |
3262 // the processor. | |
3263 | |
3264 #ifndef ZERO | |
3265 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) | |
3266 ARM_ONLY(2 * M) PPC_ONLY(4 * M); | |
3267 #endif // ZERO | |
3268 | |
3269 FILE *fp = fopen("/proc/meminfo", "r"); | |
3270 if (fp) { | |
3271 while (!feof(fp)) { | |
3272 int x = 0; | |
3273 char buf[16]; | |
3274 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { | |
3275 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { | |
3276 large_page_size = x * K; | |
3277 break; | |
3278 } | |
3279 } else { | |
3280 // skip to next line | |
3281 for (;;) { | |
3282 int ch = fgetc(fp); | |
3283 if (ch == EOF || ch == (int)'\n') break; | |
3284 } | |
3285 } | |
3286 } | |
3287 fclose(fp); | |
3288 } | |
3289 | |
3290 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { | |
3291 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " | |
3292 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), | |
3293 proper_unit_for_byte_size(large_page_size)); | |
3294 } | |
3295 | |
3296 return large_page_size; | |
3297 } | |
3298 | |
3299 size_t os::Linux::setup_large_page_size() { | |
3300 _large_page_size = Linux::find_large_page_size(); | |
3301 const size_t default_page_size = (size_t)Linux::page_size(); | |
3302 if (_large_page_size > default_page_size) { | |
3303 _page_sizes[0] = _large_page_size; | |
3304 _page_sizes[1] = default_page_size; | |
3305 _page_sizes[2] = 0; | |
3306 } | |
3307 | |
3308 return _large_page_size; | |
3309 } | |
3310 | |
3311 bool os::Linux::setup_large_page_type(size_t page_size) { | |
3312 if (FLAG_IS_DEFAULT(UseHugeTLBFS) && | |
3313 FLAG_IS_DEFAULT(UseSHM) && | |
3314 FLAG_IS_DEFAULT(UseTransparentHugePages)) { | |
3315 | |
3316 // The type of large pages has not been specified by the user. | |
3317 | |
3318 // Try UseHugeTLBFS and then UseSHM. | |
3319 UseHugeTLBFS = UseSHM = true; | |
3320 | |
3321 // Don't try UseTransparentHugePages since there are known | |
3322 // performance issues with it turned on. This might change in the future. | |
3323 UseTransparentHugePages = false; | |
3324 } | |
3325 | |
3326 if (UseTransparentHugePages) { | |
3327 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); | |
3328 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { | |
3329 UseHugeTLBFS = false; | |
3330 UseSHM = false; | |
3331 return true; | |
3332 } | |
3333 UseTransparentHugePages = false; | |
3334 } | |
3335 | |
3336 if (UseHugeTLBFS) { | |
3337 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); | |
3338 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { | |
3339 UseSHM = false; | |
3340 return true; | |
3341 } | |
3342 UseHugeTLBFS = false; | |
3343 } | |
3344 | |
3345 return UseSHM; | |
3346 } | |
3347 | |
3348 void os::large_page_init() { | |
3349 if (!UseLargePages && | |
3350 !UseTransparentHugePages && | |
3351 !UseHugeTLBFS && | |
3352 !UseSHM) { | |
3353 // Not using large pages. | |
3354 return; | |
3355 } | |
3356 | |
3357 if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { | |
3358 // The user explicitly turned off large pages. | |
3359 // Ignore the rest of the large pages flags. | |
3360 UseTransparentHugePages = false; | |
3361 UseHugeTLBFS = false; | |
3362 UseSHM = false; | |
3363 return; | |
3364 } | |
3365 | |
3366 size_t large_page_size = Linux::setup_large_page_size(); | |
3367 UseLargePages = Linux::setup_large_page_type(large_page_size); | |
3368 | |
3369 set_coredump_filter(); | |
3370 } | |
3371 | |
3372 #ifndef SHM_HUGETLB | |
3373 #define SHM_HUGETLB 04000 | |
3374 #endif | |
3375 | |
3376 #define shm_warning_format(format, ...) \ | |
3377 do { \ | |
3378 if (UseLargePages && \ | |
3379 (!FLAG_IS_DEFAULT(UseLargePages) || \ | |
3380 !FLAG_IS_DEFAULT(UseSHM) || \ | |
3381 !FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \ | |
3382 warning(format, __VA_ARGS__); \ | |
3383 } \ | |
3384 } while (0) | |
3385 | |
3386 #define shm_warning(str) shm_warning_format("%s", str) | |
3387 | |
3388 #define shm_warning_with_errno(str) \ | |
3389 do { \ | |
3390 int err = errno; \ | |
3391 shm_warning_format(str " (error = %d)", err); \ | |
3392 } while (0) | |
3393 | |
3394 static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) { | |
3395 assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment"); | |
3396 | |
3397 if (!is_size_aligned(alignment, SHMLBA)) { | |
3398 assert(false, "Code below assumes that alignment is at least SHMLBA aligned"); | |
3399 return NULL; | |
3400 } | |
3401 | |
3402 // To ensure that we get 'alignment' aligned memory from shmat, | |
3403 // we pre-reserve aligned virtual memory and then attach to that. | |
3404 | |
3405 char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL); | |
3406 if (pre_reserved_addr == NULL) { | |
3407 // Couldn't pre-reserve aligned memory. | |
3408 shm_warning("Failed to pre-reserve aligned memory for shmat."); | |
3409 return NULL; | |
3410 } | |
3411 | |
3412 // SHM_REMAP is needed to allow shmat to map over an existing mapping. | |
3413 char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP); | |
3414 | |
3415 if ((intptr_t)addr == -1) { | |
3416 int err = errno; | |
3417 shm_warning_with_errno("Failed to attach shared memory."); | |
3418 | |
3419 assert(err != EACCES, "Unexpected error"); | |
3420 assert(err != EIDRM, "Unexpected error"); | |
3421 assert(err != EINVAL, "Unexpected error"); | |
3422 | |
3423 // Since we don't know if the kernel unmapped the pre-reserved memory area | |
3424 // we can't unmap it, since that would potentially unmap memory that was | |
3425 // mapped from other threads. | |
3426 return NULL; | |
3427 } | |
3428 | |
3429 return addr; | |
3430 } | |
3431 | |
3432 static char* shmat_at_address(int shmid, char* req_addr) { | |
3433 if (!is_ptr_aligned(req_addr, SHMLBA)) { | |
3434 assert(false, "Requested address needs to be SHMLBA aligned"); | |
3435 return NULL; | |
3436 } | |
3437 | |
3438 char* addr = (char*)shmat(shmid, req_addr, 0); | |
3439 | |
3440 if ((intptr_t)addr == -1) { | |
3441 shm_warning_with_errno("Failed to attach shared memory."); | |
3442 return NULL; | |
3443 } | |
3444 | |
3445 return addr; | |
3446 } | |
3447 | |
3448 static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) { | |
3449 // If a req_addr has been provided, we assume that the caller has already aligned the address. | |
3450 if (req_addr != NULL) { | |
3451 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size"); | |
3452 assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment"); | |
3453 return shmat_at_address(shmid, req_addr); | |
3454 } | |
3455 | |
3456 // Since shmid has been setup with SHM_HUGETLB, shmat will automatically | |
3457 // return large page size aligned memory addresses when req_addr == NULL. | |
3458 // However, if the alignment is larger than the large page size, we have | |
3459 // to manually ensure that the memory returned is 'alignment' aligned. | |
3460 if (alignment > os::large_page_size()) { | |
3461 assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size"); | |
3462 return shmat_with_alignment(shmid, bytes, alignment); | |
3463 } else { | |
3464 return shmat_at_address(shmid, NULL); | |
3465 } | |
3466 } | |
3467 | |
3468 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { | |
3469 // "exec" is passed in but not used. Creating the shared image for | |
3470 // the code cache doesn't have an SHM_X executable permission to check. | |
3471 assert(UseLargePages && UseSHM, "only for SHM large pages"); | |
3472 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); | |
3473 assert(is_ptr_aligned(req_addr, alignment), "Unaligned address"); | |
3474 | |
3475 if (!is_size_aligned(bytes, os::large_page_size())) { | |
3476 return NULL; // Fallback to small pages. | |
3477 } | |
3478 | |
3479 // Create a large shared memory region to attach to based on size. | |
3480 // Currently, size is the total size of the heap. | |
3481 int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); | |
3482 if (shmid == -1) { | |
3483 // Possible reasons for shmget failure: | |
3484 // 1. shmmax is too small for Java heap. | |
3485 // > check shmmax value: cat /proc/sys/kernel/shmmax | |
3486 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax | |
3487 // 2. not enough large page memory. | |
3488 // > check available large pages: cat /proc/meminfo | |
3489 // > increase amount of large pages: | |
3490 // echo new_value > /proc/sys/vm/nr_hugepages | |
3491 // Note 1: different Linux may use different name for this property, | |
3492 // e.g. on Redhat AS-3 it is "hugetlb_pool". | |
3493 // Note 2: it's possible there's enough physical memory available but | |
3494 // they are so fragmented after a long run that they can't | |
3495 // coalesce into large pages. Try to reserve large pages when | |
3496 // the system is still "fresh". | |
3497 shm_warning_with_errno("Failed to reserve shared memory."); | |
3498 return NULL; | |
3499 } | |
3500 | |
3501 // Attach to the region. | |
3502 char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr); | |
3503 | |
3504 // Remove shmid. If shmat() is successful, the actual shared memory segment | |
3505 // will be deleted when it's detached by shmdt() or when the process | |
3506 // terminates. If shmat() is not successful this will remove the shared | |
3507 // segment immediately. | |
3508 shmctl(shmid, IPC_RMID, NULL); | |
3509 | |
3510 return addr; | |
3511 } | |
3512 | |
3513 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { | |
3514 assert(error == ENOMEM, "Only expect to fail if no memory is available"); | |
3515 | |
3516 bool warn_on_failure = UseLargePages && | |
3517 (!FLAG_IS_DEFAULT(UseLargePages) || | |
3518 !FLAG_IS_DEFAULT(UseHugeTLBFS) || | |
3519 !FLAG_IS_DEFAULT(LargePageSizeInBytes)); | |
3520 | |
3521 if (warn_on_failure) { | |
3522 char msg[128]; | |
3523 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " | |
3524 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); | |
3525 warning("%s", msg); | |
3526 } | |
3527 } | |
3528 | |
3529 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { | |
3530 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); | |
3531 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); | |
3532 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); | |
3533 | |
3534 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; | |
3535 char* addr = (char*)::mmap(req_addr, bytes, prot, | |
3536 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, | |
3537 -1, 0); | |
3538 | |
3539 if (addr == MAP_FAILED) { | |
3540 warn_on_large_pages_failure(req_addr, bytes, errno); | |
3541 return NULL; | |
3542 } | |
3543 | |
3544 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); | |
3545 | |
3546 return addr; | |
3478 } | 3547 } |
3479 | 3548 |
3480 // Reserve memory using mmap(MAP_HUGETLB). | 3549 // Reserve memory using mmap(MAP_HUGETLB). |
3481 // - bytes shall be a multiple of alignment. | 3550 // - bytes shall be a multiple of alignment. |
3482 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. | 3551 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. |