comparison src/os/linux/vm/os_linux.cpp @ 24007:776cb7cbe2e4

8017629: G1: UseSHM in combination with a G1HeapRegionSize > os::large_page_size() falls back to use small pages Reviewed-by: pliden, sjohanss, stuefe
author stefank
date Mon, 25 Apr 2016 11:36:14 +0200
parents 44c8fe602a5e
children 9b69cec6d01b e21dd2c95cf0
comparison
equal deleted inserted replaced
24006:44c8fe602a5e 24007:776cb7cbe2e4
3045 } 3045 }
3046 3046
3047 return addr == MAP_FAILED ? NULL : addr; 3047 return addr == MAP_FAILED ? NULL : addr;
3048 } 3048 }
3049 3049
3050 // Don't update _highest_vm_reserved_address, because there might be memory
3051 // regions above addr + size. If so, releasing a memory region only creates
3052 // a hole in the address space, it doesn't help prevent heap-stack collision.
3053 //
3054 static int anon_munmap(char * addr, size_t size) {
3055 return ::munmap(addr, size) == 0;
3056 }
3057
3058 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3059 size_t alignment_hint) {
3060 return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3061 }
3062
3063 bool os::pd_release_memory(char* addr, size_t size) {
3064 return anon_munmap(addr, size);
3065 }
3066
3067 static address highest_vm_reserved_address() {
3068 return _highest_vm_reserved_address;
3069 }
3070
3071 static bool linux_mprotect(char* addr, size_t size, int prot) {
3072 // Linux wants the mprotect address argument to be page aligned.
3073 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3074
3075 // According to SUSv3, mprotect() should only be used with mappings
3076 // established by mmap(), and mmap() always maps whole pages. Unaligned
3077 // 'addr' likely indicates problem in the VM (e.g. trying to change
3078 // protection of malloc'ed or statically allocated memory). Check the
3079 // caller if you hit this assert.
3080 assert(addr == bottom, "sanity check");
3081
3082 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3083 return ::mprotect(bottom, size, prot) == 0;
3084 }
3085
3086 // Set protections specified
3087 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3088 bool is_committed) {
3089 unsigned int p = 0;
3090 switch (prot) {
3091 case MEM_PROT_NONE: p = PROT_NONE; break;
3092 case MEM_PROT_READ: p = PROT_READ; break;
3093 case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break;
3094 case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
3095 default:
3096 ShouldNotReachHere();
3097 }
3098 // is_committed is unused.
3099 return linux_mprotect(addr, bytes, p);
3100 }
3101
3102 bool os::guard_memory(char* addr, size_t size) {
3103 return linux_mprotect(addr, size, PROT_NONE);
3104 }
3105
3106 bool os::unguard_memory(char* addr, size_t size) {
3107 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
3108 }
3109
3110 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
3111 bool result = false;
3112 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
3113 MAP_ANONYMOUS|MAP_PRIVATE,
3114 -1, 0);
3115 if (p != MAP_FAILED) {
3116 void *aligned_p = align_ptr_up(p, page_size);
3117
3118 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
3119
3120 munmap(p, page_size * 2);
3121 }
3122
3123 if (warn && !result) {
3124 warning("TransparentHugePages is not supported by the operating system.");
3125 }
3126
3127 return result;
3128 }
3129
3130 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
3131 bool result = false;
3132 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
3133 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
3134 -1, 0);
3135
3136 if (p != MAP_FAILED) {
3137 // We don't know if this really is a huge page or not.
3138 FILE *fp = fopen("/proc/self/maps", "r");
3139 if (fp) {
3140 while (!feof(fp)) {
3141 char chars[257];
3142 long x = 0;
3143 if (fgets(chars, sizeof(chars), fp)) {
3144 if (sscanf(chars, "%lx-%*x", &x) == 1
3145 && x == (long)p) {
3146 if (strstr (chars, "hugepage")) {
3147 result = true;
3148 break;
3149 }
3150 }
3151 }
3152 }
3153 fclose(fp);
3154 }
3155 munmap(p, page_size);
3156 }
3157
3158 if (warn && !result) {
3159 warning("HugeTLBFS is not supported by the operating system.");
3160 }
3161
3162 return result;
3163 }
3164
3165 /*
3166 * Set the coredump_filter bits to include largepages in core dump (bit 6)
3167 *
3168 * From the coredump_filter documentation:
3169 *
3170 * - (bit 0) anonymous private memory
3171 * - (bit 1) anonymous shared memory
3172 * - (bit 2) file-backed private memory
3173 * - (bit 3) file-backed shared memory
3174 * - (bit 4) ELF header pages in file-backed private memory areas (it is
3175 * effective only if the bit 2 is cleared)
3176 * - (bit 5) hugetlb private memory
3177 * - (bit 6) hugetlb shared memory
3178 */
3179 static void set_coredump_filter(void) {
3180 FILE *f;
3181 long cdm;
3182
3183 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
3184 return;
3185 }
3186
3187 if (fscanf(f, "%lx", &cdm) != 1) {
3188 fclose(f);
3189 return;
3190 }
3191
3192 rewind(f);
3193
3194 if ((cdm & LARGEPAGES_BIT) == 0) {
3195 cdm |= LARGEPAGES_BIT;
3196 fprintf(f, "%#lx", cdm);
3197 }
3198
3199 fclose(f);
3200 }
3201
3202 // Large page support
3203
3204 static size_t _large_page_size = 0;
3205
3206 size_t os::Linux::find_large_page_size() {
3207 size_t large_page_size = 0;
3208
3209 // large_page_size on Linux is used to round up heap size. x86 uses either
3210 // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3211 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3212 // page as large as 256M.
3213 //
3214 // Here we try to figure out page size by parsing /proc/meminfo and looking
3215 // for a line with the following format:
3216 // Hugepagesize: 2048 kB
3217 //
3218 // If we can't determine the value (e.g. /proc is not mounted, or the text
3219 // format has been changed), we'll use the largest page size supported by
3220 // the processor.
3221
3222 #ifndef ZERO
3223 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
3224 ARM_ONLY(2 * M) PPC_ONLY(4 * M);
3225 #endif // ZERO
3226
3227 FILE *fp = fopen("/proc/meminfo", "r");
3228 if (fp) {
3229 while (!feof(fp)) {
3230 int x = 0;
3231 char buf[16];
3232 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
3233 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
3234 large_page_size = x * K;
3235 break;
3236 }
3237 } else {
3238 // skip to next line
3239 for (;;) {
3240 int ch = fgetc(fp);
3241 if (ch == EOF || ch == (int)'\n') break;
3242 }
3243 }
3244 }
3245 fclose(fp);
3246 }
3247
3248 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
3249 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
3250 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
3251 proper_unit_for_byte_size(large_page_size));
3252 }
3253
3254 return large_page_size;
3255 }
3256
3257 size_t os::Linux::setup_large_page_size() {
3258 _large_page_size = Linux::find_large_page_size();
3259 const size_t default_page_size = (size_t)Linux::page_size();
3260 if (_large_page_size > default_page_size) {
3261 _page_sizes[0] = _large_page_size;
3262 _page_sizes[1] = default_page_size;
3263 _page_sizes[2] = 0;
3264 }
3265
3266 return _large_page_size;
3267 }
3268
3269 bool os::Linux::setup_large_page_type(size_t page_size) {
3270 if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
3271 FLAG_IS_DEFAULT(UseSHM) &&
3272 FLAG_IS_DEFAULT(UseTransparentHugePages)) {
3273
3274 // The type of large pages has not been specified by the user.
3275
3276 // Try UseHugeTLBFS and then UseSHM.
3277 UseHugeTLBFS = UseSHM = true;
3278
3279 // Don't try UseTransparentHugePages since there are known
3280 // performance issues with it turned on. This might change in the future.
3281 UseTransparentHugePages = false;
3282 }
3283
3284 if (UseTransparentHugePages) {
3285 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
3286 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
3287 UseHugeTLBFS = false;
3288 UseSHM = false;
3289 return true;
3290 }
3291 UseTransparentHugePages = false;
3292 }
3293
3294 if (UseHugeTLBFS) {
3295 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
3296 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
3297 UseSHM = false;
3298 return true;
3299 }
3300 UseHugeTLBFS = false;
3301 }
3302
3303 return UseSHM;
3304 }
3305
3306 void os::large_page_init() {
3307 if (!UseLargePages &&
3308 !UseTransparentHugePages &&
3309 !UseHugeTLBFS &&
3310 !UseSHM) {
3311 // Not using large pages.
3312 return;
3313 }
3314
3315 if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
3316 // The user explicitly turned off large pages.
3317 // Ignore the rest of the large pages flags.
3318 UseTransparentHugePages = false;
3319 UseHugeTLBFS = false;
3320 UseSHM = false;
3321 return;
3322 }
3323
3324 size_t large_page_size = Linux::setup_large_page_size();
3325 UseLargePages = Linux::setup_large_page_type(large_page_size);
3326
3327 set_coredump_filter();
3328 }
3329
3330 #ifndef SHM_HUGETLB
3331 #define SHM_HUGETLB 04000
3332 #endif
3333
3334 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3335 // "exec" is passed in but not used. Creating the shared image for
3336 // the code cache doesn't have an SHM_X executable permission to check.
3337 assert(UseLargePages && UseSHM, "only for SHM large pages");
3338 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3339
3340 if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
3341 return NULL; // Fallback to small pages.
3342 }
3343
3344 key_t key = IPC_PRIVATE;
3345 char *addr;
3346
3347 bool warn_on_failure = UseLargePages &&
3348 (!FLAG_IS_DEFAULT(UseLargePages) ||
3349 !FLAG_IS_DEFAULT(UseSHM) ||
3350 !FLAG_IS_DEFAULT(LargePageSizeInBytes)
3351 );
3352 char msg[128];
3353
3354 // Create a large shared memory region to attach to based on size.
3355 // Currently, size is the total size of the heap
3356 int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
3357 if (shmid == -1) {
3358 // Possible reasons for shmget failure:
3359 // 1. shmmax is too small for Java heap.
3360 // > check shmmax value: cat /proc/sys/kernel/shmmax
3361 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
3362 // 2. not enough large page memory.
3363 // > check available large pages: cat /proc/meminfo
3364 // > increase amount of large pages:
3365 // echo new_value > /proc/sys/vm/nr_hugepages
3366 // Note 1: different Linux may use different name for this property,
3367 // e.g. on Redhat AS-3 it is "hugetlb_pool".
3368 // Note 2: it's possible there's enough physical memory available but
3369 // they are so fragmented after a long run that they can't
3370 // coalesce into large pages. Try to reserve large pages when
3371 // the system is still "fresh".
3372 if (warn_on_failure) {
3373 jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
3374 warning("%s", msg);
3375 }
3376 return NULL;
3377 }
3378
3379 // attach to the region
3380 addr = (char*)shmat(shmid, req_addr, 0);
3381 int err = errno;
3382
3383 // Remove shmid. If shmat() is successful, the actual shared memory segment
3384 // will be deleted when it's detached by shmdt() or when the process
3385 // terminates. If shmat() is not successful this will remove the shared
3386 // segment immediately.
3387 shmctl(shmid, IPC_RMID, NULL);
3388
3389 if ((intptr_t)addr == -1) {
3390 if (warn_on_failure) {
3391 jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
3392 warning("%s", msg);
3393 }
3394 return NULL;
3395 }
3396
3397 return addr;
3398 }
3399
3400 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
3401 assert(error == ENOMEM, "Only expect to fail if no memory is available");
3402
3403 bool warn_on_failure = UseLargePages &&
3404 (!FLAG_IS_DEFAULT(UseLargePages) ||
3405 !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
3406 !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3407
3408 if (warn_on_failure) {
3409 char msg[128];
3410 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
3411 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
3412 warning("%s", msg);
3413 }
3414 }
3415
3416 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
3417 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3418 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
3419 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3420
3421 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3422 char* addr = (char*)::mmap(req_addr, bytes, prot,
3423 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
3424 -1, 0);
3425
3426 if (addr == MAP_FAILED) {
3427 warn_on_large_pages_failure(req_addr, bytes, errno);
3428 return NULL;
3429 }
3430
3431 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
3432
3433 return addr;
3434 }
3435
3436 // Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed().
3437 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address 3050 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
3438 // (req_addr != NULL) or with a given alignment. 3051 // (req_addr != NULL) or with a given alignment.
3439 // - bytes shall be a multiple of alignment. 3052 // - bytes shall be a multiple of alignment.
3440 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. 3053 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment.
3441 // - alignment sets the alignment at which memory shall be allocated. 3054 // - alignment sets the alignment at which memory shall be allocated.
3472 } 3085 }
3473 start = start_aligned; 3086 start = start_aligned;
3474 } 3087 }
3475 } 3088 }
3476 return start; 3089 return start;
3477 3090 }
3091
3092 // Don't update _highest_vm_reserved_address, because there might be memory
3093 // regions above addr + size. If so, releasing a memory region only creates
3094 // a hole in the address space, it doesn't help prevent heap-stack collision.
3095 //
3096 static int anon_munmap(char * addr, size_t size) {
3097 return ::munmap(addr, size) == 0;
3098 }
3099
3100 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3101 size_t alignment_hint) {
3102 return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3103 }
3104
3105 bool os::pd_release_memory(char* addr, size_t size) {
3106 return anon_munmap(addr, size);
3107 }
3108
3109 static address highest_vm_reserved_address() {
3110 return _highest_vm_reserved_address;
3111 }
3112
3113 static bool linux_mprotect(char* addr, size_t size, int prot) {
3114 // Linux wants the mprotect address argument to be page aligned.
3115 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3116
3117 // According to SUSv3, mprotect() should only be used with mappings
3118 // established by mmap(), and mmap() always maps whole pages. Unaligned
3119 // 'addr' likely indicates problem in the VM (e.g. trying to change
3120 // protection of malloc'ed or statically allocated memory). Check the
3121 // caller if you hit this assert.
3122 assert(addr == bottom, "sanity check");
3123
3124 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3125 return ::mprotect(bottom, size, prot) == 0;
3126 }
3127
3128 // Set protections specified
3129 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3130 bool is_committed) {
3131 unsigned int p = 0;
3132 switch (prot) {
3133 case MEM_PROT_NONE: p = PROT_NONE; break;
3134 case MEM_PROT_READ: p = PROT_READ; break;
3135 case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break;
3136 case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
3137 default:
3138 ShouldNotReachHere();
3139 }
3140 // is_committed is unused.
3141 return linux_mprotect(addr, bytes, p);
3142 }
3143
3144 bool os::guard_memory(char* addr, size_t size) {
3145 return linux_mprotect(addr, size, PROT_NONE);
3146 }
3147
3148 bool os::unguard_memory(char* addr, size_t size) {
3149 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
3150 }
3151
3152 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
3153 bool result = false;
3154 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
3155 MAP_ANONYMOUS|MAP_PRIVATE,
3156 -1, 0);
3157 if (p != MAP_FAILED) {
3158 void *aligned_p = align_ptr_up(p, page_size);
3159
3160 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
3161
3162 munmap(p, page_size * 2);
3163 }
3164
3165 if (warn && !result) {
3166 warning("TransparentHugePages is not supported by the operating system.");
3167 }
3168
3169 return result;
3170 }
3171
3172 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
3173 bool result = false;
3174 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
3175 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
3176 -1, 0);
3177
3178 if (p != MAP_FAILED) {
3179 // We don't know if this really is a huge page or not.
3180 FILE *fp = fopen("/proc/self/maps", "r");
3181 if (fp) {
3182 while (!feof(fp)) {
3183 char chars[257];
3184 long x = 0;
3185 if (fgets(chars, sizeof(chars), fp)) {
3186 if (sscanf(chars, "%lx-%*x", &x) == 1
3187 && x == (long)p) {
3188 if (strstr (chars, "hugepage")) {
3189 result = true;
3190 break;
3191 }
3192 }
3193 }
3194 }
3195 fclose(fp);
3196 }
3197 munmap(p, page_size);
3198 }
3199
3200 if (warn && !result) {
3201 warning("HugeTLBFS is not supported by the operating system.");
3202 }
3203
3204 return result;
3205 }
3206
3207 /*
3208 * Set the coredump_filter bits to include largepages in core dump (bit 6)
3209 *
3210 * From the coredump_filter documentation:
3211 *
3212 * - (bit 0) anonymous private memory
3213 * - (bit 1) anonymous shared memory
3214 * - (bit 2) file-backed private memory
3215 * - (bit 3) file-backed shared memory
3216 * - (bit 4) ELF header pages in file-backed private memory areas (it is
3217 * effective only if the bit 2 is cleared)
3218 * - (bit 5) hugetlb private memory
3219 * - (bit 6) hugetlb shared memory
3220 */
3221 static void set_coredump_filter(void) {
3222 FILE *f;
3223 long cdm;
3224
3225 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
3226 return;
3227 }
3228
3229 if (fscanf(f, "%lx", &cdm) != 1) {
3230 fclose(f);
3231 return;
3232 }
3233
3234 rewind(f);
3235
3236 if ((cdm & LARGEPAGES_BIT) == 0) {
3237 cdm |= LARGEPAGES_BIT;
3238 fprintf(f, "%#lx", cdm);
3239 }
3240
3241 fclose(f);
3242 }
3243
3244 // Large page support
3245
3246 static size_t _large_page_size = 0;
3247
3248 size_t os::Linux::find_large_page_size() {
3249 size_t large_page_size = 0;
3250
3251 // large_page_size on Linux is used to round up heap size. x86 uses either
3252 // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
3253 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
3254 // page as large as 256M.
3255 //
3256 // Here we try to figure out page size by parsing /proc/meminfo and looking
3257 // for a line with the following format:
3258 // Hugepagesize: 2048 kB
3259 //
3260 // If we can't determine the value (e.g. /proc is not mounted, or the text
3261 // format has been changed), we'll use the largest page size supported by
3262 // the processor.
3263
3264 #ifndef ZERO
3265 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
3266 ARM_ONLY(2 * M) PPC_ONLY(4 * M);
3267 #endif // ZERO
3268
3269 FILE *fp = fopen("/proc/meminfo", "r");
3270 if (fp) {
3271 while (!feof(fp)) {
3272 int x = 0;
3273 char buf[16];
3274 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
3275 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
3276 large_page_size = x * K;
3277 break;
3278 }
3279 } else {
3280 // skip to next line
3281 for (;;) {
3282 int ch = fgetc(fp);
3283 if (ch == EOF || ch == (int)'\n') break;
3284 }
3285 }
3286 }
3287 fclose(fp);
3288 }
3289
3290 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
3291 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
3292 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
3293 proper_unit_for_byte_size(large_page_size));
3294 }
3295
3296 return large_page_size;
3297 }
3298
3299 size_t os::Linux::setup_large_page_size() {
3300 _large_page_size = Linux::find_large_page_size();
3301 const size_t default_page_size = (size_t)Linux::page_size();
3302 if (_large_page_size > default_page_size) {
3303 _page_sizes[0] = _large_page_size;
3304 _page_sizes[1] = default_page_size;
3305 _page_sizes[2] = 0;
3306 }
3307
3308 return _large_page_size;
3309 }
3310
3311 bool os::Linux::setup_large_page_type(size_t page_size) {
3312 if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
3313 FLAG_IS_DEFAULT(UseSHM) &&
3314 FLAG_IS_DEFAULT(UseTransparentHugePages)) {
3315
3316 // The type of large pages has not been specified by the user.
3317
3318 // Try UseHugeTLBFS and then UseSHM.
3319 UseHugeTLBFS = UseSHM = true;
3320
3321 // Don't try UseTransparentHugePages since there are known
3322 // performance issues with it turned on. This might change in the future.
3323 UseTransparentHugePages = false;
3324 }
3325
3326 if (UseTransparentHugePages) {
3327 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
3328 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
3329 UseHugeTLBFS = false;
3330 UseSHM = false;
3331 return true;
3332 }
3333 UseTransparentHugePages = false;
3334 }
3335
3336 if (UseHugeTLBFS) {
3337 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
3338 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
3339 UseSHM = false;
3340 return true;
3341 }
3342 UseHugeTLBFS = false;
3343 }
3344
3345 return UseSHM;
3346 }
3347
3348 void os::large_page_init() {
3349 if (!UseLargePages &&
3350 !UseTransparentHugePages &&
3351 !UseHugeTLBFS &&
3352 !UseSHM) {
3353 // Not using large pages.
3354 return;
3355 }
3356
3357 if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
3358 // The user explicitly turned off large pages.
3359 // Ignore the rest of the large pages flags.
3360 UseTransparentHugePages = false;
3361 UseHugeTLBFS = false;
3362 UseSHM = false;
3363 return;
3364 }
3365
3366 size_t large_page_size = Linux::setup_large_page_size();
3367 UseLargePages = Linux::setup_large_page_type(large_page_size);
3368
3369 set_coredump_filter();
3370 }
3371
3372 #ifndef SHM_HUGETLB
3373 #define SHM_HUGETLB 04000
3374 #endif
3375
3376 #define shm_warning_format(format, ...) \
3377 do { \
3378 if (UseLargePages && \
3379 (!FLAG_IS_DEFAULT(UseLargePages) || \
3380 !FLAG_IS_DEFAULT(UseSHM) || \
3381 !FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \
3382 warning(format, __VA_ARGS__); \
3383 } \
3384 } while (0)
3385
3386 #define shm_warning(str) shm_warning_format("%s", str)
3387
3388 #define shm_warning_with_errno(str) \
3389 do { \
3390 int err = errno; \
3391 shm_warning_format(str " (error = %d)", err); \
3392 } while (0)
3393
3394 static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) {
3395 assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment");
3396
3397 if (!is_size_aligned(alignment, SHMLBA)) {
3398 assert(false, "Code below assumes that alignment is at least SHMLBA aligned");
3399 return NULL;
3400 }
3401
3402 // To ensure that we get 'alignment' aligned memory from shmat,
3403 // we pre-reserve aligned virtual memory and then attach to that.
3404
3405 char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL);
3406 if (pre_reserved_addr == NULL) {
3407 // Couldn't pre-reserve aligned memory.
3408 shm_warning("Failed to pre-reserve aligned memory for shmat.");
3409 return NULL;
3410 }
3411
3412 // SHM_REMAP is needed to allow shmat to map over an existing mapping.
3413 char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP);
3414
3415 if ((intptr_t)addr == -1) {
3416 int err = errno;
3417 shm_warning_with_errno("Failed to attach shared memory.");
3418
3419 assert(err != EACCES, "Unexpected error");
3420 assert(err != EIDRM, "Unexpected error");
3421 assert(err != EINVAL, "Unexpected error");
3422
3423 // Since we don't know if the kernel unmapped the pre-reserved memory area
3424 // we can't unmap it, since that would potentially unmap memory that was
3425 // mapped from other threads.
3426 return NULL;
3427 }
3428
3429 return addr;
3430 }
3431
3432 static char* shmat_at_address(int shmid, char* req_addr) {
3433 if (!is_ptr_aligned(req_addr, SHMLBA)) {
3434 assert(false, "Requested address needs to be SHMLBA aligned");
3435 return NULL;
3436 }
3437
3438 char* addr = (char*)shmat(shmid, req_addr, 0);
3439
3440 if ((intptr_t)addr == -1) {
3441 shm_warning_with_errno("Failed to attach shared memory.");
3442 return NULL;
3443 }
3444
3445 return addr;
3446 }
3447
3448 static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) {
3449 // If a req_addr has been provided, we assume that the caller has already aligned the address.
3450 if (req_addr != NULL) {
3451 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size");
3452 assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment");
3453 return shmat_at_address(shmid, req_addr);
3454 }
3455
3456 // Since shmid has been setup with SHM_HUGETLB, shmat will automatically
3457 // return large page size aligned memory addresses when req_addr == NULL.
3458 // However, if the alignment is larger than the large page size, we have
3459 // to manually ensure that the memory returned is 'alignment' aligned.
3460 if (alignment > os::large_page_size()) {
3461 assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size");
3462 return shmat_with_alignment(shmid, bytes, alignment);
3463 } else {
3464 return shmat_at_address(shmid, NULL);
3465 }
3466 }
3467
3468 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
3469 // "exec" is passed in but not used. Creating the shared image for
3470 // the code cache doesn't have an SHM_X executable permission to check.
3471 assert(UseLargePages && UseSHM, "only for SHM large pages");
3472 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3473 assert(is_ptr_aligned(req_addr, alignment), "Unaligned address");
3474
3475 if (!is_size_aligned(bytes, os::large_page_size())) {
3476 return NULL; // Fallback to small pages.
3477 }
3478
3479 // Create a large shared memory region to attach to based on size.
3480 // Currently, size is the total size of the heap.
3481 int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
3482 if (shmid == -1) {
3483 // Possible reasons for shmget failure:
3484 // 1. shmmax is too small for Java heap.
3485 // > check shmmax value: cat /proc/sys/kernel/shmmax
3486 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
3487 // 2. not enough large page memory.
3488 // > check available large pages: cat /proc/meminfo
3489 // > increase amount of large pages:
3490 // echo new_value > /proc/sys/vm/nr_hugepages
3491 // Note 1: different Linux may use different name for this property,
3492 // e.g. on Redhat AS-3 it is "hugetlb_pool".
3493 // Note 2: it's possible there's enough physical memory available but
3494 // they are so fragmented after a long run that they can't
3495 // coalesce into large pages. Try to reserve large pages when
3496 // the system is still "fresh".
3497 shm_warning_with_errno("Failed to reserve shared memory.");
3498 return NULL;
3499 }
3500
3501 // Attach to the region.
3502 char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr);
3503
3504 // Remove shmid. If shmat() is successful, the actual shared memory segment
3505 // will be deleted when it's detached by shmdt() or when the process
3506 // terminates. If shmat() is not successful this will remove the shared
3507 // segment immediately.
3508 shmctl(shmid, IPC_RMID, NULL);
3509
3510 return addr;
3511 }
3512
3513 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
3514 assert(error == ENOMEM, "Only expect to fail if no memory is available");
3515
3516 bool warn_on_failure = UseLargePages &&
3517 (!FLAG_IS_DEFAULT(UseLargePages) ||
3518 !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
3519 !FLAG_IS_DEFAULT(LargePageSizeInBytes));
3520
3521 if (warn_on_failure) {
3522 char msg[128];
3523 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
3524 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
3525 warning("%s", msg);
3526 }
3527 }
3528
3529 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
3530 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
3531 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
3532 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
3533
3534 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
3535 char* addr = (char*)::mmap(req_addr, bytes, prot,
3536 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
3537 -1, 0);
3538
3539 if (addr == MAP_FAILED) {
3540 warn_on_large_pages_failure(req_addr, bytes, errno);
3541 return NULL;
3542 }
3543
3544 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
3545
3546 return addr;
3478 } 3547 }
3479 3548
3480 // Reserve memory using mmap(MAP_HUGETLB). 3549 // Reserve memory using mmap(MAP_HUGETLB).
3481 // - bytes shall be a multiple of alignment. 3550 // - bytes shall be a multiple of alignment.
3482 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. 3551 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment.