Skip to content
/ server Public
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cmake/os/Windows.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,3 @@ include(${_SCRIPT_DIR}/WindowsCache.cmake)
# this is out of place, not really a system check
set(FN_NO_CASE_SENSE 1)
set(USE_SYMDIR 1)
set(HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1)

5 changes: 0 additions & 5 deletions config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -446,11 +446,6 @@
/* This should mean case insensitive file system */
#cmakedefine FN_NO_CASE_SENSE 1

/* Whether an anonymous private mapping is unaccessible after
madvise(MADV_DONTNEED) or madvise(MADV_FREE) or similar has been invoked;
this is the case with Microsoft Windows VirtualFree(MEM_DECOMMIT) */
#cmakedefine HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1

#cmakedefine HAVE_CHARSET_armscii8 1
#cmakedefine HAVE_CHARSET_ascii 1
#cmakedefine HAVE_CHARSET_big5 1
Expand Down
9 changes: 7 additions & 2 deletions include/my_virtual_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@
extern "C" {
#endif

# ifdef _WIN32
#ifdef _WIN32
char *my_virtual_mem_reserve(size_t *size);
# endif
#endif
#ifdef __linux__
char *my_virtual_mem_commit(char *ptr, size_t size, my_bool commit);
#else
char *my_virtual_mem_commit(char *ptr, size_t size);
# define my_virtual_mem_commit(ptr, size, c) my_virtual_mem_commit(ptr, size)
#endif
void my_virtual_mem_decommit(char *ptr, size_t size);
void my_virtual_mem_release(char *ptr, size_t size);

Expand Down
15 changes: 0 additions & 15 deletions mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,6 @@
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
@@ -227,11 +227,11 @@
SESSION_VALUE NULL
DEFAULT_VALUE 0
VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Maximum innodb_buffer_pool_size
NUMERIC_MIN_VALUE 0
-NUMERIC_MAX_VALUE 18446744073701163008
-NUMERIC_BLOCK_SIZE 8388608
+NUMERIC_MAX_VALUE 4292870144
+NUMERIC_BLOCK_SIZE 2097152
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
@@ -239,7 +239,7 @@
SESSION_VALUE NULL
DEFAULT_VALUE 0
Expand Down
14 changes: 2 additions & 12 deletions mysql-test/suite/sys_vars/r/sysvars_innodb.result
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ variable_name not in (
'innodb_numa_interleave', # only available WITH_NUMA
'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS
'innodb_buffer_pool_commit', # only available on Linux
'innodb_buffer_pool_size_max', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
Expand Down Expand Up @@ -224,18 +226,6 @@ NUMERIC_BLOCK_SIZE 8388608
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_BUFFER_POOL_SIZE_MAX
SESSION_VALUE NULL
DEFAULT_VALUE 0
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Maximum innodb_buffer_pool_size
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 18446744073701163008
NUMERIC_BLOCK_SIZE 8388608
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_BUF_DUMP_STATUS_FREQUENCY
SESSION_VALUE NULL
DEFAULT_VALUE 0
Expand Down
2 changes: 2 additions & 0 deletions mysql-test/suite/sys_vars/t/sysvars_innodb.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP
'innodb_numa_interleave', # only available WITH_NUMA
'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS
'innodb_buffer_pool_commit', # only available on Linux
'innodb_buffer_pool_size_max', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
Expand Down
10 changes: 9 additions & 1 deletion mysys/my_largepage.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,21 @@ char *my_large_virtual_alloc(size_t *size)
DBUG_RETURN(ptr);
}
}

my_use_large_pages= FALSE;
}

# ifdef _AIX
/* On IBM AIX, my_virtual_mem_commit() relies on mprotect(2) rather than
a subsequent mmap(2) with MAP_FIXED. */
ptr= mmap(NULL, *size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | OS_MAP_ANON, -1, 0);
# else
ptr= mmap(NULL, *size, PROT_NONE, MAP_PRIVATE | OS_MAP_ANON, -1, 0);
# endif
if (ptr == MAP_FAILED)
{
my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size);
my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size);
ptr= NULL;
}

Expand Down
24 changes: 4 additions & 20 deletions mysys/my_virtual_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static my_bool is_memory_committed(char *ptr, size_t size)
}
#endif

char *my_virtual_mem_commit(char *ptr, size_t size)
char *my_virtual_mem_commit(char *ptr, size_t size, my_bool commit)
{
DBUG_ASSERT(ptr);
#ifdef _WIN32
Expand Down Expand Up @@ -103,8 +103,8 @@ char *my_virtual_mem_commit(char *ptr, size_t size)
# else
void *p= 0;
const int flags=
# ifdef MAP_POPULATE
MAP_POPULATE |
# ifdef __linux__
(commit ? MAP_POPULATE : 0) |
# endif
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
p= mmap(ptr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
Expand All @@ -128,9 +128,6 @@ void my_virtual_mem_decommit(char *ptr, size_t size)
{
#ifdef _WIN32
DBUG_ASSERT(is_memory_committed(ptr, size));
# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
# error "VirtualFree(MEM_DECOMMIT) will not allow subsequent reads!"
# endif
if (!my_use_large_pages)
{
if (!VirtualFree(ptr, size, MEM_DECOMMIT))
Expand All @@ -141,19 +138,6 @@ void my_virtual_mem_decommit(char *ptr, size_t size)
}
}
#else
const int prot=
# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
/*
In InnoDB, buf_pool_t::page_guess() may deference pointers to
this, assuming that either the original contents or zeroed
contents is available.
*/
PROT_READ
# else
/* We will explicitly mark the memory unaccessible. */
PROT_NONE
# endif
;
# ifdef _AIX
disclaim(ptr, size, DISCLAIM_ZEROMEM);
# elif defined __linux__ || defined __osf__
Expand All @@ -172,7 +156,7 @@ void my_virtual_mem_decommit(char *ptr, size_t size)
# else
# warning "Do not know how to decommit memory"
# endif
if (mprotect(ptr, size, prot))
if (mprotect(ptr, size, PROT_NONE))
{
my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno);
DBUG_ASSERT(0);
Expand Down
35 changes: 4 additions & 31 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1374,8 +1374,8 @@ bool buf_pool_t::create() noexcept
#ifdef UNIV_PFS_MEMORY
PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner);
#endif
#ifdef _WIN32
if (!my_virtual_mem_commit(memory, actual_size))
#ifndef _AIX
if (!my_virtual_mem_commit(memory, actual_size, commit))
{
my_virtual_mem_release(memory_unaligned, size_unaligned);
memory= nullptr;
Expand Down Expand Up @@ -1860,12 +1860,6 @@ inline void buf_pool_t::shrunk(size_t size, size_t reduced) noexcept
ut_ad(size + reduced == size_in_bytes);
size_in_bytes_requested= size;
size_in_bytes= size;
# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
/* Only page_guess() may read this memory, which after
my_virtual_mem_decommit() may be zeroed out or preserve its original
contents. Try to catch any unintended reads outside page_guess(). */
MEM_UNDEFINED(memory + size, size_in_bytes_max - size);
# else
for (size_t n= page_hash.pad(page_hash.n_cells), i= 0; i < n;
i+= page_hash.ELEMENTS_PER_LATCH + 1)
{
Expand All @@ -1876,7 +1870,6 @@ inline void buf_pool_t::shrunk(size_t size, size_t reduced) noexcept
guess before we invoke my_virtual_mem_decommit() below. */
latch.unlock();
}
# endif
my_virtual_mem_decommit(memory + size, reduced);
#ifdef UNIV_PFS_MEMORY
PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, reduced, owner);
Expand Down Expand Up @@ -1935,7 +1928,7 @@ ATTRIBUTE_COLD void buf_pool_t::resize(size_t size, THD *thd) noexcept

if (n_blocks_removed <= 0)
{
if (!my_virtual_mem_commit(memory + old_size, size - old_size))
if (!my_virtual_mem_commit(memory + old_size, size - old_size, commit))
{
mysql_mutex_unlock(&mutex);
sql_print_error("InnoDB: Cannot commit innodb_buffer_pool_size=%zum;"
Expand Down Expand Up @@ -2837,34 +2830,14 @@ uint32_t buf_pool_t::page_guess(buf_block_t *b, page_hash_latch &latch,
const page_id_t id) noexcept
{
transactional_shared_lock_guard<page_hash_latch> g{latch};
#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
/* shrunk() and my_virtual_mem_decommit() could retain the original
contents of the virtual memory range or zero it out immediately or
with a delay. Any zeroing out may lead to a false positive for
b->page.id() == id but never for b->page.state(). At the time of
the shrunk() call, shrink() and buf_LRU_block_free_non_file_page()
should guarantee that b->page.state() is equal to
buf_page_t::NOT_USED (0) for all to-be-freed blocks. */
#else
/* shrunk() made the memory inaccessible. */
if (UNIV_UNLIKELY(reinterpret_cast<char*>(b) >= memory + size_in_bytes))
/* shrunk() made the memory inaccessible. */
return 0;
#endif
const page_id_t block_id{b->page.id()};
#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
/* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able
to catch any unintended access elsewhere in our code. */
MEM_MAKE_DEFINED(&block_id, sizeof block_id);
#endif

if (id == block_id)
{
uint32_t state= b->page.state();
#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT
/* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able
to catch any unintended access elsewhere in our code. */
MEM_MAKE_DEFINED(&state, sizeof state);
#endif
/* Ignore guesses that point to read-fixed blocks. We can only
avoid a race condition by looking up the block via page_hash. */
if ((state >= buf_page_t::FREED && state < buf_page_t::READ_FIX) ||
Expand Down
38 changes: 33 additions & 5 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3661,6 +3661,13 @@ static void innodb_buffer_pool_size_update(THD* thd,st_mysql_sys_var*,void*,
buf_pool.resize(*static_cast<const size_t*>(save), thd);
}

#ifdef __linux__
static MYSQL_SYSVAR_BOOL(buffer_pool_commit, buf_pool.commit,
PLUGIN_VAR_NOCMDARG,
"Whether to disable Linux overcommit for innodb_buffer_pool_size",
NULL, NULL, FALSE);
#endif

static MYSQL_SYSVAR_SIZE_T(buffer_pool_size, buf_pool.size_in_bytes_requested,
PLUGIN_VAR_RQCMDARG,
"The size of the memory buffer InnoDB uses to cache data"
Expand All @@ -3687,10 +3694,29 @@ static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_auto_min,
innodb_buffer_pool_extent_size);
#endif

#if SIZEOF_SIZE_T < 8 || defined _AIX || defined HAVE_valgrind
/* In constrained environments, innodb_buffer_pool_size_max
will default to the initial innodb_buffer_pool_size, that is,
by default, it will not be possible to increase innodb_buffer_pool_size.

In MemorySanitizer and possibly Valgrind memcheck, any virtual memory
allocation would be backed by one or more copies of shadow bits of the
same size that could be allocated and initialized even for dummy
mappings created by mmap(2) with PROT_NONE. We do not want significant
overhead beyond the actual innodb_buffer_pool_size. */
static constexpr size_t innodb_buffer_pool_size_max_default{0},
innodb_buffer_pool_size_max_minimum{0};
#else
static constexpr size_t innodb_buffer_pool_size_max_default{8ULL << 40},// 8TiB
innodb_buffer_pool_size_max_minimum{innodb_buffer_pool_extent_size};
#endif

static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_max, buf_pool.size_in_bytes_max,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Maximum innodb_buffer_pool_size",
nullptr, nullptr, 0, 0,
nullptr, nullptr,
innodb_buffer_pool_size_max_default,
innodb_buffer_pool_size_max_minimum,
size_t(-ssize_t(innodb_buffer_pool_extent_size)),
innodb_buffer_pool_extent_size);

Expand Down Expand Up @@ -3782,11 +3808,10 @@ static int innodb_init_params()
min= ut_calc_align<size_t>
(buf_pool.blocks_in_bytes(BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4),
1U << 20);
size_t innodb_buffer_pool_size= buf_pool.size_in_bytes_requested;
const size_t innodb_buffer_pool_size= buf_pool.size_in_bytes_requested;

/* With large pages, buffer pool can't grow or shrink. */
if (!buf_pool.size_in_bytes_max || my_use_large_pages ||
innodb_buffer_pool_size > buf_pool.size_in_bytes_max)
if (innodb_buffer_pool_size > buf_pool.size_in_bytes_max ||
my_use_large_pages /* large_pages=ON fixes innodb_buffer_pool_size */)
buf_pool.size_in_bytes_max= ut_calc_align(innodb_buffer_pool_size,
innodb_buffer_pool_extent_size);

Expand Down Expand Up @@ -19916,6 +19941,9 @@ static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tabl

static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(autoextend_increment),
#ifdef __linux__
MYSQL_SYSVAR(buffer_pool_commit),
#endif
MYSQL_SYSVAR(buffer_pool_size),
#if defined __linux__ || !defined DBUG_OFF
MYSQL_SYSVAR(buffer_pool_size_auto_min),
Expand Down
4 changes: 4 additions & 0 deletions storage/innobase/include/buf0buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,10 @@ class buf_pool_t
to read this for heuristic
purposes without holding any
mutex or latch */
#ifdef __linux__
/** whether to use MAP_POPULATE for the allocation */
my_bool commit;
#endif
/** Cleared when buf_LRU_get_free_block() fails.
Set whenever the free list grows, along with a broadcast of done_free.
Protected by buf_pool.mutex. */
Expand Down