diff --git a/README.md b/README.md index 8d393ba8..048df80e 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ These instructions will get you a copy of the project up and running on your loc * automake 1.13.4 or later * autoconf 2.69 or later * libtool 2.4.2 or later - * fuse 2.6.0 or later + * fuse3 3.4.0 or later (default), or fuse 2.6.0 or later with `--with-fuse2` * uuid 1.36 or later (Linux) * libxml-2.0 2.6.16 or later * net-snmp 5.3 or later @@ -198,8 +198,28 @@ make install `./configure --help` shows various options for build and install. +On Linux the build uses libfuse 3 (package `libfuse3-dev` on Debian/Ubuntu, +`fuse3-devel` on Fedora/RHEL). Pass `--with-fuse2` to build against the +legacy libfuse 2 API instead; macOS, FreeBSD, and NetBSD currently use the +libfuse 2 API by default. On macOS with macFUSE 5 or later, which ships a +libfuse 3, the FUSE 3 build can be selected with `--with-fuse2=no` +(autotools) or `-DLTFS_WITH_FUSE2=OFF` (CMake). + +FUSE 3 builds negotiate request sizes up to 1 MiB (tunable with +`-o max_write=`), serve directory listings through readdirplus, and +support `-o direct_io` to bypass the kernel page cache entirely so large +archive jobs do not fill it (mmap does not work on files opened this way). + In some systems, you might need `sudo ldconfig -v` after `make install` to load the shared libraries correctly. +## Running the test suite + +`make check` runs integration tests against a tape emulated in a local +directory by the file backend; no tape hardware is required. The tests need +a Linux host with `/dev/fuse` and are skipped elsewhere. On macOS, +`tests/run-in-docker.sh [configure-options...]` builds and runs the suite +inside an Ubuntu container. + #### Parameter settings of the sg driver LTFS uses the sg driver by default. You can improve reliability to change parameters of the sg driver below. diff --git a/configure.ac b/configure.ac index f28d2b28..58f15391 100644 --- a/configure.ac +++ b/configure.ac @@ -286,7 +286,55 @@ fi dnl dnl Check for FUSE, libuuid, and libxml2 dnl -PKG_CHECK_MODULES([FUSE_MODULE], [fuse >= 2.6.0]) +dnl libfuse 3 is preferred on Linux, falling back to libfuse 2 when the +dnl fuse3 development files are not installed. --with-fuse2 forces the +dnl legacy libfuse 2 API; it is also the default on the other platforms +dnl until their fuse3 stacks are verified. +dnl +AC_ARG_WITH([fuse2], + [AS_HELP_STRING([--with-fuse2], + [build against libfuse 2 instead of libfuse 3])], + [with_fuse2=$withval], + [with_fuse2=default]) +if test "x${with_fuse2}" = "xdefault" && test "x${host_linux}" != "xyes" +then + with_fuse2=yes +fi + +if test "x${with_fuse2}" = "xyes" +then + PKG_CHECK_MODULES([FUSE_MODULE], [fuse >= 2.6.0]) +elif test "x${with_fuse2}" = "xno" +then + PKG_CHECK_MODULES([FUSE_MODULE], [fuse3 >= 3.4.0]) +else + PKG_CHECK_MODULES([FUSE_MODULE], [fuse3 >= 3.4.0], + [with_fuse2=no], + [PKG_CHECK_MODULES([FUSE_MODULE], [fuse >= 2.6.0], [with_fuse2=yes])]) +fi +AC_MSG_CHECKING([for the FUSE API version to use]) +if test "x${with_fuse2}" = "xyes" +then + AC_MSG_RESULT([2]) +else + AC_MSG_RESULT([3]) +fi + +dnl +dnl struct fuse_file_info.parallel_direct_writes appeared in libfuse 3.15; +dnl detect the member instead of relying on version numbers. +dnl +if test "x${with_fuse2}" != "xyes" +then + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS ${FUSE_MODULE_CFLAGS} -DFUSE_USE_VERSION=31" + AC_CHECK_MEMBER([struct fuse_file_info.parallel_direct_writes], + [AC_DEFINE([HAVE_FUSE_PARALLEL_DIRECT_WRITES], [1], + [Define to 1 if struct fuse_file_info has parallel_direct_writes])], + [], + [[#include ]]) + CFLAGS="$SAVE_CFLAGS" +fi PKG_CHECK_MODULES([LIBXML2_MODULE], [libxml-2.0 >= 2.6.16]) if test "x${host_mac}" = "xyes" @@ -458,6 +506,10 @@ dnl Update flags dnl Sets CFLAGS to force optimization and debugging options, which isn't quite kosher dnl AM_CPPFLAGS="-D_GNU_SOURCE -I\$(top_srcdir)/src -DLTFS_CONFIG_FILE='\"${sysconfdir}/ltfs.conf\"' -DLTFS_BASE_DIR='\"${prefix}\"'" +if test "x${with_fuse2}" != "xyes" +then + AM_CPPFLAGS="${AM_CPPFLAGS} -DHAVE_FUSE3" +fi AM_CFLAGS="-Wall -Wsign-compare -fsigned-char ${FUSE_MODULE_CFLAGS} ${UUID_MODULE_CFLAGS} ${LIBXML2_MODULE_CFLAGS} ${ICU_MODULE_CFLAGS} ${SNMP_ENABLE} ${SNMP_MODULE_CFLAGS}" if test "x$use_fast" = "xyes" diff --git a/messages/bin_ltfs/root.txt b/messages/bin_ltfs/root.txt index 3a9cd000..e42105d7 100644 --- a/messages/bin_ltfs/root.txt +++ b/messages/bin_ltfs/root.txt @@ -154,7 +154,9 @@ root:table { 14115E:string { "Invalid scsi_append_only_mode option: %s." } 14116E:string { "This medium is not supported (%d)." } 14123W:string { "The main function of FUSE returned error (%d)." } - + 14124I:string { "FUSE maximum request size is %u KiB." } + 14125W:string { "The max_write option is ignored when built against FUSE 2." } + // 14150 - 14199 are reserved for LE+ // Help messages @@ -244,5 +246,7 @@ root:table { // Reserved 14466I 14467I:string { " -o syslogtrace Enable diagnostic output to stderr and syslog(same as verbose=303)" } // Reserved 14468I + 14469I:string { " -o max_write= Maximum size of a FUSE request in bytes (FUSE 3 builds only, default: 1048576)" } + 14470I:string { " -o direct_io Bypass the kernel page cache for all file I/O (disables mmap)" } } } diff --git a/src/iosched/unified.c b/src/iosched/unified.c index f492e12f..71f9dcc1 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -1790,7 +1790,11 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, if (new_req->offset + new_req->count > dpr->file_size) dpr->file_size = new_req->offset + new_req->count; - return (ssize_t)count; + /* Only copy_count bytes were stored (one cache block at most); the + * caller's append loop must advance by that, not the full count, or + * everything past the first block of a larger-than-blocksize write is + * silently dropped. */ + return (ssize_t)copy_count; } /** diff --git a/src/libltfs/ltfs.h b/src/libltfs/ltfs.h index 5a792f55..53226ccd 100644 --- a/src/libltfs/ltfs.h +++ b/src/libltfs/ltfs.h @@ -210,6 +210,11 @@ struct device_data; * or a negative value on error. */ typedef int (*ltfs_dir_filler) (void *buf, const char *name, void *priv); +/* Directory listing callback that also receives the entry's attributes. + * attr may be NULL when the backing store yields names only. */ +typedef int (*ltfs_dir_filler_attr) (void *buf, const char *name, + const struct dentry_attr *attr, void *priv); + /** * All capacities are relative to filesystem block size. */ diff --git a/src/libltfs/ltfs_fsops.c b/src/libltfs/ltfs_fsops.c index 7b2aa4d4..a2fa5369 100644 --- a/src/libltfs/ltfs_fsops.c +++ b/src/libltfs/ltfs_fsops.c @@ -1414,6 +1414,96 @@ int ltfs_fsops_readdir(struct dentry *d, void *buf, ltfs_dir_filler filler, void return ret; } +/* Copy a child's attributes without taking the volume lock again; + * the caller already holds it (same fields as ltfs_fsops_getattr). */ +static void _fsops_child_attr(struct dentry *child, struct dentry_attr *attr, + struct ltfs_volume *vol) +{ + acquireread_mrsw(&child->meta_lock); + + if (child->isslink) + attr->size = strlen(child->target.name); + else + attr->size = child->size; + + attr->alloc_size = child->realsize; + attr->blocksize = vol->label->blocksize; + attr->uid = child->uid; + attr->nlink = child->link_count; + attr->create_time = child->creation_time; + attr->access_time = child->access_time; + attr->modify_time = child->modify_time; + attr->change_time = child->change_time; + attr->backup_time = child->backup_time; + attr->readonly = child->readonly; + attr->isdir = child->isdir; + attr->isslink = child->isslink; + + releaseread_mrsw(&child->meta_lock); + + if (! child->isdir && ! child->isslink && iosched_initialized(vol)) + attr->size = iosched_get_filesize(child, vol); +} + +int ltfs_fsops_readdir_attr(struct dentry *d, void *buf, ltfs_dir_filler_attr filler, + void *filler_priv, struct ltfs_volume *vol) +{ + int ret = 0; + struct name_list *entry, *tmp; + struct dentry_attr attr; + + CHECK_ARG_NULL(d, -LTFS_NULL_ARG); + CHECK_ARG_NULL(filler, -LTFS_NULL_ARG); + CHECK_ARG_NULL(vol, -LTFS_NULL_ARG); + + if (! d->isdir) + return -LTFS_ISFILE; + + ret = ltfs_get_volume_lock(false, vol); + if (ret < 0) + return ret; + + acquireread_mrsw(&d->contents_lock); + if (dcache_initialized(vol)) { + /* The dentry cache yields names only */ + int i; + char **namelist = NULL; + ret = dcache_readdir(d, false, (void ***) &namelist, vol); + if (ret == 0 && namelist) { + for (i=0; namelist[i]; ++i) { + ret = filler(buf, namelist[i], NULL, filler_priv); + if (ret < 0) + break; + } + for (i=0; namelist[i]; ++i) + free(namelist[i]); + free(namelist); + } + } else { + if (HASH_COUNT(d->child_list) != 0) { + HASH_SORT(d->child_list, fs_hash_sort_by_uid); + HASH_ITER(hh, d->child_list, entry, tmp) { + _fsops_child_attr(entry->d, &attr, vol); + ret = filler(buf, entry->d->platform_safe_name, &attr, filler_priv); + if (ret < 0) + break; + } + } + } + releaseread_mrsw(&d->contents_lock); + + /* Update access time */ + if (ret == 0) { + acquirewrite_mrsw(&d->meta_lock); + get_current_timespec(&d->access_time); + releasewrite_mrsw(&d->meta_lock); + ltfs_set_index_dirty(true, true, vol->index); + } + + releaseread_mrsw(&vol->lock); + return ret; +} + int _ltfs_fsops_read_direntry(struct dentry *d, struct ltfs_direntry *dirent, unsigned long index, bool root, struct ltfs_volume *vol) { diff --git a/src/libltfs/ltfs_fsops.h b/src/libltfs/ltfs_fsops.h index cfdea5f3..25b1663e 100644 --- a/src/libltfs/ltfs_fsops.h +++ b/src/libltfs/ltfs_fsops.h @@ -324,6 +324,13 @@ int ltfs_fsops_removexattr(const char *path, const char *name, ltfs_file_id *id, int ltfs_fsops_readdir(struct dentry *d, void *buf, ltfs_dir_filler filler, void *filler_priv, struct ltfs_volume *vol); +/** + * List a directory like ltfs_fsops_readdir, passing each entry's attributes + * to the filler as well. attr is NULL when the backing store yields names only. + */ +int ltfs_fsops_readdir_attr(struct dentry *d, void *buf, ltfs_dir_filler_attr filler, + void *filler_priv, struct ltfs_volume *vol); + /** * Get an entry in the directory. * It does get the "." and ".." entries only when d is specified non volume root directory. diff --git a/src/libltfs/ltfs_fuse_version.h b/src/libltfs/ltfs_fuse_version.h index 0b11382c..46240510 100644 --- a/src/libltfs/ltfs_fuse_version.h +++ b/src/libltfs/ltfs_fuse_version.h @@ -50,6 +50,18 @@ #ifndef __ltfs_fuse_version_h__ #define __ltfs_fuse_version_h__ +/* HAVE_FUSE3 is set on the compiler command line by configure + * (default on Linux; --with-fuse2 selects the libfuse 2 API). */ +#ifdef HAVE_FUSE3 +#define FUSE_USE_VERSION 31 +/* macFUSE 5's libfuse3 defaults to Darwin-specific operation signatures + * (struct fuse_darwin_attr, struct statfs, ...). Request the upstream- + * compatible API instead; the library exports both symbol flavors. */ +#ifdef __APPLE__ +#define FUSE_DARWIN_ENABLE_EXTENSIONS 0 +#endif +#else #define FUSE_USE_VERSION 26 +#endif #endif /* __ltfs_fuse_version_h__ */ diff --git a/src/libltfs/xattr.h b/src/libltfs/xattr.h index 66f82fe2..fd1785ab 100644 --- a/src/libltfs/xattr.h +++ b/src/libltfs/xattr.h @@ -66,7 +66,9 @@ extern "C" { #include "libltfs/arch/freebsd/xattr.h" #endif -#include "fuse.h" +#include "libltfs/ltfs_fuse_version.h" +#include + #include "ltfs.h" #define LTFS_PRIVATE_PREFIX "ltfs." diff --git a/src/ltfs_fuse.c b/src/ltfs_fuse.c index db4d3a5c..c3d72b97 100644 --- a/src/ltfs_fuse.c +++ b/src/ltfs_fuse.c @@ -84,6 +84,58 @@ static struct fuse_context *context; #define FUSE_REQ_ENTER(r) REQ_NUMBER(REQ_STAT_ENTER, REQ_FUSE, r) #define FUSE_REQ_EXIT(r) REQ_NUMBER(REQ_STAT_EXIT, REQ_FUSE, r) +/* The FUSE 3 directory filler takes an extra flags argument. */ +#ifdef HAVE_FUSE3 +#define LTFS_FILL(filler, buf, name, st, off) (filler)(buf, name, st, off, 0) +#else +#define LTFS_FILL(filler, buf, name, st, off) (filler)(buf, name, st, off) +#endif + +#ifdef HAVE_FUSE3 +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) +#endif +#endif + +/* Handle-based variants; on FUSE 3 they are reached through getattr/truncate. */ +int ltfs_fuse_fgetattr(const char *path, struct stat *stbuf, struct fuse_file_info *fi); +int ltfs_fuse_ftruncate(const char *path, off_t length, struct fuse_file_info *fi); + +/* The fuse2 macFUSE API adds a position argument to the xattr handlers; the + * fuse3 build uses the upstream signatures (Darwin extensions disabled in + * ltfs_fuse_version.h). */ +#if defined(__APPLE__) && !defined(HAVE_FUSE3) +#define LTFS_XATTR_POSITION 1 +#endif + +#if !defined(__APPLE__) && FUSE_VERSION > 27 +/* Per-open cache policy. With -o direct_io every read and write bypasses + * the kernel page cache: requests arrive at the application's I/O size + * (up to the negotiated maximum) and stream straight to the daemon, at + * the cost of mmap support and kernel readahead. Otherwise the page + * cache is used and kept across opens (the daemon is the only writer). + * keep_cache must never be set while another open of the same file uses + * direct_io; the policy is mount-wide, so the modes cannot mix. */ +static void _ltfs_fuse_set_cache_flags(struct fuse_file_info *fi, struct ltfs_fuse_data *priv) +{ + if (priv->direct_io) { + fi->direct_io = 1; + fi->keep_cache = 0; +#ifdef HAVE_FUSE_PARALLEL_DIRECT_WRITES + /* Writes are serialized further down; this only removes the + * kernel-side exclusive lock for non-extending direct writes. */ + fi->parallel_direct_writes = 1; +#endif + } else { + fi->direct_io = 0; + fi->keep_cache = 1; + } +} +#endif + struct ltfs_file_handle *_new_ltfs_file_handle(struct file_info *fi) { int ret; @@ -278,7 +330,7 @@ int ltfs_fuse_fgetattr(const char *path, struct stat *stbuf, struct fuse_file_in return errormap_fuse_error(ret); } -int ltfs_fuse_getattr(const char *path, struct stat *stbuf) +static int _ltfs_fuse_getattr_path(const char *path, struct stat *stbuf) { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; struct dentry_attr attr; @@ -299,6 +351,20 @@ int ltfs_fuse_getattr(const char *path, struct stat *stbuf) return errormap_fuse_error(ret); } +#ifdef HAVE_FUSE3 +int ltfs_fuse_getattr(const char *path, struct stat *stbuf, struct fuse_file_info *fi) +{ + if (fi) + return ltfs_fuse_fgetattr(path, stbuf, fi); + return _ltfs_fuse_getattr_path(path, stbuf); +} +#else +int ltfs_fuse_getattr(const char *path, struct stat *stbuf) +{ + return _ltfs_fuse_getattr_path(path, stbuf); +} +#endif + int ltfs_fuse_access(const char *path, int mode) { @@ -408,10 +474,7 @@ int ltfs_fuse_open(const char *path, struct fuse_file_info *fi) fi->direct_io = 1; fi->keep_cache = 0; #else - /* cannot set keep cache if any process has the file open with direct_io set! so only - * set it on newer FUSE versions, where we don't use direct_io. */ - fi->direct_io = 0; - fi->keep_cache = 1; + _ltfs_fuse_set_cache_flags(fi, priv); #endif #endif @@ -591,7 +654,11 @@ int ltfs_fuse_flush(const char *path, struct fuse_file_info *fi) return errormap_fuse_error(ret); } +#ifdef HAVE_FUSE3 +int ltfs_fuse_utimens(const char *path, const struct timespec ts[2], struct fuse_file_info *fi) +#else int ltfs_fuse_utimens(const char *path, const struct timespec ts[2]) +#endif { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; struct ltfs_timespec tsTmp[2]; @@ -603,13 +670,28 @@ int ltfs_fuse_utimens(const char *path, const struct timespec ts[2]) tsTmp[0] = ltfs_timespec_from_timespec(&ts[0]); tsTmp[1] = ltfs_timespec_from_timespec(&ts[1]); - ltfsmsg(LTFS_DEBUG, 14038D, path); - ret = ltfs_fsops_utimens_path(path, tsTmp, &id, priv->data); + id.uid = 0; + id.ino = 0; + +#ifdef HAVE_FUSE3 + /* With nullpath_ok set, FUSE 3 may pass a NULL path for a handle-based + * call on an open (possibly unlinked) file; operate on the handle. */ + if (fi) { + struct ltfs_file_handle *file = FILEHANDLE_TO_STRUCT(fi->fh); + ltfsmsg(LTFS_DEBUG, 14038D, _dentry_name(path, file->file_info)); + ret = ltfs_fsops_utimens(file->file_info->dentry_handle, tsTmp, priv->data); + id.uid = ((struct dentry *)(file->file_info->dentry_handle))->uid; + } else +#endif + { + ltfsmsg(LTFS_DEBUG, 14038D, path); + ret = ltfs_fsops_utimens_path(path, tsTmp, &id, priv->data); + } ltfs_request_trace(FUSE_REQ_EXIT(REQ_UTIMENS), ret, id.uid); if (ret) - ltfsmsg(LTFS_ERR, 10020E, "utimens", path, 0, 0); + ltfsmsg(LTFS_ERR, 10020E, "utimens", path ? path : "(fh)", 0, 0); return errormap_fuse_error(ret); } @@ -618,7 +700,11 @@ int ltfs_fuse_utimens(const char *path, const struct timespec ts[2]) * Change the mode of a file or directory. Since LTFS does not support full Unix permissions, * this function just sets or clears the read-only flag. */ +#ifdef HAVE_FUSE3 +int ltfs_fuse_chmod(const char *path, mode_t mode, struct fuse_file_info *fi) +#else int ltfs_fuse_chmod(const char *path, mode_t mode) +#endif { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; ltfs_file_id id; @@ -627,13 +713,28 @@ int ltfs_fuse_chmod(const char *path, mode_t mode) ltfs_request_trace(FUSE_REQ_ENTER(REQ_CHMOD), (uint64_t)mode, 0); - ltfsmsg(LTFS_DEBUG, 14039D, path); - ret = ltfs_fsops_set_readonly_path(path, new_readonly, &id, priv->data); + id.uid = 0; + id.ino = 0; + +#ifdef HAVE_FUSE3 + /* With nullpath_ok set, FUSE 3 may pass a NULL path for a handle-based + * call on an open (possibly unlinked) file; operate on the handle. */ + if (fi) { + struct ltfs_file_handle *file = FILEHANDLE_TO_STRUCT(fi->fh); + ltfsmsg(LTFS_DEBUG, 14039D, _dentry_name(path, file->file_info)); + ret = ltfs_fsops_set_readonly(file->file_info->dentry_handle, new_readonly, priv->data); + id.uid = ((struct dentry *)(file->file_info->dentry_handle))->uid; + } else +#endif + { + ltfsmsg(LTFS_DEBUG, 14039D, path); + ret = ltfs_fsops_set_readonly_path(path, new_readonly, &id, priv->data); + } ltfs_request_trace(FUSE_REQ_EXIT(REQ_CHMOD), ret, id.uid); if (ret) - ltfsmsg(LTFS_ERR, 10020E, "chmod", path, mode, 0); + ltfsmsg(LTFS_ERR, 10020E, "chmod", path ? path : "(fh)", mode, 0); return errormap_fuse_error(ret); } @@ -642,7 +743,11 @@ int ltfs_fuse_chmod(const char *path, mode_t mode) * Set ownership of a file or directory. Succeeds, but has no effect: user/group are * controlled by mount-time options uid and gid. */ +#ifdef HAVE_FUSE3 +int ltfs_fuse_chown(const char *path, uid_t user, gid_t group, struct fuse_file_info *fi) +#else int ltfs_fuse_chown(const char *path, uid_t user, gid_t group) +#endif { ltfs_request_trace(FUSE_REQ_ENTER(REQ_CHOWN), ((uint64_t)user << 32) + group, 0); ltfs_request_trace(FUSE_REQ_EXIT(REQ_CHOWN), 0, 0); @@ -711,10 +816,7 @@ int ltfs_fuse_create(const char *path, mode_t mode, struct fuse_file_info *fi) fi->direct_io = 1; fi->keep_cache = 0; #else - /* cannot set keep cache if any process has the file open with direct_io set! so only - * set it on newer FUSE versions, where we don't use direct_io. */ - fi->direct_io = 0; - fi->keep_cache = 1; + _ltfs_fuse_set_cache_flags(fi, priv); #endif #endif @@ -746,7 +848,7 @@ int ltfs_fuse_mkdir(const char *path, mode_t mode) return errormap_fuse_error(ret); } -int ltfs_fuse_truncate(const char *path, off_t length) +static int _ltfs_fuse_truncate_path(const char *path, off_t length) { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; ltfs_file_id id; @@ -763,6 +865,20 @@ int ltfs_fuse_truncate(const char *path, off_t length) return errormap_fuse_error(ret); } +#ifdef HAVE_FUSE3 +int ltfs_fuse_truncate(const char *path, off_t length, struct fuse_file_info *fi) +{ + if (fi) + return ltfs_fuse_ftruncate(path, length, fi); + return _ltfs_fuse_truncate_path(path, length); +} +#else +int ltfs_fuse_truncate(const char *path, off_t length) +{ + return _ltfs_fuse_truncate_path(path, length); +} +#endif + int ltfs_fuse_ftruncate(const char *path, off_t length, struct fuse_file_info *fi) { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; @@ -815,12 +931,30 @@ int ltfs_fuse_rmdir(const char *path) return errormap_fuse_error(ret); } +#ifdef HAVE_FUSE3 +int ltfs_fuse_rename(const char *from, const char *to, unsigned int flags) +#else int ltfs_fuse_rename(const char *from, const char *to) +#endif { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; ltfs_file_id id; int ret; +#ifdef HAVE_FUSE3 + /* LTFS cannot swap two dentries atomically, and unknown flags + * must be rejected rather than ignored. */ + if (flags & ~(unsigned int)RENAME_NOREPLACE) + return -EINVAL; + if (flags & RENAME_NOREPLACE) { + struct dentry_attr attr; + ltfs_file_id existing_id; + + if (ltfs_fsops_getattr_path(to, &attr, &existing_id, priv->data) == 0) + return -EEXIST; + } +#endif + ltfs_request_trace(FUSE_REQ_ENTER(REQ_RENAME), 0, 0); ltfsmsg(LTFS_DEBUG, 14046D, from, to); @@ -853,9 +987,58 @@ int _ltfs_fuse_filldir(void *buf, const char *name, void *priv) return ret; } - ret = filler(buf, new_name, NULL, 0); + ret = LTFS_FILL(filler, buf, new_name, NULL, 0); +#else + ret = LTFS_FILL(filler, buf, name, NULL, 0); +#endif + + free(new_name); + if (ret) + return -ENOBUFS; + return 0; +} + +#ifdef HAVE_FUSE3 +/* Context for _ltfs_fuse_filldir_plus */ +struct ltfs_fuse_fill_plus { + fuse_fill_dir_t filler; + struct ltfs_fuse_data *priv; +}; + +/* readdirplus filler: hand the entry's attributes to the kernel so it can + * prefill its inode cache and no getattr round trip is needed per entry. */ +static int _ltfs_fuse_filldir_plus(void *buf, const char *name, + const struct dentry_attr *attr, void *vpriv) +{ + struct ltfs_fuse_fill_plus *fill = vpriv; + struct stat st; + char *new_name; + int ret; + + if (! attr) + return _ltfs_fuse_filldir(buf, name, fill->filler); + + memset(&st, 0, sizeof(st)); + _ltfs_fuse_attr_to_stat(&st, (struct dentry_attr *)attr, fill->priv); + + ret = pathname_unformat(name, &new_name); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 14027E, "unformat", ret); + return ret; + } + +#ifdef __APPLE__ + free(new_name); + + ret = pathname_nfd_normalize(name, &new_name); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 14027E, "nfd", ret); + return ret; + } + + ret = fill->filler(buf, new_name, &st, 0, FUSE_FILL_DIR_PLUS); #else - ret = filler(buf, name, NULL, 0); + ret = fill->filler(buf, name, &st, 0, FUSE_FILL_DIR_PLUS); #endif free(new_name); @@ -863,9 +1046,15 @@ int _ltfs_fuse_filldir(void *buf, const char *name, void *priv) return -ENOBUFS; return 0; } +#endif /* HAVE_FUSE3 */ +#ifdef HAVE_FUSE3 +int ltfs_fuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi, enum fuse_readdir_flags flags) +#else int ltfs_fuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) +#endif { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; struct ltfs_file_handle *file = FILEHANDLE_TO_STRUCT(fi->fh); @@ -875,17 +1064,25 @@ int ltfs_fuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, ltfsmsg(LTFS_DEBUG, 14047D, _dentry_name(path, file->file_info)); - if (filler(buf, ".", NULL, 0)) { + if (LTFS_FILL(filler, buf, ".", NULL, 0)) { /* No buffer space */ ltfsmsg(LTFS_DEBUG, 14026D); return -ENOBUFS; } - if (filler(buf, "..", NULL, 0)) { + if (LTFS_FILL(filler, buf, "..", NULL, 0)) { /* No buffer space */ ltfsmsg(LTFS_DEBUG, 14026D); return -ENOBUFS; } +#ifdef HAVE_FUSE3 + if (flags & FUSE_READDIR_PLUS) { + struct ltfs_fuse_fill_plus fill = { .filler = filler, .priv = priv }; + + ret = ltfs_fsops_readdir_attr(file->file_info->dentry_handle, buf, + _ltfs_fuse_filldir_plus, &fill, priv->data); + } else +#endif ret = ltfs_fsops_readdir(file->file_info->dentry_handle, buf, _ltfs_fuse_filldir, filler, priv->data); @@ -945,13 +1142,13 @@ int ltfs_fuse_read(const char *path, char *buf, size_t size, off_t offset, struc return errormap_fuse_error(ret); } -#ifdef __APPLE__ +#ifdef LTFS_XATTR_POSITION int ltfs_fuse_setxattr(const char *path, const char *name, const char *value, size_t size, int flags, uint32_t position) #else int ltfs_fuse_setxattr(const char *path, const char *name, const char *value, size_t size, int flags) -#endif /* __APPLE__ */ +#endif /* LTFS_XATTR_POSITION */ { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; ltfs_file_id id; @@ -965,14 +1162,14 @@ int ltfs_fuse_setxattr(const char *path, const char *name, const char *value, si * on OS X, and we have no resource forks * TODO: is it correct to behave this way? */ -#ifdef __APPLE__ +#ifdef LTFS_XATTR_POSITION if (position) { /* Position argument must be zero */ ltfsmsg(LTFS_ERR, 14023E); ltfs_request_trace(FUSE_REQ_EXIT(REQ_SETXATTR), -EINVAL, 0); return -EINVAL; } -#endif /* __APPLE__ */ +#endif /* LTFS_XATTR_POSITION */ ret = ltfs_fsops_setxattr(path, name, value, size, flags, &id, priv->data); @@ -981,12 +1178,12 @@ int ltfs_fuse_setxattr(const char *path, const char *name, const char *value, si return errormap_fuse_error(ret); } -#ifdef __APPLE__ +#ifdef LTFS_XATTR_POSITION int ltfs_fuse_getxattr(const char *path, const char *name, char *value, size_t size, uint32_t position) #else int ltfs_fuse_getxattr(const char *path, const char *name, char *value, size_t size) -#endif /* __APPLE__ */ +#endif /* LTFS_XATTR_POSITION */ { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; ltfs_file_id id; @@ -1000,7 +1197,7 @@ int ltfs_fuse_getxattr(const char *path, const char *name, char *value, size_t s * on OS X, and we have no resource forks * TODO: is it correct to behave this way? */ -#ifdef __APPLE__ +#ifdef LTFS_XATTR_POSITION if (position) { /* Position argument must be zero */ ltfsmsg(LTFS_ERR, 14024E); @@ -1014,7 +1211,7 @@ int ltfs_fuse_getxattr(const char *path, const char *name, char *value, size_t s ltfs_request_trace(FUSE_REQ_EXIT(REQ_GETXATTR), -LTFS_NO_XATTR, 0); return errormap_fuse_error(-LTFS_NO_XATTR); } -#endif /* __APPLE__ */ +#endif /* LTFS_XATTR_POSITION */ ret = ltfs_fsops_getxattr(path, name, value, size, &id, priv->data); @@ -1061,13 +1258,43 @@ int ltfs_fuse_removexattr(const char *path, const char *name) * Mount the filesystem. This function assumes a volume has been * allocated and ltfs_mount has been called; it just does some secondary setup. */ +#ifdef HAVE_FUSE3 +void * ltfs_fuse_mount(struct fuse_conn_info *conn, struct fuse_config *cfg) +#else void * ltfs_fuse_mount(struct fuse_conn_info *conn) +#endif { struct ltfs_fuse_data *priv = fuse_get_context()->private_data; struct statvfs *stats = &priv->fs_stats; ltfs_request_trace(FUSE_REQ_ENTER(REQ_MOUNT), 0, 0); +#ifdef HAVE_FUSE3 + /* Options that were passed as -o arguments on FUSE 2. + * use_ino: pass LTFS UIDs through as inode numbers (needs 64-bit ino_t). + * hard_remove: unlink files instead of renaming them to .fuse_hidden. + * nullpath_ok: handle-based operations may receive a NULL path. */ + if (sizeof(ino_t) >= 8) + cfg->use_ino = 1; + cfg->hard_remove = 1; + cfg->nullpath_ok = 1; + + /* Tape reads must stay ordered; FUSE 3 enables asynchronous reads by + * default (the -o sync_read mount option was removed). */ + conn->want &= ~FUSE_CAP_ASYNC_READ; + + /* Request sizes up to max_write (libfuse >= 3.6 negotiates the + * matching max_pages with the kernel). Read requests are bounded by + * the same page limit. */ + conn->max_write = priv->fuse_max_write; + ltfsmsg(LTFS_INFO, 14124I, (unsigned int)(conn->max_write / 1024)); + + /* Always use readdirplus, not only when the kernel heuristic asks + * for it: attributes come from the in-memory index, so handing them + * out with the listing is free and avoids a getattr per entry. */ + conn->want &= ~FUSE_CAP_READDIRPLUS_AUTO; +#endif + if (priv->pid_orig != getpid()) { /* * Reopen device when LTFS was forked in fuse_main(). @@ -1206,7 +1433,9 @@ struct fuse_operations ltfs_ops = { .init = ltfs_fuse_mount, .destroy = ltfs_fuse_umount, .getattr = ltfs_fuse_getattr, +#ifndef HAVE_FUSE3 .fgetattr = ltfs_fuse_fgetattr, +#endif .access = ltfs_fuse_access, .statfs = ltfs_fuse_statfs, .open = ltfs_fuse_open, @@ -1218,7 +1447,9 @@ struct fuse_operations ltfs_ops = { .chown = ltfs_fuse_chown, .create = ltfs_fuse_create, .truncate = ltfs_fuse_truncate, +#ifndef HAVE_FUSE3 .ftruncate = ltfs_fuse_ftruncate, +#endif .unlink = ltfs_fuse_unlink, .rename = ltfs_fuse_rename, .mkdir = ltfs_fuse_mkdir, @@ -1235,7 +1466,9 @@ struct fuse_operations ltfs_ops = { .removexattr = ltfs_fuse_removexattr, .symlink = ltfs_fuse_symlink, .readlink = ltfs_fuse_readlink, +#ifndef HAVE_FUSE3 #if FUSE_VERSION >= 28 .flag_nullpath_ok = 1, #endif +#endif }; diff --git a/src/ltfs_fuse.h b/src/ltfs_fuse.h index 3cf79f17..5de0bd36 100644 --- a/src/ltfs_fuse.h +++ b/src/ltfs_fuse.h @@ -68,6 +68,10 @@ extern "C" { #include "libltfs/plugin.h" #include "libltfs/uthash.h" +/* Default and minimum for the -o max_write option (FUSE 3 builds) */ +#define LTFS_FUSE_MAX_WRITE_DEFAULT (1UL << 20) +#define LTFS_FUSE_MAX_WRITE_MIN (128UL << 10) + struct ltfs_fuse_data { bool first_parsing_pass; /**< Just looking for a config file? If so, don't print help */ @@ -131,6 +135,8 @@ struct ltfs_fuse_data { char *symlink_str; /**< Symbolic Link type fetched by option (live or posix)*/ char *str_append_only_mode; /**< option sting of scsi_append_only_mode */ int append_only_mode; /**< Use append-only mode */ + unsigned long fuse_max_write; /**< Maximum size of a FUSE request in bytes (FUSE 3) */ + int direct_io; /**< Bypass the kernel page cache for all file I/O */ bool advanced_help; /**< Include standard FUSE options on --help? */ diff --git a/src/main.c b/src/main.c index 533ec70d..77fb62e7 100644 --- a/src/main.c +++ b/src/main.c @@ -60,6 +60,9 @@ #include #include "ltfs_fuse.h" +#ifdef HAVE_FUSE3 +#include /* fuse_parse_cmdline, struct fuse_cmdline_opts */ +#endif #include "libltfs/ltfs.h" #include "ltfs_copyright.h" #include "libltfs/pathname.h" @@ -136,6 +139,9 @@ static struct fuse_opt ltfs_options[] = { LTFS_OPT("capture_index", capture_index, 1), LTFS_OPT("symlink_type=%s", symlink_str, 0), LTFS_OPT("scsi_append_only_mode=%s", str_append_only_mode, 0), + LTFS_OPT("max_write=%lu", fuse_max_write, 0), + LTFS_OPT("direct_io", direct_io, 1), + LTFS_OPT("nodirect_io", direct_io, 0), LTFS_OPT_KEY("-a", KEY_ADVANCED_HELP), FUSE_OPT_KEY("-h", KEY_HELP), FUSE_OPT_KEY("--help", KEY_HELP), @@ -174,6 +180,8 @@ void single_drive_advanced_usage(const char *default_driver, struct ltfs_fuse_da ltfsresult(14448I); /* -o release_device */ ltfsresult(14456I); /* -o capture_index */ ltfsresult(14463I); /* -o scsi_append_only_mode= */ + ltfsresult(14469I); /* -o max_write= */ + ltfsresult(14470I); /* -o direct_io */ ltfsresult(14406I); /* -a */ /* TODO: future use for WORM */ /* set worm rollback flag and rollback_str by this option */ @@ -746,6 +754,10 @@ int main(int argc, char **argv) } } +#ifndef HAVE_FUSE3 + /* On FUSE 3 these are set through struct fuse_config in the init + * callback (ltfs_fuse_mount); the mount options no longer exist. */ + /* Unlink objects from the file system instead of having them renamed to .fuse_hidden */ ret = fuse_opt_add_arg(&args, "-ohard_remove"); if (ret < 0) { @@ -761,6 +773,7 @@ int main(int argc, char **argv) ltfsmsg(LTFS_ERR, 14001E, "sync_read", ret); return 1; } +#endif #ifdef __APPLE__ /* Change MacFUSE timeout from 60 secs to 3100 secs (41mins) */ @@ -787,14 +800,17 @@ int main(int argc, char **argv) } #endif +#ifndef HAVE_FUSE3 #if FUSE_VERSION >= 28 - /* For FUSE 2.8 or higher, automatically enable big_writes */ + /* For FUSE 2.8 or higher, automatically enable big_writes. + * FUSE 3 removed the option; large writes are always enabled. */ ret = fuse_opt_add_arg(&args, "-obig_writes"); if (ret < 0) { /* Could not enable FUSE option */ ltfsmsg(LTFS_ERR, 14001E, "big_writes", ret); return 1; } +#endif #endif /* Set up permissions based on mount options and current user information */ @@ -971,8 +987,22 @@ int single_drive_main(struct fuse_args *args, struct ltfs_fuse_data *priv) ltfsmsg(LTFS_INFO, 14095I); } +#ifdef HAVE_FUSE3 + /* Maximum FUSE request size; the kernel rounds it to whole pages and + * caps it (1 MiB unless raised via fs.fuse.max_pages_limit). */ + if (priv->fuse_max_write == 0) + priv->fuse_max_write = LTFS_FUSE_MAX_WRITE_DEFAULT; + else if (priv->fuse_max_write < LTFS_FUSE_MAX_WRITE_MIN) + priv->fuse_max_write = LTFS_FUSE_MAX_WRITE_MIN; +#else + if (priv->fuse_max_write != 0) + ltfsmsg(LTFS_WARN, 14125W); +#endif + +#ifndef HAVE_FUSE3 /* If the local inode space is big enough, have FUSE pass through our UIDs as inode - * numbers instead of generating its own. */ + * numbers instead of generating its own. On FUSE 3 this is set through + * struct fuse_config in the init callback. */ if (sizeof(ino_t) >= 8) { ret = fuse_opt_add_arg(args, "-ouse_ino"); if (ret < 0) { @@ -981,6 +1011,7 @@ int single_drive_main(struct fuse_args *args, struct ltfs_fuse_data *priv) return 1; } } +#endif /* Set file system name to "ltfs:devname" in case FUSE doesn't pick it up */ snprintf(fsname, sizeof(fsname), "-ofsname=ltfs:%s", priv->devname); @@ -1223,7 +1254,17 @@ int single_drive_main(struct fuse_args *args, struct ltfs_fuse_data *priv) for ( i=0; iargc; i++) { fuse_opt_add_arg(&tmpa, args->argv[i]); } +#ifdef HAVE_FUSE3 + { + struct fuse_cmdline_opts cmdline_opts; + + ret = fuse_parse_cmdline(&tmpa, &cmdline_opts); + if (ret == 0) + mountpoint = cmdline_opts.mountpoint; + } +#else ret = fuse_parse_cmdline( &tmpa, &mountpoint, NULL, NULL); +#endif fuse_opt_free_args(&tmpa); if (ret < 0 || mountpoint == NULL) { ltfsmsg(LTFS_ERR, 14094E, ret); diff --git a/tests/t/10-request-size.sh b/tests/t/10-request-size.sh new file mode 100755 index 00000000..86f7e3f8 --- /dev/null +++ b/tests/t/10-request-size.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# Verify the FUSE request sizes that reach the daemon. FUSE 3 builds +# negotiate 1 MiB requests (max_write/max_pages); FUSE 2 is limited to +# 128 KiB with big_writes. +. "${top_srcdir}/tests/lib/harness.sh" + +# DEBUG3 logging prints "FUSE write '...' (offset=..., count=...)" +LTFS_MOUNT_OPTS="-o verbose=6" + +ltfs_setup + +dd if=/dev/zero of="$MNT/big" bs=1M count=8 conv=fsync status=none + +# O_DIRECT reads bypass the readahead window, so the application's +# request size reaches the daemon (split at the negotiated maximum) +dd if="$MNT/big" of=/dev/null bs=1M iflag=direct status=none \ + || skip "O_DIRECT reads not supported on this kernel" + +ltfs_finish + +max_req() { + grep "FUSE $1" "$WORK/ltfs.log" | grep -o 'count=[0-9]*' | \ + cut -d= -f2 | sort -n | tail -1 +} + +write_max=$(max_req write) +read_max=$(max_req read) +echo "largest write request: ${write_max:-none}, largest read request: ${read_max:-none}" + +[ -n "$write_max" ] || fail "no write requests logged" + +if ltfs_is_fuse3; then + [ "$write_max" -ge 524288 ] || fail "write requests capped at $write_max bytes" + [ "$read_max" -ge 524288 ] || fail "read requests capped at $read_max bytes" +else + # big_writes raises the FUSE 2 limit to 128 KiB + [ "$write_max" -ge 65536 ] || fail "write requests capped at $write_max bytes" +fi + +echo "PASS" diff --git a/tests/t/11-direct-io.sh b/tests/t/11-direct-io.sh new file mode 100755 index 00000000..fabf01e0 --- /dev/null +++ b/tests/t/11-direct-io.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# -o direct_io: data integrity without the page cache, large requests +# even for buffered application I/O, and graceful mmap failure. +. "${top_srcdir}/tests/lib/harness.sh" + +HELPER="$top_builddir/tests/helpers/fsops_helper" + +LTFS_MOUNT_OPTS="-o direct_io -o verbose=6" + +ltfs_setup + +# Data integrity through the direct path, including odd sizes +dd if=/dev/urandom of="$WORK/data" bs=37k count=9 status=none +cp "$WORK/data" "$MNT/data" +cmp -s "$WORK/data" "$MNT/data" || fail "data mismatch while mounted" + +# mmap is not available on direct-I/O files; it must fail cleanly +out=$("$HELPER" mmap "$MNT/data" 4096) && fail "mmap unexpectedly succeeded" +echo "mmap failed as expected: $out" + +# Buffered writes from the application reach the daemon at the +# application's block size (no page-cache splitting) +dd if=/dev/zero of="$MNT/big" bs=1M count=4 status=none + +ltfs_remount +cmp -s "$WORK/data" "$MNT/data" || fail "data mismatch after remount" + +ltfs_finish + +if ltfs_is_fuse3; then + write_max=$(grep "FUSE write" "$WORK/ltfs.log" | grep -o 'count=[0-9]*' | \ + cut -d= -f2 | sort -n | tail -1) + echo "largest write request: $write_max" + [ "$write_max" -ge 524288 ] || fail "direct writes capped at $write_max bytes" +fi + +echo "PASS" diff --git a/tests/t/12-readdirplus.sh b/tests/t/12-readdirplus.sh new file mode 100755 index 00000000..216eef96 --- /dev/null +++ b/tests/t/12-readdirplus.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# readdirplus: listing a directory must return correct attributes and, +# on FUSE 3, must not trigger a getattr request per entry. +. "${top_srcdir}/tests/lib/harness.sh" + +LTFS_MOUNT_OPTS="-o verbose=6" + +NFILES=100 + +ltfs_setup + +mkdir "$MNT/big" +i=0 +while [ $i -lt $NFILES ]; do + head -c $((i + 1)) /dev/zero >"$MNT/big/f$i" + i=$((i + 1)) +done + +# Remount so the listing below runs against a cold kernel cache +ltfs_remount + +# Attributes reported by the listing must match per-file stat +ls -l "$MNT/big" >"$WORK/listing" +for n in 0 57 99; do + ls_size=$(awk -v f="f$n" '$NF == f {print $5}' "$WORK/listing") + [ "$ls_size" = "$((n + 1))" ] || fail "listing reports size $ls_size for f$n" + stat_size=$(stat -c %s "$MNT/big/f$n") + [ "$stat_size" = "$((n + 1))" ] || fail "stat reports size $stat_size for f$n" +done + +ltfs_finish + +# "FUSE getattr/fgetattr" debug lines from the remounted instance show how +# many attribute requests the listing needed +getattrs=$(grep -c "FUSE f*getattr" "$WORK/ltfs-remount.log" || true) +echo "getattr requests during ls -l of $NFILES files: $getattrs" + +if ltfs_is_fuse3; then + # readdirplus delivers attributes with the listing; without it the + # kernel issues one getattr (via lookup) per entry. The prefill is + # only effective with libfuse >= 3.17 (verified there; libfuse 3.14 + # never sends READDIRPLUS to the high-level API), so the strict + # assertion is gated on the runtime library version. + ver=$(fusermount3 -V 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + maj=${ver%%.*} + min=${ver#*.} + if [ "${maj:-0}" -gt 3 ] || { [ "${maj:-0}" -eq 3 ] && [ "${min:-0}" -ge 17 ]; }; then + [ "$getattrs" -lt $((NFILES / 2)) ] \ + || fail "expected readdirplus to suppress per-entry getattr, saw $getattrs" + else + echo "libfuse ${ver:-unknown}: readdirplus prefill not asserted (verified on >= 3.17)" + fi +fi + +echo "PASS"