diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index f28a747e82c4..7e8d1bafb0f3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -145,22 +145,18 @@ Once everything is in good shape and the details have been worked out you can re Any required reviews can then be finalized and the pull request merged. #### Tests and Benchmarks -* Every pull request will by tested by the buildbot on multiple platforms by running the [zfs-tests.sh and zloop.sh]( +* Every pull request is tested using a GitHub Actions workflow on multiple platforms by running the [zfs-tests.sh and zloop.sh]( https://openzfs.github.io/openzfs-docs/Developer%20Resources/Building%20ZFS.html#running-zloop-sh-and-zfs-tests-sh) test suites. +`.github/workflows/scripts/generate-ci-type.py` is used to determine whether the pull request is nonbehavior, i.e., not introducing behavior changes of any code, configuration or tests. If so, the CI will run on fewer platforms and only essential sanity tests will run. You can always override this by adding `ZFS-CI-Type` line to your commit message: + * If your last commit (or `HEAD` in git terms) contains a line `ZFS-CI-Type: quick`, quick mode is forced regardless of what files are changed. + * Otherwise, if any commit in a PR contains a line `ZFS-CI-Type: full`, full mode is forced. * To verify your changes conform to the [style guidelines]( https://github.com/openzfs/zfs/blob/master/.github/CONTRIBUTING.md#style-guides ), please run `make checkstyle` and resolve any warnings. -* Static code analysis of each pull request is performed by the buildbot; run `make lint` to check your changes. -* Test cases should be provided when appropriate. -This includes making sure new features have adequate code coverage. +* Code analysis is performed by [CodeQL](https://codeql.github.com/) for each pull request. +* Test cases should be provided when appropriate. This includes making sure new features have adequate code coverage. * If your pull request improves performance, please include some benchmarks. -* The pull request must pass all required [ZFS -Buildbot](http://build.zfsonlinux.org/) builders before -being accepted. If you are experiencing intermittent TEST -builder failures, you may be experiencing a [test suite -issue](https://github.com/openzfs/zfs/issues?q=is%3Aissue+is%3Aopen+label%3A%22Type%3A+Test+Suite%22). -There are also various [buildbot options](https://openzfs.github.io/openzfs-docs/Developer%20Resources/Buildbot%20Options.html) -to control how changes are tested. +* The pull request must pass all CI checks before being accepted. ### Testing All help is appreciated! If you're in a position to run the latest code diff --git a/.github/workflows/build-dependencies.txt b/.github/workflows/build-dependencies.txt deleted file mode 100644 index 73921865c42a..000000000000 --- a/.github/workflows/build-dependencies.txt +++ /dev/null @@ -1,57 +0,0 @@ -acl -alien -attr -autoconf -bc -build-essential -curl -dbench -debhelper-compat -dh-python -dkms -fakeroot -fio -gdb -gdebi -git -ksh -lcov -libacl1-dev -libaio-dev -libattr1-dev -libblkid-dev -libcurl4-openssl-dev -libdevmapper-dev -libelf-dev -libffi-dev -libmount-dev -libpam0g-dev -libselinux1-dev -libssl-dev -libtool -libudev-dev -linux-headers-generic -lsscsi -mdadm -nfs-kernel-server -pamtester -parted -po-debconf -python3 -python3-all-dev -python3-cffi -python3-dev -python3-packaging -python3-pip -python3-setuptools -python3-sphinx -rng-tools-debian -rsync -samba -sysstat -uuid-dev -watchdog -wget -xfslibs-dev -xz-utils -zlib1g-dev diff --git a/.github/workflows/checkstyle-dependencies.txt b/.github/workflows/checkstyle-dependencies.txt deleted file mode 100644 index 879d3dbc5553..000000000000 --- a/.github/workflows/checkstyle-dependencies.txt +++ /dev/null @@ -1,5 +0,0 @@ -cppcheck -devscripts -mandoc -pax-utils -shellcheck diff --git a/.github/workflows/checkstyle.yaml b/.github/workflows/checkstyle.yaml index abcb358fc04f..b34ca1302873 100644 --- a/.github/workflows/checkstyle.yaml +++ b/.github/workflows/checkstyle.yaml @@ -4,6 +4,10 @@ on: push: pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: checkstyle: runs-on: ubuntu-22.04 @@ -13,15 +17,11 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies run: | - # https://github.com/orgs/community/discussions/47863 - sudo apt-mark hold grub-efi-amd64-signed - sudo apt-get update --fix-missing - sudo apt-get upgrade - sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/build-dependencies.txt apt-get install -qq - sudo xargs --arg-file=${{ github.workspace }}/.github/workflows/checkstyle-dependencies.txt apt-get install -qq - sudo python3 -m pip install --quiet flake8 - sudo apt-get clean - + # for x in lxd core20 snapd; do sudo snap remove $x; done + sudo apt-get purge -y snapd google-chrome-stable firefox + ONLY_DEPS=1 .github/workflows/scripts/qemu-3-deps.sh ubuntu22 + sudo apt-get install -y cppcheck devscripts mandoc pax-utils shellcheck + sudo python -m pipx install --quiet flake8 # confirm that the tools are installed # the build system doesn't fail when they are not checkbashisms --version @@ -31,8 +31,13 @@ jobs: shellcheck --version - name: Prepare run: | + sed -i '/DEBUG_CFLAGS="-Werror"/s/^/#/' config/zfs-build.m4 ./autogen.sh + - name: Configure + run: | ./configure + - name: Make + run: | make -j$(nproc) --no-print-directory --silent - name: Checkstyle run: | diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index e015b2cb71d9..2656a20fea0d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -4,6 +4,10 @@ on: push: pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -27,15 +31,15 @@ jobs: uses: actions/checkout@v4 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: config-file: .github/codeql-${{ matrix.language }}.yml languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scripts/README.md b/.github/workflows/scripts/README.md new file mode 100644 index 000000000000..811d23d93875 --- /dev/null +++ b/.github/workflows/scripts/README.md @@ -0,0 +1,14 @@ + +Workflow for each operating system: +- install qemu on the github runner +- download current cloud image of operating system +- start and init that image via cloud-init +- install dependencies and poweroff system +- start system and build openzfs and then poweroff again +- clone build system and start 2 instances of it +- run functional testings and complete in around 3h +- when tests are done, do some logfile preparing +- show detailed results for each system +- in the end, generate the job summary + +/TR 14.09.2024 diff --git a/.github/workflows/scripts/generate-ci-type.py b/.github/workflows/scripts/generate-ci-type.py new file mode 100755 index 000000000000..943aae254469 --- /dev/null +++ b/.github/workflows/scripts/generate-ci-type.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +""" +Determine the CI type based on the change list and commit message. + +Prints "quick" if (explicity required by user): +- the *last* commit message contains 'ZFS-CI-Type: quick' +or if (heuristics): +- the files changed are not in the list of specified directories, and +- all commit messages do not contain 'ZFS-CI-Type: full' + +Otherwise prints "full". +""" + +import sys +import subprocess +import re + +""" +Patterns of files that are not considered to trigger full CI. +Note: not using pathlib.Path.match() because it does not support '**' +""" +FULL_RUN_IGNORE_REGEX = list(map(re.compile, [ + r'.*\.md', + r'.*\.gitignore' +])) + +""" +Patterns of files that are considered to trigger full CI. +""" +FULL_RUN_REGEX = list(map(re.compile, [ + r'cmd.*', + r'configs/.*', + r'META', + r'.*\.am', + r'.*\.m4', + r'autogen\.sh', + r'configure\.ac', + r'copy-builtin', + r'contrib', + r'etc', + r'include', + r'lib/.*', + r'module/.*', + r'scripts/.*', + r'tests/.*', + r'udev/.*' +])) + +if __name__ == '__main__': + + prog = sys.argv[0] + + if len(sys.argv) != 3: + print(f'Usage: {prog} ') + sys.exit(1) + + head, base = sys.argv[1:3] + + def output_type(type, reason): + print(f'{prog}: will run {type} CI: {reason}', file=sys.stderr) + print(type) + sys.exit(0) + + # check last (HEAD) commit message + last_commit_message_raw = subprocess.run([ + 'git', 'show', '-s', '--format=%B', 'HEAD' + ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + for line in last_commit_message_raw.stdout.decode().splitlines(): + if line.strip().lower() == 'zfs-ci-type: quick': + output_type('quick', f'explicitly requested by HEAD commit {head}') + + # check all commit messages + all_commit_message_raw = subprocess.run([ + 'git', 'show', '-s', + '--format=ZFS-CI-Commit: %H%n%B', f'{head}...{base}' + ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + all_commit_message = all_commit_message_raw.stdout.decode().splitlines() + + commit_ref = head + for line in all_commit_message: + if line.startswith('ZFS-CI-Commit:'): + commit_ref = line.lstrip('ZFS-CI-Commit:').rstrip() + if line.strip().lower() == 'zfs-ci-type: full': + output_type('full', f'explicitly requested by commit {commit_ref}') + + # check changed files + changed_files_raw = subprocess.run([ + 'git', 'diff', '--name-only', head, base + ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + changed_files = changed_files_raw.stdout.decode().splitlines() + + for f in changed_files: + for r in FULL_RUN_IGNORE_REGEX: + if r.match(f): + break + else: + for r in FULL_RUN_REGEX: + if r.match(f): + output_type( + 'full', + f'changed file "{f}" matches pattern "{r.pattern}"' + ) + + # catch-all + output_type('quick', 'no changed file matches full CI patterns') diff --git a/.github/workflows/scripts/generate-summary.sh b/.github/workflows/scripts/generate-summary.sh deleted file mode 100755 index 5f19aa4081fc..000000000000 --- a/.github/workflows/scripts/generate-summary.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env bash - -# for runtime reasons we split functional testings into N parts -# - use a define to check for missing tarfiles -FUNCTIONAL_PARTS="4" - -ZTS_REPORT="tests/test-runner/bin/zts-report.py" -chmod +x $ZTS_REPORT - -function output() { - echo -e $* >> Summary.md -} - -function error() { - output ":bangbang: $* :bangbang:\n" -} - -# this function generates the real summary -# - expects a logfile "log" in current directory -function generate() { - # we issued some error already - test ! -s log && return - - # for overview and zts-report - cat log | grep '^Test' > list - - # error details - awk '/\[FAIL\]|\[KILLED\]/{ show=1; print; next; } - /\[SKIP\]|\[PASS\]/{ show=0; } show' log > err - - # summary of errors - if [ -s err ]; then - output "
"
-    $ZTS_REPORT --no-maybes ./list >> Summary.md
-    output "
" - - # generate seperate error logfile - ERRLOGS=$((ERRLOGS+1)) - errfile="err-$ERRLOGS.md" - echo -e "\n## $headline (debugging)\n" >> $errfile - echo "
Error Listing - with dmesg and dbgmsg
" >> $errfile
-    dd if=err bs=999k count=1 >> $errfile
-    echo "
" >> $errfile - else - output "All tests passed :thumbsup:" - fi - - output "
Full Listing
"
-  cat list >> Summary.md
-  output "
" - - # remove tmp files - rm -f err list log -} - -# check tarfiles and untar -function check_tarfile() { - if [ -f "$1" ]; then - tar xf "$1" || error "Tarfile $1 returns some error" - else - error "Tarfile $1 not found" - fi -} - -# check logfile and concatenate test results -function check_logfile() { - if [ -f "$1" ]; then - cat "$1" >> log - else - error "Logfile $1 not found" - fi -} - -# sanity -function summarize_s() { - headline="$1" - output "\n## $headline\n" - rm -rf testfiles - check_tarfile "$2/sanity.tar" - check_logfile "testfiles/log" - generate -} - -# functional -function summarize_f() { - headline="$1" - output "\n## $headline\n" - rm -rf testfiles - for i in $(seq 1 $FUNCTIONAL_PARTS); do - tarfile="$2-part$i/part$i.tar" - check_tarfile "$tarfile" - check_logfile "testfiles/log" - done - generate -} - -# https://docs.github.com/en/enterprise-server@3.6/actions/using-workflows/workflow-commands-for-github-actions#step-isolation-and-limits -# Job summaries are isolated between steps and each step is restricted to a maximum size of 1MiB. -# [ ] can not show all error findings here -# [x] split files into smaller ones and create additional steps - -ERRLOGS=0 -if [ ! -f Summary/Summary.md ]; then - # first call, we do the default summary (~500k) - echo -n > Summary.md - summarize_s "Sanity Tests Ubuntu 22.04" Logs-22.04-sanity - summarize_f "Functional Tests Ubuntu 22.04" Logs-22.04-functional - - cat Summary.md >> $GITHUB_STEP_SUMMARY - mkdir -p Summary - mv *.md Summary -else - # here we get, when errors where returned in first call - test -f Summary/err-$1.md && cat Summary/err-$1.md >> $GITHUB_STEP_SUMMARY -fi - -exit 0 diff --git a/.github/workflows/scripts/merge_summary.awk b/.github/workflows/scripts/merge_summary.awk new file mode 100755 index 000000000000..2b00d00226c9 --- /dev/null +++ b/.github/workflows/scripts/merge_summary.awk @@ -0,0 +1,109 @@ +#!/bin/awk -f +# +# Merge multiple ZTS tests results summaries into a single summary. This is +# needed when you're running different parts of ZTS on different tests +# runners or VMs. +# +# Usage: +# +# ./merge_summary.awk summary1.txt [summary2.txt] [summary3.txt] ... +# +# or: +# +# cat summary*.txt | ./merge_summary.awk +# +BEGIN { + i=-1 + pass=0 + fail=0 + skip=0 + state="" + cl=0 + el=0 + upl=0 + ul=0 + + # Total seconds of tests runtime + total=0; +} + +# Skip empty lines +/^\s*$/{next} + +# Skip Configuration and Test lines +/^Test:/{state=""; next} +/Configuration/{state="";next} + +# When we see "test-runner.py" stop saving config lines, and +# save test runner lines +/test-runner.py/{state="testrunner"; runner=runner$0"\n"; next} + +# We need to differentiate the PASS counts from test result lines that start +# with PASS, like: +# +# PASS mv_files/setup +# +# Use state="pass_count" to differentiate +# +/Results Summary/{state="pass_count"; next} +/PASS/{ if (state=="pass_count") {pass += $2}} +/FAIL/{ if (state=="pass_count") {fail += $2}} +/SKIP/{ if (state=="pass_count") {skip += $2}} +/Running Time/{ + state=""; + running[i]=$3; + split($3, arr, ":") + total += arr[1] * 60 * 60; + total += arr[2] * 60; + total += arr[3] + next; +} + +/Tests with results other than PASS that are expected/{state="expected_lines"; next} +/Tests with result of PASS that are unexpected/{state="unexpected_pass_lines"; next} +/Tests with results other than PASS that are unexpected/{state="unexpected_lines"; next} +{ + if (state == "expected_lines") { + expected_lines[el] = $0 + el++ + } + + if (state == "unexpected_pass_lines") { + unexpected_pass_lines[upl] = $0 + upl++ + } + if (state == "unexpected_lines") { + unexpected_lines[ul] = $0 + ul++ + } +} + +# Reproduce summary +END { + print runner; + print "\nResults Summary" + print "PASS\t"pass + print "FAIL\t"fail + print "SKIP\t"skip + print "" + print "Running Time:\t"strftime("%T", total, 1) + if (pass+fail+skip > 0) { + percent_passed=(pass/(pass+fail+skip) * 100) + } + printf "Percent passed:\t%3.2f%", percent_passed + + print "\n\nTests with results other than PASS that are expected:" + asort(expected_lines, sorted) + for (j in sorted) + print sorted[j] + + print "\n\nTests with result of PASS that are unexpected:" + asort(unexpected_pass_lines, sorted) + for (j in sorted) + print sorted[j] + + print "\n\nTests with results other than PASS that are unexpected:" + asort(unexpected_lines, sorted) + for (j in sorted) + print sorted[j] +} diff --git a/.github/workflows/scripts/qemu-1-setup.sh b/.github/workflows/scripts/qemu-1-setup.sh new file mode 100755 index 000000000000..ebd80a2f98c1 --- /dev/null +++ b/.github/workflows/scripts/qemu-1-setup.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +###################################################################### +# 1) setup qemu instance on action runner +###################################################################### + +set -eu + +# install needed packages +export DEBIAN_FRONTEND="noninteractive" +sudo apt-get -y update +sudo apt-get install -y axel cloud-image-utils daemonize guestfs-tools \ + ksmtuned virt-manager linux-modules-extra-$(uname -r) zfsutils-linux + +# generate ssh keys +rm -f ~/.ssh/id_ed25519 +ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N "" + +# we expect RAM shortage +cat << EOF | sudo tee /etc/ksmtuned.conf > /dev/null +# https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/7/html/virtualization_tuning_and_optimization_guide/chap-ksm +KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +KSM_SLEEP_MSEC=10 +KSM_NPAGES_BOOST=300 +KSM_NPAGES_DECAY=-50 +KSM_NPAGES_MIN=64 +KSM_NPAGES_MAX=2048 + +KSM_THRES_COEF=25 +KSM_THRES_CONST=2048 + +LOGFILE=/var/log/ksmtuned.log +DEBUG=1 +EOF +sudo systemctl restart ksm +sudo systemctl restart ksmtuned + +# not needed +sudo systemctl stop docker.socket +sudo systemctl stop multipathd.socket + +# remove default swapfile and /mnt +sudo swapoff -a +sudo umount -l /mnt +DISK="/dev/disk/cloud/azure_resource-part1" +sudo sed -e "s|^$DISK.*||g" -i /etc/fstab +sudo wipefs -aq $DISK +sudo systemctl daemon-reload + +sudo modprobe loop +sudo modprobe zfs + +# partition the disk as needed +DISK="/dev/disk/cloud/azure_resource" +sudo sgdisk --zap-all $DISK +sudo sgdisk -p \ + -n 1:0:+16G -c 1:"swap" \ + -n 2:0:0 -c 2:"tests" \ +$DISK +sync +sleep 1 + +# swap with same size as RAM +sudo mkswap $DISK-part1 +sudo swapon $DISK-part1 + +# 60GB data disk +SSD1="$DISK-part2" + +# 10GB data disk on ext4 +sudo fallocate -l 10G /test.ssd1 +SSD2=$(sudo losetup -b 4096 -f /test.ssd1 --show) + +# adjust zfs module parameter and create pool +exec 1>/dev/null +ARC_MIN=$((1024*1024*256)) +ARC_MAX=$((1024*1024*512)) +echo $ARC_MIN | sudo tee /sys/module/zfs/parameters/zfs_arc_min +echo $ARC_MAX | sudo tee /sys/module/zfs/parameters/zfs_arc_max +echo 1 | sudo tee /sys/module/zfs/parameters/zvol_use_blk_mq +sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 \ + -O relatime=off -O atime=off -O xattr=sa -O compression=lz4 \ + -O mountpoint=/mnt/tests + +# no need for some scheduler +for i in /sys/block/s*/queue/scheduler; do + echo "none" | sudo tee $i > /dev/null +done diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh new file mode 100755 index 000000000000..923c38c0f937 --- /dev/null +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -0,0 +1,213 @@ +#!/usr/bin/env bash + +###################################################################### +# 2) start qemu with some operating system, init via cloud-init +###################################################################### + +set -eu + +# short name used in zfs-qemu.yml +OS="$1" + +# OS variant (virt-install --os-variant list) +OSv=$OS + +# compressed with .zst extension +REPO="https://github.com/mcmilk/openzfs-freebsd-images" +FREEBSD="$REPO/releases/download/v2024-09-16" +URLzs="" + +# Ubuntu mirrors +#UBMIRROR="https://cloud-images.ubuntu.com" +#UBMIRROR="https://mirrors.cloud.tencent.com/ubuntu-cloud-images" +UBMIRROR="https://mirror.citrahost.com/ubuntu-cloud-images" + +# default nic model for vm's +NIC="virtio" + +case "$OS" in + almalinux8) + OSNAME="AlmaLinux 8" + URL="https://repo.almalinux.org/almalinux/8/cloud/x86_64/images/AlmaLinux-8-GenericCloud-latest.x86_64.qcow2" + ;; + almalinux9) + OSNAME="AlmaLinux 9" + URL="https://repo.almalinux.org/almalinux/9/cloud/x86_64/images/AlmaLinux-9-GenericCloud-latest.x86_64.qcow2" + ;; + archlinux) + OSNAME="Archlinux" + URL="https://geo.mirror.pkgbuild.com/images/latest/Arch-Linux-x86_64-cloudimg.qcow2" + # dns sometimes fails with that url :/ + echo "89.187.191.12 geo.mirror.pkgbuild.com" | sudo tee /etc/hosts > /dev/null + ;; + centos-stream9) + OSNAME="CentOS Stream 9" + URL="https://cloud.centos.org/centos/9-stream/x86_64/images/CentOS-Stream-GenericCloud-9-latest.x86_64.qcow2" + ;; + debian11) + OSNAME="Debian 11" + URL="https://cloud.debian.org/images/cloud/bullseye/latest/debian-11-generic-amd64.qcow2" + ;; + debian12) + OSNAME="Debian 12" + URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" + ;; + fedora39) + OSNAME="Fedora 39" + OSv="fedora39" + URL="https://download.fedoraproject.org/pub/fedora/linux/releases/39/Cloud/x86_64/images/Fedora-Cloud-Base-39-1.5.x86_64.qcow2" + ;; + fedora40) + OSNAME="Fedora 40" + OSv="fedora39" + URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2" + ;; + freebsd13r) + OSNAME="FreeBSD 13.4-RELEASE" + OSv="freebsd13.0" + URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst" + BASH="/usr/local/bin/bash" + NIC="rtl8139" + ;; + freebsd13) + OSNAME="FreeBSD 13.4-STABLE" + OSv="freebsd13.0" + URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst" + BASH="/usr/local/bin/bash" + NIC="rtl8139" + ;; + freebsd14r) + OSNAME="FreeBSD 14.1-RELEASE" + OSv="freebsd14.0" + URLzs="$FREEBSD/amd64-freebsd-14.1-RELEASE.qcow2.zst" + BASH="/usr/local/bin/bash" + ;; + freebsd14) + OSNAME="FreeBSD 14.1-STABLE" + OSv="freebsd14.0" + URLzs="$FREEBSD/amd64-freebsd-14.1-STABLE.qcow2.zst" + BASH="/usr/local/bin/bash" + ;; + freebsd15) + OSNAME="FreeBSD 15.0-CURRENT" + OSv="freebsd14.0" + URLzs="$FREEBSD/amd64-freebsd-15.0-CURRENT.qcow2.zst" + BASH="/usr/local/bin/bash" + ;; + tumbleweed) + OSNAME="openSUSE Tumbleweed" + OSv="opensusetumbleweed" + MIRROR="http://opensuse-mirror-gce-us.susecloud.net" + URL="$MIRROR/tumbleweed/appliances/openSUSE-MicroOS.x86_64-OpenStack-Cloud.qcow2" + ;; + ubuntu20) + OSNAME="Ubuntu 20.04" + OSv="ubuntu20.04" + URL="$UBMIRROR/focal/current/focal-server-cloudimg-amd64.img" + ;; + ubuntu22) + OSNAME="Ubuntu 22.04" + OSv="ubuntu22.04" + URL="$UBMIRROR/jammy/current/jammy-server-cloudimg-amd64.img" + ;; + ubuntu24) + OSNAME="Ubuntu 24.04" + OSv="ubuntu24.04" + URL="$UBMIRROR/noble/current/noble-server-cloudimg-amd64.img" + ;; + *) + echo "Wrong value for OS variable!" + exit 111 + ;; +esac + +# environment file +ENV="/var/tmp/env.txt" +echo "ENV=$ENV" >> $ENV + +# result path +echo 'RESPATH="/var/tmp/test_results"' >> $ENV + +# FreeBSD 13 has problems with: e1000+virtio +echo "NIC=$NIC" >> $ENV + +# freebsd15 -> used in zfs-qemu.yml +echo "OS=$OS" >> $ENV + +# freebsd14.0 -> used for virt-install +echo "OSv=\"$OSv\"" >> $ENV + +# FreeBSD 15 (Current) -> used for summary +echo "OSNAME=\"$OSNAME\"" >> $ENV + +sudo mkdir -p "/mnt/tests" +sudo chown -R $(whoami) /mnt/tests + +# we are downloading via axel, curl and wget are mostly slower and +# require more return value checking +IMG="/mnt/tests/cloudimg.qcow2" +if [ ! -z "$URLzs" ]; then + echo "Loading image $URLzs ..." + time axel -q -o "$IMG.zst" "$URLzs" + zstd -q -d --rm "$IMG.zst" +else + echo "Loading image $URL ..." + time axel -q -o "$IMG" "$URL" +fi + +DISK="/dev/zvol/zpool/openzfs" +FORMAT="raw" +sudo zfs create -ps -b 64k -V 80g zpool/openzfs +while true; do test -b $DISK && break; sleep 1; done +echo "Importing VM image to zvol..." +sudo qemu-img dd -f qcow2 -O raw if=$IMG of=$DISK bs=4M +rm -f $IMG + +PUBKEY=$(cat ~/.ssh/id_ed25519.pub) +cat < /tmp/user-data +#cloud-config + +fqdn: $OS + +users: +- name: root + shell: $BASH +- name: zfs + sudo: ALL=(ALL) NOPASSWD:ALL + shell: $BASH + ssh_authorized_keys: + - $PUBKEY + +growpart: + mode: auto + devices: ['/'] + ignore_growroot_disabled: false +EOF + +sudo virsh net-update default add ip-dhcp-host \ + "" --live --config + +sudo virt-install \ + --os-variant $OSv \ + --name "openzfs" \ + --cpu host-passthrough \ + --virt-type=kvm --hvm \ + --vcpus=4,sockets=1 \ + --memory $((1024*12)) \ + --memballoon model=virtio \ + --graphics none \ + --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ + --cloud-init user-data=/tmp/user-data \ + --disk $DISK,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ + --import --noautoconsole >/dev/null + +# in case the directory isn't there already +mkdir -p $HOME/.ssh + +cat <> $HOME/.ssh/config +# no questions please +StrictHostKeyChecking no + +# small timeout, used in while loops later +ConnectTimeout 1 +EOF diff --git a/.github/workflows/scripts/qemu-3-deps.sh b/.github/workflows/scripts/qemu-3-deps.sh new file mode 100755 index 000000000000..a2fb5e38249a --- /dev/null +++ b/.github/workflows/scripts/qemu-3-deps.sh @@ -0,0 +1,221 @@ +#!/usr/bin/env bash + +###################################################################### +# 3) install dependencies for compiling and loading +###################################################################### + +set -eu + +function archlinux() { + echo "##[group]Running pacman -Syu" + sudo btrfs filesystem resize max / + sudo pacman -Syu --noconfirm + echo "##[endgroup]" + + echo "##[group]Install Development Tools" + sudo pacman -Sy --noconfirm base-devel bc cpio dhclient dkms fakeroot \ + fio gdb inetutils jq less linux linux-headers lsscsi nfs-utils parted \ + pax perf python-packaging python-setuptools qemu-guest-agent ksh samba \ + sysstat rng-tools rsync wget xxhash + echo "##[endgroup]" +} + +function debian() { + export DEBIAN_FRONTEND="noninteractive" + + echo "##[group]Running apt-get update+upgrade" + sudo apt-get update -y + sudo apt-get upgrade -y + echo "##[endgroup]" + + echo "##[group]Install Development Tools" + sudo apt-get install -y \ + acl alien attr autoconf bc cpio curl dbench dh-python dkms fakeroot \ + fio gdb gdebi git ksh lcov isc-dhcp-client jq libacl1-dev libaio-dev \ + libattr1-dev libblkid-dev libcurl4-openssl-dev libdevmapper-dev libelf-dev \ + libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev libtool \ + libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ + lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ + python3-cffi python3-dev python3-distlib python3-packaging \ + python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ + rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev + echo "##[endgroup]" +} + +function freebsd() { + export ASSUME_ALWAYS_YES="YES" + + echo "##[group]Install Development Tools" + sudo pkg install -y autoconf automake autotools base64 checkbashisms fio \ + gdb gettext gettext-runtime git gmake gsed jq ksh93 lcov libtool lscpu \ + pkgconf python python3 pamtester pamtester qemu-guest-agent rsync xxhash + sudo pkg install -xy \ + '^samba4[[:digit:]]+$' \ + '^py3[[:digit:]]+-cffi$' \ + '^py3[[:digit:]]+-sysctl$' \ + '^py3[[:digit:]]+-packaging$' + echo "##[endgroup]" +} + +# common packages for: almalinux, centos, redhat +function rhel() { + echo "##[group]Running dnf update" + echo "max_parallel_downloads=10" | sudo -E tee -a /etc/dnf/dnf.conf + sudo dnf clean all + sudo dnf update -y --setopt=fastestmirror=1 --refresh + echo "##[endgroup]" + + echo "##[group]Install Development Tools" + sudo dnf group install -y "Development Tools" + sudo dnf install -y \ + acl attr bc bzip2 curl dbench dkms elfutils-libelf-devel fio gdb git \ + jq kernel-rpm-macros ksh libacl-devel libaio-devel libargon2-devel \ + libattr-devel libblkid-devel libcurl-devel libffi-devel ncompress \ + libselinux-devel libtirpc-devel libtool libudev-devel libuuid-devel \ + lsscsi mdadm nfs-utils openssl-devel pam-devel pamtester parted perf \ + python3 python3-cffi python3-devel python3-packaging kernel-devel \ + python3-setuptools qemu-guest-agent rng-tools rpcgen rpm-build rsync \ + samba sysstat systemd watchdog wget xfsprogs-devel xxhash zlib-devel + echo "##[endgroup]" +} + +function tumbleweed() { + echo "##[group]Running zypper is TODO!" + sleep 23456 + echo "##[endgroup]" +} + +# Install dependencies +case "$1" in + almalinux8) + echo "##[group]Enable epel and powertools repositories" + sudo dnf config-manager -y --set-enabled powertools + sudo dnf install -y epel-release + echo "##[endgroup]" + rhel + echo "##[group]Install kernel-abi-whitelists" + sudo dnf install -y kernel-abi-whitelists + echo "##[endgroup]" + ;; + almalinux9|centos-stream9) + echo "##[group]Enable epel and crb repositories" + sudo dnf config-manager -y --set-enabled crb + sudo dnf install -y epel-release + echo "##[endgroup]" + rhel + echo "##[group]Install kernel-abi-stablelists" + sudo dnf install -y kernel-abi-stablelists + echo "##[endgroup]" + ;; + archlinux) + archlinux + ;; + debian*) + debian + echo "##[group]Install Debian specific" + sudo apt-get install -yq linux-perf dh-sequence-dkms + echo "##[endgroup]" + ;; + fedora*) + rhel + ;; + freebsd*) + freebsd + ;; + tumbleweed) + tumbleweed + ;; + ubuntu*) + debian + echo "##[group]Install Ubuntu specific" + sudo apt-get install -yq linux-tools-common libtirpc-dev \ + linux-modules-extra-$(uname -r) + if [ "$1" != "ubuntu20" ]; then + sudo apt-get install -yq dh-sequence-dkms + fi + echo "##[endgroup]" + echo "##[group]Delete Ubuntu OpenZFS modules" + for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done + echo "##[endgroup]" + ;; +esac + +# This script is used for checkstyle + zloop deps also. +# Install only the needed packages and exit - when used this way. +test -z "${ONLY_DEPS:-}" || exit 0 + +# Start services +echo "##[group]Enable services" +case "$1" in + freebsd*) + # add virtio things + echo 'virtio_load="YES"' | sudo -E tee -a /boot/loader.conf + for i in balloon blk console random scsi; do + echo "virtio_${i}_load=\"YES\"" | sudo -E tee -a /boot/loader.conf + done + echo "fdescfs /dev/fd fdescfs rw 0 0" | sudo -E tee -a /etc/fstab + sudo -E mount /dev/fd + sudo -E touch /etc/zfs/exports + sudo -E sysrc mountd_flags="/etc/zfs/exports" + echo '[global]' | sudo -E tee /usr/local/etc/smb4.conf >/dev/null + sudo -E service nfsd enable + sudo -E service qemu-guest-agent enable + sudo -E service samba_server enable + ;; + debian*|ubuntu*) + sudo -E systemctl enable nfs-kernel-server + sudo -E systemctl enable qemu-guest-agent + sudo -E systemctl enable smbd + ;; + *) + # All other linux distros + sudo -E systemctl enable nfs-server + sudo -E systemctl enable qemu-guest-agent + sudo -E systemctl enable smb + ;; +esac +echo "##[endgroup]" + +# Setup Kernel cmdline +CMDLINE="console=tty0 console=ttyS0,115200n8" +CMDLINE="$CMDLINE selinux=0" +CMDLINE="$CMDLINE random.trust_cpu=on" +CMDLINE="$CMDLINE no_timer_check" +case "$1" in + almalinux*|centos*|fedora*) + GRUB_CFG="/boot/grub2/grub.cfg" + GRUB_MKCONFIG="grub2-mkconfig" + CMDLINE="$CMDLINE biosdevname=0 net.ifnames=0" + echo 'GRUB_SERIAL_COMMAND="serial --speed=115200"' \ + | sudo tee -a /etc/default/grub >/dev/null + ;; + ubuntu24) + GRUB_CFG="/boot/grub/grub.cfg" + GRUB_MKCONFIG="grub-mkconfig" + echo 'GRUB_DISABLE_OS_PROBER="false"' \ + | sudo tee -a /etc/default/grub >/dev/null + ;; + *) + GRUB_CFG="/boot/grub/grub.cfg" + GRUB_MKCONFIG="grub-mkconfig" + ;; +esac + +case "$1" in + archlinux|freebsd*) + true + ;; + *) + echo "##[group]Edit kernel cmdline" + sudo sed -i -e '/^GRUB_CMDLINE_LINUX/d' /etc/default/grub || true + echo "GRUB_CMDLINE_LINUX=\"$CMDLINE\"" \ + | sudo tee -a /etc/default/grub >/dev/null + sudo $GRUB_MKCONFIG -o $GRUB_CFG + echo "##[endgroup]" + ;; +esac + +# reset cloud-init configuration and poweroff +sudo cloud-init clean --logs +sleep 2 && sudo poweroff & +exit 0 diff --git a/.github/workflows/scripts/qemu-4-build.sh b/.github/workflows/scripts/qemu-4-build.sh new file mode 100755 index 000000000000..1051ee1f4deb --- /dev/null +++ b/.github/workflows/scripts/qemu-4-build.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +###################################################################### +# 4) configure and build openzfs modules +###################################################################### + +set -eu + +function run() { + LOG="/var/tmp/build-stderr.txt" + echo "****************************************************" + echo "$(date) ($*)" + echo "****************************************************" + ($@ || echo $? > /tmp/rv) 3>&1 1>&2 2>&3 | stdbuf -eL -oL tee -a $LOG + if [ -f /tmp/rv ]; then + RV=$(cat /tmp/rv) + echo "****************************************************" + echo "exit with value=$RV ($*)" + echo "****************************************************" + echo 1 > /var/tmp/build-exitcode.txt + exit $RV + fi +} + +function freebsd() { + export MAKE="gmake" + echo "##[group]Autogen.sh" + run ./autogen.sh + echo "##[endgroup]" + + echo "##[group]Configure" + run ./configure \ + --prefix=/usr/local \ + --with-libintl-prefix=/usr/local \ + --enable-pyzfs \ + --enable-debug \ + --enable-debuginfo + echo "##[endgroup]" + + echo "##[group]Build" + run gmake -j$(sysctl -n hw.ncpu) + echo "##[endgroup]" + + echo "##[group]Install" + run sudo gmake install + echo "##[endgroup]" +} + +function linux() { + echo "##[group]Autogen.sh" + run ./autogen.sh + echo "##[endgroup]" + + echo "##[group]Configure" + run ./configure \ + --prefix=/usr \ + --enable-pyzfs \ + --enable-debug \ + --enable-debuginfo + echo "##[endgroup]" + + echo "##[group]Build" + run make -j$(nproc) + echo "##[endgroup]" + + echo "##[group]Install" + run sudo make install + echo "##[endgroup]" +} + +function rpm_build_and_install() { + EXTRA_CONFIG="${1:-}" + echo "##[group]Autogen.sh" + run ./autogen.sh + echo "##[endgroup]" + + echo "##[group]Configure" + run ./configure --enable-debug --enable-debuginfo $EXTRA_CONFIG + echo "##[endgroup]" + + echo "##[group]Build" + run make pkg-kmod pkg-utils + echo "##[endgroup]" + + echo "##[group]Install" + run sudo dnf -y --skip-broken localinstall $(ls *.rpm | grep -v src.rpm) + echo "##[endgroup]" + +} + +function deb_build_and_install() { +echo "##[group]Autogen.sh" + run ./autogen.sh + echo "##[endgroup]" + + echo "##[group]Configure" + run ./configure \ + --prefix=/usr \ + --enable-pyzfs \ + --enable-debug \ + --enable-debuginfo + echo "##[endgroup]" + + echo "##[group]Build" + run make native-deb-kmod native-deb-utils + echo "##[endgroup]" + + echo "##[group]Install" + # Do kmod install. Note that when you build the native debs, the + # packages themselves are placed in parent directory '../' rather than + # in the source directory like the rpms are. + run sudo apt-get -y install $(find ../ | grep -E '\.deb$' \ + | grep -Ev 'dkms|dracut') + echo "##[endgroup]" +} + +# Debug: show kernel cmdline +if [ -f /proc/cmdline ] ; then + cat /proc/cmdline || true +fi + +# save some sysinfo +uname -a > /var/tmp/uname.txt + +cd $HOME/zfs +export PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin" + +# build +case "$1" in + freebsd*) + freebsd + ;; + alma*|centos*) + rpm_build_and_install "--with-spec=redhat" + ;; + fedora*) + rpm_build_and_install + ;; + debian*|ubuntu*) + deb_build_and_install + ;; + *) + linux + ;; +esac + +# building the zfs module was ok +echo 0 > /var/tmp/build-exitcode.txt + +# reset cloud-init configuration and poweroff +sudo cloud-init clean --logs +sync && sleep 2 && sudo poweroff & +exit 0 diff --git a/.github/workflows/scripts/qemu-5-setup.sh b/.github/workflows/scripts/qemu-5-setup.sh new file mode 100755 index 000000000000..7acb67a27920 --- /dev/null +++ b/.github/workflows/scripts/qemu-5-setup.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash + +###################################################################### +# 5) start test machines and load openzfs module +###################################################################### + +set -eu + +# read our defined variables +source /var/tmp/env.txt + +# wait for poweroff to succeed +PID=$(pidof /usr/bin/qemu-system-x86_64) +tail --pid=$PID -f /dev/null +sudo virsh undefine openzfs + +# definitions of per operating system +case "$OS" in + freebsd*) + VMs=2 + CPU=3 + RAM=6 + ;; + *) + VMs=2 + CPU=3 + RAM=7 + ;; +esac + +# this can be different for each distro +echo "VMs=$VMs" >> $ENV + +# create snapshot we can clone later +sudo zfs snapshot zpool/openzfs@now + +# setup the testing vm's +PUBKEY=$(cat ~/.ssh/id_ed25519.pub) +for i in $(seq 1 $VMs); do + + echo "Creating disk for vm$i..." + DISK="/dev/zvol/zpool/vm$i" + FORMAT="raw" + sudo zfs clone zpool/openzfs@now zpool/vm$i + sudo zfs create -ps -b 64k -V 80g zpool/vm$i-2 + + cat < /tmp/user-data +#cloud-config + +fqdn: vm$i + +users: +- name: root + shell: $BASH +- name: zfs + sudo: ALL=(ALL) NOPASSWD:ALL + shell: $BASH + ssh_authorized_keys: + - $PUBKEY + +growpart: + mode: auto + devices: ['/'] + ignore_growroot_disabled: false +EOF + + sudo virsh net-update default add ip-dhcp-host \ + "" --live --config + + sudo virt-install \ + --os-variant $OSv \ + --name "vm$i" \ + --cpu host-passthrough \ + --virt-type=kvm --hvm \ + --vcpus=$CPU,sockets=1 \ + --memory $((1024*RAM)) \ + --memballoon model=virtio \ + --graphics none \ + --cloud-init user-data=/tmp/user-data \ + --network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \ + --disk $DISK,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ + --disk $DISK-2,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ + --import --noautoconsole >/dev/null +done + +# check the memory state from time to time +cat < cronjob.sh +# $OS +exec 1>>/var/tmp/stats.txt +exec 2>&1 +echo "*******************************************************" +date +uptime +free -m +df -h /mnt/tests +zfs list +EOF +sudo chmod +x cronjob.sh +sudo mv -f cronjob.sh /root/cronjob.sh +echo '*/5 * * * * /root/cronjob.sh' > crontab.txt +sudo crontab crontab.txt +rm crontab.txt + +# check if the machines are okay +echo "Waiting for vm's to come up... (${VMs}x CPU=$CPU RAM=$RAM)" +for i in $(seq 1 $VMs); do + while true; do + ssh 2>/dev/null zfs@192.168.122.1$i "uname -a" && break + done +done +echo "All $VMs VMs are up now." + +# Save the VM's serial output (ttyS0) to /var/tmp/console.txt +# - ttyS0 on the VM corresponds to a local /dev/pty/N entry +# - use 'virsh ttyconsole' to lookup the /dev/pty/N entry +for i in $(seq 1 $VMs); do + mkdir -p $RESPATH/vm$i + read "pty" <<< $(sudo virsh ttyconsole vm$i) + sudo nohup bash -c "cat $pty > $RESPATH/vm$i/console.txt" & +done +echo "Console logging for ${VMs}x $OS started." diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh new file mode 100755 index 000000000000..2f023198bbf6 --- /dev/null +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash + +###################################################################### +# 6) load openzfs module and run the tests +# +# called on runner: qemu-6-tests.sh +# called on qemu-vm: qemu-6-tests.sh $OS $2/$3 +###################################################################### + +set -eu + +function prefix() { + ID="$1" + LINE="$2" + CURRENT=$(date +%s) + TSSTART=$(cat /tmp/tsstart) + DIFF=$((CURRENT-TSSTART)) + H=$((DIFF/3600)) + DIFF=$((DIFF-(H*3600))) + M=$((DIFF/60)) + S=$((DIFF-(M*60))) + + CTR=$(cat /tmp/ctr) + echo $LINE| grep -q "^Test[: ]" && CTR=$((CTR+1)) && echo $CTR > /tmp/ctr + + BASE="$HOME/work/zfs/zfs" + COLOR="$BASE/scripts/zfs-tests-color.sh" + CLINE=$(echo $LINE| grep "^Test[ :]" | sed -e 's|/usr/local|/usr|g' \ + | sed -e 's| /usr/share/zfs/zfs-tests/tests/| |g' | $COLOR) + if [ -z "$CLINE" ]; then + printf "vm${ID}: %s\n" "$LINE" + else + # [vm2: 00:15:54 256] Test: functional/checksum/setup (run as root) [00:00] [PASS] + printf "[vm${ID}: %02d:%02d:%02d %4d] %s\n" \ + "$H" "$M" "$S" "$CTR" "$CLINE" + fi +} + +# called directly on the runner +if [ -z ${1:-} ]; then + cd "/var/tmp" + source env.txt + SSH=$(which ssh) + TESTS='$HOME/zfs/.github/workflows/scripts/qemu-6-tests.sh' + echo 0 > /tmp/ctr + date "+%s" > /tmp/tsstart + + for i in $(seq 1 $VMs); do + IP="192.168.122.1$i" + daemonize -c /var/tmp -p vm${i}.pid -o vm${i}log.txt -- \ + $SSH zfs@$IP $TESTS $OS $i $VMs $CI_TYPE + # handly line by line and add info prefix + stdbuf -oL tail -fq vm${i}log.txt \ + | while read -r line; do prefix "$i" "$line"; done & + echo $! > vm${i}log.pid + # don't mix up the initial --- Configuration --- part + sleep 0.13 + done + + # wait for all vm's to finish + for i in $(seq 1 $VMs); do + tail --pid=$(cat vm${i}.pid) -f /dev/null + pid=$(cat vm${i}log.pid) + rm -f vm${i}log.pid + kill $pid + done + + exit 0 +fi + +# this part runs inside qemu vm +export PATH="$PATH:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/sbin:/usr/local/bin" +case "$1" in + freebsd*) + sudo kldstat -n zfs 2>/dev/null && sudo kldunload zfs + sudo -E ./zfs/scripts/zfs.sh + TDIR="/usr/local/share/zfs" + ;; + *) + # use xfs @ /var/tmp for all distros + sudo mv -f /var/tmp/*.txt /tmp + sudo mkfs.xfs -fq /dev/vdb + sudo mount -o noatime /dev/vdb /var/tmp + sudo chmod 1777 /var/tmp + sudo mv -f /tmp/*.txt /var/tmp + sudo -E modprobe zfs + TDIR="/usr/share/zfs" + ;; +esac + +# run functional testings and save exitcode +cd /var/tmp +TAGS=$2/$3 +if [ "$4" == "quick" ]; then + export RUNFILES="sanity.run" +fi +sudo dmesg -c > dmesg-prerun.txt +mount > mount.txt +df -h > df-prerun.txt +$TDIR/zfs-tests.sh -vK -s 3GB -T $TAGS +RV=$? +df -h > df-postrun.txt +echo $RV > tests-exitcode.txt +sync +exit 0 diff --git a/.github/workflows/scripts/qemu-7-prepare.sh b/.github/workflows/scripts/qemu-7-prepare.sh new file mode 100755 index 000000000000..a5fbd7213161 --- /dev/null +++ b/.github/workflows/scripts/qemu-7-prepare.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash + +###################################################################### +# 7) prepare output of the results +# - this script pre-creates all needed logfiles for later summary +###################################################################### + +set -eu + +# read our defined variables +cd /var/tmp +source env.txt + +mkdir -p $RESPATH + +# check if building the module has failed +if [ -z ${VMs:-} ]; then + cd $RESPATH + echo ":exclamation: ZFS module didn't build successfully :exclamation:" \ + | tee summary.txt | tee /tmp/summary.txt + cp /var/tmp/*.txt . + tar cf /tmp/qemu-$OS.tar -C $RESPATH -h . || true + exit 0 +fi + +# build was okay +BASE="$HOME/work/zfs/zfs" +MERGE="$BASE/.github/workflows/scripts/merge_summary.awk" + +# catch result files of testings (vm's should be there) +for i in $(seq 1 $VMs); do + rsync -arL zfs@192.168.122.1$i:$RESPATH/current $RESPATH/vm$i || true + scp zfs@192.168.122.1$i:"/var/tmp/*.txt" $RESPATH/vm$i || true +done +cp -f /var/tmp/*.txt $RESPATH || true +cd $RESPATH + +# prepare result files for summary +for i in $(seq 1 $VMs); do + file="vm$i/build-stderr.txt" + test -s $file && mv -f $file build-stderr.txt + + file="vm$i/build-exitcode.txt" + test -s $file && mv -f $file build-exitcode.txt + + file="vm$i/uname.txt" + test -s $file && mv -f $file uname.txt + + file="vm$i/tests-exitcode.txt" + if [ ! -s $file ]; then + # XXX - add some tests for kernel panic's here + # tail -n 80 vm$i/console.txt | grep XYZ + echo 1 > $file + fi + rv=$(cat vm$i/tests-exitcode.txt) + test $rv != 0 && touch /tmp/have_failed_tests + + file="vm$i/current/log" + if [ -s $file ]; then + cat $file >> log + awk '/\[FAIL\]|\[KILLED\]/{ show=1; print; next; }; \ + /\[SKIP\]|\[PASS\]/{ show=0; } show' \ + $file > /tmp/vm${i}dbg.txt + fi + + file="vm${i}log.txt" + fileC="/tmp/vm${i}log.txt" + if [ -s $file ]; then + cat $file >> summary + cat $file | $BASE/scripts/zfs-tests-color.sh > $fileC + fi +done + +# create summary of tests +if [ -s summary ]; then + $MERGE summary | grep -v '^/' > summary.txt + $MERGE summary | $BASE/scripts/zfs-tests-color.sh > /tmp/summary.txt + rm -f summary +else + touch summary.txt /tmp/summary.txt +fi + +# create file for debugging +if [ -s log ]; then + awk '/\[FAIL\]|\[KILLED\]/{ show=1; print; next; }; \ + /\[SKIP\]|\[PASS\]/{ show=0; } show' \ + log > summary-failure-logs.txt + rm -f log +else + touch summary-failure-logs.txt +fi + +# create debug overview for failed tests +cat summary.txt \ + | awk '/\(expected PASS\)/{ if ($1!="SKIP") print $2; next; } show' \ + | while read t; do + cat summary-failure-logs.txt \ + | awk '$0~/Test[: ]/{ show=0; } $0~v{ show=1; } show' v="$t" \ + > /tmp/fail.txt + SIZE=$(stat --printf="%s" /tmp/fail.txt) + SIZE=$((SIZE/1024)) + # Test Summary: + echo "##[group]$t ($SIZE KiB)" >> /tmp/failed.txt + cat /tmp/fail.txt | $BASE/scripts/zfs-tests-color.sh >> /tmp/failed.txt + echo "##[endgroup]" >> /tmp/failed.txt + # Job Summary: + echo -e "\n
\n$t ($SIZE KiB)
" >> failed.txt
+  cat /tmp/fail.txt >> failed.txt
+  echo "
" >> failed.txt +done + +if [ -e /tmp/have_failed_tests ]; then + echo ":warning: Some tests failed!" >> failed.txt +else + echo ":thumbsup: All tests passed." >> failed.txt +fi + +if [ ! -s uname.txt ]; then + echo ":interrobang: Panic - where is my uname.txt?" > uname.txt +fi + +# artifact ready now +tar cf /tmp/qemu-$OS.tar -C $RESPATH -h . || true diff --git a/.github/workflows/scripts/qemu-8-summary.sh b/.github/workflows/scripts/qemu-8-summary.sh new file mode 100755 index 000000000000..26dbab28323b --- /dev/null +++ b/.github/workflows/scripts/qemu-8-summary.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +###################################################################### +# 8) show colored output of results +###################################################################### + +set -eu + +# read our defined variables +source /var/tmp/env.txt +cd $RESPATH + +# helper function for showing some content with headline +function showfile() { + content=$(dd if=$1 bs=1024 count=400k 2>/dev/null) + if [ -z "$2" ]; then + group1="" + group2="" + else + SIZE=$(stat --printf="%s" "$file") + SIZE=$((SIZE/1024)) + group1="##[group]$2 ($SIZE KiB)" + group2="##[endgroup]" + fi +cat < tmp$$ +$group1 +$content +$group2 +EOF + cat tmp$$ + rm -f tmp$$ +} + +# overview +cat /tmp/summary.txt +echo "" + +if [ -f /tmp/have_failed_tests -a -s /tmp/failed.txt ]; then + echo "Debuginfo of failed tests:" + cat /tmp/failed.txt + echo "" + cat /tmp/summary.txt | grep -v '^/' + echo "" +fi + +echo -e "\nFull logs for download:\n $1\n" + +for i in $(seq 1 $VMs); do + rv=$(cat vm$i/tests-exitcode.txt) + + if [ $rv = 0 ]; then + vm="vm$i" + else + vm="vm$i" + fi + + file="vm$i/dmesg-prerun.txt" + test -s "$file" && showfile "$file" "$vm: dmesg kernel" + + file="/tmp/vm${i}log.txt" + test -s "$file" && showfile "$file" "$vm: test results" + + file="vm$i/console.txt" + test -s "$file" && showfile "$file" "$vm: serial console" + + file="/tmp/vm${i}dbg.txt" + test -s "$file" && showfile "$file" "$vm: failure logfile" +done + +test -f /tmp/have_failed_tests && exit 1 +exit 0 diff --git a/.github/workflows/scripts/qemu-9-summary-page.sh b/.github/workflows/scripts/qemu-9-summary-page.sh new file mode 100755 index 000000000000..cae287ac1073 --- /dev/null +++ b/.github/workflows/scripts/qemu-9-summary-page.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +###################################################################### +# 9) generate github summary page of all the testings +###################################################################### + +set -eu + +function output() { + echo -e $* >> "out-$logfile.md" +} + +function outfile() { + test -s "$1" || return + cat "$1" >> "out-$logfile.md" +} + +function outfile_plain() { + test -s "$1" || return + output "
"
+  cat "$1" >> "out-$logfile.md"
+  output "
" +} + +function send2github() { + test -f "$1" || exit 0 + dd if="$1" bs=1023k count=1 >> $GITHUB_STEP_SUMMARY +} + +# https://docs.github.com/en/enterprise-server@3.6/actions/using-workflows/workflow-commands-for-github-actions#step-isolation-and-limits +# Job summaries are isolated between steps and each step is restricted to a maximum size of 1MiB. +# [ ] can not show all error findings here +# [x] split files into smaller ones and create additional steps + +# first call, generate all summaries +if [ ! -f out-1.md ]; then + logfile="1" + for tarfile in Logs-functional-*/qemu-*.tar; do + rm -rf vm* *.txt + if [ ! -s "$tarfile" ]; then + output "\n## Functional Tests: unknown\n" + output ":exclamation: Tarfile $tarfile is empty :exclamation:" + continue + fi + tar xf "$tarfile" + test -s env.txt || continue + source env.txt + output "\n## Functional Tests: $OSNAME\n" + outfile_plain uname.txt + outfile_plain summary.txt + outfile failed.txt + logfile=$((logfile+1)) + done + send2github out-1.md +else + send2github out-$1.md +fi diff --git a/.github/workflows/scripts/setup-dependencies.sh b/.github/workflows/scripts/setup-dependencies.sh deleted file mode 100755 index b40f9290f914..000000000000 --- a/.github/workflows/scripts/setup-dependencies.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env bash - -set -eu - -function prerun() { - echo "::group::Install build dependencies" - # remove snap things, update+upgrade will be faster then - for x in lxd core20 snapd; do sudo snap remove $x; done - sudo apt-get purge snapd google-chrome-stable firefox - # https://github.com/orgs/community/discussions/47863 - sudo apt-get remove grub-efi-amd64-bin grub-efi-amd64-signed shim-signed --allow-remove-essential - sudo apt-get update - sudo apt upgrade - sudo xargs --arg-file=.github/workflows/build-dependencies.txt apt-get install -qq - sudo apt-get clean - sudo dmesg -c > /var/tmp/dmesg-prerun - echo "::endgroup::" -} - -function mod_build() { - echo "::group::Generate debian packages" - ./autogen.sh - ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan - make --no-print-directory --silent native-deb-utils native-deb-kmod - mv ../*.deb . - rm ./openzfs-zfs-dracut*.deb ./openzfs-zfs-dkms*.deb - echo "$ImageOS-$ImageVersion" > tests/ImageOS.txt - echo "::endgroup::" -} - -function mod_install() { - # install the pre-built module only on the same runner image - MOD=`cat tests/ImageOS.txt` - if [ "$MOD" != "$ImageOS-$ImageVersion" ]; then - rm -f *.deb - mod_build - fi - - echo "::group::Install and load modules" - # don't use kernel-shipped zfs modules - sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf - sudo apt-get install --fix-missing ./*.deb - - # Native Debian packages enable and start the services - # Stop zfs-zed daemon, as it may interfere with some ZTS test cases - sudo systemctl stop zfs-zed - sudo depmod -a - sudo modprobe zfs - sudo dmesg - sudo dmesg -c > /var/tmp/dmesg-module-load - echo "::endgroup::" - - echo "::group::Report CPU information" - lscpu - cat /proc/spl/kstat/zfs/chksum_bench - echo "::endgroup::" - - echo "::group::Optimize storage for ZFS testings" - # remove swap and umount fast storage - # 89GiB -> rootfs + bootfs with ~80MB/s -> don't care - # 64GiB -> /mnt with 420MB/s -> new testing ssd - sudo swapoff -a - - # this one is fast and mounted @ /mnt - # -> we reformat with ext4 + move it to /var/tmp - DEV="/dev/disk/azure/resource-part1" - sudo umount /mnt - sudo mkfs.ext4 -O ^has_journal -F $DEV - sudo mount -o noatime,barrier=0 $DEV /var/tmp - sudo chmod 1777 /var/tmp - - # disk usage afterwards - sudo df -h / - sudo df -h /var/tmp - sudo fstrim -a - echo "::endgroup::" -} - -case "$1" in - build) - prerun - mod_build - ;; - tests) - prerun - mod_install - ;; -esac diff --git a/.github/workflows/scripts/setup-functional.sh b/.github/workflows/scripts/setup-functional.sh deleted file mode 100755 index 08c4d872abdf..000000000000 --- a/.github/workflows/scripts/setup-functional.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -set -eu - -TDIR="/usr/share/zfs/zfs-tests/tests/functional" -echo -n "TODO=" -case "$1" in - part1) - # ~1h 20m - echo "cli_root" - ;; - part2) - # ~1h - ls $TDIR|grep '^[a-m]'|grep -v "cli_root"|xargs|tr -s ' ' ',' - ;; - part3) - # ~1h - ls $TDIR|grep '^[n-qs-z]'|xargs|tr -s ' ' ',' - ;; - part4) - # ~1h - ls $TDIR|grep '^r'|xargs|tr -s ' ' ',' - ;; -esac diff --git a/.github/workflows/zfs-linux-tests.yml b/.github/workflows/zfs-linux-tests.yml deleted file mode 100644 index 74f94ab8f2a4..000000000000 --- a/.github/workflows/zfs-linux-tests.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: zfs-linux-tests - -on: - workflow_call: - inputs: - os: - description: 'The ubuntu version: 20.02 or 22.04' - required: true - type: string - -jobs: - - zloop: - runs-on: ubuntu-${{ inputs.os }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v4 - with: - name: modules-${{ inputs.os }} - - name: Install modules - run: | - tar xzf modules-${{ inputs.os }}.tgz - .github/workflows/scripts/setup-dependencies.sh tests - - name: Tests - timeout-minutes: 30 - run: | - sudo mkdir -p /var/tmp/zloop - # run for 10 minutes or at most 2 iterations for a maximum runner - # time of 20 minutes. - sudo /usr/share/zfs/zloop.sh -t 600 -I 2 -l -m1 -- -T 120 -P 60 - - name: Prepare artifacts - if: failure() - run: | - sudo chmod +r -R /var/tmp/zloop/ - - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Zloop-logs-${{ inputs.os }} - path: | - /var/tmp/zloop/*/ - !/var/tmp/zloop/*/vdev/ - retention-days: 14 - if-no-files-found: ignore - - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Zloop-files-${{ inputs.os }} - path: | - /var/tmp/zloop/*/vdev/ - retention-days: 14 - if-no-files-found: ignore - - sanity: - runs-on: ubuntu-${{ inputs.os }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v4 - with: - name: modules-${{ inputs.os }} - - name: Install modules - run: | - tar xzf modules-${{ inputs.os }}.tgz - .github/workflows/scripts/setup-dependencies.sh tests - - name: Tests - timeout-minutes: 60 - shell: bash - run: | - set -o pipefail - /usr/share/zfs/zfs-tests.sh -vKR -s 3G -r sanity | scripts/zfs-tests-color.sh - - name: Prepare artifacts - if: success() || failure() - run: | - RESPATH="/var/tmp/test_results" - mv -f $RESPATH/current $RESPATH/testfiles - tar cf $RESPATH/sanity.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v4 - if: success() || failure() - with: - name: Logs-${{ inputs.os }}-sanity - path: /var/tmp/test_results/sanity.tar - if-no-files-found: ignore - - functional: - runs-on: ubuntu-${{ inputs.os }} - strategy: - fail-fast: false - matrix: - tests: [ part1, part2, part3, part4 ] - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v4 - with: - name: modules-${{ inputs.os }} - - name: Install modules - run: | - tar xzf modules-${{ inputs.os }}.tgz - .github/workflows/scripts/setup-dependencies.sh tests - - name: Setup tests - run: | - .github/workflows/scripts/setup-functional.sh ${{ matrix.tests }} >> $GITHUB_ENV - - name: Tests - timeout-minutes: 120 - shell: bash - run: | - set -o pipefail - /usr/share/zfs/zfs-tests.sh -vKR -s 3G -T ${{ env.TODO }} | scripts/zfs-tests-color.sh - - name: Prepare artifacts - if: success() || failure() - run: | - RESPATH="/var/tmp/test_results" - mv -f $RESPATH/current $RESPATH/testfiles - tar cf $RESPATH/${{ matrix.tests }}.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v4 - if: success() || failure() - with: - name: Logs-${{ inputs.os }}-functional-${{ matrix.tests }} - path: /var/tmp/test_results/${{ matrix.tests }}.tar - if-no-files-found: ignore diff --git a/.github/workflows/zfs-linux.yml b/.github/workflows/zfs-linux.yml deleted file mode 100644 index 13111f6d9847..000000000000 --- a/.github/workflows/zfs-linux.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: zfs-linux - -on: - push: - pull_request: - -jobs: - - build: - name: Build - strategy: - fail-fast: false - matrix: - os: [22.04] - runs-on: ubuntu-${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Build modules - run: .github/workflows/scripts/setup-dependencies.sh build - - name: Prepare modules upload - run: tar czf modules-${{ matrix.os }}.tgz *.deb .github tests/test-runner tests/ImageOS.txt - - uses: actions/upload-artifact@v4 - with: - name: modules-${{ matrix.os }} - path: modules-${{ matrix.os }}.tgz - retention-days: 14 - - testings: - name: Testing - strategy: - fail-fast: false - matrix: - os: [22.04] - needs: build - uses: ./.github/workflows/zfs-linux-tests.yml - with: - os: ${{ matrix.os }} - - cleanup: - if: always() - name: Cleanup - runs-on: ubuntu-22.04 - needs: testings - steps: - - uses: actions/download-artifact@v4 - - name: Generating summary - run: | - tar xzf modules-22.04/modules-22.04.tgz .github tests - .github/workflows/scripts/generate-summary.sh - # up to 4 steps, each can have 1 MiB output (for debugging log files) - - name: Summary for errors #1 - run: .github/workflows/scripts/generate-summary.sh 1 - - name: Summary for errors #2 - run: .github/workflows/scripts/generate-summary.sh 2 - - name: Summary for errors #3 - run: .github/workflows/scripts/generate-summary.sh 3 - - name: Summary for errors #4 - run: .github/workflows/scripts/generate-summary.sh 4 - - uses: actions/upload-artifact@v4 - with: - name: Summary Files - path: Summary/ diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml new file mode 100644 index 000000000000..8922701f9899 --- /dev/null +++ b/.github/workflows/zfs-qemu.yml @@ -0,0 +1,175 @@ +name: zfs-qemu + +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + test-config: + name: Setup + runs-on: ubuntu-24.04 + outputs: + test_os: ${{ steps.os.outputs.os }} + ci_type: ${{ steps.os.outputs.ci_type }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generate OS config and CI type + id: os + run: | + FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13", "freebsd13r", "freebsd14", "freebsd14r", "ubuntu20", "ubuntu22", "ubuntu24"]' + QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13", "freebsd14", "ubuntu24"]' + # determine CI type when running on PR + ci_type="full" + if ${{ github.event_name == 'pull_request' }}; then + head=${{ github.event.pull_request.head.sha }} + base=${{ github.event.pull_request.base.sha }} + ci_type=$(python3 .github/workflows/scripts/generate-ci-type.py $head $base) + fi + if [ "$ci_type" == "quick" ]; then + os_selection="$QUICK_OS" + else + os_selection="$FULL_OS" + fi + os_json=$(echo ${os_selection} | jq -c) + echo "os=$os_json" >> $GITHUB_OUTPUT + echo "ci_type=$ci_type" >> $GITHUB_OUTPUT + + qemu-vm: + name: qemu-x86 + needs: [ test-config ] + strategy: + fail-fast: false + matrix: + # all: + # os: [almalinux8, almalinux9, archlinux, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24] + # openzfs: + # os: [almalinux8, almalinux9, centos-stream9, debian11, debian12, fedora39, fedora40, freebsd13, freebsd13r, freebsd14, freebsd14r, ubuntu20, ubuntu22, ubuntu24] + os: ${{ fromJson(needs.test-config.outputs.test_os) }} + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Setup QEMU + timeout-minutes: 10 + run: .github/workflows/scripts/qemu-1-setup.sh + + - name: Start build machine + timeout-minutes: 10 + run: .github/workflows/scripts/qemu-2-start.sh ${{ matrix.os }} + + - name: Install dependencies + timeout-minutes: 20 + run: | + echo "Install dependencies in QEMU machine" + IP=192.168.122.10 + while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do + ssh 2>/dev/null zfs@$IP "uname -a" && break + done + scp .github/workflows/scripts/qemu-3-deps.sh zfs@$IP:qemu-3-deps.sh + PID=`pidof /usr/bin/qemu-system-x86_64` + ssh zfs@$IP '$HOME/qemu-3-deps.sh' ${{ matrix.os }} + # wait for poweroff to succeed + tail --pid=$PID -f /dev/null + sleep 5 # avoid this: "error: Domain is already active" + rm -f $HOME/.ssh/known_hosts + + - name: Build modules + timeout-minutes: 30 + run: | + echo "Build modules in QEMU machine" + sudo virsh start openzfs + IP=192.168.122.10 + while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do + ssh 2>/dev/null zfs@$IP "uname -a" && break + done + rsync -ar $HOME/work/zfs/zfs zfs@$IP:./ + ssh zfs@$IP '$HOME/zfs/.github/workflows/scripts/qemu-4-build.sh' ${{ matrix.os }} + + - name: Setup testing machines + timeout-minutes: 5 + run: .github/workflows/scripts/qemu-5-setup.sh + + - name: Run tests + timeout-minutes: 270 + run: .github/workflows/scripts/qemu-6-tests.sh + env: + CI_TYPE: ${{ needs.test-config.outputs.ci_type }} + + - name: Prepare artifacts + if: always() + timeout-minutes: 10 + run: .github/workflows/scripts/qemu-7-prepare.sh + + - uses: actions/upload-artifact@v4 + id: artifact-upload + if: always() + with: + name: Logs-functional-${{ matrix.os }} + path: /tmp/qemu-${{ matrix.os }}.tar + if-no-files-found: ignore + + - name: Test Summary + if: always() + run: .github/workflows/scripts/qemu-8-summary.sh '${{ steps.artifact-upload.outputs.artifact-url }}' + + cleanup: + if: always() + name: Cleanup + runs-on: ubuntu-latest + needs: [ qemu-vm ] + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + - uses: actions/download-artifact@v4 + - name: Generating summary + run: .github/workflows/scripts/qemu-9-summary-page.sh + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 2 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 3 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 4 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 5 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 6 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 7 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 8 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 9 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 10 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 11 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 12 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 13 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 14 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 15 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 16 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 17 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 18 + - name: Generating summary... + run: .github/workflows/scripts/qemu-9-summary-page.sh 19 + - uses: actions/upload-artifact@v4 + with: + name: Summary Files + path: out-* diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml new file mode 100644 index 000000000000..90d93c48e4bd --- /dev/null +++ b/.github/workflows/zloop.yml @@ -0,0 +1,77 @@ +name: zloop + +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + zloop: + runs-on: ubuntu-24.04 + env: + TEST_DIR: /var/tmp/zloop + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Install dependencies + run: | + sudo apt-get purge -y snapd google-chrome-stable firefox + ONLY_DEPS=1 .github/workflows/scripts/qemu-3-deps.sh ubuntu24 + - name: Autogen.sh + run: | + sed -i '/DEBUG_CFLAGS="-Werror"/s/^/#/' config/zfs-build.m4 + ./autogen.sh + - name: Configure + run: | + ./configure --prefix=/usr --enable-debug --enable-debuginfo \ + --enable-asan --enable-ubsan \ + --enable-debug-kmem --enable-debug-kmem-tracking + - name: Make + run: | + make -j$(nproc) + - name: Install + run: | + sudo make install + sudo depmod + sudo modprobe zfs + - name: Tests + run: | + sudo mkdir -p $TEST_DIR + # run for 10 minutes or at most 6 iterations for a maximum runner + # time of 60 minutes. + sudo /usr/share/zfs/zloop.sh -t 600 -I 6 -l -m 1 -- -T 120 -P 60 + - name: Prepare artifacts + if: failure() + run: | + sudo chmod +r -R $TEST_DIR/ + - name: Ztest log + if: failure() + run: | + grep -B10 -A1000 'ASSERT' $TEST_DIR/*/ztest.out || tail -n 1000 $TEST_DIR/*/ztest.out + - name: Gdb log + if: failure() + run: | + sed -n '/Backtraces (full)/q;p' $TEST_DIR/*/ztest.gdb + - name: Zdb log + if: failure() + run: | + cat $TEST_DIR/*/ztest.zdb + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: Logs + path: | + /var/tmp/zloop/*/ + !/var/tmp/zloop/*/vdev/ + if-no-files-found: ignore + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: Pool files + path: | + /var/tmp/zloop/*/vdev/ + if-no-files-found: ignore diff --git a/.mailmap b/.mailmap index 7e17d82d7352..5aa1eef464d9 100644 --- a/.mailmap +++ b/.mailmap @@ -208,6 +208,7 @@ Vaibhav Bhanawat <88050553+vaibhav-delphix@users. Violet Purcell <66446404+vimproved@users.noreply.github.com> Vipin Kumar Verma <75025470+vermavipinkumar@users.noreply.github.com> Wolfgang Bumiller +XDTG <35128600+XDTG@users.noreply.github.com> xtouqh <72357159+xtouqh@users.noreply.github.com> Yuri Pankov <113725409+yuripv@users.noreply.github.com> Yuri Pankov <82001006+yuripv@users.noreply.github.com> diff --git a/AUTHORS b/AUTHORS index 09814b435311..6a5cc088e651 100644 --- a/AUTHORS +++ b/AUTHORS @@ -248,6 +248,7 @@ CONTRIBUTORS: Gian-Carlo DeFazio Gionatan Danti Giuseppe Di Natale + Gleb Smirnoff Glenn Washburn glibg10b gofaster @@ -565,6 +566,7 @@ CONTRIBUTORS: Sen Haerens Serapheim Dimitropoulos Seth Forshee + Seth Hoffert Seth Troisi Shaan Nobee Shampavman @@ -598,6 +600,7 @@ CONTRIBUTORS: Tamas TEVESZ Teodor Spæren TerraTech + Theera K. Thijs Cramer Thomas Bertschinger Thomas Geppert @@ -648,6 +651,7 @@ CONTRIBUTORS: Windel Bouwman Wojciech Małota-Wójcik Wolfgang Bumiller + XDTG Xin Li Xinliang Liu xtouqh diff --git a/META b/META index 76ca22cbae00..dfe0f8a2d030 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.10 -Linux-Minimum: 3.10 +Linux-Maximum: 6.11 +Linux-Minimum: 4.18 diff --git a/cmd/arc_summary b/cmd/arc_summary index 2d0bfddd93a9..c24d400fa39a 100755 --- a/cmd/arc_summary +++ b/cmd/arc_summary @@ -566,7 +566,7 @@ def section_arc(kstats_dict): l2_hdr_size = arc_stats['l2_hdr_size'] abd_chunk_waste_size = arc_stats['abd_chunk_waste_size'] - prt_1('ARC structal breakdown (current size):', f_bytes(arc_size)) + prt_1('ARC structural breakdown (current size):', f_bytes(arc_size)) prt_i2('Compressed size:', f_perc(compressed_size, arc_size), f_bytes(compressed_size)) prt_i2('Overhead size:', diff --git a/cmd/arcstat.in b/cmd/arcstat.in index c4f10a1d6d3b..5016e16d0685 100755 --- a/cmd/arcstat.in +++ b/cmd/arcstat.in @@ -152,6 +152,7 @@ cols = { "l2asize": [7, 1024, "Actual (compressed) size of the L2ARC"], "l2size": [6, 1024, "Size of the L2ARC"], "l2bytes": [7, 1024, "Bytes read per second from the L2ARC"], + "l2wbytes": [8, 1024, "Bytes written per second to the L2ARC"], "grow": [4, 1000, "ARC grow disabled"], "need": [5, 1024, "ARC reclaim need"], "free": [5, 1024, "ARC free memory"], @@ -169,6 +170,83 @@ cols = { "zactive": [7, 1000, "zfetch prefetches active per second"], } +# ARC structural breakdown from arc_summary +structfields = { + "cmp": ["compressed", "Compressed"], + "ovh": ["overhead", "Overhead"], + "bon": ["bonus", "Bonus"], + "dno": ["dnode", "Dnode"], + "dbu": ["dbuf", "Dbuf"], + "hdr": ["hdr", "Header"], + "l2h": ["l2_hdr", "L2 header"], + "abd": ["abd_chunk_waste", "ABD chunk waste"], +} +structstats = { # size stats + "percent": "size", # percentage of this value + "sz": ["_size", "size"], +} + +# ARC types breakdown from arc_summary +typefields = { + "data": ["data", "ARC data"], + "meta": ["metadata", "ARC metadata"], +} +typestats = { # size stats + "percent": "cachessz", # percentage of this value + "tg": ["_target", "target"], + "sz": ["_size", "size"], +} + +# ARC states breakdown from arc_summary +statefields = { + "ano": ["anon", "Anonymous"], + "mfu": ["mfu", "MFU"], + "mru": ["mru", "MRU"], + "unc": ["uncached", "Uncached"], +} +targetstats = { + "percent": "cachessz", # percentage of this value + "fields": ["mfu", "mru"], # only applicable to these fields + "tg": ["_target", "target"], + "dt": ["_data_target", "data target"], + "mt": ["_metadata_target", "metadata target"], +} +statestats = { # size stats + "percent": "cachessz", # percentage of this value + "sz": ["_size", "size"], + "da": ["_data", "data size"], + "me": ["_metadata", "metadata size"], + "ed": ["_evictable_data", "evictable data size"], + "em": ["_evictable_metadata", "evictable metadata size"], +} +ghoststats = { + "fields": ["mfu", "mru"], # only applicable to these fields + "gsz": ["_ghost_size", "ghost size"], + "gd": ["_ghost_data", "ghost data size"], + "gm": ["_ghost_metadata", "ghost metadata size"], +} + +# fields and stats +fieldstats = [ + [structfields, structstats], + [typefields, typestats], + [statefields, targetstats, statestats, ghoststats], +] +for fs in fieldstats: + fields, stats = fs[0], fs[1:] + for field, fieldval in fields.items(): + for group in stats: + for stat, statval in group.items(): + if stat in ["fields", "percent"] or \ + ("fields" in group and field not in group["fields"]): + continue + colname = field + stat + coldesc = fieldval[1] + " " + statval[1] + cols[colname] = [len(colname), 1024, coldesc] + if "percent" in group: + cols[colname + "%"] = [len(colname) + 1, 100, \ + coldesc + " percentage"] + v = {} hdr = ["time", "read", "ddread", "ddh%", "dmread", "dmh%", "pread", "ph%", "size", "c", "avail"] @@ -286,6 +364,29 @@ def snap_stats(): kstat_update() cur = kstat + + # fill in additional values from arc_summary + cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\ + cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\ + cur["uncached_data"]+cur["uncached_metadata"] + s = 4294967296 + pd = cur["pd"] + pm = cur["pm"] + meta = cur["meta"] + v = (s-int(pd))*(s-int(meta))/s + cur["mfu_data_target"] = v / 65536 * caches_size / 65536 + v = (s-int(pm))*int(meta)/s + cur["mfu_metadata_target"] = v / 65536 * caches_size / 65536 + v = int(pd)*(s-int(meta))/s + cur["mru_data_target"] = v / 65536 * caches_size / 65536 + v = int(pm)*int(meta)/s + cur["mru_metadata_target"] = v / 65536 * caches_size / 65536 + + cur["data_target"] = cur["mfu_data_target"] + cur["mru_data_target"] + cur["metadata_target"] = cur["mfu_metadata_target"] + cur["mru_metadata_target"] + cur["mfu_target"] = cur["mfu_data_target"] + cur["mfu_metadata_target"] + cur["mru_target"] = cur["mru_data_target"] + cur["mru_metadata_target"] + for key in cur: if re.match(key, "class"): continue @@ -295,31 +396,34 @@ def snap_stats(): d[key] = cur[key] +def isint(num): + if isinstance(num, float): + return num.is_integer() + if isinstance(num, int): + return True + return False + + def prettynum(sz, scale, num=0): - suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'] + suffix = ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'] index = 0 - save = 0 # Special case for date field if scale == -1: return "%s" % num - # Rounding error, return 0 - elif 0 < num < 1: - num = 0 - - while abs(num) > scale and index < 5: - save = num - num = num / scale - index += 1 - - if index == 0: - return "%*d" % (sz, num) + if scale != 100: + while abs(num) > scale and index < 5: + num = num / scale + index += 1 - if abs(save / scale) < 10: - return "%*.1f%s" % (sz - 1, num, suffix[index]) + width = sz - (0 if index == 0 else 1) + intlen = len("%.0f" % num) # %.0f rounds to nearest int + if sint == 1 and isint(num) or width < intlen + 2: + decimal = 0 else: - return "%*d%s" % (sz - 1, num, suffix[index]) + decimal = 1 + return "%*.*f%s" % (width, decimal, num, suffix[index]) def print_values(): @@ -509,131 +613,148 @@ def calculate(): v = dict() v["time"] = time.strftime("%H:%M:%S", time.localtime()) - v["hits"] = d["hits"] // sint - v["iohs"] = d["iohits"] // sint - v["miss"] = d["misses"] // sint + v["hits"] = d["hits"] / sint + v["iohs"] = d["iohits"] / sint + v["miss"] = d["misses"] / sint v["read"] = v["hits"] + v["iohs"] + v["miss"] - v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0 - v["ioh%"] = 100 * v["iohs"] // v["read"] if v["read"] > 0 else 0 + v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0 + v["ioh%"] = 100 * v["iohs"] / v["read"] if v["read"] > 0 else 0 v["miss%"] = 100 - v["hit%"] - v["ioh%"] if v["read"] > 0 else 0 - v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint - v["dioh"] = (d["demand_data_iohits"] + d["demand_metadata_iohits"]) // sint - v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint + v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint + v["dioh"] = (d["demand_data_iohits"] + d["demand_metadata_iohits"]) / sint + v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint v["dread"] = v["dhit"] + v["dioh"] + v["dmis"] - v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0 - v["di%"] = 100 * v["dioh"] // v["dread"] if v["dread"] > 0 else 0 + v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0 + v["di%"] = 100 * v["dioh"] / v["dread"] if v["dread"] > 0 else 0 v["dm%"] = 100 - v["dh%"] - v["di%"] if v["dread"] > 0 else 0 - v["ddhit"] = d["demand_data_hits"] // sint - v["ddioh"] = d["demand_data_iohits"] // sint - v["ddmis"] = d["demand_data_misses"] // sint + v["ddhit"] = d["demand_data_hits"] / sint + v["ddioh"] = d["demand_data_iohits"] / sint + v["ddmis"] = d["demand_data_misses"] / sint v["ddread"] = v["ddhit"] + v["ddioh"] + v["ddmis"] - v["ddh%"] = 100 * v["ddhit"] // v["ddread"] if v["ddread"] > 0 else 0 - v["ddi%"] = 100 * v["ddioh"] // v["ddread"] if v["ddread"] > 0 else 0 + v["ddh%"] = 100 * v["ddhit"] / v["ddread"] if v["ddread"] > 0 else 0 + v["ddi%"] = 100 * v["ddioh"] / v["ddread"] if v["ddread"] > 0 else 0 v["ddm%"] = 100 - v["ddh%"] - v["ddi%"] if v["ddread"] > 0 else 0 - v["dmhit"] = d["demand_metadata_hits"] // sint - v["dmioh"] = d["demand_metadata_iohits"] // sint - v["dmmis"] = d["demand_metadata_misses"] // sint + v["dmhit"] = d["demand_metadata_hits"] / sint + v["dmioh"] = d["demand_metadata_iohits"] / sint + v["dmmis"] = d["demand_metadata_misses"] / sint v["dmread"] = v["dmhit"] + v["dmioh"] + v["dmmis"] - v["dmh%"] = 100 * v["dmhit"] // v["dmread"] if v["dmread"] > 0 else 0 - v["dmi%"] = 100 * v["dmioh"] // v["dmread"] if v["dmread"] > 0 else 0 + v["dmh%"] = 100 * v["dmhit"] / v["dmread"] if v["dmread"] > 0 else 0 + v["dmi%"] = 100 * v["dmioh"] / v["dmread"] if v["dmread"] > 0 else 0 v["dmm%"] = 100 - v["dmh%"] - v["dmi%"] if v["dmread"] > 0 else 0 - v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint + v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint v["pioh"] = (d["prefetch_data_iohits"] + - d["prefetch_metadata_iohits"]) // sint + d["prefetch_metadata_iohits"]) / sint v["pmis"] = (d["prefetch_data_misses"] + - d["prefetch_metadata_misses"]) // sint + d["prefetch_metadata_misses"]) / sint v["pread"] = v["phit"] + v["pioh"] + v["pmis"] - v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0 - v["pi%"] = 100 * v["pioh"] // v["pread"] if v["pread"] > 0 else 0 + v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0 + v["pi%"] = 100 * v["pioh"] / v["pread"] if v["pread"] > 0 else 0 v["pm%"] = 100 - v["ph%"] - v["pi%"] if v["pread"] > 0 else 0 - v["pdhit"] = d["prefetch_data_hits"] // sint - v["pdioh"] = d["prefetch_data_iohits"] // sint - v["pdmis"] = d["prefetch_data_misses"] // sint + v["pdhit"] = d["prefetch_data_hits"] / sint + v["pdioh"] = d["prefetch_data_iohits"] / sint + v["pdmis"] = d["prefetch_data_misses"] / sint v["pdread"] = v["pdhit"] + v["pdioh"] + v["pdmis"] - v["pdh%"] = 100 * v["pdhit"] // v["pdread"] if v["pdread"] > 0 else 0 - v["pdi%"] = 100 * v["pdioh"] // v["pdread"] if v["pdread"] > 0 else 0 + v["pdh%"] = 100 * v["pdhit"] / v["pdread"] if v["pdread"] > 0 else 0 + v["pdi%"] = 100 * v["pdioh"] / v["pdread"] if v["pdread"] > 0 else 0 v["pdm%"] = 100 - v["pdh%"] - v["pdi%"] if v["pdread"] > 0 else 0 - v["pmhit"] = d["prefetch_metadata_hits"] // sint - v["pmioh"] = d["prefetch_metadata_iohits"] // sint - v["pmmis"] = d["prefetch_metadata_misses"] // sint + v["pmhit"] = d["prefetch_metadata_hits"] / sint + v["pmioh"] = d["prefetch_metadata_iohits"] / sint + v["pmmis"] = d["prefetch_metadata_misses"] / sint v["pmread"] = v["pmhit"] + v["pmioh"] + v["pmmis"] - v["pmh%"] = 100 * v["pmhit"] // v["pmread"] if v["pmread"] > 0 else 0 - v["pmi%"] = 100 * v["pmioh"] // v["pmread"] if v["pmread"] > 0 else 0 + v["pmh%"] = 100 * v["pmhit"] / v["pmread"] if v["pmread"] > 0 else 0 + v["pmi%"] = 100 * v["pmioh"] / v["pmread"] if v["pmread"] > 0 else 0 v["pmm%"] = 100 - v["pmh%"] - v["pmi%"] if v["pmread"] > 0 else 0 v["mhit"] = (d["prefetch_metadata_hits"] + - d["demand_metadata_hits"]) // sint + d["demand_metadata_hits"]) / sint v["mioh"] = (d["prefetch_metadata_iohits"] + - d["demand_metadata_iohits"]) // sint + d["demand_metadata_iohits"]) / sint v["mmis"] = (d["prefetch_metadata_misses"] + - d["demand_metadata_misses"]) // sint + d["demand_metadata_misses"]) / sint v["mread"] = v["mhit"] + v["mioh"] + v["mmis"] - v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0 - v["mi%"] = 100 * v["mioh"] // v["mread"] if v["mread"] > 0 else 0 + v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0 + v["mi%"] = 100 * v["mioh"] / v["mread"] if v["mread"] > 0 else 0 v["mm%"] = 100 - v["mh%"] - v["mi%"] if v["mread"] > 0 else 0 v["arcsz"] = cur["size"] v["size"] = cur["size"] v["c"] = cur["c"] - v["mfu"] = d["mfu_hits"] // sint - v["mru"] = d["mru_hits"] // sint - v["mrug"] = d["mru_ghost_hits"] // sint - v["mfug"] = d["mfu_ghost_hits"] // sint - v["unc"] = d["uncached_hits"] // sint - v["eskip"] = d["evict_skip"] // sint - v["el2skip"] = d["evict_l2_skip"] // sint - v["el2cach"] = d["evict_l2_cached"] // sint - v["el2el"] = d["evict_l2_eligible"] // sint - v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint - v["el2mru"] = d["evict_l2_eligible_mru"] // sint - v["el2inel"] = d["evict_l2_ineligible"] // sint - v["mtxmis"] = d["mutex_miss"] // sint + v["mfu"] = d["mfu_hits"] / sint + v["mru"] = d["mru_hits"] / sint + v["mrug"] = d["mru_ghost_hits"] / sint + v["mfug"] = d["mfu_ghost_hits"] / sint + v["unc"] = d["uncached_hits"] / sint + v["eskip"] = d["evict_skip"] / sint + v["el2skip"] = d["evict_l2_skip"] / sint + v["el2cach"] = d["evict_l2_cached"] / sint + v["el2el"] = d["evict_l2_eligible"] / sint + v["el2mfu"] = d["evict_l2_eligible_mfu"] / sint + v["el2mru"] = d["evict_l2_eligible_mru"] / sint + v["el2inel"] = d["evict_l2_ineligible"] / sint + v["mtxmis"] = d["mutex_miss"] / sint v["ztotal"] = (d["zfetch_hits"] + d["zfetch_future"] + d["zfetch_stride"] + - d["zfetch_past"] + d["zfetch_misses"]) // sint - v["zhits"] = d["zfetch_hits"] // sint - v["zahead"] = (d["zfetch_future"] + d["zfetch_stride"]) // sint - v["zpast"] = d["zfetch_past"] // sint - v["zmisses"] = d["zfetch_misses"] // sint - v["zmax"] = d["zfetch_max_streams"] // sint - v["zfuture"] = d["zfetch_future"] // sint - v["zstride"] = d["zfetch_stride"] // sint - v["zissued"] = d["zfetch_io_issued"] // sint - v["zactive"] = d["zfetch_io_active"] // sint + d["zfetch_past"] + d["zfetch_misses"]) / sint + v["zhits"] = d["zfetch_hits"] / sint + v["zahead"] = (d["zfetch_future"] + d["zfetch_stride"]) / sint + v["zpast"] = d["zfetch_past"] / sint + v["zmisses"] = d["zfetch_misses"] / sint + v["zmax"] = d["zfetch_max_streams"] / sint + v["zfuture"] = d["zfetch_future"] / sint + v["zstride"] = d["zfetch_stride"] / sint + v["zissued"] = d["zfetch_io_issued"] / sint + v["zactive"] = d["zfetch_io_active"] / sint + + # ARC structural breakdown, ARC types breakdown, ARC states breakdown + v["cachessz"] = cur["caches_size"] + for fs in fieldstats: + fields, stats = fs[0], fs[1:] + for field, fieldval in fields.items(): + for group in stats: + for stat, statval in group.items(): + if stat in ["fields", "percent"] or \ + ("fields" in group and field not in group["fields"]): + continue + colname = field + stat + v[colname] = cur[fieldval[0] + statval[0]] + if "percent" in group: + v[colname + "%"] = 100 * v[colname] / \ + v[group["percent"]] if v[group["percent"]] > 0 else 0 if l2exist: - v["l2hits"] = d["l2_hits"] // sint - v["l2miss"] = d["l2_misses"] // sint + v["l2hits"] = d["l2_hits"] / sint + v["l2miss"] = d["l2_misses"] / sint v["l2read"] = v["l2hits"] + v["l2miss"] - v["l2hit%"] = 100 * v["l2hits"] // v["l2read"] if v["l2read"] > 0 else 0 + v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0 v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0 v["l2asize"] = cur["l2_asize"] v["l2size"] = cur["l2_size"] - v["l2bytes"] = d["l2_read_bytes"] // sint + v["l2bytes"] = d["l2_read_bytes"] / sint + v["l2wbytes"] = d["l2_write_bytes"] / sint v["l2pref"] = cur["l2_prefetch_asize"] v["l2mfu"] = cur["l2_mfu_asize"] v["l2mru"] = cur["l2_mru_asize"] v["l2data"] = cur["l2_bufc_data_asize"] v["l2meta"] = cur["l2_bufc_metadata_asize"] - v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] - v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] - v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] - v["l2data%"] = 100 * v["l2data"] // v["l2asize"] - v["l2meta%"] = 100 * v["l2meta"] // v["l2asize"] + v["l2pref%"] = 100 * v["l2pref"] / v["l2asize"] + v["l2mfu%"] = 100 * v["l2mfu"] / v["l2asize"] + v["l2mru%"] = 100 * v["l2mru"] / v["l2asize"] + v["l2data%"] = 100 * v["l2data"] / v["l2asize"] + v["l2meta%"] = 100 * v["l2meta"] / v["l2asize"] v["grow"] = 0 if cur["arc_no_grow"] else 1 v["need"] = cur["arc_need_free"] diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 8e3b6972ae04..2c136bacb1a8 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1117,7 +1117,7 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size) { (void) data, (void) size; zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp = zap_attribute_long_alloc(); void *prop; unsigned i; @@ -1125,53 +1125,54 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size) (void) printf("\n"); for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; zap_cursor_advance(&zc)) { boolean_t key64 = !!(zap_getflags(zc.zc_zap) & ZAP_FLAG_UINT64_KEY); if (key64) (void) printf("\t\t0x%010lx = ", - *(uint64_t *)attr.za_name); + *(uint64_t *)attrp->za_name); else - (void) printf("\t\t%s = ", attr.za_name); + (void) printf("\t\t%s = ", attrp->za_name); - if (attr.za_num_integers == 0) { + if (attrp->za_num_integers == 0) { (void) printf("\n"); continue; } - prop = umem_zalloc(attr.za_num_integers * - attr.za_integer_length, UMEM_NOFAIL); + prop = umem_zalloc(attrp->za_num_integers * + attrp->za_integer_length, UMEM_NOFAIL); if (key64) (void) zap_lookup_uint64(os, object, - (const uint64_t *)attr.za_name, 1, - attr.za_integer_length, attr.za_num_integers, + (const uint64_t *)attrp->za_name, 1, + attrp->za_integer_length, attrp->za_num_integers, prop); else - (void) zap_lookup(os, object, attr.za_name, - attr.za_integer_length, attr.za_num_integers, + (void) zap_lookup(os, object, attrp->za_name, + attrp->za_integer_length, attrp->za_num_integers, prop); - if (attr.za_integer_length == 1 && !key64) { - if (strcmp(attr.za_name, + if (attrp->za_integer_length == 1 && !key64) { + if (strcmp(attrp->za_name, DSL_CRYPTO_KEY_MASTER_KEY) == 0 || - strcmp(attr.za_name, + strcmp(attrp->za_name, DSL_CRYPTO_KEY_HMAC_KEY) == 0 || - strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 || - strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 || - strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) { + strcmp(attrp->za_name, DSL_CRYPTO_KEY_IV) == 0 || + strcmp(attrp->za_name, DSL_CRYPTO_KEY_MAC) == 0 || + strcmp(attrp->za_name, + DMU_POOL_CHECKSUM_SALT) == 0) { uint8_t *u8 = prop; - for (i = 0; i < attr.za_num_integers; i++) { + for (i = 0; i < attrp->za_num_integers; i++) { (void) printf("%02x", u8[i]); } } else { (void) printf("%s", (char *)prop); } } else { - for (i = 0; i < attr.za_num_integers; i++) { - switch (attr.za_integer_length) { + for (i = 0; i < attrp->za_num_integers; i++) { + switch (attrp->za_integer_length) { case 1: (void) printf("%u ", ((uint8_t *)prop)[i]); @@ -1192,9 +1193,11 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size) } } (void) printf("\n"); - umem_free(prop, attr.za_num_integers * attr.za_integer_length); + umem_free(prop, + attrp->za_num_integers * attrp->za_integer_length); } zap_cursor_fini(&zc); + zap_attribute_free(attrp); } static void @@ -1295,26 +1298,27 @@ dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) { (void) data, (void) size; zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp = zap_attribute_alloc(); dump_zap_stats(os, object); (void) printf("\n"); for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = ", attr.za_name); - if (attr.za_num_integers == 0) { + (void) printf("\t\t%s = ", attrp->za_name); + if (attrp->za_num_integers == 0) { (void) printf("\n"); continue; } (void) printf(" %llx : [%d:%d:%d]\n", - (u_longlong_t)attr.za_first_integer, - (int)ATTR_LENGTH(attr.za_first_integer), - (int)ATTR_BSWAP(attr.za_first_integer), - (int)ATTR_NUM(attr.za_first_integer)); + (u_longlong_t)attrp->za_first_integer, + (int)ATTR_LENGTH(attrp->za_first_integer), + (int)ATTR_BSWAP(attrp->za_first_integer), + (int)ATTR_NUM(attrp->za_first_integer)); } zap_cursor_fini(&zc); + zap_attribute_free(attrp); } static void @@ -1322,7 +1326,7 @@ dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) { (void) data, (void) size; zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp = zap_attribute_alloc(); uint16_t *layout_attrs; unsigned i; @@ -1330,29 +1334,30 @@ dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) (void) printf("\n"); for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = [", attr.za_name); - if (attr.za_num_integers == 0) { + (void) printf("\t\t%s = [", attrp->za_name); + if (attrp->za_num_integers == 0) { (void) printf("\n"); continue; } - VERIFY(attr.za_integer_length == 2); - layout_attrs = umem_zalloc(attr.za_num_integers * - attr.za_integer_length, UMEM_NOFAIL); + VERIFY(attrp->za_integer_length == 2); + layout_attrs = umem_zalloc(attrp->za_num_integers * + attrp->za_integer_length, UMEM_NOFAIL); - VERIFY(zap_lookup(os, object, attr.za_name, - attr.za_integer_length, - attr.za_num_integers, layout_attrs) == 0); + VERIFY(zap_lookup(os, object, attrp->za_name, + attrp->za_integer_length, + attrp->za_num_integers, layout_attrs) == 0); - for (i = 0; i != attr.za_num_integers; i++) + for (i = 0; i != attrp->za_num_integers; i++) (void) printf(" %d ", (int)layout_attrs[i]); (void) printf("]\n"); umem_free(layout_attrs, - attr.za_num_integers * attr.za_integer_length); + attrp->za_num_integers * attrp->za_integer_length); } zap_cursor_fini(&zc); + zap_attribute_free(attrp); } static void @@ -1360,7 +1365,7 @@ dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) { (void) data, (void) size; zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp = zap_attribute_long_alloc(); const char *typenames[] = { /* 0 */ "not specified", /* 1 */ "FIFO", @@ -1384,13 +1389,14 @@ dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) (void) printf("\n"); for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; zap_cursor_advance(&zc)) { (void) printf("\t\t%s = %lld (type: %s)\n", - attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), - typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); + attrp->za_name, ZFS_DIRENT_OBJ(attrp->za_first_integer), + typenames[ZFS_DIRENT_TYPE(attrp->za_first_integer)]); } zap_cursor_fini(&zc); + zap_attribute_free(attrp); } static int @@ -2155,23 +2161,25 @@ dump_brt(spa_t *spa) continue; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { uint64_t refcnt; VERIFY0(zap_lookup_uint64(brt->brt_mos, brtvd->bv_mos_entries, - (const uint64_t *)za.za_name, 1, - za.za_integer_length, za.za_num_integers, &refcnt)); + (const uint64_t *)za->za_name, 1, + za->za_integer_length, za->za_num_integers, + &refcnt)); - uint64_t offset = *(const uint64_t *)za.za_name; + uint64_t offset = *(const uint64_t *)za->za_name; snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid, (u_longlong_t)offset); printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt); } zap_cursor_fini(&zc); + zap_attribute_free(za); } } @@ -2953,28 +2961,30 @@ static void dump_bookmarks(objset_t *os, int verbosity) { zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp; dsl_dataset_t *ds = dmu_objset_ds(os); dsl_pool_t *dp = spa_get_dsl(os->os_spa); objset_t *mos = os->os_spa->spa_meta_objset; if (verbosity < 4) return; + attrp = zap_attribute_alloc(); dsl_pool_config_enter(dp, FTAG); for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; zap_cursor_advance(&zc)) { char osname[ZFS_MAX_DATASET_NAME_LEN]; char buf[ZFS_MAX_DATASET_NAME_LEN]; int len; dmu_objset_name(os, osname); len = snprintf(buf, sizeof (buf), "%s#%s", osname, - attr.za_name); + attrp->za_name); VERIFY3S(len, <, ZFS_MAX_DATASET_NAME_LEN); (void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6); } zap_cursor_fini(&zc); dsl_pool_config_exit(dp, FTAG); + zap_attribute_free(attrp); } static void @@ -6857,18 +6867,19 @@ iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg) ASSERT0(err); zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attrp = zap_attribute_alloc(); dsl_deadlist_t ll; /* NULL out os prior to dsl_deadlist_open in case it's garbage */ ll.dl_os = NULL; for (zap_cursor_init(&zc, mos, zap_obj); - zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_retrieve(&zc, attrp) == 0; (void) zap_cursor_advance(&zc)) { - dsl_deadlist_open(&ll, mos, attr.za_first_integer); + dsl_deadlist_open(&ll, mos, attrp->za_first_integer); func(&ll, arg); dsl_deadlist_close(&ll); } zap_cursor_fini(&zc); + zap_attribute_free(attrp); } static int @@ -8082,13 +8093,14 @@ static void errorlog_count_refd(objset_t *mos, uint64_t errlog) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, errlog); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - mos_obj_refd(za.za_first_integer); + mos_obj_refd(za->za_first_integer); } zap_cursor_fini(&zc); + zap_attribute_free(za); } static int @@ -8788,7 +8800,7 @@ zdb_read_block(char *thing, spa_t *spa) void *lbuf, *buf; char *s, *p, *dup, *flagstr, *sizes, *tmp = NULL; const char *vdev, *errmsg = NULL; - int i, error; + int i, len, error; boolean_t borrowed = B_FALSE, found = B_FALSE; dup = strdup(thing); @@ -8816,7 +8828,8 @@ zdb_read_block(char *thing, spa_t *spa) for (s = strtok_r(flagstr, ":", &tmp); s != NULL; s = strtok_r(NULL, ":", &tmp)) { - for (i = 0; i < strlen(flagstr); i++) { + len = strlen(flagstr); + for (i = 0; i < len; i++) { int bit = flagbits[(uchar_t)flagstr[i]]; if (bit == 0) { @@ -9086,13 +9099,14 @@ zdb_embedded_block(char *thing) static boolean_t zdb_numeric(char *str) { - int i = 0; + int i = 0, len; - if (strlen(str) == 0) + len = strlen(str); + if (len == 0) return (B_FALSE); if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0) i = 2; - for (; i < strlen(str); i++) { + for (; i < len; i++) { if (!isxdigit(str[i])) return (B_FALSE); } diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index e3caaeb70e14..80d81c1154ae 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -64,7 +64,8 @@ static void zil_prt_rec_create(zilog_t *zilog, int txtype, const void *arg) { (void) zilog; - const lr_create_t *lr = arg; + const lr_create_t *lrc = arg; + const _lr_create_t *lr = &lrc->lr_create; time_t crtime = lr->lr_crtime[0]; char *name, *link; lr_attr_t *lrattr; @@ -121,7 +122,8 @@ static void zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg) { (void) zilog, (void) txtype; - const lr_rename_t *lr = arg; + const lr_rename_t *lrr = arg; + const _lr_rename_t *lr = &lrr->lr_rename; char *snm = (char *)(lr + 1); char *tnm = snm + strlen(snm) + 1; diff --git a/cmd/zed/agents/zfs_diagnosis.c b/cmd/zed/agents/zfs_diagnosis.c index e35cd0756c60..b45b89af430f 100644 --- a/cmd/zed/agents/zfs_diagnosis.c +++ b/cmd/zed/agents/zfs_diagnosis.c @@ -71,6 +71,7 @@ typedef struct zfs_case_data { uint64_t zc_ena; uint64_t zc_pool_guid; uint64_t zc_vdev_guid; + uint64_t zc_parent_guid; int zc_pool_state; char zc_serd_checksum[MAX_SERDLEN]; char zc_serd_io[MAX_SERDLEN]; @@ -181,10 +182,10 @@ zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp) } /* - * count other unique slow-io cases in a pool + * Return count of other unique SERD cases under same vdev parent */ static uint_t -zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) +zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) { zfs_case_t *zcp; uint_t cases = 0; @@ -206,10 +207,32 @@ zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) for (zcp = uu_list_first(zfs_cases); zcp != NULL; zcp = uu_list_next(zfs_cases, zcp)) { - if (zcp->zc_data.zc_pool_guid == zfs_case->zc_pool_guid && - zcp->zc_data.zc_vdev_guid != zfs_case->zc_vdev_guid && - zcp->zc_data.zc_serd_slow_io[0] != '\0' && - fmd_serd_active(hdl, zcp->zc_data.zc_serd_slow_io)) { + zfs_case_data_t *zcd = &zcp->zc_data; + + /* + * must be same pool and parent vdev but different leaf vdev + */ + if (zcd->zc_pool_guid != zfs_case->zc_pool_guid || + zcd->zc_parent_guid != zfs_case->zc_parent_guid || + zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) { + continue; + } + + /* + * Check if there is another active serd case besides zfs_case + * + * Only one serd engine will be assigned to the case + */ + if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] && + fmd_serd_active(hdl, zcd->zc_serd_checksum)) { + cases++; + } + if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] && + fmd_serd_active(hdl, zcd->zc_serd_io)) { + cases++; + } + if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] && + fmd_serd_active(hdl, zcd->zc_serd_slow_io)) { cases++; } } @@ -502,6 +525,34 @@ zfs_ereport_when(fmd_hdl_t *hdl, nvlist_t *nvl, er_timeval_t *when) } } +/* + * Record the specified event in the SERD engine and return a + * boolean value indicating whether or not the engine fired as + * the result of inserting this event. + * + * When the pool has similar active cases on other vdevs, then + * the fired state is disregarded and the case is retired. + */ +static int +zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep, + zfs_case_t *zcp, const char *err_type) +{ + int fired = fmd_serd_record(hdl, name, ep); + int peers = 0; + + if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) { + fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases " + "-- skip faulting the vdev %llu", + (u_longlong_t)zcp->zc_data.zc_pool_guid, + peers, err_type, + (u_longlong_t)zcp->zc_data.zc_vdev_guid); + zfs_case_retire(hdl, zcp); + fired = 0; + } + + return (fired); +} + /* * Main fmd entry point. */ @@ -510,7 +561,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) { zfs_case_t *zcp, *dcp; int32_t pool_state; - uint64_t ena, pool_guid, vdev_guid; + uint64_t ena, pool_guid, vdev_guid, parent_guid; uint64_t checksum_n, checksum_t; uint64_t io_n, io_t; er_timeval_t pool_load; @@ -600,6 +651,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) vdev_guid = 0; + if (nvlist_lookup_uint64(nvl, + FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0) + parent_guid = 0; if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0) ena = 0; @@ -710,6 +764,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) data.zc_ena = ena; data.zc_pool_guid = pool_guid; data.zc_vdev_guid = vdev_guid; + data.zc_parent_guid = parent_guid; data.zc_pool_state = (int)pool_state; fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data)); @@ -872,8 +927,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) SEC2NSEC(io_t)); zfs_case_serialize(zcp); } - if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep)) + if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io, + ep, zcp, "io error")) { checkremove = B_TRUE; + } } else if (fmd_nvl_class_match(hdl, nvl, ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) { uint64_t slow_io_n, slow_io_t; @@ -899,25 +956,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) } /* Pass event to SERD engine and see if this triggers */ if (zcp->zc_data.zc_serd_slow_io[0] != '\0' && - fmd_serd_record(hdl, zcp->zc_data.zc_serd_slow_io, - ep)) { - /* - * Ignore a slow io diagnosis when other - * VDEVs in the pool show signs of being slow. - */ - if (zfs_other_slow_cases(hdl, &zcp->zc_data)) { - zfs_case_retire(hdl, zcp); - fmd_hdl_debug(hdl, "pool %llu has " - "multiple slow io cases -- skip " - "degrading vdev %llu", - (u_longlong_t) - zcp->zc_data.zc_pool_guid, - (u_longlong_t) - zcp->zc_data.zc_vdev_guid); - } else { - zfs_case_solve(hdl, zcp, - "fault.fs.zfs.vdev.slow_io"); - } + zfs_fm_serd_record(hdl, + zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) { + zfs_case_solve(hdl, zcp, + "fault.fs.zfs.vdev.slow_io"); } } else if (fmd_nvl_class_match(hdl, nvl, ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) { @@ -968,8 +1010,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) SEC2NSEC(checksum_t)); zfs_case_serialize(zcp); } - if (fmd_serd_record(hdl, - zcp->zc_data.zc_serd_checksum, ep)) { + if (zfs_fm_serd_record(hdl, + zcp->zc_data.zc_serd_checksum, ep, zcp, + "checksum")) { zfs_case_solve(hdl, zcp, "fault.fs.zfs.vdev.checksum"); } diff --git a/cmd/zhack.c b/cmd/zhack.c index f15a6ece538c..f297afb65d47 100644 --- a/cmd/zhack.c +++ b/cmd/zhack.c @@ -203,26 +203,27 @@ static void dump_obj(objset_t *os, uint64_t obj, const char *name) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_long_alloc(); (void) printf("%s_obj:\n", name); for (zap_cursor_init(&zc, os, obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - if (za.za_integer_length == 8) { - ASSERT(za.za_num_integers == 1); + if (za->za_integer_length == 8) { + ASSERT(za->za_num_integers == 1); (void) printf("\t%s = %llu\n", - za.za_name, (u_longlong_t)za.za_first_integer); + za->za_name, (u_longlong_t)za->za_first_integer); } else { - ASSERT(za.za_integer_length == 1); + ASSERT(za->za_integer_length == 1); char val[1024]; - VERIFY(zap_lookup(os, obj, za.za_name, + VERIFY(zap_lookup(os, obj, za->za_name, 1, sizeof (val), val) == 0); - (void) printf("\t%s = %s\n", za.za_name, val); + (void) printf("\t%s = %s\n", za->za_name, val); } } zap_cursor_fini(&zc); + zap_attribute_free(za); } static void diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am index d08b8e1791b6..2f962408e5a3 100644 --- a/cmd/zpool/Makefile.am +++ b/cmd/zpool/Makefile.am @@ -140,12 +140,14 @@ dist_zpoolcompat_DATA = \ %D%/compatibility.d/freebsd-11.2 \ %D%/compatibility.d/freebsd-11.3 \ %D%/compatibility.d/freenas-9.10.2 \ - %D%/compatibility.d/grub2 \ + %D%/compatibility.d/grub2-2.06 \ + %D%/compatibility.d/grub2-2.12 \ %D%/compatibility.d/openzfs-2.0-freebsd \ %D%/compatibility.d/openzfs-2.0-linux \ %D%/compatibility.d/openzfs-2.1-freebsd \ %D%/compatibility.d/openzfs-2.1-linux \ %D%/compatibility.d/openzfs-2.2 \ + %D%/compatibility.d/openzfs-2.3 \ %D%/compatibility.d/openzfsonosx-1.7.0 \ %D%/compatibility.d/openzfsonosx-1.8.1 \ %D%/compatibility.d/openzfsonosx-1.9.3 \ @@ -171,6 +173,7 @@ zpoolcompatlinks = \ "freebsd-11.3 freebsd-12.2" \ "freebsd-11.3 freebsd-12.3" \ "freebsd-11.3 freebsd-12.4" \ + "grub2-2.12 grub2" \ "openzfs-2.1-freebsd freebsd-13.0" \ "openzfs-2.1-freebsd freebsd-13.1" \ "openzfs-2.1-freebsd freebsd-13.2" \ @@ -182,7 +185,9 @@ zpoolcompatlinks = \ "zol-0.8 ubuntu-20.04" \ "openzfs-2.1-linux ubuntu-22.04" \ "openzfs-2.2 openzfs-2.2-linux" \ - "openzfs-2.2 openzfs-2.2-freebsd" + "openzfs-2.2 openzfs-2.2-freebsd" \ + "openzfs-2.3 openzfs-2.3-linux" \ + "openzfs-2.3 openzfs-2.3-freebsd" zpoolconfdir = $(sysconfdir)/zfs/zpool.d INSTALL_DATA_HOOKS += zpool-install-data-hook diff --git a/cmd/zpool/compatibility.d/grub2-2.06 b/cmd/zpool/compatibility.d/grub2-2.06 new file mode 100644 index 000000000000..ea7468dbcc78 --- /dev/null +++ b/cmd/zpool/compatibility.d/grub2-2.06 @@ -0,0 +1,23 @@ +# Features which are supported by GRUB2 versions prior to v2.12. +# +# GRUB is not able to detect ZFS pool if snaphsot of top level boot pool +# is created. This issue is observed with GRUB versions before v2.12 if +# extensible_dataset feature is enabled on ZFS boot pool. +# +# This file lists all read-only comaptible features except +# extensible_dataset and any other feature that depends on it. +# +allocation_classes +async_destroy +block_cloning +device_rebuild +embedded_data +empty_bpobj +enabled_txg +hole_birth +log_spacemap +lz4_compress +resilver_defer +spacemap_histogram +spacemap_v2 +zpool_checkpoint diff --git a/cmd/zpool/compatibility.d/grub2 b/cmd/zpool/compatibility.d/grub2-2.12 similarity index 81% rename from cmd/zpool/compatibility.d/grub2 rename to cmd/zpool/compatibility.d/grub2-2.12 index 6d60e643593b..86cce6338bb8 100644 --- a/cmd/zpool/compatibility.d/grub2 +++ b/cmd/zpool/compatibility.d/grub2-2.12 @@ -1,4 +1,4 @@ -# Features which are supported by GRUB2 +# Features which are supported by GRUB2 versions from v2.12 onwards. allocation_classes async_destroy block_cloning diff --git a/cmd/zpool/compatibility.d/openzfs-2.3 b/cmd/zpool/compatibility.d/openzfs-2.3 new file mode 100644 index 000000000000..d0620988098c --- /dev/null +++ b/cmd/zpool/compatibility.d/openzfs-2.3 @@ -0,0 +1,45 @@ +# Features supported by OpenZFS 2.3 on Linux and FreeBSD +allocation_classes +async_destroy +blake3 +block_cloning +bookmark_v2 +bookmark_written +bookmarks +device_rebuild +device_removal +draid +edonr +embedded_data +empty_bpobj +enabled_txg +encryption +extensible_dataset +fast_dedup +filesystem_limits +head_errlog +hole_birth +large_blocks +large_dnode +large_microzap +livelist +log_spacemap +longname +lz4_compress +multi_vdev_crash_dump +obsolete_counts +project_quota +raidz_expansion +redacted_datasets +redaction_bookmarks +redaction_list_spill +resilver_defer +sha512 +skein +spacemap_histogram +spacemap_v2 +userobj_accounting +vdev_zaps_v2 +zilsaxattr +zpool_checkpoint +zstd_compress diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 349c208c521b..aa7da68aa683 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -522,7 +522,7 @@ get_usage(zpool_help_t idx) return (gettext("\tstatus [--power] [-j [--json-int, " "--json-flat-vdevs, ...\n" "\t --json-pool-key-guid]] [-c [script1,script2,...]] " - "[-DegiLpPstvx] ...\n" + "[-dDegiLpPstvx] ...\n" "\t [-T d|u] [pool] [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" @@ -2602,6 +2602,7 @@ typedef struct status_cbdata { boolean_t cb_print_unhealthy; boolean_t cb_print_status; boolean_t cb_print_slow_ios; + boolean_t cb_print_dio_verify; boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; @@ -2879,7 +2880,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, uint_t c, i, vsc, children; pool_scan_stat_t *ps = NULL; vdev_stat_t *vs; - char rbuf[6], wbuf[6], cbuf[6]; + char rbuf[6], wbuf[6], cbuf[6], dbuf[6]; char *vname; uint64_t notpresent; spare_cbdata_t spare_cb; @@ -2997,6 +2998,17 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, printf(" %5s", "-"); } } + if (VDEV_STAT_VALID(vs_dio_verify_errors, vsc) && + cb->cb_print_dio_verify) { + zfs_nicenum(vs->vs_dio_verify_errors, dbuf, + sizeof (dbuf)); + + if (cb->cb_literal) + printf(" %5llu", + (u_longlong_t)vs->vs_dio_verify_errors); + else + printf(" %5s", dbuf); + } } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, @@ -10873,6 +10885,10 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_BOLD, " %5s", gettext("POWER")); } + if (cbp->cb_print_dio_verify) { + printf_color(ANSI_BOLD, " %5s", gettext("DIO")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); @@ -10921,10 +10937,11 @@ status_callback(zpool_handle_t *zhp, void *data) } /* - * zpool status [-c [script1,script2,...]] [-DegiLpPstvx] [--power] [-T d|u] ... - * [pool] [interval [count]] + * zpool status [-c [script1,script2,...]] [-dDegiLpPstvx] [--power] ... + * [-T d|u] [pool] [interval [count]] * * -c CMD For each vdev, run command CMD + * -d Display Direct I/O write verify errors * -D Display dedup status (undocumented) * -e Display only unhealthy vdevs * -g Display guid for individual vdev name. @@ -10967,7 +10984,7 @@ zpool_do_status(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, "c:jDegiLpPstT:vx", long_options, + while ((c = getopt_long(argc, argv, "c:jdDegiLpPstT:vx", long_options, NULL)) != -1) { switch (c) { case 'c': @@ -10994,6 +11011,9 @@ zpool_do_status(int argc, char **argv) } cmd = optarg; break; + case 'd': + cb.cb_print_dio_verify = B_TRUE; + break; case 'D': if (++cb.cb_dedup_stats > 2) cb.cb_dedup_stats = 2; diff --git a/cmd/zstream/zstream_recompress.c b/cmd/zstream/zstream_recompress.c index ae2c56320b2a..586ac5623aa5 100644 --- a/cmd/zstream/zstream_recompress.c +++ b/cmd/zstream/zstream_recompress.c @@ -288,7 +288,8 @@ zstream_do_recompress(int argc, char *argv[]) abd_t *pabd = abd_get_from_buf_struct(&abd, buf, bufsz); size_t csize = zio_compress_data(ctype, &dabd, - &pabd, drrw->drr_logical_size, level); + &pabd, drrw->drr_logical_size, + drrw->drr_logical_size, level); size_t rounded = P2ROUNDUP(csize, SPA_MINBLOCKSIZE); if (rounded >= drrw->drr_logical_size) { diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c index dccd325d4cfa..51266b0b6629 100644 --- a/cmd/zstream/zstream_redup.c +++ b/cmd/zstream/zstream_redup.c @@ -132,7 +132,7 @@ static void rdt_insert(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset) { - uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t ch = cityhash3(guid, object, offset); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); redup_entry_t **rdepp; @@ -152,7 +152,7 @@ rdt_lookup(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t *stream_offsetp) { - uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t ch = cityhash3(guid, object, offset); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); for (redup_entry_t *rde = rdt->redup_hash_array[hashcode]; diff --git a/cmd/ztest.c b/cmd/ztest.c index ce031632e758..523f280aae1a 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -685,15 +685,17 @@ static int str2shift(const char *buf) { const char *ends = "BKMGTPEZ"; - int i; + int i, len; if (buf[0] == '\0') return (0); - for (i = 0; i < strlen(ends); i++) { + + len = strlen(ends); + for (i = 0; i < len; i++) { if (toupper(buf[0]) == ends[i]) break; } - if (i == strlen(ends)) { + if (i == len) { (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); usage(B_FALSE); @@ -1940,7 +1942,7 @@ ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj, static void ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) { - char *name = (void *)(lr + 1); /* name follows lr */ + char *name = (char *)&lr->lr_data[0]; /* name follows lr */ size_t namesize = strlen(name) + 1; itx_t *itx; @@ -1948,7 +1950,7 @@ ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) return; itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); - memcpy(&itx->itx_lr + 1, &lr->lr_common + 1, + memcpy(&itx->itx_lr + 1, &lr->lr_create.lr_common + 1, sizeof (*lr) + namesize - sizeof (lr_t)); zil_itx_assign(zd->zd_zilog, itx, tx); @@ -1957,7 +1959,7 @@ ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) static void ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) { - char *name = (void *)(lr + 1); /* name follows lr */ + char *name = (char *)&lr->lr_data[0]; /* name follows lr */ size_t namesize = strlen(name) + 1; itx_t *itx; @@ -2043,8 +2045,9 @@ static int ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap) { ztest_ds_t *zd = arg1; - lr_create_t *lr = arg2; - char *name = (void *)(lr + 1); /* name follows lr */ + lr_create_t *lrc = arg2; + _lr_create_t *lr = &lrc->lr_create; + char *name = (char *)&lrc->lr_data[0]; /* name follows lr */ objset_t *os = zd->zd_os; ztest_block_tag_t *bbt; dmu_buf_t *db; @@ -2122,7 +2125,7 @@ ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap) VERIFY0(zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, &lr->lr_foid, tx)); - (void) ztest_log_create(zd, tx, lr); + (void) ztest_log_create(zd, tx, lrc); dmu_tx_commit(tx); @@ -2134,7 +2137,7 @@ ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap) { ztest_ds_t *zd = arg1; lr_remove_t *lr = arg2; - char *name = (void *)(lr + 1); /* name follows lr */ + char *name = (char *)&lr->lr_data[0]; /* name follows lr */ objset_t *os = zd->zd_os; dmu_object_info_t doi; dmu_tx_t *tx; @@ -2188,9 +2191,9 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) ztest_ds_t *zd = arg1; lr_write_t *lr = arg2; objset_t *os = zd->zd_os; - void *data = lr + 1; /* data follows lr */ + uint8_t *data = &lr->lr_data[0]; /* data follows lr */ uint64_t offset, length; - ztest_block_tag_t *bt = data; + ztest_block_tag_t *bt = (ztest_block_tag_t *)data; ztest_block_tag_t *bbt; uint64_t gen, txg, lrtxg, crtxg; dmu_object_info_t doi; @@ -2262,6 +2265,13 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) if (ztest_random(4) != 0) { int prefetch = ztest_random(2) ? DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + prefetch |= DMU_DIRECTIO; + ztest_block_tag_t rbt; VERIFY(dmu_read(os, lr->lr_foid, offset, @@ -2642,7 +2652,8 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) continue; } - lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); + lr_create_t *lrc = ztest_lr_alloc(sizeof (*lrc), od->od_name); + _lr_create_t *lr = &lrc->lr_create; lr->lr_doid = od->od_dir; lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ @@ -2726,7 +2737,7 @@ ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); - memcpy(lr + 1, data, size); + memcpy(&lr->lr_data[0], data, size); error = ztest_replay_write(zd, lr, B_FALSE); @@ -2813,6 +2824,13 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) enum ztest_io_type io_type; uint64_t blocksize; void *data; + uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; VERIFY0(dmu_object_info(zd->zd_os, object, &doi)); blocksize = doi.doi_data_block_size; @@ -2878,7 +2896,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) (void) pthread_rwlock_unlock(&ztest_name_lock); VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, - DMU_READ_NO_PREFETCH)); + dmu_read_flags)); (void) ztest_write(zd, object, offset, blocksize, data); break; @@ -5045,6 +5063,13 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) uint64_t stride = 123456789ULL; uint64_t width = 40; int free_percent = 5; + uint32_t dmu_read_flags = DMU_READ_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; /* * This test uses two objects, packobj and bigobj, that are always @@ -5123,10 +5148,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) * Read the current contents of our objects. */ error = dmu_read(os, packobj, packoff, packsize, packbuf, - DMU_READ_PREFETCH); + dmu_read_flags); ASSERT0(error); error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, - DMU_READ_PREFETCH); + dmu_read_flags); ASSERT0(error); /* @@ -5244,9 +5269,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); VERIFY0(dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); + packsize, packcheck, dmu_read_flags)); VERIFY0(dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); + bigsize, bigcheck, dmu_read_flags)); ASSERT0(memcmp(packbuf, packcheck, packsize)); ASSERT0(memcmp(bigbuf, bigcheck, bigsize)); @@ -5336,6 +5361,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) dmu_buf_t *bonus_db; arc_buf_t **bigbuf_arcbufs; dmu_object_info_t doi; + uint32_t dmu_read_flags = DMU_READ_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; size = sizeof (ztest_od_t) * OD_ARRAY_SIZE; od = umem_alloc(size, UMEM_NOFAIL); @@ -5466,10 +5498,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) */ if (i != 0 || ztest_random(2) != 0) { error = dmu_read(os, packobj, packoff, - packsize, packbuf, DMU_READ_PREFETCH); + packsize, packbuf, dmu_read_flags); ASSERT0(error); error = dmu_read(os, bigobj, bigoff, bigsize, - bigbuf, DMU_READ_PREFETCH); + bigbuf, dmu_read_flags); ASSERT0(error); } compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, @@ -5529,9 +5561,9 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); VERIFY0(dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); + packsize, packcheck, dmu_read_flags)); VERIFY0(dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); + bigsize, bigcheck, dmu_read_flags)); ASSERT0(memcmp(packbuf, packcheck, packsize)); ASSERT0(memcmp(bigbuf, bigcheck, bigsize)); diff --git a/config/ax_compare_version.m4 b/config/ax_compare_version.m4 new file mode 100644 index 000000000000..ffb4997e8b14 --- /dev/null +++ b/config/ax_compare_version.m4 @@ -0,0 +1,177 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_compare_version.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +# +# DESCRIPTION +# +# This macro compares two version strings. Due to the various number of +# minor-version numbers that can exist, and the fact that string +# comparisons are not compatible with numeric comparisons, this is not +# necessarily trivial to do in a autoconf script. This macro makes doing +# these comparisons easy. +# +# The six basic comparisons are available, as well as checking equality +# limited to a certain number of minor-version levels. +# +# The operator OP determines what type of comparison to do, and can be one +# of: +# +# eq - equal (test A == B) +# ne - not equal (test A != B) +# le - less than or equal (test A <= B) +# ge - greater than or equal (test A >= B) +# lt - less than (test A < B) +# gt - greater than (test A > B) +# +# Additionally, the eq and ne operator can have a number after it to limit +# the test to that number of minor versions. +# +# eq0 - equal up to the length of the shorter version +# ne0 - not equal up to the length of the shorter version +# eqN - equal up to N sub-version levels +# neN - not equal up to N sub-version levels +# +# When the condition is true, shell commands ACTION-IF-TRUE are run, +# otherwise shell commands ACTION-IF-FALSE are run. The environment +# variable 'ax_compare_version' is always set to either 'true' or 'false' +# as well. +# +# Examples: +# +# AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8]) +# AX_COMPARE_VERSION([3.15],[lt],[3.15.8]) +# +# would both be true. +# +# AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8]) +# AX_COMPARE_VERSION([3.15],[gt],[3.15.8]) +# +# would both be false. +# +# AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8]) +# +# would be true because it is only comparing two minor versions. +# +# AX_COMPARE_VERSION([3.15.7],[eq0],[3.15]) +# +# would be true because it is only comparing the lesser number of minor +# versions of the two values. +# +# Note: The characters that separate the version numbers do not matter. An +# empty string is the same as version 0. OP is evaluated by autoconf, not +# configure, so must be a string, not a variable. +# +# The author would like to acknowledge Guido Draheim whose advice about +# the m4_case and m4_ifvaln functions make this macro only include the +# portions necessary to perform the specific comparison specified by the +# OP argument in the final configure script. +# +# LICENSE +# +# Copyright (c) 2008 Tim Toolan +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 13 + +dnl ######################################################################### +AC_DEFUN([AX_COMPARE_VERSION], [ + AC_REQUIRE([AC_PROG_AWK]) + + # Used to indicate true or false condition + ax_compare_version=false + + # Convert the two version strings to be compared into a format that + # allows a simple string comparison. The end result is that a version + # string of the form 1.12.5-r617 will be converted to the form + # 0001001200050617. In other words, each number is zero padded to four + # digits, and non digits are removed. + AS_VAR_PUSHDEF([A],[ax_compare_version_A]) + A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ + -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ + -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ + -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ + -e 's/[[^0-9]]//g'` + + AS_VAR_PUSHDEF([B],[ax_compare_version_B]) + B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ + -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ + -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ + -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ + -e 's/[[^0-9]]//g'` + + dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary + dnl # then the first line is used to determine if the condition is true. + dnl # The sed right after the echo is to remove any indented white space. + m4_case(m4_tolower($2), + [lt],[ + ax_compare_version=`echo "x$A +x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"` + ], + [gt],[ + ax_compare_version=`echo "x$A +x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"` + ], + [le],[ + ax_compare_version=`echo "x$A +x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"` + ], + [ge],[ + ax_compare_version=`echo "x$A +x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"` + ],[ + dnl Split the operator from the subversion count if present. + m4_bmatch(m4_substr($2,2), + [0],[ + # A count of zero means use the length of the shorter version. + # Determine the number of characters in A and B. + ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'` + ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'` + + # Set A to no more than B's length and B to no more than A's length. + A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"` + B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"` + ], + [[0-9]+],[ + # A count greater than zero means use only that many subversions + A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` + B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` + ], + [.+],[ + AC_WARNING( + [invalid OP numeric parameter: $2]) + ],[]) + + # Pad zeros at end of numbers to make same length. + ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`" + B="$B`echo $A | sed 's/./0/g'`" + A="$ax_compare_version_tmp_A" + + # Check for equality or inequality as necessary. + m4_case(m4_tolower(m4_substr($2,0,2)), + [eq],[ + test "x$A" = "x$B" && ax_compare_version=true + ], + [ne],[ + test "x$A" != "x$B" && ax_compare_version=true + ],[ + AC_WARNING([invalid OP parameter: $2]) + ]) + ]) + + AS_VAR_POPDEF([A])dnl + AS_VAR_POPDEF([B])dnl + + dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE. + if test "$ax_compare_version" = "true" ; then + m4_ifvaln([$4],[$4],[:])dnl + m4_ifvaln([$5],[else $5])dnl + fi +]) dnl AX_COMPARE_VERSION diff --git a/config/intlmacosx.m4 b/config/intlmacosx.m4 deleted file mode 100644 index 30e6f50e0ac6..000000000000 --- a/config/intlmacosx.m4 +++ /dev/null @@ -1,72 +0,0 @@ -# intlmacosx.m4 serial 6 (gettext-0.20) -dnl Copyright (C) 2004-2014, 2016, 2019 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. -dnl -dnl This file can be used in projects which are not available under -dnl the GNU General Public License or the GNU Library General Public -dnl License but which still want to provide support for the GNU gettext -dnl functionality. -dnl Please note that the actual code of the GNU gettext library is covered -dnl by the GNU Library General Public License, and the rest of the GNU -dnl gettext package is covered by the GNU General Public License. -dnl They are *not* in the public domain. - -dnl Checks for special options needed on Mac OS X. -dnl Defines INTL_MACOSX_LIBS. -AC_DEFUN([gt_INTL_MACOSX], -[ - dnl Check for API introduced in Mac OS X 10.4. - AC_CACHE_CHECK([for CFPreferencesCopyAppValue], - [gt_cv_func_CFPreferencesCopyAppValue], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFPreferencesCopyAppValue(NULL, NULL)]])], - [gt_cv_func_CFPreferencesCopyAppValue=yes], - [gt_cv_func_CFPreferencesCopyAppValue=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFPreferencesCopyAppValue = yes; then - AC_DEFINE([HAVE_CFPREFERENCESCOPYAPPVALUE], [1], - [Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in the CoreFoundation framework.]) - fi - dnl Check for API introduced in Mac OS X 10.5. - AC_CACHE_CHECK([for CFLocaleCopyCurrent], [gt_cv_func_CFLocaleCopyCurrent], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFLocaleCopyCurrent();]])], - [gt_cv_func_CFLocaleCopyCurrent=yes], - [gt_cv_func_CFLocaleCopyCurrent=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFLocaleCopyCurrent = yes; then - AC_DEFINE([HAVE_CFLOCALECOPYCURRENT], [1], - [Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the CoreFoundation framework.]) - fi - AC_CACHE_CHECK([for CFLocaleCopyPreferredLanguages], [gt_cv_func_CFLocaleCopyPreferredLanguages], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFLocaleCopyPreferredLanguages();]])], - [gt_cv_func_CFLocaleCopyPreferredLanguages=yes], - [gt_cv_func_CFLocaleCopyPreferredLanguages=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFLocaleCopyPreferredLanguages = yes; then - AC_DEFINE([HAVE_CFLOCALECOPYPREFERREDLANGUAGES], [1], - [Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework.]) - fi - INTL_MACOSX_LIBS= - if test $gt_cv_func_CFPreferencesCopyAppValue = yes \ - || test $gt_cv_func_CFLocaleCopyCurrent = yes \ - || test $gt_cv_func_CFLocaleCopyPreferredLanguages = yes; then - INTL_MACOSX_LIBS="-Wl,-framework -Wl,CoreFoundation" - fi - AC_SUBST([INTL_MACOSX_LIBS]) -]) diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 index 3ae5dc6b6dbc..3dbd97948189 100644 --- a/config/kernel-acl.m4 +++ b/config/kernel-acl.m4 @@ -1,112 +1,3 @@ -dnl # -dnl # Check if posix_acl_release can be used from a ZFS_META_LICENSED -dnl # module. The is_owner_or_cap macro was replaced by -dnl # inode_owner_or_capable -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE], [ - ZFS_LINUX_TEST_SRC([posix_acl_release], [ - #include - #include - #include - ], [ - struct posix_acl *tmp = posix_acl_alloc(1, 0); - posix_acl_release(tmp); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ - AC_MSG_CHECKING([whether posix_acl_release() is available]) - ZFS_LINUX_TEST_RESULT([posix_acl_release], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_RELEASE, 1, - [posix_acl_release() is available]) - - AC_MSG_CHECKING([whether posix_acl_release() is GPL-only]) - ZFS_LINUX_TEST_RESULT([posix_acl_release_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_RELEASE_GPL_ONLY, 1, - [posix_acl_release() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.14 API change, -dnl # set_cached_acl() and forget_cached_acl() changed from inline to -dnl # EXPORT_SYMBOL. In the former case, they may not be usable because of -dnl # posix_acl_release. In the latter case, we can always use them. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE], [ - ZFS_LINUX_TEST_SRC([set_cached_acl], [ - #include - #include - #include - ], [ - struct inode *ip = NULL; - struct posix_acl *acl = posix_acl_alloc(1, 0); - set_cached_acl(ip, ACL_TYPE_ACCESS, acl); - forget_cached_acl(ip, ACL_TYPE_ACCESS); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ - AC_MSG_CHECKING([whether set_cached_acl() is usable]) - ZFS_LINUX_TEST_RESULT([set_cached_acl_license], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_CACHED_ACL_USABLE, 1, - [set_cached_acl() is usable]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.1 API change, -dnl # posix_acl_chmod() was added as the preferred interface. -dnl # -dnl # 3.14 API change, -dnl # posix_acl_chmod() was changed to __posix_acl_chmod() -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD], [ - ZFS_LINUX_TEST_SRC([posix_acl_chmod], [ - #include - #include - ],[ - posix_acl_chmod(NULL, 0, 0) - ]) - - ZFS_LINUX_TEST_SRC([__posix_acl_chmod], [ - #include - #include - ],[ - __posix_acl_chmod(NULL, 0, 0) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ - AC_MSG_CHECKING([whether __posix_acl_chmod exists]) - ZFS_LINUX_TEST_RESULT([__posix_acl_chmod], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, - [__posix_acl_chmod() exists]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether posix_acl_chmod exists]) - ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, - [posix_acl_chmod() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([posix_acl_chmod()]) - ]) - ]) -]) - dnl # dnl # 3.1 API change, dnl # posix_acl_equiv_mode now wants an umode_t instead of a mode_t @@ -130,34 +21,6 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ ]) ]) -dnl # -dnl # 4.8 API change, -dnl # The function posix_acl_valid now must be passed a namespace. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS], [ - ZFS_LINUX_TEST_SRC([posix_acl_valid_with_ns], [ - #include - #include - ],[ - struct user_namespace *user_ns = NULL; - const struct posix_acl *acl = NULL; - int error; - - error = posix_acl_valid(user_ns, acl); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ - AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) - ZFS_LINUX_TEST_RESULT([posix_acl_valid_with_ns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_VALID_WITH_NS, 1, - [posix_acl_valid() wants user namespace]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 3.1 API change, dnl # Check if inode_operations contains the function get_acl @@ -226,9 +89,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ ]) ]) -dnl # -dnl # 3.14 API change, -dnl # Check if inode_operations contains the function set_acl dnl # dnl # 5.12 API change, dnl # set_acl() added a user_namespace* parameter first @@ -290,106 +150,35 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ - AC_MSG_CHECKING([whether iops->set_acl() exists]) + AC_MSG_CHECKING([whether iops->set_acl() with 4 args exists]) ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1, [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1, [iops->set_acl() takes 4 args, arg2 is struct dentry *]) ],[ - ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) - ],[ - ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14]) - ]) + AC_MSG_RESULT(no) ]) ]) ]) ]) -dnl # -dnl # 4.7 API change, -dnl # The kernel get_acl will now check cache before calling i_op->get_acl and -dnl # do set_cached_acl after that, so i_op->get_acl don't need to do that -dnl # anymore. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE], [ - ZFS_LINUX_TEST_SRC([get_acl_handle_cache], [ - #include - ],[ - void *sentinel __attribute__ ((unused)) = - uncached_acl_sentinel(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE], [ - AC_MSG_CHECKING([whether uncached_acl_sentinel() exists]) - ZFS_LINUX_TEST_RESULT([get_acl_handle_cache], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, - [uncached_acl_sentinel() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.16 kernel: check if struct posix_acl acl.a_refcount is a refcount_t. -dnl # It's an atomic_t on older kernels. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT], [ - ZFS_LINUX_TEST_SRC([acl_refcount], [ - #include - #include - #include - ],[ - struct posix_acl acl; - refcount_t *r __attribute__ ((unused)) = &acl.a_refcount; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_ACL_HAS_REFCOUNT], [ - AC_MSG_CHECKING([whether posix_acl has refcount_t]) - ZFS_LINUX_TEST_RESULT([acl_refcount], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ACL_REFCOUNT, 1, [posix_acl has refcount_t]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL], [ - ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE - ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT ]) AC_DEFUN([ZFS_AC_KERNEL_ACL], [ - ZFS_AC_KERNEL_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_POSIX_ACL_CHMOD ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE - ZFS_AC_KERNEL_ACL_HAS_REFCOUNT ]) diff --git a/config/kernel-aio-fsync.m4 b/config/kernel-aio-fsync.m4 deleted file mode 100644 index b4dbf29ba781..000000000000 --- a/config/kernel-aio-fsync.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # Linux 4.9-rc5+ ABI, removal of the .aio_fsync field -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_AIO_FSYNC], [ - ZFS_LINUX_TEST_SRC([aio_fsync], [ - #include - - static const struct file_operations - fops __attribute__ ((unused)) = { - .aio_fsync = NULL, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ - AC_MSG_CHECKING([whether fops->aio_fsync() exists]) - ZFS_LINUX_TEST_RESULT([aio_fsync], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_AIO_FSYNC, 1, [fops->aio_fsync() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-assign_str.m4 b/config/kernel-assign_str.m4 new file mode 100644 index 000000000000..cf4b00e7cbf2 --- /dev/null +++ b/config/kernel-assign_str.m4 @@ -0,0 +1,62 @@ +dnl # +dnl # 6.10 kernel, check number of args of __assign_str() for trace: +dnl +dnl # 6.10+: one arg +dnl # 6.9 and older: two args +dnl # +dnl # More specifically, this will test to see if __assign_str() takes one +dnl # arg. If __assign_str() takes two args, or is not defined, then +dnl # HAVE_1ARG_ASSIGN_STR will not be set. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_1ARG_ASSIGN_STR], [ + AC_MSG_CHECKING([whether __assign_str() has one arg]) + ZFS_LINUX_TRY_COMPILE_HEADER([ + #include + MODULE_LICENSE("$ZFS_META_LICENSE"); + + #define CREATE_TRACE_POINTS + #include "conftest.h" + ],[ + trace_zfs_autoconf_event_one("1"); + trace_zfs_autoconf_event_two("2"); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_1ARG_ASSIGN_STR, 1, + [__assign_str() has one arg]) + ],[ + AC_MSG_RESULT(no) + ],[ + #if !defined(_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ) + #define _CONFTEST_H + + #undef TRACE_SYSTEM + #define TRACE_SYSTEM zfs + #include + + DECLARE_EVENT_CLASS(zfs_autoconf_event_class, + TP_PROTO(char *string), + TP_ARGS(string), + TP_STRUCT__entry( + __string(str, string) + ), + TP_fast_assign( + __assign_str(str); + ), + TP_printk("str = %s", __get_str(str)) + ); + + #define DEFINE_AUTOCONF_EVENT(name) \ + DEFINE_EVENT(zfs_autoconf_event_class, name, \ + TP_PROTO(char * str), \ + TP_ARGS(str)) + DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_one); + DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_two); + + #endif /* _CONFTEST_H */ + + #undef TRACE_INCLUDE_PATH + #define TRACE_INCLUDE_PATH . + #define TRACE_INCLUDE_FILE conftest + #include + ]) +]) diff --git a/config/kernel-bdi.m4 b/config/kernel-bdi.m4 deleted file mode 100644 index 9758863a9cbf..000000000000 --- a/config/kernel-bdi.m4 +++ /dev/null @@ -1,81 +0,0 @@ -dnl # -dnl # Check available BDI interfaces. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BDI], [ - ZFS_LINUX_TEST_SRC([super_setup_bdi_name], [ - #include - struct super_block sb; - ], [ - char *name = "bdi"; - atomic_long_t zfs_bdi_seq; - int error __attribute__((unused)); - atomic_long_set(&zfs_bdi_seq, 0); - error = - super_setup_bdi_name(&sb, "%.28s-%ld", name, - atomic_long_inc_return(&zfs_bdi_seq)); - ]) - - ZFS_LINUX_TEST_SRC([bdi_setup_and_register], [ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name); - ]) - - ZFS_LINUX_TEST_SRC([bdi_setup_and_register_3args], [ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - unsigned int cap = BDI_CAP_MAP_COPY; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name, cap); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BDI], [ - dnl # - dnl # 4.12, super_setup_bdi_name() introduced. - dnl # - AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) - ZFS_LINUX_TEST_RESULT_SYMBOL([super_setup_bdi_name], - [super_setup_bdi_name], [fs/super.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SUPER_SETUP_BDI_NAME, 1, - [super_setup_bdi_name() exits]) - ], [ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. - dnl # - AC_MSG_CHECKING( - [whether bdi_setup_and_register() wants 2 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([bdi_setup_and_register], - [bdi_setup_and_register], [mm/backing-dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_BDI_SETUP_AND_REGISTER, 1, - [bdi_setup_and_register() wants 2 args]) - ], [ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.34 - 3.19, bdi_setup_and_register() - dnl # takes 3 arguments. - dnl # - AC_MSG_CHECKING( - [whether bdi_setup_and_register() wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL( - [bdi_setup_and_register_3args], - [bdi_setup_and_register], [mm/backing-dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARGS_BDI_SETUP_AND_REGISTER, 1, - [bdi_setup_and_register() wants 3 args]) - ], [ - ZFS_LINUX_TEST_ERROR([bdi_setup]) - ]) - ]) - ]) -]) diff --git a/config/kernel-bio.m4 b/config/kernel-bio.m4 index b22c1a3de7e1..8afc9c59ddad 100644 --- a/config/kernel-bio.m4 +++ b/config/kernel-bio.m4 @@ -1,81 +1,3 @@ -dnl # -dnl # 2.6.36 API change, -dnl # REQ_FAILFAST_{DEV|TRANSPORT|DRIVER} -dnl # REQ_DISCARD -dnl # REQ_FLUSH -dnl # -dnl # 4.8 - 4.9 API, -dnl # REQ_FLUSH was renamed to REQ_PREFLUSH -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ], [ - ZFS_LINUX_TEST_SRC([req_failfast_mask], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_FAILFAST_MASK; - ]) - - ZFS_LINUX_TEST_SRC([req_discard], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_DISCARD; - ]) - - ZFS_LINUX_TEST_SRC([req_flush], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_FLUSH; - ]) - - ZFS_LINUX_TEST_SRC([req_preflush], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_PREFLUSH; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK], [ - AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) - ZFS_LINUX_TEST_RESULT([req_failfast_mask], [ - AC_MSG_RESULT(yes) - ],[ - ZFS_LINUX_TEST_ERROR([REQ_FAILFAST_MASK]) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_DISCARD], [ - AC_MSG_CHECKING([whether REQ_DISCARD is defined]) - ZFS_LINUX_TEST_RESULT([req_discard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_DISCARD, 1, [REQ_DISCARD is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FLUSH], [ - AC_MSG_CHECKING([whether REQ_FLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_FLUSH, 1, [REQ_FLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_PREFLUSH], [ - AC_MSG_CHECKING([whether REQ_PREFLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_preflush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_PREFLUSH, 1, [REQ_PREFLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 4.8 API, dnl # @@ -84,31 +6,6 @@ dnl # checking the bio->bi_rw flags. The following checks are used to dnl # detect if a specific operation is supported. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ - ZFS_LINUX_TEST_SRC([req_op_discard], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_DISCARD; - ]) - - ZFS_LINUX_TEST_SRC([req_op_secure_erase], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; - ]) - - ZFS_LINUX_TEST_SRC([req_op_flush], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_FLUSH; - ]) - - ZFS_LINUX_TEST_SRC([bio_bi_opf], [ - #include - ],[ - struct bio bio __attribute__ ((unused)); - bio.bi_opf = 0; - ]) - ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [ #include ],[ @@ -117,47 +14,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD], [ - AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) - ZFS_LINUX_TEST_RESULT([req_op_discard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, [REQ_OP_DISCARD is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE], [ - AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) - ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1, - [REQ_OP_SECURE_ERASE is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH], [ - AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_op_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ - AC_MSG_CHECKING([whether bio->bi_opf is defined]) - ZFS_LINUX_TEST_RESULT([bio_bi_opf], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_OP_ATTRS], [ AC_MSG_CHECKING([whether bio_set_op_attrs is available]) ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [ @@ -210,127 +66,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [ - AC_MSG_CHECKING([whether bio_set_dev() is available]) - ZFS_LINUX_TEST_RESULT([bio_set_dev], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available]) - - AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) - ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, - [bio_set_dev() GPL-only]) - ]) - - AC_MSG_CHECKING([whether bio_set_dev() is a macro]) - ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1, - [bio_set_dev() is a macro]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ + AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.3 API change -dnl # Error argument dropped from bio_endio in favor of newly introduced -dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE. -dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451 -dnl # ("block: add a bi_error field to struct bio"). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ - ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ - #include - static void wanted_end_io(struct bio *bio) { return; } - bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; - ], []) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ - AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) - ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1, - [bio_end_io_t wants 1 arg]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change -dnl # The bio->bi_error field was replaced with bio->bi_status which is an -dnl # enum which describes all possible error types. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [ - ZFS_LINUX_TEST_SRC([bio_bi_status], [ - #include - ], [ - struct bio bio __attribute__ ((unused)); - blk_status_t status __attribute__ ((unused)) = BLK_STS_OK; - bio.bi_status = status; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ - AC_MSG_CHECKING([whether bio->bi_status exists]) - ZFS_LINUX_TEST_RESULT([bio_bi_status], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.14 API change, -dnl # Immutable biovecs. A number of fields of struct bio are moved to -dnl # struct bvec_iter. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [ - ZFS_LINUX_TEST_SRC([bio_bvec_iter], [ - #include ],[ - struct bio bio; - bio.bi_iter.bi_sector = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ - AC_MSG_CHECKING([whether bio has bi_iter]) - ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.8 API change -dnl # The rw argument has been removed from submit_bio/submit_bio_wait. -dnl # Callers are now expected to set bio->bi_rw instead of passing it in. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO], [ - ZFS_LINUX_TEST_SRC([submit_bio], [ - #include - ],[ - struct bio *bio = NULL; - (void) submit_bio(bio); + AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, + [bio_set_dev() GPL-only]) ]) -]) -AC_DEFUN([ZFS_AC_KERNEL_BIO_SUBMIT_BIO], [ - AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) - ZFS_LINUX_TEST_RESULT([submit_bio], [ + AC_MSG_CHECKING([whether bio_set_dev() is a macro]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg]) + AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1, + [bio_set_dev() is a macro]) ],[ AC_MSG_RESULT(no) ]) @@ -449,31 +198,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID], [ ]) ]) -dnl # -dnl # Linux 5.16 API -dnl # -dnl # The Linux 5.16 API moved struct blkcg_gq into linux/blk-cgroup.h, which -dnl # has been around since 2015. This test looks for the presence of that -dnl # header, so that it can be conditionally included where it exists, but -dnl # still be backward compatible with kernels that pre-date its introduction. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER], [ - ZFS_LINUX_TEST_SRC([blk_cgroup_header], [ - #include - ], []) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [ - AC_MSG_CHECKING([whether linux/blk-cgroup.h exists]) - ZFS_LINUX_TEST_RESULT([blk_cgroup_header],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LINUX_BLK_CGROUP_HEADER, 1, - [linux/blk-cgroup.h exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 5.18 API dnl # @@ -510,43 +234,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [ - ZFS_AC_KERNEL_SRC_REQ ZFS_AC_KERNEL_SRC_BIO_OPS ZFS_AC_KERNEL_SRC_BIO_SET_DEV - ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS - ZFS_AC_KERNEL_SRC_BIO_BI_STATUS - ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER - ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO ZFS_AC_KERNEL_SRC_BIO_CURRENT_BIO_LIST ZFS_AC_KERNEL_SRC_BLKG_TRYGET ZFS_AC_KERNEL_SRC_BIO_BDEV_DISK ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO - ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG ]) AC_DEFUN([ZFS_AC_KERNEL_BIO], [ - ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK - ZFS_AC_KERNEL_BIO_REQ_DISCARD - ZFS_AC_KERNEL_BIO_REQ_FLUSH - ZFS_AC_KERNEL_BIO_REQ_PREFLUSH - - ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD - ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE - ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH - ZFS_AC_KERNEL_BIO_BI_OPF ZFS_AC_KERNEL_BIO_SET_OP_ATTRS - ZFS_AC_KERNEL_BIO_SET_DEV - ZFS_AC_KERNEL_BIO_END_IO_T_ARGS - ZFS_AC_KERNEL_BIO_BI_STATUS - ZFS_AC_KERNEL_BIO_BVEC_ITER - ZFS_AC_KERNEL_BIO_SUBMIT_BIO ZFS_AC_KERNEL_BIO_CURRENT_BIO_LIST ZFS_AC_KERNEL_BLKG_TRYGET ZFS_AC_KERNEL_BIO_BDEV_DISK ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID - ZFS_AC_KERNEL_BLK_CGROUP_HEADER ZFS_AC_KERNEL_BIO_ALLOC_4ARG ]) diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4 index a064140f337a..cd2b143e89a0 100644 --- a/config/kernel-blk-queue.m4 +++ b/config/kernel-blk-queue.m4 @@ -161,7 +161,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ dnl # dnl # 5.19: bdev_max_secure_erase_sectors() available dnl # 4.8: blk_queue_secure_erase() available -dnl # 2.6.36: blk_queue_secdiscard() available dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [ @@ -182,16 +181,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ memset(q, 0, sizeof(r)); value = blk_queue_secure_erase(q); ],[-Wframe-larger-than=8192]) - - ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [ - #include - ],[ - struct request_queue r; - struct request_queue *q = &r; - int value __attribute__ ((unused)); - memset(q, 0, sizeof(r)); - value = blk_queue_secdiscard(q); - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ @@ -209,137 +198,11 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, [blk_queue_secure_erase() is available]) ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, - [blk_queue_secdiscard() is available]) - ],[ - ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) - ]) + ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) ]) ]) ]) -dnl # -dnl # 4.16 API change, -dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [ - ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [ - #include - #include - ],[ - struct request_queue *q = NULL; - blk_queue_flag_set(0, q); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ - AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, - [blk_queue_flag_set() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [ - ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [ - #include - #include - ],[ - struct request_queue *q = NULL; - blk_queue_flag_clear(0, q); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ - AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, - [blk_queue_flag_clear() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 2.6.36 API change, -dnl # Added blk_queue_flush() interface, while the previous interface -dnl # was available to all the new one is GPL-only. Thus in addition to -dnl # detecting if this function is available we determine if it is -dnl # GPL-only. If the GPL-only interface is there we implement our own -dnl # compatibility function, otherwise we use the function. The hope -dnl # is that long term this function will be opened up. -dnl # -dnl # 4.7 API change, -dnl # Replace blk_queue_flush with blk_queue_write_cache -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [ - ZFS_LINUX_TEST_SRC([blk_queue_flush], [ - #include - ], [ - struct request_queue *q __attribute__ ((unused)) = NULL; - (void) blk_queue_flush(q, REQ_FLUSH); - ], [], [ZFS_META_LICENSE]) - - ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [ - #include - #include - ], [ - struct request_queue *q __attribute__ ((unused)) = NULL; - blk_queue_write_cache(q, true, true); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ - AC_MSG_CHECKING([whether blk_queue_flush() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1, - [blk_queue_flush() is available]) - - AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only]) - ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1, - [blk_queue_flush() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) - - dnl # - dnl # 4.7 API change - dnl # Replace blk_queue_flush with blk_queue_write_cache - dnl # - AC_MSG_CHECKING([whether blk_queue_write_cache() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1, - [blk_queue_write_cache() exists]) - - AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only]) - ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1, - [blk_queue_write_cache() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.34 API change dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors(). @@ -385,24 +248,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ ]) ]) -dnl # -dnl # See if kernel supports block multi-queue and blk_status_t. -dnl # blk_status_t represents the new status codes introduced in the 4.13 -dnl # kernel patch: -dnl # -dnl # block: introduce new block status code type -dnl # -dnl # We do not currently support the "old" block multi-queue interfaces from -dnl # prior kernels. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [ - ZFS_LINUX_TEST_SRC([blk_mq], [ - #include - ], [ - struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0}; - (void) blk_mq_alloc_tag_set(&tag_set); - return BLK_STS_OK; - ], []) +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ_RQ_HCTX], [ ZFS_LINUX_TEST_SRC([blk_mq_rq_hctx], [ #include #include @@ -413,18 +259,11 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [ ], []) ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [ - AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available]) - ZFS_LINUX_TEST_RESULT([blk_mq], [ +AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ_RQ_HCTX], [ + AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request]) + ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available]) - AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request]) - ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request]) - ], [ - AC_MSG_RESULT(no) - ]) + AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request]) ], [ AC_MSG_RESULT(no) ]) @@ -437,12 +276,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [ ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_SRC_BLK_MQ + ZFS_AC_KERNEL_SRC_BLK_MQ_RQ_HCTX ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [ @@ -452,10 +288,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [ ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_BLK_MQ + ZFS_AC_KERNEL_BLK_MQ_RQ_HCTX ]) diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4 index 4f60f96acb56..83190c6fbe3f 100644 --- a/config/kernel-blkdev.m4 +++ b/config/kernel-blkdev.m4 @@ -396,7 +396,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_INVALIDATE_BDEV], [ dnl # dnl # 5.11 API, lookup_bdev() takes dev_t argument. dnl # 2.6.27 API, lookup_bdev() was first exported. -dnl # 4.4.0-6.21 API, lookup_bdev() on Ubuntu takes mode argument. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [ ZFS_LINUX_TEST_SRC([lookup_bdev_devt], [ @@ -418,15 +417,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [ bdev = lookup_bdev(path); ]) - - ZFS_LINUX_TEST_SRC([lookup_bdev_mode], [ - #include - ], [ - struct block_device *bdev __attribute__ ((unused)); - const char path[] = "/example/path"; - - bdev = lookup_bdev(path, FMODE_READ); - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [ @@ -446,17 +436,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [ AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, [lookup_bdev() wants 1 arg]) ], [ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether lookup_bdev() wants mode arg]) - ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_mode], - [lookup_bdev], [fs/block_dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MODE_LOOKUP_BDEV, 1, - [lookup_bdev() wants mode arg]) - ], [ - ZFS_LINUX_TEST_ERROR([lookup_bdev()]) - ]) + ZFS_LINUX_TEST_ERROR([lookup_bdev()]) ]) ]) ]) diff --git a/config/kernel-clear-inode.m4 b/config/kernel-clear-inode.m4 deleted file mode 100644 index 3f454d7ec0d3..000000000000 --- a/config/kernel-clear-inode.m4 +++ /dev/null @@ -1,39 +0,0 @@ -dnl # -dnl # 3.5.0 API change -dnl # torvalds/linux@dbd5768f87ff6fb0a4fe09c4d7b6c4a24de99430 and -dnl # torvalds/linux@7994e6f7254354e03028a11f98a27bd67dace9f1 reworked -dnl # where inode_sync_wait() is called. -dnl # -dnl # Prior to these changes it would occur in end_writeback() but due -dnl # to various issues (described in the above commits) it has been -dnl # moved to evict(). This changes the ordering is which sync occurs -dnl # but otherwise doesn't impact the zpl implementation. -dnl # -dnl # The major impact here is the renaming of end_writeback() to -dnl # clear_inode(). However, care must be taken when detecting this -dnl # API change because as recently as 2.6.35 there was a clear_inode() -dnl # function. However, it was made obsolete by the evict_inode() API -dnl # change at the same time. -dnl # -dnl # Therefore, to ensure we have the correct API we only allow the -dnl # clear_inode() compatibility code to be defined iff the evict_inode() -dnl # functionality is also detected. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CLEAR_INODE], [ - ZFS_LINUX_TEST_SRC([clear_inode], [ - #include - ], [ - clear_inode(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], [ - AC_MSG_CHECKING([whether clear_inode() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([clear_inode], - [clear_inode], [fs/inode.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CLEAR_INODE, 1, [clear_inode() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-config-defined.m4 b/config/kernel-config-defined.m4 index 54837d728341..83c40fa6cd8e 100644 --- a/config/kernel-config-defined.m4 +++ b/config/kernel-config-defined.m4 @@ -4,21 +4,6 @@ dnl # detected at configure time and cause a build failure. Otherwise dnl # modules may be successfully built that behave incorrectly. dnl # AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEFINED], [ - AS_IF([test "x$cross_compiling" != xyes], [ - AC_RUN_IFELSE([ - AC_LANG_PROGRAM([ - #include "$LINUX/include/linux/license.h" - ], [ - return !license_is_gpl_compatible( - "$ZFS_META_LICENSE"); - ]) - ], [ - AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], - [Define to 1 if GPL-only symbols can be used]) - ], [ - ]) - ]) - ZFS_AC_KERNEL_SRC_CONFIG_MODULES ZFS_AC_KERNEL_SRC_CONFIG_BLOCK ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 deleted file mode 100644 index ab7d9c5cedba..000000000000 --- a/config/kernel-current-time.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 4.9, current_time() added -dnl # 4.18, return type changed from timespec to timespec64 -dnl # -dnl # Note that we don't care about the return type in this check. If we have -dnl # to implement a fallback, we'll know we're <4.9, which was timespec. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [ - ZFS_LINUX_TEST_SRC([current_time], [ - #include - ], [ - struct inode ip __attribute__ ((unused)); - (void) current_time(&ip); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], [ - AC_MSG_CHECKING([whether current_time() exists]) - ZFS_LINUX_TEST_RESULT_SYMBOL([current_time], - [current_time], [fs/inode.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-dentry-alias.m4 b/config/kernel-dentry-alias.m4 deleted file mode 100644 index f0ddb8d010b0..000000000000 --- a/config/kernel-dentry-alias.m4 +++ /dev/null @@ -1,30 +0,0 @@ -dnl # -dnl # 3.18 API change -dnl # Dentry aliases are in d_u struct dentry member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ - ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ - #include - #include - #include - ], [ - struct inode *inode __attribute__ ((unused)) = NULL; - struct dentry *dentry __attribute__ ((unused)) = NULL; - hlist_for_each_entry(dentry, &inode->i_dentry, - d_u.d_alias) { - d_drop(dentry); - } - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ - AC_MSG_CHECKING([whether dentry aliases are in d_u member]) - ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, - [dentry aliases are in d_u member]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index 500f61e26aee..aa5a9f2aff39 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -1,26 +1,3 @@ -dnl # -dnl # 3.4.0 API change -dnl # Added d_make_root() to replace previous d_alloc_root() function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_MAKE_ROOT], [ - ZFS_LINUX_TEST_SRC([d_make_root], [ - #include - ], [ - d_make_root(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], [ - AC_MSG_CHECKING([whether d_make_root() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_make_root], - [d_make_root], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.28 API change dnl # Added d_obtain_alias() helper function. @@ -43,31 +20,6 @@ AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], [ ]) ]) -dnl # -dnl # 2.6.12 API change -dnl # d_prune_aliases() helper function available. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES], [ - ZFS_LINUX_TEST_SRC([d_prune_aliases], [ - #include - ], [ - struct inode *ip = NULL; - d_prune_aliases(ip); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], [ - AC_MSG_CHECKING([whether d_prune_aliases() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_prune_aliases], - [d_prune_aliases], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, - [d_prune_aliases() is available]) - ], [ - ZFS_LINUX_TEST_ERROR([d_prune_aliases()]) - ]) -]) - dnl # dnl # 2.6.38 API change dnl # Added d_set_d_op() helper function. @@ -90,101 +42,14 @@ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ ]) ]) -dnl # -dnl # 3.6 API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ - ZFS_LINUX_TEST_SRC([dentry_operations_revalidate], [ - #include - #include - - static int revalidate (struct dentry *dentry, - struct nameidata *nidata) { return 0; } - - static const struct dentry_operations - dops __attribute__ ((unused)) = { - .d_revalidate = revalidate, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) - ZFS_LINUX_TEST_RESULT([dentry_operations_revalidate], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_REVALIDATE_NAMEIDATA, 1, - [dops->d_revalidate() operation takes nameidata]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 2.6.30 API change -dnl # The 'struct dentry_operations' was constified in the dentry structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS], [ - ZFS_LINUX_TEST_SRC([dentry_operations_const], [ - #include - - const struct dentry_operations test_d_op = { - .d_revalidate = NULL, - }; - ],[ - struct dentry d __attribute__ ((unused)); - d.d_op = &test_d_op; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ - AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) - ZFS_LINUX_TEST_RESULT([dentry_operations_const], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, - [dentry uses const struct dentry_operations]) - ],[ - ZFS_LINUX_TEST_ERROR([const dentry_operations]) - ]) -]) - -dnl # -dnl # 2.6.38 API change -dnl # Added sb->s_d_op default dentry_operations member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_S_D_OP], [ - ZFS_LINUX_TEST_SRC([super_block_s_d_op], [ - #include - ],[ - struct super_block sb __attribute__ ((unused)); - sb.s_d_op = NULL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], [ - AC_MSG_CHECKING([whether super_block has s_d_op]) - ZFS_LINUX_TEST_RESULT([super_block_s_d_op], [ - AC_MSG_RESULT(yes) - ], [ - ZFS_LINUX_TEST_ERROR([super_block s_d_op]) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ - ZFS_AC_KERNEL_SRC_D_MAKE_ROOT ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES ZFS_AC_KERNEL_SRC_D_SET_D_OP - ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS ZFS_AC_KERNEL_SRC_S_D_OP ]) AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ - ZFS_AC_KERNEL_D_MAKE_ROOT ZFS_AC_KERNEL_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_D_PRUNE_ALIASES ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS ZFS_AC_KERNEL_S_D_OP ]) diff --git a/config/kernel-dirty-inode.m4 b/config/kernel-dirty-inode.m4 deleted file mode 100644 index 2ef8658748ca..000000000000 --- a/config/kernel-dirty-inode.m4 +++ /dev/null @@ -1,29 +0,0 @@ -dnl # -dnl # 3.0 API change -dnl # The sops->dirty_inode() callbacks were updated to take a flags -dnl # argument. This allows the greater control over whether the -dnl # filesystem needs to push out a transaction or not. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ - ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ - #include - - static void dirty_inode(struct inode *a, int b) { return; } - - static const struct super_operations - sops __attribute__ ((unused)) = { - .dirty_inode = dirty_inode, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE], [ - AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) - ZFS_LINUX_TEST_RESULT([dirty_inode_with_flags], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_DIRTY_INODE_WITH_FLAGS, 1, - [sops->dirty_inode() wants flags]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-encode-fh-inode.m4 b/config/kernel-encode-fh-inode.m4 deleted file mode 100644 index b3ec040b5e95..000000000000 --- a/config/kernel-encode-fh-inode.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # 3.5.0 API change -dnl # torvalds/linux@b0b0382bb4904965a9e9fca77ad87514dfda0d1c changed the -dnl # ->encode_fh() callback to pass the child inode and its parents inode -dnl # rather than a dentry and a boolean saying whether we want the parent. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ - ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ - #include - static int encode_fh(struct inode *inode, __u32 *fh, int *max_len, - struct inode *parent) { return 0; } - static struct export_operations eops __attribute__ ((unused))={ - .encode_fh = encode_fh, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ - AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) - ZFS_LINUX_TEST_RESULT([export_operations_encode_fh], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ENCODE_FH_WITH_INODE, 1, - [eops->encode_fh() wants child and parent inodes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-evict-inode.m4 b/config/kernel-evict-inode.m4 deleted file mode 100644 index 87082c9a2839..000000000000 --- a/config/kernel-evict-inode.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.36 API change -dnl # The sops->delete_inode() and sops->clear_inode() callbacks have -dnl # replaced by a single sops->evict_inode() callback. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ - ZFS_LINUX_TEST_SRC([evict_inode], [ - #include - static void evict_inode (struct inode * t) { return; } - static struct super_operations sops __attribute__ ((unused)) = { - .evict_inode = evict_inode, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ - AC_MSG_CHECKING([whether sops->evict_inode() exists]) - ZFS_LINUX_TEST_RESULT([evict_inode], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_EVICT_INODE, 1, [sops->evict_inode() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([evict_inode]) - ]) -]) diff --git a/config/kernel-fadvise.m4 b/config/kernel-fadvise.m4 deleted file mode 100644 index 08912de16ed8..000000000000 --- a/config/kernel-fadvise.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # Linux 4.19 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FADVISE], [ - ZFS_LINUX_TEST_SRC([file_fadvise], [ - #include - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fadvise = NULL, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FADVISE], [ - AC_MSG_CHECKING([whether fops->fadvise() exists]) - ZFS_LINUX_TEST_RESULT([file_fadvise], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FADVISE, 1, [fops->fadvise() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fallocate.m4 b/config/kernel-fallocate.m4 deleted file mode 100644 index 95186dada453..000000000000 --- a/config/kernel-fallocate.m4 +++ /dev/null @@ -1,44 +0,0 @@ -dnl # -dnl # Linux 2.6.38 - 3.x API -dnl # The fallocate callback was moved from the inode_operations -dnl # structure to the file_operations structure. -dnl # -dnl # -dnl # Linux 3.15+ -dnl # fallocate learned a new flag, FALLOC_FL_ZERO_RANGE -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ - ZFS_LINUX_TEST_SRC([file_fallocate], [ - #include - - static long test_fallocate(struct file *file, int mode, - loff_t offset, loff_t len) { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fallocate = test_fallocate, - }; - ], []) - ZFS_LINUX_TEST_SRC([falloc_fl_zero_range], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = FALLOC_FL_ZERO_RANGE; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TEST_RESULT([file_fallocate], [ - AC_MSG_RESULT(yes) - AC_MSG_CHECKING([whether FALLOC_FL_ZERO_RANGE exists]) - ZFS_LINUX_TEST_RESULT([falloc_fl_zero_range], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FALLOC_FL_ZERO_RANGE, 1, [FALLOC_FL_ZERO_RANGE is defined]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ - ZFS_LINUX_TEST_ERROR([file_fallocate]) - ]) -]) diff --git a/config/kernel-file-dentry.m4 b/config/kernel-file-dentry.m4 deleted file mode 100644 index 9cb5869c3821..000000000000 --- a/config/kernel-file-dentry.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.1 API change -dnl # struct access file->f_path.dentry was replaced by accessor function -dnl # since fix torvalds/linux@4bacc9c9234c ("overlayfs: Make f_path always -dnl # point to the overlay and f_inode to the underlay"). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_DENTRY], [ - ZFS_LINUX_TEST_SRC([file_dentry], [ - #include - ],[ - struct file *f = NULL; - file_dentry(f); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ - AC_MSG_CHECKING([whether file_dentry() is available]) - ZFS_LINUX_TEST_RESULT([file_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_DENTRY, 1, [file_dentry() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-file-inode.m4 b/config/kernel-file-inode.m4 deleted file mode 100644 index 00a3621657ad..000000000000 --- a/config/kernel-file-inode.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 3.19 API change -dnl # struct access f->f_dentry->d_inode was replaced by accessor function -dnl # file_inode(f) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_INODE], [ - ZFS_LINUX_TEST_SRC([file_inode], [ - #include - ],[ - struct file *f = NULL; - file_inode(f); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ - AC_MSG_CHECKING([whether file_inode() is available]) - ZFS_LINUX_TEST_RESULT([file_inode], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_INODE, 1, [file_inode() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-file.m4 b/config/kernel-file.m4 new file mode 100644 index 000000000000..31252544c745 --- /dev/null +++ b/config/kernel-file.m4 @@ -0,0 +1,31 @@ +dnl # +dnl # 6.12 removed f_version from struct file +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_F_VERSION], [ + ZFS_LINUX_TEST_SRC([file_f_version], [ + #include + + static const struct f __attribute__((unused)) = { + .f_version = 0; + }; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_F_VERSION], [ + AC_MSG_CHECKING([whether file->f_version exists]) + ZFS_LINUX_TEST_RESULT([file_f_version], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILE_F_VERSION, 1, + [file->f_version exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE], [ + ZFS_AC_KERNEL_FILE_F_VERSION +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE], [ + ZFS_AC_KERNEL_SRC_FILE_F_VERSION +]) diff --git a/config/kernel-filemap.m4 b/config/kernel-filemap.m4 deleted file mode 100644 index 0b7da828d299..000000000000 --- a/config/kernel-filemap.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # filemap_range_has_page was not available till 4.13 -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [ - ZFS_LINUX_TEST_SRC([filemap_range_has_page], [ - #include - #include - ],[ - struct address_space *mapping = NULL; - loff_t lstart = 0; - loff_t lend = 0; - bool ret __attribute__ ((unused)); - - ret = filemap_range_has_page(mapping, lstart, lend); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [ - AC_MSG_CHECKING([whether filemap_range_has_page() is available]) - ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1, - [filemap_range_has_page() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fsync.m4 b/config/kernel-fsync.m4 deleted file mode 100644 index c155f8af81a8..000000000000 --- a/config/kernel-fsync.m4 +++ /dev/null @@ -1,53 +0,0 @@ -dnl # -dnl # Check file_operations->fsync interface. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ - ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ - #include - - static int test_fsync(struct file *f, int x) { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fsync = test_fsync, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([fsync_range], [ - #include - - static int test_fsync(struct file *f, loff_t a, loff_t b, int c) - { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fsync = test_fsync, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FSYNC], [ - dnl # - dnl # Linux 2.6.35 - Linux 3.0 API - dnl # - AC_MSG_CHECKING([whether fops->fsync() wants no dentry]) - ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, - [fops->fsync() without dentry]) - ],[ - AC_MSG_RESULT([no]) - - dnl # - dnl # Linux 3.1 - 3.x API - dnl # - AC_MSG_CHECKING([whether fops->fsync() wants range]) - ZFS_LINUX_TEST_RESULT([fsync_range], [ - AC_MSG_RESULT([range]) - AC_DEFINE(HAVE_FSYNC_RANGE, 1, - [fops->fsync() with range]) - ],[ - ZFS_LINUX_TEST_ERROR([fops->fsync]) - ]) - ]) -]) diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4 index f5323f0dcb9f..d355f9006bd3 100644 --- a/config/kernel-generic_fillattr.m4 +++ b/config/kernel-generic_fillattr.m4 @@ -48,12 +48,16 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1, [generic_fillattr requires struct mnt_idmap* and u32 request_mask]) ],[ + AC_MSG_RESULT([no]) + AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, [generic_fillattr requires struct mnt_idmap*]) ],[ + AC_MSG_RESULT([no]) + AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ AC_MSG_RESULT([yes]) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index a6a109004294..da92aad058cb 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -49,18 +49,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ bio_end_io_acct(bio, start_time); ]) - ZFS_LINUX_TEST_SRC([generic_acct_3args], [ - #include - - void (*generic_start_io_acct_f)(int, unsigned long, - struct hd_struct *) = &generic_start_io_acct; - void (*generic_end_io_acct_f)(int, struct hd_struct *, - unsigned long) = &generic_end_io_acct; - ], [ - generic_start_io_acct(0, 0, NULL); - generic_end_io_acct(0, NULL, 0); - ]) - ZFS_LINUX_TEST_SRC([generic_acct_4args], [ #include @@ -138,23 +126,6 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ [generic_*_io_acct() 4 arg available]) ], [ AC_MSG_RESULT(no) - - dnl # - dnl # 3.19 API addition - dnl # - dnl # torvalds/linux@394ffa50 allows us to increment - dnl # iostat counters without generic_make_request(). - dnl # - AC_MSG_CHECKING( - [whether generic_*_io_acct wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], - [generic_start_io_acct], [block/bio.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_*_io_acct() 3 arg available]) - ], [ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-generic_readlink.m4 b/config/kernel-generic_readlink.m4 deleted file mode 100644 index a7a33b408abd..000000000000 --- a/config/kernel-generic_readlink.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 4.10 API -dnl # -dnl # NULL inode_operations.readlink implies generic_readlink(), which -dnl # has been made static. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL], [ - ZFS_LINUX_TEST_SRC([generic_readlink_global], [ - #include - ],[ - int i __attribute__ ((unused)); - i = generic_readlink(NULL, NULL, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ - AC_MSG_CHECKING([whether generic_readlink is global]) - ZFS_LINUX_TEST_RESULT([generic_readlink_global], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GENERIC_READLINK, 1, - [generic_readlink is global]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-genhd-flags.m4 b/config/kernel-genhd-flags.m4 index af6a8a086bc9..60cc3173397c 100644 --- a/config/kernel-genhd-flags.m4 +++ b/config/kernel-genhd-flags.m4 @@ -17,12 +17,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENHD_FLAGS], [ ], [ int flags __attribute__ ((unused)) = GENHD_FL_NO_PART; ]) - - ZFS_LINUX_TEST_SRC([genhd_fl_no_part_scan], [ - #include - ], [ - int flags __attribute__ ((unused)) = GENHD_FL_NO_PART_SCAN; - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [ @@ -30,29 +24,18 @@ AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [ AC_MSG_CHECKING([whether GENHD_FL_EXT_DEVT flag is available]) ZFS_LINUX_TEST_RESULT([genhd_fl_ext_devt], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, GENHD_FL_EXT_DEVT, + AC_DEFINE(HAVE_GENHD_FL_EXT_DEVT, 1, [GENHD_FL_EXT_DEVT flag is available]) ], [ AC_MSG_RESULT(no) - AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, 0, - [GENHD_FL_EXT_DEVT flag is not available]) ]) AC_MSG_CHECKING([whether GENHD_FL_NO_PART flag is available]) ZFS_LINUX_TEST_RESULT([genhd_fl_no_part], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART, + AC_DEFINE(HAVE_GENHD_FL_NO_PART, 1, [GENHD_FL_NO_PART flag is available]) ], [ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether GENHD_FL_NO_PART_SCAN flag is available]) - ZFS_LINUX_TEST_RESULT([genhd_fl_no_part_scan], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART_SCAN, - [GENHD_FL_NO_PART_SCAN flag is available]) - ], [ - ZFS_LINUX_TEST_ERROR([GENHD_FL_NO_PART|GENHD_FL_NO_PART_SCAN]) - ]) ]) ]) diff --git a/config/kernel-get-link.m4 b/config/kernel-get-link.m4 deleted file mode 100644 index 1f8f5b0c8b72..000000000000 --- a/config/kernel-get-link.m4 +++ /dev/null @@ -1,104 +0,0 @@ -dnl # -dnl # Supported get_link() interfaces checked newest to oldest. -dnl # Note this interface used to be named follow_link. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ - ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ - #include - static const char *get_link(struct dentry *de, struct inode *ip, - struct delayed_call *done) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ - #include - static const char *get_link(struct dentry *de, struct - inode *ip, void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ - #include - static const char *follow_link(struct dentry *de, - void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ - #include - static void *follow_link(struct dentry *de, struct - nameidata *nd) { return (void *)NULL; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ - dnl # - dnl # 4.5 API change - dnl # The get_link interface has added a delayed done call and - dnl # used it to retire the put_link() interface. - dnl # - AC_MSG_CHECKING([whether iops->get_link() passes delayed]) - ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, [iops->get_link() delayed]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.5 API change - dnl # The follow_link() interface has been replaced by - dnl # get_link() which behaves the same as before except: - dnl # - An inode is passed as a separate argument - dnl # - When called in RCU mode a NULL dentry is passed. - dnl # - AC_MSG_CHECKING([whether iops->get_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([inode_operations_get_link_cookie], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_COOKIE, 1, - [iops->get_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.2 API change - dnl # This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING( - [whether iops->follow_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([inode_operations_follow_link], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, - [iops->follow_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_CHECKING( - [whether iops->follow_link() passes nameidata]) - ZFS_LINUX_TEST_RESULT( - [inode_operations_follow_link_nameidata],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, - [iops->follow_link() nameidata]) - ],[ - ZFS_LINUX_TEST_ERROR([get_link]) - ]) - ]) - ]) - ]) -]) diff --git a/config/kernel-global_page_state.m4 b/config/kernel-global_page_state.m4 deleted file mode 100644 index 76f2bba202a1..000000000000 --- a/config/kernel-global_page_state.m4 +++ /dev/null @@ -1,128 +0,0 @@ -dnl # -dnl # 4.8 API change -dnl # -dnl # 75ef71840539 mm, vmstat: add infrastructure for per-node vmstats -dnl # 599d0c954f91 mm, vmscan: move LRU lists to node -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE], [ - ZFS_LINUX_TEST_SRC([global_node_page_state], [ - #include - #include - ],[ - (void) global_node_page_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_node_page_state() exists]) - ZFS_LINUX_TEST_RESULT([global_node_page_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, - [global_node_page_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.14 API change -dnl # -dnl # c41f012ade0b mm: rename global_page_state to global_zone_page_state -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE], [ - ZFS_LINUX_TEST_SRC([global_zone_page_state], [ - #include - #include - ],[ - (void) global_zone_page_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_zone_page_state() exists]) - ZFS_LINUX_TEST_RESULT([global_zone_page_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, - [global_zone_page_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Create a define and autoconf variable for an enum member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_ENUM_MEMBER], [ - AC_MSG_CHECKING([whether enum $2 contains $1]) - AS_IF([AC_TRY_COMMAND( - "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | grep -Eqx $1)],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, - [enum $2 contains $1]) - m4_join([_], [ZFS_ENUM], m4_toupper($2), $1)=1 - ],[ - AC_MSG_RESULT([no]) - ]) -]) - -dnl # -dnl # Sanity check helpers -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR],[ - AC_MSG_RESULT(no) - AC_MSG_RESULT([$1 in either node_stat_item or zone_stat_item: $2]) - ZFS_LINUX_TEST_ERROR([global page state]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ - enum_check_a="m4_join([_], [$ZFS_ENUM_NODE_STAT_ITEM], $1)" - enum_check_b="m4_join([_], [$ZFS_ENUM_ZONE_STAT_ITEM], $1)" - AS_IF([test -n "$enum_check_a" -a -n "$enum_check_b"],[ - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR([$1], [DUPLICATE]) - ]) - AS_IF([test -z "$enum_check_a" -a -z "$enum_check_b"],[ - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR([$1], [NOT FOUND]) - ]) -]) - -dnl # -dnl # Ensure the config tests are finding one and only one of each enum. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ - AC_MSG_CHECKING([whether global_page_state enums are sane]) - - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_FILE_PAGES]) - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_ANON]) - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_FILE]) - - AC_MSG_RESULT(yes) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE], [ - ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE - ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE -]) - -dnl # -dnl # enum members in which we're interested -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE], [ - ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE - ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE - - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - - ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY -]) diff --git a/config/kernel-group-info.m4 b/config/kernel-group-info.m4 deleted file mode 100644 index 6941d62da017..000000000000 --- a/config/kernel-group-info.m4 +++ /dev/null @@ -1,22 +0,0 @@ -dnl # -dnl # 4.9 API change -dnl # group_info changed from 2d array via >blocks to 1d array via ->gid -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GROUP_INFO_GID], [ - ZFS_LINUX_TEST_SRC([group_info_gid], [ - #include - ],[ - struct group_info gi __attribute__ ((unused)) = {}; - gi.gid[0] = KGIDT_INIT(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ - AC_MSG_CHECKING([whether group_info->gid exists]) - ZFS_LINUX_TEST_RESULT([group_info_gid], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-hotplug.m4 b/config/kernel-hotplug.m4 deleted file mode 100644 index e796a6d2e8e8..000000000000 --- a/config/kernel-hotplug.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 4.6 API change -dnl # Added CPU hotplug APIs -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HOTPLUG], [ - ZFS_LINUX_TEST_SRC([cpu_hotplug], [ - #include - ],[ - enum cpuhp_state state = CPUHP_ONLINE; - int (*fp)(unsigned int, struct hlist_node *) = NULL; - cpuhp_state_add_instance_nocalls(0, (struct hlist_node *)NULL); - cpuhp_state_remove_instance_nocalls(0, (struct hlist_node *)NULL); - cpuhp_setup_state_multi(state, "", fp, fp); - cpuhp_remove_multi_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CPU_HOTPLUG], [ - AC_MSG_CHECKING([whether CPU hotplug APIs exist]) - ZFS_LINUX_TEST_RESULT([cpu_hotplug], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CPU_HOTPLUG, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-in-compat-syscall.m4 b/config/kernel-in-compat-syscall.m4 deleted file mode 100644 index baaac8c4fda2..000000000000 --- a/config/kernel-in-compat-syscall.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.5 API change -dnl # Added in_compat_syscall() which can be overridden on a per- -dnl # architecture basis. Prior to this is_compat_task() was the -dnl # provided interface. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL], [ - ZFS_LINUX_TEST_SRC([in_compat_syscall], [ - #include - ],[ - in_compat_syscall(); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ - AC_MSG_CHECKING([whether in_compat_syscall() is available]) - ZFS_LINUX_TEST_RESULT([in_compat_syscall], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, - [in_compat_syscall() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 index 5f7ce1ad9a5d..73b8213109fb 100644 --- a/config/kernel-inode-getattr.m4 +++ b/config/kernel-inode-getattr.m4 @@ -57,20 +57,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ .getattr = test_getattr, }; ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ - #include - - static int test_getattr( - struct vfsmount *mnt, struct dentry *d, - struct kstat *k) - { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .getattr = test_getattr, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ @@ -105,18 +91,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ [iops->getattr() takes a path]) ],[ AC_MSG_RESULT(no) - - dnl # - dnl # Kernel < 4.11 test - dnl # - AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) - ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, - [iops->getattr() takes a vfsmount]) - ],[ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-inode-lock.m4 b/config/kernel-inode-lock.m4 deleted file mode 100644 index 5eb04af78771..000000000000 --- a/config/kernel-inode-lock.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.7 API change -dnl # i_mutex is changed to i_rwsem. Instead of directly using -dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared() -dnl # We test inode_lock_shared because inode_lock is introduced earlier. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_LOCK], [ - ZFS_LINUX_TEST_SRC([inode_lock], [ - #include - ],[ - struct inode *inode = NULL; - inode_lock_shared(inode); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ - AC_MSG_CHECKING([whether inode_lock_shared() exists]) - ZFS_LINUX_TEST_RESULT([inode_lock], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4 index f7fc16439093..286f73bb047e 100644 --- a/config/kernel-inode-permission.m4 +++ b/config/kernel-inode-permission.m4 @@ -42,6 +42,8 @@ AC_DEFUN([ZFS_AC_KERNEL_PERMISSION], [ AC_DEFINE(HAVE_IOPS_PERMISSION_IDMAP, 1, [iops->permission() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->permission() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([permission_userns], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-inode-set-flags.m4 b/config/kernel-inode-set-flags.m4 deleted file mode 100644 index 133f666a9517..000000000000 --- a/config/kernel-inode-set-flags.m4 +++ /dev/null @@ -1,22 +0,0 @@ -dnl # -dnl # 3.15 API change -dnl # inode_set_flags introduced to set i_flags -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS], [ - ZFS_LINUX_TEST_SRC([inode_set_flags], [ - #include - ],[ - struct inode inode; - inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ - AC_MSG_CHECKING([whether inode_set_flags() exists]) - ZFS_LINUX_TEST_RESULT([inode_set_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_SET_FLAGS, 1, [inode_set_flags() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-set-iversion.m4 b/config/kernel-inode-set-iversion.m4 deleted file mode 100644 index dd415de324a7..000000000000 --- a/config/kernel-inode-set-iversion.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 4.16 API change -dnl # inode_set_iversion introduced to set i_version -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION], [ - ZFS_LINUX_TEST_SRC([inode_set_iversion], [ - #include - ],[ - struct inode inode; - inode_set_iversion(&inode, 1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ - AC_MSG_CHECKING([whether inode_set_iversion() exists]) - ZFS_LINUX_TEST_RESULT([inode_set_iversion], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_SET_IVERSION, 1, - [inode_set_iversion() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4 index 69289e897be6..9a12acc95a3f 100644 --- a/config/kernel-inode-setattr.m4 +++ b/config/kernel-inode-setattr.m4 @@ -37,19 +37,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ .setattr = test_setattr, }; ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_setattr], [ - #include - - static int test_setattr( - struct dentry *de, struct iattr *ia) - { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .setattr = test_setattr, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [ @@ -73,15 +60,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [ [iops->setattr() takes struct user_namespace*]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->setattr() exists]) - ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOPS_SETATTR, 1, - [iops->setattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index 4d861596ed0b..59988e937929 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -14,20 +14,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ ts = timestamp_truncate(ts, &ip); ]) - dnl # - dnl # 4.18 API change - dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. - dnl # - ZFS_LINUX_TEST_SRC([inode_times], [ - #include - ],[ - struct inode ip; - struct timespec ts; - - memset(&ip, 0, sizeof(ip)); - ts = ip.i_mtime; - ]) - dnl # dnl # 6.6 API change dnl # i_ctime no longer directly accessible, must use @@ -106,15 +92,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ AC_MSG_RESULT(no) ]) - AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) - ZFS_LINUX_TEST_RESULT([inode_times], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, - [inode->i_*time's are timespec64]) - ]) - AC_MSG_CHECKING([whether inode_get_ctime() exists]) ZFS_LINUX_TEST_RESULT([inode_get_ctime], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-kmem-cache.m4 b/config/kernel-kmem-cache.m4 deleted file mode 100644 index 0e9fe9eb2a90..000000000000 --- a/config/kernel-kmem-cache.m4 +++ /dev/null @@ -1,41 +0,0 @@ -dnl # -dnl # grsecurity API change, -dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by -dnl # kmem_cache_create_usercopy(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY], [ - ZFS_LINUX_TEST_SRC([kmem_cache_create_usercopy], [ - #include - static void ctor(void *foo) { /* fake ctor */ } - ],[ - struct kmem_cache *skc_linux_cache; - const char *name = "test"; - size_t size = 4096; - size_t align = 8; - unsigned long flags = 0; - size_t useroffset = 0; - size_t usersize = size - useroffset; - - skc_linux_cache = kmem_cache_create_usercopy( - name, size, align, flags, useroffset, usersize, ctor); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ - AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) - ZFS_LINUX_TEST_RESULT([kmem_cache_create_usercopy], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1, - [kmem_cache_create_usercopy() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE], [ - ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY -]) - -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE], [ - ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY -]) diff --git a/config/kernel-kmem.m4 b/config/kernel-kmem.m4 index 03c2a41fbdb2..f1c0d24125ce 100644 --- a/config/kernel-kmem.m4 +++ b/config/kernel-kmem.m4 @@ -57,31 +57,6 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [ AC_MSG_RESULT([$enable_debug_kmem_tracking]) ]) -dnl # -dnl # 4.12 API, -dnl # Added kvmalloc allocation strategy -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [ - ZFS_LINUX_TEST_SRC([kvmalloc], [ - #include - #include - ],[ - void *p __attribute__ ((unused)); - - p = kvmalloc(0, GFP_KERNEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [ - AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available]) - ZFS_LINUX_TEST_RESULT([kvmalloc], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 5.8 API, dnl # __vmalloc PAGE_KERNEL removal @@ -106,4 +81,4 @@ AC_DEFUN([ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL], [ AC_MSG_RESULT(no) ]) ]) -- \ No newline at end of file +- diff --git a/config/kernel-kstrtoul.m4 b/config/kernel-kstrtoul.m4 deleted file mode 100644 index 8e4b542978a9..000000000000 --- a/config/kernel-kstrtoul.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.39 API change -dnl # Added kstrtoul() -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KSTRTOUL], [ - ZFS_LINUX_TEST_SRC([kstrtoul], [ - #include - ],[ - int ret __attribute__ ((unused)) = kstrtoul(NULL, 10, NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ - AC_MSG_CHECKING([whether kstrtoul() exists]) - ZFS_LINUX_TEST_RESULT([kstrtoul], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([kstrtoul()]) - ]) -]) diff --git a/config/kernel-kthread.m4 b/config/kernel-kthread.m4 index f5b824d7947a..4d580efead6b 100644 --- a/config/kernel-kthread.m4 +++ b/config/kernel-kthread.m4 @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ +AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL], [ dnl # dnl # 5.17 API: enum pid_type * as new 4th dequeue_signal() argument, dnl # 5768d8906bc23d512b1a736c1e198aa833a6daa4 ("signal: Requeue signals in the appropriate queue") @@ -23,12 +23,24 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info); dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); dnl # + dnl # 6.12 API: first arg struct_task* removed + dnl # int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); + dnl # AC_MSG_CHECKING([whether dequeue_signal() takes 4 arguments]) - ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal], [ + ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_4arg], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, [dequeue_signal() takes 4 arguments]) + AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, + [dequeue_signal() takes 4 arguments]) ], [ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether dequeue_signal() a task argument]) + ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_3arg_task], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DEQUEUE_SIGNAL_3ARG_TASK, 1, + [dequeue_signal() takes a task argument]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) @@ -43,8 +55,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ - ZFS_LINUX_TEST_SRC([kthread_dequeue_signal], [ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL], [ + ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_3arg_task], [ + #include + ], [ + struct task_struct *task = NULL; + sigset_t *mask = NULL; + kernel_siginfo_t *info = NULL; + int error __attribute__ ((unused)); + + error = dequeue_signal(task, mask, info); + ]) + + ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_4arg], [ #include ], [ struct task_struct *task = NULL; @@ -59,10 +82,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD], [ ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT - ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG + ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD], [ ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT - ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG + ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL ]) diff --git a/config/kernel-ktime.m4 b/config/kernel-ktime.m4 deleted file mode 100644 index 64c3b5f90328..000000000000 --- a/config/kernel-ktime.m4 +++ /dev/null @@ -1,55 +0,0 @@ -dnl # -dnl # 4.18: ktime_get_coarse_real_ts64() replaces current_kernel_time64(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ - ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ - #include - ], [ - struct timespec64 ts; - ktime_get_coarse_real_ts64(&ts); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ - AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) - ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, - [ktime_get_coarse_real_ts64() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.18: ktime_get_raw_ts64() replaces getrawmonotonic64(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64], [ - ZFS_LINUX_TEST_SRC([ktime_get_raw_ts64], [ - #include - ], [ - struct timespec64 ts; - ktime_get_raw_ts64(&ts); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_RAW_TS64], [ - AC_MSG_CHECKING([whether ktime_get_raw_ts64() exists]) - ZFS_LINUX_TEST_RESULT([ktime_get_raw_ts64], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_RAW_TS64, 1, - [ktime_get_raw_ts64() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME], [ - ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 - ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64 -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME], [ - ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 - ZFS_AC_KERNEL_KTIME_GET_RAW_TS64 -]) diff --git a/config/kernel-lseek-execute.m4 b/config/kernel-lseek-execute.m4 deleted file mode 100644 index 652f611f8da4..000000000000 --- a/config/kernel-lseek-execute.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # 3.11 API change -dnl # lseek_execute helper exported -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE], [ - ZFS_LINUX_TEST_SRC([lseek_execute], [ - #include - ], [ - struct file *fp __attribute__ ((unused)) = NULL; - struct inode *ip __attribute__ ((unused)) = NULL; - loff_t offset __attribute__ ((unused)) = 0; - loff_t maxsize __attribute__ ((unused)) = 0; - - lseek_execute(fp, ip, offset, maxsize); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], [ - AC_MSG_CHECKING([whether lseek_execute() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([lseek_execute], - [lseek_exclusive], [fs/read_write.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, [lseek_execute() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 index 4c54bdd6d4a2..66d6a18cd976 100644 --- a/config/kernel-make-request-fn.m4 +++ b/config/kernel-make-request-fn.m4 @@ -2,14 +2,6 @@ dnl # dnl # Check for make_request_fn interface. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ - ZFS_LINUX_TEST_SRC([make_request_fn_void], [ - #include - static void make_request(struct request_queue *q, - struct bio *bio) { return; } - ],[ - blk_queue_make_request(NULL, &make_request); - ]) - ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ #include static blk_qc_t make_request(struct request_queue *q, @@ -197,36 +189,20 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ AC_MSG_RESULT(no) dnl # - dnl # Linux 3.2 API Change - dnl # make_request_fn returns void. + dnl # Linux 4.4 API Change + dnl # make_request_fn returns blk_qc_t. dnl # AC_MSG_CHECKING( - [whether make_request_fn() returns void]) - ZFS_LINUX_TEST_RESULT([make_request_fn_void], [ + [whether make_request_fn() returns blk_qc_t]) + ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, void, + AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, [make_request_fn() return type]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1, - [Noting that make_request_fn() returns void]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, + [Noting that make_request_fn() ] + [returns blk_qc_t]) ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # Linux 4.4 API Change - dnl # make_request_fn returns blk_qc_t. - dnl # - AC_MSG_CHECKING( - [whether make_request_fn() returns blk_qc_t]) - ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, - [make_request_fn() return type]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, - [Noting that make_request_fn() ] - [returns blk_qc_t]) - ],[ - ZFS_LINUX_TEST_ERROR([make_request_fn]) - ]) + ZFS_LINUX_TEST_ERROR([make_request_fn]) ]) ]) ]) diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4 index 367f100094d3..8e084443c7b4 100644 --- a/config/kernel-mkdir.m4 +++ b/config/kernel-mkdir.m4 @@ -68,6 +68,8 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1, [iops->mkdir() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 5.12 API change dnl # The struct user_namespace arg was added as the first argument to @@ -80,15 +82,6 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ [iops->mkdir() takes struct user_namespace*]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->mkdir() takes umode_t]) - ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, - [iops->mkdir() takes umode_t]) - ],[ - ZFS_LINUX_TEST_ERROR([mkdir()]) - ]) ]) ]) ]) diff --git a/config/kernel-mm-page-flags.m4 b/config/kernel-mm-page-flags.m4 new file mode 100644 index 000000000000..b1277118305d --- /dev/null +++ b/config/kernel-mm-page-flags.m4 @@ -0,0 +1,24 @@ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_FLAG_ERROR], [ + ZFS_LINUX_TEST_SRC([mm_page_flag_error], [ + #include + + static enum pageflags + test_flag __attribute__((unused)) = PG_error; + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_FLAG_ERROR], [ + AC_MSG_CHECKING([whether PG_error flag is available]) + ZFS_LINUX_TEST_RESULT([mm_page_flag_error], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MM_PAGE_FLAG_ERROR, 1, [PG_error flag is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS], [ + ZFS_AC_KERNEL_SRC_MM_PAGE_FLAG_ERROR +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_FLAGS], [ + ZFS_AC_KERNEL_MM_PAGE_FLAG_ERROR +]) diff --git a/config/kernel-mm-pagemap.m4 b/config/kernel-mm-pagemap.m4 index 466b6fa07d9a..def6f5f4b3aa 100644 --- a/config/kernel-mm-pagemap.m4 +++ b/config/kernel-mm-pagemap.m4 @@ -21,8 +21,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING], [ ZFS_LINUX_TEST_SRC([page_mapping], [ #include ],[ - struct page *p = NULL; - struct address_space *m = page_mapping(NULL); + struct address_space *m; + m = page_mapping(NULL); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_MAPPING], [ diff --git a/config/kernel-percpu.m4 b/config/kernel-percpu.m4 index 5125dd5c5bb8..12e81892cb6b 100644 --- a/config/kernel-percpu.m4 +++ b/config/kernel-percpu.m4 @@ -1,55 +1,3 @@ -dnl # -dnl # 3.18 API change, -dnl # The function percpu_counter_init now must be passed a GFP mask. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [ - ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [ - #include - #include - ],[ - struct percpu_counter counter; - int error; - - error = percpu_counter_init(&counter, 0, GFP_KERNEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [ - AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t]) - ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1, - [percpu_counter_init() wants gfp_t]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change, -dnl # __percpu_counter_add() was renamed to percpu_counter_add_batch(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH], [ - ZFS_LINUX_TEST_SRC([percpu_counter_add_batch], [ - #include - ],[ - struct percpu_counter counter; - - percpu_counter_add_batch(&counter, 1, 1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH], [ - AC_MSG_CHECKING([whether percpu_counter_add_batch() is defined]) - ZFS_LINUX_TEST_RESULT([percpu_counter_add_batch], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1, - [percpu_counter_add_batch() is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 5.10 API change, dnl # The "count" was moved into ref->data, from ref @@ -75,13 +23,9 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA], [ ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [ - ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT - ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA ]) AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [ - ZFS_AC_KERNEL_PERCPU_COUNTER_INIT - ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA ]) diff --git a/config/kernel-put-link.m4 b/config/kernel-put-link.m4 deleted file mode 100644 index 8ab318cbff8c..000000000000 --- a/config/kernel-put-link.m4 +++ /dev/null @@ -1,61 +0,0 @@ -dnl # -dnl # Supported symlink APIs -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ - ZFS_LINUX_TEST_SRC([put_link_cookie], [ - #include - static void put_link(struct inode *ip, void *cookie) - { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([put_link_nameidata], [ - #include - static void put_link(struct dentry *de, struct - nameidata *nd, void *ptr) { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ - dnl # - dnl # 4.5 API change - dnl # get_link() uses delayed done, there is no put_link() interface. - dnl # This check initially uses the inode_operations_get_link result - dnl # - ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ - AC_DEFINE(HAVE_PUT_LINK_DELAYED, 1, [iops->put_link() delayed]) - ],[ - dnl # - dnl # 4.2 API change - dnl # This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING([whether iops->put_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([put_link_cookie], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PUT_LINK_COOKIE, 1, - [iops->put_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_CHECKING( - [whether iops->put_link() passes nameidata]) - ZFS_LINUX_TEST_RESULT([put_link_nameidata], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PUT_LINK_NAMEIDATA, 1, - [iops->put_link() nameidata]) - ],[ - ZFS_LINUX_TEST_ERROR([put_link]) - ]) - ]) - ]) -]) diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index ce881502d1b1..1c47222bdc30 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -1,23 +1,4 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ - dnl # - dnl # 3.9 (to 4.9) API change, - dnl # - dnl # A new version of iops->rename() was added (rename2) that takes a flag - dnl # argument (to support renameat2). However this separate function was - dnl # merged back into iops->rename() in Linux 4.9. - dnl # - ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ - #include - static int rename2_fn(struct inode *sip, struct dentry *sdp, - struct inode *tip, struct dentry *tdp, - unsigned int flags) { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .rename2 = rename2_fn, - }; - ],[]) - dnl # dnl # 4.9 API change, dnl # @@ -36,24 +17,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ }; ],[]) - dnl # - dnl # EL7 compatibility - dnl # - dnl # EL7 has backported renameat2 support, but it's done by defining a - dnl # separate iops wrapper structure that takes the .renameat2 function. - dnl # - ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ - #include - static int rename2_fn(struct inode *sip, struct dentry *sdp, - struct inode *tip, struct dentry *tdp, - unsigned int flags) { return 0; } - - static const struct inode_operations_wrapper - iops __attribute__ ((unused)) = { - .rename2 = rename2_fn, - }; - ],[]) - dnl # dnl # 5.12 API change, dnl # @@ -95,6 +58,8 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1, [iops->rename() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [ AC_MSG_RESULT(yes) @@ -103,30 +68,13 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether iops->rename2() exists]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [ + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists]) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->rename() wants flags]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, - [iops->rename() wants flags]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()]) - ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1, - [struct inode_operations_wrapper takes .rename2()]) - ],[ - AC_MSG_RESULT(no) - ]) - ]) ]) ]) ]) diff --git a/config/kernel-rw.m4 b/config/kernel-rw.m4 deleted file mode 100644 index 85b47d5c6fc2..000000000000 --- a/config/kernel-rw.m4 +++ /dev/null @@ -1,69 +0,0 @@ -dnl # -dnl # 4.14 API change -dnl # kernel_write() which was introduced in 3.9 was updated to take -dnl # the offset as a pointer which is needed by vn_rdwr(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITE], [ - ZFS_LINUX_TEST_SRC([kernel_write], [ - #include - ],[ - struct file *file = NULL; - const void *buf = NULL; - size_t count = 0; - loff_t *pos = NULL; - ssize_t ret; - - ret = kernel_write(file, buf, count, pos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ - AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) - ZFS_LINUX_TEST_RESULT([kernel_write], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1, - [kernel_write() take loff_t pointer]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.14 API change -dnl # kernel_read() which has existed for forever was updated to take -dnl # the offset as a pointer which is needed by vn_rdwr(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_READ], [ - ZFS_LINUX_TEST_SRC([kernel_read], [ - #include - ],[ - struct file *file = NULL; - void *buf = NULL; - size_t count = 0; - loff_t *pos = NULL; - ssize_t ret; - - ret = kernel_read(file, buf, count, pos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_READ], [ - AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) - ZFS_LINUX_TEST_RESULT([kernel_read], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1, - [kernel_read() take loff_t pointer]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_RW], [ - ZFS_AC_KERNEL_SRC_WRITE - ZFS_AC_KERNEL_SRC_READ -]) - -AC_DEFUN([ZFS_AC_KERNEL_RW], [ - ZFS_AC_KERNEL_WRITE - ZFS_AC_KERNEL_READ -]) diff --git a/config/kernel-rwsem.m4 b/config/kernel-rwsem.m4 deleted file mode 100644 index d3a64a8efa19..000000000000 --- a/config/kernel-rwsem.m4 +++ /dev/null @@ -1,60 +0,0 @@ -dnl # -dnl # 3.16 API Change -dnl # -dnl # rwsem-spinlock "->activity" changed to "->count" -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY], [ - ZFS_LINUX_TEST_SRC([rwsem_activity], [ - #include - ],[ - struct rw_semaphore dummy_semaphore __attribute__ ((unused)); - dummy_semaphore.activity = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ - AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) - ZFS_LINUX_TEST_RESULT([rwsem_activity], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1, - [struct rw_semaphore has member activity]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.8 API Change -dnl # -dnl # rwsem "->count" changed to atomic_long_t type -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT], [ - ZFS_LINUX_TEST_SRC([rwsem_atomic_long_count], [ - #include - ],[ - DECLARE_RWSEM(dummy_semaphore); - (void) atomic_long_read(&dummy_semaphore.count); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ - AC_MSG_CHECKING( - [whether struct rw_semaphore has atomic_long_t member count]) - ZFS_LINUX_TEST_RESULT([rwsem_atomic_long_count], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1, - [struct rw_semaphore has atomic_long_t member count]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [ - ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY - ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [ - ZFS_AC_KERNEL_RWSEM_ACTIVITY - ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT -]) diff --git a/config/kernel-sched.m4 b/config/kernel-sched.m4 index 17e49fbdf472..8ef4cc6ee4cc 100644 --- a/config/kernel-sched.m4 +++ b/config/kernel-sched.m4 @@ -20,63 +20,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], [ ]) ]) -dnl # -dnl # 4.11 API change, -dnl # Moved things from linux/sched.h to linux/sched/signal.h -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER], [ - ZFS_LINUX_TEST_SRC([sched_signal_header], [ - #include - #include - ],[ - return 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], [ - AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) - ZFS_LINUX_TEST_RESULT([sched_signal_header], [ - AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, - [linux/sched/signal.h exists]) - AC_MSG_RESULT(yes) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.19 API change -dnl # The io_schedule_timeout() function is present in all 2.6.32 kernels -dnl # but it was not exported until Linux 3.19. The RHEL 7.x kernels which -dnl # are based on a 3.10 kernel do export this symbol. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT], [ - ZFS_LINUX_TEST_SRC([io_schedule_timeout], [ - #include - ], [ - (void) io_schedule_timeout(1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ - AC_MSG_CHECKING([whether io_schedule_timeout() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([io_schedule_timeout], - [io_schedule_timeout], [], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IO_SCHEDULE_TIMEOUT, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED], [ ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER - ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT ]) AC_DEFUN([ZFS_AC_KERNEL_SCHED], [ ZFS_AC_KERNEL_SCHED_RT_HEADER - ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT ]) diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4 index e02d6263e9c9..b10ddafc054b 100644 --- a/config/kernel-setattr-prepare.m4 +++ b/config/kernel-setattr-prepare.m4 @@ -51,6 +51,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [ AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1, [setattr_prepare() accepts mnt_idmap]) ], [ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*]) ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns], [setattr_prepare], [fs/attr.c], [ diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 6580b08d5ff2..c4258f4e40d6 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -58,31 +58,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ ]) dnl # -dnl # 3.12 API change -dnl # The nid member was added to struct shrink_control to support -dnl # NUMA-aware shrinkers. +dnl # 6.0 API change +dnl # register_shrinker() becomes a var-arg function that takes +dnl # a printf-style format string as args > 0 dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ - ZFS_LINUX_TEST_SRC([shrink_control_nid], [ - #include - ],[ - struct shrink_control sc __attribute__ ((unused)); - unsigned long scnidsize __attribute__ ((unused)) = - sizeof(sc.nid); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ - AC_MSG_CHECKING([whether shrink_control has nid]) - ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ - AC_MSG_RESULT(yes) - AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, - [struct shrink_control has nid]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include @@ -98,30 +77,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ - ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ - #include - static int shrinker_cb(struct shrinker *shrink, - struct shrink_control *sc) { return 0; } - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ]) - - ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ - #include - static unsigned long shrinker_cb(struct shrinker *shrink, - struct shrink_control *sc) { return 0; } +AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SHRINKER_VARARG],[ + AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) + ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, + [register_shrinker is vararg]) ],[ - struct shrinker cache_shrinker = { - .count_objects = shrinker_cb, - .scan_objects = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); + AC_MSG_RESULT(no) ]) ]) @@ -144,117 +107,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ - dnl # - dnl # 6.0 API change - dnl # register_shrinker() becomes a var-arg function that takes - dnl # a printf-style format string as args > 0 - dnl # - AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) - ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_REGISTER], [ + AC_MSG_CHECKING([whether shrinker_register() exists]) + ZFS_LINUX_TEST_RESULT([shrinker_register], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, - [register_shrinker is vararg]) - - dnl # We assume that the split shrinker callback exists if the - dnl # vararg register_shrinker() exists, because the latter is - dnl # a much more recent addition, and the macro test for the - dnl # var-arg version only works if the callback is split - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) - ],[ + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, [shrinker_register exists]) + ], [ AC_MSG_RESULT(no) - dnl # - dnl # 3.0 - 3.11 API change - dnl # cs->shrink(struct shrinker *, struct shrink_control *sc) - dnl # - AC_MSG_CHECKING([whether new 2-argument shrinker exists]) - ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1, - [new shrinker callback wants 2 args]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 3.12 API change, - dnl # cs->shrink() is logically split in to - dnl # cs->count_objects() and cs->scan_objects() - dnl # - AC_MSG_CHECKING( - [whether cs->count_objects callback exists]) - ZFS_LINUX_TEST_RESULT( - [shrinker_cb_shrink_control_split],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING( - [whether shrinker_register exists]) - ZFS_LINUX_TEST_RESULT([shrinker_register], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, - [shrinker_register exists]) - - dnl # We assume that the split shrinker - dnl # callback exists if - dnl # shrinker_register() exists, - dnl # because the latter is a much more - dnl # recent addition, and the macro - dnl # test for shrinker_register() only - dnl # works if the callback is split - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, - 1, [cs->count_objects exists]) - ],[ - AC_MSG_RESULT(no) - ZFS_LINUX_TEST_ERROR([shrinker]) - ]) - ]) - ]) - ]) -]) - -dnl # -dnl # 2.6.39 API change, -dnl # Shrinker adjust to use common shrink_control structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ - ZFS_LINUX_TEST_SRC([shrink_control_struct], [ - #include - ],[ - struct shrink_control sc __attribute__ ((unused)); - - sc.nr_to_scan = 0; - sc.gfp_mask = GFP_KERNEL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ - AC_MSG_CHECKING([whether struct shrink_control exists]) - ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, - [struct shrink_control exists]) - ],[ - ZFS_LINUX_TEST_ERROR([shrink_control]) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR - ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK - ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK - ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SHRINKER_CALLBACK - ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT + ZFS_AC_KERNEL_REGISTER_SHRINKER_VARARG + ZFS_AC_KERNEL_SHRINKER_REGISTER ]) diff --git a/config/kernel-signal-stop.m4 b/config/kernel-signal-stop.m4 deleted file mode 100644 index 6cb86e7c4cde..000000000000 --- a/config/kernel-signal-stop.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 4.4 API change -dnl # Added kernel_signal_stop -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SIGNAL_STOP], [ - ZFS_LINUX_TEST_SRC([signal_stop], [ - #include - ],[ - kernel_signal_stop(); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SIGNAL_STOP], [ - AC_MSG_CHECKING([whether signal_stop() exists]) - ZFS_LINUX_TEST_RESULT([signal_stop], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SIGNAL_STOP, 1, [signal_stop() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-special-state.m4 b/config/kernel-special-state.m4 deleted file mode 100644 index aa60aabebc43..000000000000 --- a/config/kernel-special-state.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 4.17 API change -dnl # Added set_special_state() function -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE], [ - ZFS_LINUX_TEST_SRC([set_special_state], [ - #include - ],[ - set_special_state(TASK_STOPPED); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SET_SPECIAL_STATE], [ - AC_MSG_CHECKING([whether set_special_state() exists]) - ZFS_LINUX_TEST_RESULT([set_special_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_SPECIAL_STATE, 1, [set_special_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-strlcpy.m4 b/config/kernel-strlcpy.m4 index c31cf52d78b0..d50b0035e9d9 100644 --- a/config/kernel-strlcpy.m4 +++ b/config/kernel-strlcpy.m4 @@ -1,6 +1,5 @@ dnl # -dnl # 6.8.x replaced strlcpy with strscpy. Check for both so we can provide -dnl # appropriate fallbacks. +dnl # 6.8 removed strlcpy. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ ZFS_LINUX_TEST_SRC([kernel_has_strlcpy], [ @@ -13,17 +12,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_STRSCPY], [ - ZFS_LINUX_TEST_SRC([kernel_has_strscpy], [ - #include - ], [ - const char *src = "goodbye"; - char dst[32]; - ssize_t len; - len = strscpy(dst, src, sizeof (dst)); - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ AC_MSG_CHECKING([whether strlcpy() exists]) ZFS_LINUX_TEST_RESULT([kernel_has_strlcpy], [ @@ -34,14 +22,3 @@ AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ AC_MSG_RESULT([no]) ]) ]) - -AC_DEFUN([ZFS_AC_KERNEL_STRSCPY], [ - AC_MSG_CHECKING([whether strscpy() exists]) - ZFS_LINUX_TEST_RESULT([kernel_has_strscpy], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_KERNEL_STRSCPY, 1, - [strscpy() exists]) - ], [ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-super-userns.m4 b/config/kernel-super-userns.m4 deleted file mode 100644 index 1ad35f2d19ba..000000000000 --- a/config/kernel-super-userns.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 4.8 API change -dnl # struct user_namespace was added to struct super_block as -dnl # super->s_user_ns member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_USER_NS], [ - ZFS_LINUX_TEST_SRC([super_user_ns], [ - #include - #include - ], [ - struct super_block super; - super.s_user_ns = (struct user_namespace *)NULL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ - AC_MSG_CHECKING([whether super_block->s_user_ns exists]) - ZFS_LINUX_TEST_RESULT([super_user_ns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SUPER_USER_NS, 1, - [super_block->s_user_ns exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4 index 804fceab28f0..fb6d23f61cbf 100644 --- a/config/kernel-symlink.m4 +++ b/config/kernel-symlink.m4 @@ -41,6 +41,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [ AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1, [iops->symlink() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([symlink_userns], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 deleted file mode 100644 index c710e804be0b..000000000000 --- a/config/kernel-timer.m4 +++ /dev/null @@ -1,75 +0,0 @@ -dnl # 4.14-rc3 API change -dnl # https://lwn.net/Articles/735887/ -dnl # -dnl # Check if timer_list.func get passed a timer_list or an unsigned long -dnl # (older kernels). Also sanity check the from_timer() and timer_setup() -dnl # macros are available as well, since they will be used in the same newer -dnl # kernels that support the new timer_list.func signature. -dnl # -dnl # Also check for the existence of flags in struct timer_list, they were -dnl # added in 4.1-rc8 via 0eeda71bc30d. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ - ZFS_LINUX_TEST_SRC([timer_setup], [ - #include - - struct my_task_timer { - struct timer_list timer; - int data; - }; - - static void task_expire(struct timer_list *tl) - { - struct my_task_timer *task_timer = - from_timer(task_timer, tl, timer); - task_timer->data = 42; - } - ],[ - struct my_task_timer task_timer; - timer_setup(&task_timer.timer, task_expire, 0); - ]) - - ZFS_LINUX_TEST_SRC([timer_list_function], [ - #include - static void task_expire(struct timer_list *tl) {} - ],[ - struct timer_list tl; - tl.function = task_expire; - ]) - - ZFS_LINUX_TEST_SRC([timer_list_flags], [ - #include - ],[ - struct timer_list tl; - tl.flags = 2; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ - AC_MSG_CHECKING([whether timer_setup() is available]) - ZFS_LINUX_TEST_RESULT([timer_setup], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, - [timer_setup() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether timer function expects timer_list]) - ZFS_LINUX_TEST_RESULT([timer_list_function], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, - [timer_list.function gets a timer_list]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether struct timer_list has flags]) - ZFS_LINUX_TEST_RESULT([timer_list_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, - [struct timer_list has a flags member]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 index 7439514186e4..a711d67ed558 100644 --- a/config/kernel-tmpfile.m4 +++ b/config/kernel-tmpfile.m4 @@ -59,23 +59,19 @@ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ AC_MSG_CHECKING([whether i_op->tmpfile() exists]) ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap]) ], [ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ],[ ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11]) diff --git a/config/kernel-user-ns-inum.m4 b/config/kernel-user-ns-inum.m4 deleted file mode 100644 index 2207a4aa6921..000000000000 --- a/config/kernel-user-ns-inum.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 3.18 API change -dnl # struct user_namespace inum moved from .proc_inum to .ns.inum. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [ - ZFS_LINUX_TEST_SRC([user_ns_common_inum], [ - #include - ], [ - struct user_namespace uns; - uns.ns.inum = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [ - AC_MSG_CHECKING([whether user_namespace->ns.inum exists]) - ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1, - [user_namespace->ns.inum exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-userns-capabilities.m4 b/config/kernel-userns-capabilities.m4 index 026503623a2b..f4e24fb1606a 100644 --- a/config/kernel-userns-capabilities.m4 +++ b/config/kernel-userns-capabilities.m4 @@ -19,33 +19,6 @@ AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ ]) ]) -dnl # -dnl # 4.10 API change -dnl # has_capability() was exported. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_HAS_CAPABILITY], [ - ZFS_LINUX_TEST_SRC([has_capability], [ - #include - ],[ - struct task_struct *task = NULL; - int cap = 0; - bool result __attribute__ ((unused)); - - result = has_capability(task, cap); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_HAS_CAPABILITY], [ - AC_MSG_CHECKING([whether has_capability() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([has_capability], - [has_capability], [kernel/capability.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_HAS_CAPABILITY, 1, [has_capability() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.39 API change dnl # struct user_namespace was added to struct cred_t as cred->user_ns member diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 index 7b7b91f979f9..17605a13fdef 100644 --- a/config/kernel-vfs-direct_IO.m4 +++ b/config/kernel-vfs-direct_IO.m4 @@ -1,5 +1,5 @@ dnl # -dnl # Check for direct IO interfaces. +dnl # Check for Direct I/O interfaces. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter], [ @@ -25,31 +25,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ .direct_IO = test_direct_IO, }; ],[]) - - ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ - #include - - static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, - struct iov_iter *iter, loff_t offset) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([direct_io_iovec], [ - #include - - static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ @@ -76,34 +51,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ ],[ AC_MSG_RESULT([no]) - - dnl # - dnl # Linux 3.16.x API change - dnl # - AC_MSG_CHECKING( - [whether aops->direct_IO() uses rw and offset]) - ZFS_LINUX_TEST_RESULT([direct_io_iter_rw_offset], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, - [aops->direct_IO() uses iov_iter with ] - [rw and offset]) - ],[ - AC_MSG_RESULT([no]) - - dnl # - dnl # Ancient Linux API (predates git) - dnl # - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iovec]) - ZFS_LINUX_TEST_RESULT([direct_io_iovec], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, - [aops->direct_IO() uses iovec]) - ],[ - ZFS_LINUX_TEST_ERROR([direct IO]) - AC_MSG_RESULT([no]) - ]) - ]) + ZFS_LINUX_TEST_ERROR([Direct I/O]) ]) ]) ]) diff --git a/config/kernel-vfs-extended-file_range.m4 b/config/kernel-vfs-extended-file_range.m4 deleted file mode 100644 index a2622313129e..000000000000 --- a/config/kernel-vfs-extended-file_range.m4 +++ /dev/null @@ -1,50 +0,0 @@ -dnl # -dnl # EL7 have backported copy_file_range and clone_file_range and -dnl # added them to an "extended" file_operations struct. -dnl # -dnl # We're testing for both functions in one here, because they will only -dnl # ever appear together and we don't want to match a similar method in -dnl # some future vendor kernel. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND], [ - ZFS_LINUX_TEST_SRC([vfs_file_operations_extend], [ - #include - - static ssize_t test_copy_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; (void) flags; - return (0); - } - - static int test_clone_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - u64 len) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; - return (0); - } - - static const struct file_operations_extend - fops __attribute__ ((unused)) = { - .kabi_fops = {}, - .copy_file_range = test_copy_file_range, - .clone_file_range = test_clone_file_range, - }; - ],[]) -]) -AC_DEFUN([ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND], [ - AC_MSG_CHECKING([whether file_operations_extend takes \ -.copy_file_range() and .clone_file_range()]) - ZFS_LINUX_TEST_RESULT([vfs_file_operations_extend], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_FILE_OPERATIONS_EXTEND, 1, - [file_operations_extend takes .copy_file_range() - and .clone_file_range()]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-vfs-file_range.m4 b/config/kernel-vfs-file_range.m4 index 8a5cbe2eeeed..936f7b4eba4c 100644 --- a/config/kernel-vfs-file_range.m4 +++ b/config/kernel-vfs-file_range.m4 @@ -19,36 +19,6 @@ dnl # dnl # 6.8: generic_copy_file_range() removed, replaced by dnl # splice_copy_file_range() dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [ - ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [ - #include - - static ssize_t test_copy_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; (void) flags; - return (0); - } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .copy_file_range = test_copy_file_range, - }; - ],[]) -]) -AC_DEFUN([ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE], [ - AC_MSG_CHECKING([whether fops->copy_file_range() is available]) - ZFS_LINUX_TEST_RESULT([vfs_copy_file_range], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_COPY_FILE_RANGE, 1, - [fops->copy_file_range() is available]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE], [ ZFS_LINUX_TEST_SRC([generic_copy_file_range], [ #include diff --git a/config/kernel-vfs-getattr.m4 b/config/kernel-vfs-getattr.m4 deleted file mode 100644 index eb07853cc4b9..000000000000 --- a/config/kernel-vfs-getattr.m4 +++ /dev/null @@ -1,86 +0,0 @@ -dnl # -dnl # 4.11 API, a528d35e@torvalds/linux -dnl # vfs_getattr(const struct path *p, struct kstat *s, u32 m, unsigned int f) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_4args], [ - #include - ],[ - vfs_getattr((const struct path *)NULL, - (struct kstat *)NULL, - (u32)0, - (unsigned int)0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_4ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_4args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_4ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 4 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.9 API -dnl # vfs_getattr(struct path *p, struct kstat *s) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_2args], [ - #include - ],[ - vfs_getattr((struct path *) NULL, - (struct kstat *)NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_2ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_2args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 2 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # <3.9 API -dnl # vfs_getattr(struct vfsmount *v, struct dentry *d, struct kstat *k) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_3args], [ - #include - ],[ - vfs_getattr((struct vfsmount *)NULL, - (struct dentry *)NULL, - (struct kstat *)NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_3ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_3args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 3 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR], [ - ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS - ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS - ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR], [ - ZFS_AC_KERNEL_VFS_GETATTR_4ARGS - ZFS_AC_KERNEL_VFS_GETATTR_2ARGS - ZFS_AC_KERNEL_VFS_GETATTR_3ARGS -]) diff --git a/config/kernel-vfs-invalidate_folio.m4 b/config/kernel-vfs-invalidate_folio.m4 new file mode 100644 index 000000000000..61a5c8478af1 --- /dev/null +++ b/config/kernel-vfs-invalidate_folio.m4 @@ -0,0 +1,33 @@ +dnl # +dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO], [ + ZFS_LINUX_TEST_SRC([vfs_has_invalidate_folio], [ + #include + + static void + test_invalidate_folio(struct folio *folio, size_t offset, + size_t len) { + (void) folio; (void) offset; (void) len; + return; + } + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .invalidate_folio = test_invalidate_folio, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO], [ + dnl # + dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page + dnl # + AC_MSG_CHECKING([whether invalidate_folio exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_invalidate_folio], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_INVALIDATE_FOLIO, 1, [invalidate_folio exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4 index ff560ff3eef0..ed7961a9e9dd 100644 --- a/config/kernel-vfs-iov_iter.m4 +++ b/config/kernel-vfs-iov_iter.m4 @@ -2,44 +2,6 @@ dnl # dnl # Check for available iov_iter functionality. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ - ZFS_LINUX_TEST_SRC([iov_iter_types], [ - #include - #include - ],[ - int type __attribute__ ((unused)) = ITER_KVEC; - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_advance], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t advance = 512; - - iov_iter_advance(&iter, advance); - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_revert], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t revert = 512; - - iov_iter_revert(&iter, revert); - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_fault_in_readable], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t size = 512; - int error __attribute__ ((unused)); - - error = iov_iter_fault_in_readable(&iter, size); - ]) - ZFS_LINUX_TEST_SRC([fault_in_iov_iter_readable], [ #include #include @@ -51,38 +13,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ error = fault_in_iov_iter_readable(&iter, size); ]) - ZFS_LINUX_TEST_SRC([iov_iter_count], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t bytes __attribute__ ((unused)); - - bytes = iov_iter_count(&iter); - ]) - - ZFS_LINUX_TEST_SRC([copy_to_iter], [ - #include + ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [ #include - ],[ + ], [ struct iov_iter iter = { 0 }; - char buf[512] = { 0 }; - size_t size = 512; - size_t bytes __attribute__ ((unused)); + struct page **pages = NULL; + size_t maxsize = 4096; + unsigned maxpages = 1; + size_t start; + size_t ret __attribute__ ((unused)); - bytes = copy_to_iter((const void *)&buf, size, &iter); + ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages, + &start); ]) - ZFS_LINUX_TEST_SRC([copy_from_iter], [ - #include + ZFS_LINUX_TEST_SRC([iov_iter_get_pages], [ #include - ],[ + ], [ struct iov_iter iter = { 0 }; - char buf[512] = { 0 }; - size_t size = 512; - size_t bytes __attribute__ ((unused)); + struct page **pages = NULL; + size_t maxsize = 4096; + unsigned maxpages = 1; + size_t start; + size_t ret __attribute__ ((unused)); - bytes = copy_from_iter((void *)&buf, size, &iter); + ret = iov_iter_get_pages(&iter, pages, maxsize, maxpages, + &start); ]) ZFS_LINUX_TEST_SRC([iov_iter_type], [ @@ -105,85 +61,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ enable_vfs_iov_iter="yes" - AC_MSG_CHECKING([whether iov_iter types are available]) - ZFS_LINUX_TEST_RESULT([iov_iter_types], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_TYPES, 1, - [iov_iter types are available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether iov_iter_advance() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_advance], [ + AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) + ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_ADVANCE, 1, - [iov_iter_advance() is available]) + AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1, + [fault_in_iov_iter_readable() is available]) ],[ AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" ]) - AC_MSG_CHECKING([whether iov_iter_revert() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_revert], [ + dnl # + dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_page_pages2(). + dnl # + AC_MSG_CHECKING([whether iov_iter_get_pages2() is available]) + ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_REVERT, 1, - [iov_iter_revert() is available]) - ],[ + AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1, + [iov_iter_get_pages2() is available]) + ], [ AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether iov_iter_fault_in_readable() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_fault_in_readable], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_FAULT_IN_READABLE, 1, - [iov_iter_fault_in_readable() is available]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) - ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ + AC_MSG_CHECKING([whether iov_iter_get_pages() is available]) + ZFS_LINUX_TEST_RESULT([iov_iter_get_pages], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1, - [fault_in_iov_iter_readable() is available]) - ],[ + AC_DEFINE(HAVE_IOV_ITER_GET_PAGES, 1, + [iov_iter_get_pages() is available]) + ], [ AC_MSG_RESULT(no) enable_vfs_iov_iter="no" ]) ]) - AC_MSG_CHECKING([whether iov_iter_count() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_count], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_COUNT, 1, - [iov_iter_count() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether copy_to_iter() is available]) - ZFS_LINUX_TEST_RESULT([copy_to_iter], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COPY_TO_ITER, 1, - [copy_to_iter() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether copy_from_iter() is available]) - ZFS_LINUX_TEST_RESULT([copy_from_iter], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COPY_FROM_ITER, 1, - [copy_from_iter() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - dnl # dnl # This checks for iov_iter_type() in linux/uio.h. It is not dnl # required, however, and the module will compiled without it diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4 deleted file mode 100644 index 2e396daa1c0f..000000000000 --- a/config/kernel-vfs-iterate.m4 +++ /dev/null @@ -1,83 +0,0 @@ -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ - ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ - #include - static int iterate(struct file *filp, struct dir_context * context) - { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate_shared = iterate, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([file_operations_iterate], [ - #include - static int iterate(struct file *filp, - struct dir_context *context) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate = iterate, - }; - - #if defined(FMODE_KABI_ITERATE) - #error "RHEL 7.5, FMODE_KABI_ITERATE interface" - #endif - ],[]) - - ZFS_LINUX_TEST_SRC([file_operations_readdir], [ - #include - static int readdir(struct file *filp, void *entry, - filldir_t func) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .readdir = readdir, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ - dnl # - dnl # 4.7 API change - dnl # - AC_MSG_CHECKING([whether fops->iterate_shared() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_iterate_shared], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_ITERATE_SHARED, 1, - [fops->iterate_shared() is available]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 3.11 API change - dnl # - dnl # RHEL 7.5 compatibility; the fops.iterate() method was - dnl # added to the file_operations structure but in order to - dnl # maintain KABI compatibility all callers must set - dnl # FMODE_KABI_ITERATE which is checked in iterate_dir(). - dnl # When detected ignore this interface and fallback to - dnl # to using fops.readdir() to retain KABI compatibility. - dnl # - AC_MSG_CHECKING([whether fops->iterate() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_iterate], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_ITERATE, 1, - [fops->iterate() is available]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # readdir interface introduced - dnl # - AC_MSG_CHECKING([whether fops->readdir() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_readdir], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_READDIR, 1, - [fops->readdir() is available]) - ],[ - ZFS_LINUX_TEST_ERROR([vfs_iterate]) - ]) - ]) - ]) -]) diff --git a/config/kernel-vfs-release_folio.m4 b/config/kernel-vfs-release_folio.m4 new file mode 100644 index 000000000000..f31db5677fd3 --- /dev/null +++ b/config/kernel-vfs-release_folio.m4 @@ -0,0 +1,32 @@ +dnl # +dnl # Linux 5.19 uses release_folio in lieu of releasepage +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO], [ + ZFS_LINUX_TEST_SRC([vfs_has_release_folio], [ + #include + + static bool + test_release_folio(struct folio *folio, gfp_t gfp) { + (void) folio; (void) gfp; + return (0); + } + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .release_folio = test_release_folio, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_RELEASE_FOLIO], [ + dnl # + dnl # Linux 5.19 uses release_folio in lieu of releasepage + dnl # + AC_MSG_CHECKING([whether release_folio exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_release_folio], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_RELEASE_FOLIO, 1, [release_folio exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel-vfs-rw-iterate.m4 b/config/kernel-vfs-rw-iterate.m4 deleted file mode 100644 index cb20ed03099a..000000000000 --- a/config/kernel-vfs-rw-iterate.m4 +++ /dev/null @@ -1,80 +0,0 @@ -dnl # -dnl # Linux 3.16 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ - ZFS_LINUX_TEST_SRC([file_operations_rw], [ - #include - - static ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) - { return 0; } - static ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) - { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .read_iter = test_read, - .write_iter = test_write, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([new_sync_rw], [ - #include - ],[ - ssize_t ret __attribute__ ((unused)); - struct file *filp = NULL; - char __user *rbuf = NULL; - const char __user *wbuf = NULL; - size_t len = 0; - loff_t ppos; - - ret = new_sync_read(filp, rbuf, len, &ppos); - ret = new_sync_write(filp, wbuf, len, &ppos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], [ - AC_MSG_CHECKING([whether fops->read/write_iter() are available]) - ZFS_LINUX_TEST_RESULT([file_operations_rw], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, - [fops->read/write_iter() are available]) - - dnl # - dnl # Linux 4.1 API - dnl # - AC_MSG_CHECKING([whether new_sync_read/write() are available]) - ZFS_LINUX_TEST_RESULT([new_sync_rw], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NEW_SYNC_READ, 1, - [new_sync_read()/new_sync_write() are available]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Linux 4.1.x API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS], [ - ZFS_LINUX_TEST_SRC([generic_write_checks], [ - #include - ],[ - struct kiocb *iocb = NULL; - struct iov_iter *iov = NULL; - generic_write_checks(iocb, iov); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS], [ - AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) - ZFS_LINUX_TEST_RESULT([generic_write_checks], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_WRITE_CHECKS_KIOCB, 1, - [generic_write_checks() takes kiocb]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-wait.m4 b/config/kernel-wait.m4 deleted file mode 100644 index 0414242bf6d4..000000000000 --- a/config/kernel-wait.m4 +++ /dev/null @@ -1,99 +0,0 @@ -dnl # -dnl # 4.13 API change -dnl # Renamed struct wait_queue -> struct wait_queue_entry. -dnl # -dnl # N.B. The type check is performed before all other checks -dnl # since ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY depends on -dnl # HAVE_WAIT_QUEUE_ENTRY_T being set in confdefs.h. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ - AC_MSG_CHECKING([whether wait_queue_entry_t exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - wait_queue_entry_t *entry __attribute__ ((unused)); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, - [wait_queue_entry_t exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.17 API change, -dnl # wait_on_bit() no longer requires an action argument. The former -dnl # "wait_on_bit" interface required an 'action' function to be provided -dnl # which does the actual waiting. There were over 20 such functions in the -dnl # kernel, many of them identical, though most cases can be satisfied by one -dnl # of just two functions: one which uses io_schedule() and one which just -dnl # uses schedule(). This API change was made to consolidate all of those -dnl # redundant wait functions. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_ON_BIT], [ - ZFS_LINUX_TEST_SRC([wait_on_bit], [ - #include - ],[ - int (*action)(void *) = NULL; - wait_on_bit(NULL, 0, action, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ - AC_MSG_CHECKING([whether wait_on_bit() takes an action]) - ZFS_LINUX_TEST_RESULT([wait_on_bit], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change -dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head -dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY], [ - ZFS_LINUX_TEST_SRC([wait_queue_head_entry], [ - #include - - #ifdef HAVE_WAIT_QUEUE_ENTRY_T - typedef wait_queue_head_t spl_wait_queue_head_t; - typedef wait_queue_entry_t spl_wait_queue_entry_t; - #else - typedef wait_queue_head_t spl_wait_queue_head_t; - typedef wait_queue_t spl_wait_queue_entry_t; - #endif - ],[ - spl_wait_queue_head_t wq_head; - spl_wait_queue_entry_t wq_entry; - struct list_head *head __attribute__ ((unused)); - struct list_head *entry __attribute__ ((unused)); - - head = &wq_head.head; - entry = &wq_entry.entry; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ - AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) - ZFS_LINUX_TEST_RESULT([wait_queue_head_entry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_HEAD_ENTRY, 1, - [wq_head->head and wq_entry->entry exist]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT], [ - ZFS_AC_KERNEL_SRC_WAIT_ON_BIT - ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT], [ - ZFS_AC_KERNEL_WAIT_ON_BIT - ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY -]) diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 index 32f58c70a500..d933cff7a4b9 100644 --- a/config/kernel-xattr-handler.m4 +++ b/config/kernel-xattr-handler.m4 @@ -34,73 +34,11 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ ]) dnl # -dnl # 4.5 API change, -dnl # struct xattr_handler added new member "name". -dnl # xattr_handler which matches to whole name rather than prefix should use -dnl # "name" instead of "prefix", e.g. "system.posix_acl_access" +dnl # Android API change, +dnl # The xattr_handler->get() callback was +dnl # changed to take dentry, inode and flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME], [ - ZFS_LINUX_TEST_SRC([xattr_handler_name], [ - #include - - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ - AC_MSG_CHECKING([whether xattr_handler has name]) - ZFS_LINUX_TEST_RESULT([xattr_handler_name], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_HANDLER_NAME, 1, - [xattr_handler has name]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Supported xattr handler get() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ - ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ - #include - - static int get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ - #include - - static int get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ - #include - - static int get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int handler_flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [ #include @@ -115,63 +53,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ],[]) ]) -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->get() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->get() wants dentry and inode]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode], [ +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS], [ + AC_MSG_RESULT(no) + AC_MSG_CHECKING( + [whether xattr_handler->get() wants dentry and inode and flags]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE, 1, - [xattr_handler->get() wants both dentry and inode]) + AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1, + [xattr_handler->get() wants dentry and inode and flags]) ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->get() callback was changed to take a - dnl # attr_handler, and handler_flags argument was removed and - dnl # should be accessed by handler->flags. - dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_HANDLER, 1, - [xattr_handler->get() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->get() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1, - [xattr_handler->get() wants dentry]) - ],[ - dnl # - dnl # Android API change, - dnl # The xattr_handler->get() callback was - dnl # changed to take dentry, inode and flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants dentry and inode and flags]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1, - [xattr_handler->get() wants dentry and inode and flags]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr get()]) - ]) - ]) - ]) ]) ]) @@ -222,31 +113,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ .set = set, }; ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ - #include - - static int set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ - #include - - static int set(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ @@ -264,6 +130,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1, [xattr_handler->set() takes mnt_idmap]) ], [ + AC_MSG_RESULT(no) AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace]) ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [ AC_MSG_RESULT(yes) @@ -282,152 +149,12 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, [xattr_handler->set() wants both dentry and inode]) ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->set() callback was changed to take a - dnl # xattr_handler, and handler_flags argument was removed and - dnl # should be accessed by handler->flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->set() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, - [xattr_handler->set() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->set() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->set() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, - [xattr_handler->set() wants dentry]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr set()]) - ]) - ]) + ZFS_LINUX_TEST_ERROR([xattr set()]) ]) ]) ]) ]) -dnl # -dnl # Supported xattr handler list() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ - ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ - #include - - static bool list(struct dentry *dentry) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ - #include - - static size_t list(const struct xattr_handler *handler, - struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ - #include - - static size_t list(struct dentry *dentry, - char *list, size_t list_size, - const char *name, size_t name_len, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ - dnl # 4.5 API change, - dnl # The xattr_handler->list() callback was changed to take only a - dnl # dentry and it only needs to return if it's accessible. - AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_simple], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_SIMPLE, 1, - [xattr_handler->list() wants simple]) - ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->list() callback was changed to take a - dnl # xattr_handler, and handler_flags argument was removed - dnl # and should be accessed by handler->flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->list() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_HANDLER, 1, - [xattr_handler->list() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->list() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->list() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_DENTRY, 1, - [xattr_handler->list() wants dentry]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr list()]) - ]) - ]) - ]) -]) - -dnl # -dnl # 3.7 API change, -dnl # The posix_acl_{from,to}_xattr functions gained a new -dnl # parameter: user_ns -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS], [ - ZFS_LINUX_TEST_SRC([posix_acl_from_xattr_userns], [ - #include - #include - #include - ],[ - posix_acl_from_xattr(&init_user_ns, NULL, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ - AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) - ZFS_LINUX_TEST_RESULT([posix_acl_from_xattr_userns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_FROM_XATTR_USERNS, 1, - [posix_acl_from_xattr() needs user_ns]) - ],[ - ZFS_LINUX_TEST_ERROR([posix_acl_from_xattr()]) - ]) -]) - dnl # dnl # 4.9 API change, dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are @@ -445,33 +172,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR], [ ],[]) ]) -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_SETXATTR], [ - AC_MSG_CHECKING([whether generic_setxattr() exists]) - ZFS_LINUX_TEST_RESULT([have_generic_setxattr], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, - [generic_setxattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR], [ ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST - ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR ]) AC_DEFUN([ZFS_AC_KERNEL_XATTR], [ ZFS_AC_KERNEL_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_XATTR_HANDLER_GET + ZFS_AC_KERNEL_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS ZFS_AC_KERNEL_XATTR_HANDLER_SET - ZFS_AC_KERNEL_XATTR_HANDLER_LIST - ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_GENERIC_SETXATTR ]) diff --git a/config/kernel-zlib.m4 b/config/kernel-zlib.m4 deleted file mode 100644 index 752d388389cb..000000000000 --- a/config/kernel-zlib.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 2.6.39 API compat, -dnl -dnl # The function zlib_deflate_workspacesize() now take 2 arguments. -dnl # This was done to avoid always having to allocate the maximum size -dnl # workspace (268K). The caller can now specific the windowBits and -dnl # memLevel compression parameters to get a smaller workspace. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ - ZFS_LINUX_TEST_SRC([2args_zlib_deflate_workspacesize], [ - #include - ],[ - return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ - AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) - ZFS_LINUX_TEST_RESULT([2args_zlib_deflate_workspacesize], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1, - [zlib_deflate_workspacesize() wants 2 args]) - ],[ - ZFS_LINUX_TEST_ERROR([zlib_deflate_workspacesize()]) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 4d471358d242..556df58082f9 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -14,7 +14,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ dnl # Sequential ZFS_LINUX_TRY_COMPILE tests ZFS_AC_KERNEL_FPU_HEADER ZFS_AC_KERNEL_OBJTOOL_HEADER - ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T ZFS_AC_KERNEL_MISC_MINOR ZFS_AC_KERNEL_DECLARE_EVENT_CLASS @@ -39,26 +38,13 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL - ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE ZFS_AC_KERNEL_SRC_PDE_DATA - ZFS_AC_KERNEL_SRC_FALLOCATE - ZFS_AC_KERNEL_SRC_FADVISE ZFS_AC_KERNEL_SRC_GENERIC_FADVISE - ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_SRC_RWSEM ZFS_AC_KERNEL_SRC_SCHED ZFS_AC_KERNEL_SRC_USLEEP_RANGE - ZFS_AC_KERNEL_SRC_KMEM_CACHE - ZFS_AC_KERNEL_SRC_KVMALLOC ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL - ZFS_AC_KERNEL_SRC_WAIT ZFS_AC_KERNEL_SRC_INODE_TIMES - ZFS_AC_KERNEL_SRC_INODE_LOCK - ZFS_AC_KERNEL_SRC_GROUP_INFO_GID - ZFS_AC_KERNEL_SRC_RW - ZFS_AC_KERNEL_SRC_TIMER_SETUP - ZFS_AC_KERNEL_SRC_SUPER_USER_NS ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BIO @@ -67,63 +53,42 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO - ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_SRC_XATTR ZFS_AC_KERNEL_SRC_ACL ZFS_AC_KERNEL_SRC_INODE_SETATTR ZFS_AC_KERNEL_SRC_INODE_GETATTR - ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS - ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION ZFS_AC_KERNEL_SRC_SHOW_OPTIONS - ZFS_AC_KERNEL_SRC_FILE_INODE - ZFS_AC_KERNEL_SRC_FILE_DENTRY - ZFS_AC_KERNEL_SRC_FSYNC - ZFS_AC_KERNEL_SRC_AIO_FSYNC - ZFS_AC_KERNEL_SRC_EVICT_INODE - ZFS_AC_KERNEL_SRC_DIRTY_INODE ZFS_AC_KERNEL_SRC_SHRINKER ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE ZFS_AC_KERNEL_SRC_PERMISSION - ZFS_AC_KERNEL_SRC_GET_LINK - ZFS_AC_KERNEL_SRC_PUT_LINK ZFS_AC_KERNEL_SRC_TMPFILE ZFS_AC_KERNEL_SRC_AUTOMOUNT - ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE ZFS_AC_KERNEL_SRC_COMMIT_METADATA - ZFS_AC_KERNEL_SRC_CLEAR_INODE ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_SRC_DENTRY - ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT - ZFS_AC_KERNEL_SRC_BDI ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET - ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO - ZFS_AC_KERNEL_SRC_VFS_GETATTR + ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO + ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_SRC_VFS_ITERATE ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS - ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE - ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER - ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE - ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE @@ -134,36 +99,25 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_RENAME - ZFS_AC_KERNEL_SRC_CURRENT_TIME - ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES - ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_SRC_KTIME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES - ZFS_AC_KERNEL_SRC_KSTRTOUL ZFS_AC_KERNEL_SRC_PERCPU - ZFS_AC_KERNEL_SRC_CPU_HOTPLUG ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR ZFS_AC_KERNEL_SRC_MKNOD ZFS_AC_KERNEL_SRC_SYMLINK ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS - ZFS_AC_KERNEL_SRC_SIGNAL_STOP ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS - ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_STRLCPY - ZFS_AC_KERNEL_SRC_STRSCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC - ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID - ZFS_AC_KERNEL_SRC_FILEMAP ZFS_AC_KERNEL_SRC_WRITEPAGE_T ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE @@ -171,8 +125,10 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV + ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING + ZFS_AC_KERNEL_SRC_FILE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -194,26 +150,13 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_OBJTOOL ZFS_AC_KERNEL_PDE_DATA - ZFS_AC_KERNEL_FALLOCATE - ZFS_AC_KERNEL_FADVISE ZFS_AC_KERNEL_GENERIC_FADVISE - ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_RWSEM ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE - ZFS_AC_KERNEL_KMEM_CACHE - ZFS_AC_KERNEL_KVMALLOC ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL - ZFS_AC_KERNEL_WAIT ZFS_AC_KERNEL_INODE_TIMES - ZFS_AC_KERNEL_INODE_LOCK - ZFS_AC_KERNEL_GROUP_INFO_GID - ZFS_AC_KERNEL_RW - ZFS_AC_KERNEL_TIMER_SETUP - ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BIO @@ -222,63 +165,42 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO - ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_XATTR ZFS_AC_KERNEL_ACL ZFS_AC_KERNEL_INODE_SETATTR ZFS_AC_KERNEL_INODE_GETATTR - ZFS_AC_KERNEL_INODE_SET_FLAGS - ZFS_AC_KERNEL_INODE_SET_IVERSION ZFS_AC_KERNEL_SHOW_OPTIONS - ZFS_AC_KERNEL_FILE_INODE - ZFS_AC_KERNEL_FILE_DENTRY - ZFS_AC_KERNEL_FSYNC - ZFS_AC_KERNEL_AIO_FSYNC - ZFS_AC_KERNEL_EVICT_INODE - ZFS_AC_KERNEL_DIRTY_INODE ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE ZFS_AC_KERNEL_PERMISSION - ZFS_AC_KERNEL_GET_LINK - ZFS_AC_KERNEL_PUT_LINK ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_AUTOMOUNT - ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE ZFS_AC_KERNEL_COMMIT_METADATA - ZFS_AC_KERNEL_CLEAR_INODE ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_DENTRY - ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT - ZFS_AC_KERNEL_BDI ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET - ZFS_AC_KERNEL_LSEEK_EXECUTE ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO - ZFS_AC_KERNEL_VFS_GETATTR + ZFS_AC_KERNEL_VFS_RELEASE_FOLIO + ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_VFS_ITERATE ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS - ZFS_AC_KERNEL_VFS_RW_ITERATE - ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_VFS_IOV_ITER - ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE - ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_FOLLOW_DOWN_ONE @@ -289,36 +211,25 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_RENAME - ZFS_AC_KERNEL_CURRENT_TIME - ZFS_AC_KERNEL_USERNS_CAPABILITIES - ZFS_AC_KERNEL_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_KTIME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES - ZFS_AC_KERNEL_KSTRTOUL ZFS_AC_KERNEL_PERCPU - ZFS_AC_KERNEL_CPU_HOTPLUG ZFS_AC_KERNEL_GENERIC_FILLATTR ZFS_AC_KERNEL_MKNOD ZFS_AC_KERNEL_SYMLINK ZFS_AC_KERNEL_BIO_MAX_SEGS - ZFS_AC_KERNEL_SIGNAL_STOP ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS - ZFS_AC_KERNEL_SET_SPECIAL_STATE ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_STRLCPY - ZFS_AC_KERNEL_STRSCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC - ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID - ZFS_AC_KERNEL_FILEMAP ZFS_AC_KERNEL_WRITEPAGE_T ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE @@ -326,8 +237,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV + ZFS_AC_KERNEL_MM_PAGE_FLAGS ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING + ZFS_AC_KERNEL_1ARG_ASSIGN_STR + ZFS_AC_KERNEL_FILE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE @@ -511,13 +425,39 @@ AC_DEFUN([ZFS_AC_KERNEL], [ AC_MSG_RESULT([$kernsrcver]) - AS_VERSION_COMPARE([$kernsrcver], [$ZFS_META_KVER_MIN], [ - AC_MSG_ERROR([ + AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MIN], [], [ + AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The minimum supported kernel version is $ZFS_META_KVER_MIN. ]) ]) + AC_ARG_ENABLE([linux-experimental], + AS_HELP_STRING([--enable-linux-experimental], + [Allow building against some unsupported kernel versions])) + + AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MAX], [ + AX_COMPARE_VERSION([$kernsrcver], [eq2], [$ZFS_META_KVER_MAX], [ + kern_max_version_ok=yes + ], [ + kern_max_version_ok=no + ]) + ], [ + kern_max_version_ok=yes + ]) + + AS_IF([test "x$kern_max_version_ok" != "xyes"], [ + AS_IF([test "x$enable_linux_experimental" == "xyes"], [ + AC_DEFINE(HAVE_LINUX_EXPERIMENTAL, 1, + [building against unsupported kernel version]) + ], [ + AC_MSG_ERROR([ + *** Cannot build against kernel version $kernsrcver. + *** The maximum supported kernel version is $ZFS_META_KVER_MAX. + ]) + ]) + ]) + LINUX=${kernelsrc} LINUX_OBJ=${kernelbuild} LINUX_VERSION=${kernsrcver} @@ -527,6 +467,30 @@ AC_DEFUN([ZFS_AC_KERNEL], [ AC_SUBST(LINUX_VERSION) ]) +AC_DEFUN([ZFS_AC_KERNEL_VERSION_WARNING], [ + AS_IF([test "x$enable_linux_experimental" = "xyes" && \ + test "x$kern_max_version_ok" != "xyes"], [ + AC_MSG_WARN([ + + You are building OpenZFS against Linux version $kernsrcver. + + This combination is considered EXPERIMENTAL by the OpenZFS project. + Even if it appears to build and run correctly, there may be bugs that + can cause SERIOUS DATA LOSS. + + YOU HAVE BEEN WARNED! + + If you choose to continue, we'd appreciate if you could report your + results on the OpenZFS issue tracker at: + + https://github.com/openzfs/zfs/issues/new + + Your feedback will help us prepare a new OpenZFS release that supports + this version of Linux. + ]) + ]) +]) + dnl # dnl # Detect the QAT module to be built against, QAT provides hardware dnl # acceleration for data compression: diff --git a/configure.ac b/configure.ac index 45de74006346..46077debb415 100644 --- a/configure.ac +++ b/configure.ac @@ -89,3 +89,5 @@ AC_CONFIG_FILES([ AC_OUTPUT + +ZFS_AC_KERNEL_VERSION_WARNING diff --git a/contrib/debian/not-installed b/contrib/debian/not-installed index ad14776f3b7e..88557f76fcae 100644 --- a/contrib/debian/not-installed +++ b/contrib/debian/not-installed @@ -1,5 +1,4 @@ usr/bin/arc_summary.py -usr/share/zfs/enum-extract.pl usr/share/zfs/zfs-helpers.sh etc/default/zfs etc/init.d diff --git a/contrib/debian/rules.in b/contrib/debian/rules.in index 38c7038a701f..b0d4e1cb16d6 100755 --- a/contrib/debian/rules.in +++ b/contrib/debian/rules.in @@ -86,7 +86,6 @@ override_dh_auto_install: @# Install the DKMS source. @# We only want the files needed to build the modules install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \ - '$(CURDIR)/scripts/enum-extract.pl' \ '$(CURDIR)/scripts/dkms.postbuild' $(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;) diff --git a/include/cityhash.h b/include/cityhash.h index 3b2d1e84b5b3..346fb673a07d 100644 --- a/include/cityhash.h +++ b/include/cityhash.h @@ -32,6 +32,13 @@ extern "C" { #endif +/* + * Define 1/2/3-argument specialized versions of cityhash4, which can reduce + * instruction count (especially multiplication) on some 32-bit arches. + */ +_SYS_CITYHASH_H uint64_t cityhash1(uint64_t); +_SYS_CITYHASH_H uint64_t cityhash2(uint64_t, uint64_t); +_SYS_CITYHASH_H uint64_t cityhash3(uint64_t, uint64_t, uint64_t); _SYS_CITYHASH_H uint64_t cityhash4(uint64_t, uint64_t, uint64_t, uint64_t); #ifdef __cplusplus diff --git a/include/os/freebsd/spl/sys/mutex.h b/include/os/freebsd/spl/sys/mutex.h index 8cfe56c75309..bbff9fe80389 100644 --- a/include/os/freebsd/spl/sys/mutex.h +++ b/include/os/freebsd/spl/sys/mutex.h @@ -70,4 +70,5 @@ typedef enum { #define mutex_exit(lock) sx_xunlock(lock) #define mutex_owned(lock) sx_xlocked(lock) #define mutex_owner(lock) sx_xholder(lock) + #endif /* _OPENSOLARIS_SYS_MUTEX_H_ */ diff --git a/include/os/freebsd/spl/sys/param.h b/include/os/freebsd/spl/sys/param.h index 92724e332d68..96440dce03bb 100644 --- a/include/os/freebsd/spl/sys/param.h +++ b/include/os/freebsd/spl/sys/param.h @@ -33,6 +33,7 @@ #include #include_next #define PAGESIZE PAGE_SIZE +#define PAGESHIFT PAGE_SHIFT #define ptob(x) ((uint64_t)(x) << PAGE_SHIFT) #ifdef _KERNEL #include diff --git a/include/os/freebsd/spl/sys/simd.h b/include/os/freebsd/spl/sys/simd.h index 4560bb05e978..6bc46755c4e3 100644 --- a/include/os/freebsd/spl/sys/simd.h +++ b/include/os/freebsd/spl/sys/simd.h @@ -50,4 +50,7 @@ #define kfpu_fini() do {} while (0) #endif +#define simd_stat_init() 0 +#define simd_stat_fini() 0 + #endif diff --git a/include/os/freebsd/spl/sys/uio.h b/include/os/freebsd/spl/sys/uio.h index b9d41903ea63..2bd5bdb80d98 100644 --- a/include/os/freebsd/spl/sys/uio.h +++ b/include/os/freebsd/spl/sys/uio.h @@ -34,13 +34,30 @@ #include_next #include #include +#include + +/* + * uio_extflg: extended flags + */ +#define UIO_DIRECT 0x0001 /* Direct I/O requset */ typedef struct iovec iovec_t; typedef enum uio_seg zfs_uio_seg_t; typedef enum uio_rw zfs_uio_rw_t; +/* + * This structure is used when doing Direct I/O. + */ +typedef struct { + vm_page_t *pages; + int npages; +} zfs_uio_dio_t; + typedef struct zfs_uio { struct uio *uio; + offset_t uio_soffset; + uint16_t uio_extflg; + zfs_uio_dio_t uio_dio; } zfs_uio_t; #define GET_UIO_STRUCT(u) (u)->uio @@ -52,6 +69,7 @@ typedef struct zfs_uio { #define zfs_uio_iovbase(u, idx) GET_UIO_STRUCT(u)->uio_iov[(idx)].iov_base #define zfs_uio_td(u) GET_UIO_STRUCT(u)->uio_td #define zfs_uio_rw(u) GET_UIO_STRUCT(u)->uio_rw +#define zfs_uio_soffset(u) (u)->uio_soffset #define zfs_uio_fault_disable(u, set) #define zfs_uio_prefaultpages(size, u) (0) @@ -61,6 +79,13 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) zfs_uio_offset(uio) = off; } +static inline void +zfs_uio_setsoffset(zfs_uio_t *uio, offset_t off) +{ + ASSERT3U(zfs_uio_offset(uio), ==, off); + zfs_uio_soffset(uio) = off; +} + static inline void zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { @@ -71,7 +96,11 @@ zfs_uio_advance(zfs_uio_t *uio, ssize_t size) static __inline void zfs_uio_init(zfs_uio_t *uio, struct uio *uio_s) { - GET_UIO_STRUCT(uio) = uio_s; + memset(uio, 0, sizeof (zfs_uio_t)); + if (uio_s != NULL) { + GET_UIO_STRUCT(uio) = uio_s; + zfs_uio_soffset(uio) = uio_s->uio_offset; + } } int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio); diff --git a/include/os/freebsd/zfs/sys/abd_os.h b/include/os/freebsd/zfs/sys/abd_os.h index 57122ee83e8d..be825b3b8a43 100644 --- a/include/os/freebsd/zfs/sys/abd_os.h +++ b/include/os/freebsd/zfs/sys/abd_os.h @@ -26,10 +26,15 @@ #ifndef _ABD_OS_H #define _ABD_OS_H +#include +#include + #ifdef __cplusplus extern "C" { #endif +struct abd; + struct abd_scatter { uint_t abd_offset; void *abd_chunks[1]; /* actually variable-length */ @@ -37,8 +42,14 @@ struct abd_scatter { struct abd_linear { void *abd_buf; +#if defined(_KERNEL) + struct sf_buf *sf; /* for LINEAR_PAGE FreeBSD */ +#endif }; +__attribute__((malloc)) +struct abd *abd_alloc_from_pages(vm_page_t *, unsigned long, uint64_t); + #ifdef __cplusplus } #endif diff --git a/include/os/freebsd/zfs/sys/zfs_ctldir.h b/include/os/freebsd/zfs/sys/zfs_ctldir.h index 14d75df33df0..f0a98a7def8d 100644 --- a/include/os/freebsd/zfs/sys/zfs_ctldir.h +++ b/include/os/freebsd/zfs/sys/zfs_ctldir.h @@ -40,7 +40,7 @@ extern "C" { ((zdp)->z_zfsvfs->z_ctldir != NULL)) #define zfs_show_ctldir(zdp) \ (zfs_has_ctldir(zdp) && \ - ((zdp)->z_zfsvfs->z_show_ctldir)) + ((zdp)->z_zfsvfs->z_show_ctldir == ZFS_SNAPDIR_VISIBLE)) void zfsctl_create(zfsvfs_t *); void zfsctl_destroy(zfsvfs_t *); diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h index 3e54f3e846f7..b7cbdc736d21 100644 --- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h @@ -76,13 +76,14 @@ struct zfsvfs { list_t z_all_znodes; /* all vnodes in the fs */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ struct zfsctl_root *z_ctldir; /* .zfs directory pointer */ - boolean_t z_show_ctldir; /* expose .zfs in the root dir */ + uint_t z_show_ctldir; /* how to expose .zfs in the root dir */ boolean_t z_issnap; /* true if this is a snapshot */ boolean_t z_use_fuids; /* version allows fuids */ boolean_t z_replay; /* set during ZIL replay */ boolean_t z_use_sa; /* version allow system attributes */ boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */ boolean_t z_use_namecache; /* make use of FreeBSD name cache */ + boolean_t z_longname; /* Dataset supports long names */ uint8_t z_xattr; /* xattr type in use */ uint64_t z_version; /* ZPL version */ uint64_t z_shares_dir; /* hidden shares dir */ diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h index 050fc3036f87..3727dca0f8d7 100644 --- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h +++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h @@ -179,7 +179,7 @@ extern void zfs_znode_free(struct znode *); extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE]; extern int zfs_znode_parent_and_name(struct znode *zp, struct znode **dzpp, - char *buf); + char *buf, uint64_t buflen); extern int zfs_rlimit_fsize(off_t fsize); #ifdef __cplusplus diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am index 9100aebb541e..b7bdd892ec1d 100644 --- a/include/os/linux/Makefile.am +++ b/include/os/linux/Makefile.am @@ -8,7 +8,6 @@ kernel_linux_HEADERS = \ %D%/kernel/linux/mm_compat.h \ %D%/kernel/linux/mod_compat.h \ %D%/kernel/linux/page_compat.h \ - %D%/kernel/linux/percpu_compat.h \ %D%/kernel/linux/simd.h \ %D%/kernel/linux/simd_aarch64.h \ %D%/kernel/linux/simd_arm.h \ diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index c2e818b4d4ee..d96708c600ac 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -35,50 +35,19 @@ #include #include /* for SECTOR_* */ #include - -#ifdef HAVE_BLK_MQ #include -#endif - -#ifndef HAVE_BLK_QUEUE_FLAG_SET -static inline void -blk_queue_flag_set(unsigned int flag, struct request_queue *q) -{ - queue_flag_set(flag, q); -} -#endif - -#ifndef HAVE_BLK_QUEUE_FLAG_CLEAR -static inline void -blk_queue_flag_clear(unsigned int flag, struct request_queue *q) -{ - queue_flag_clear(flag, q); -} -#endif /* * 6.11 API * Setting the flush flags directly is no longer possible; flush flags are set * on the queue_limits structure and passed to blk_disk_alloc(). In this case * we remove this function entirely. - * - * 4.7 API, - * The blk_queue_write_cache() interface has replaced blk_queue_flush() - * interface. However, the new interface is GPL-only thus we implement - * our own trivial wrapper when the GPL-only version is detected. - * - * 2.6.36 - 4.6 API, - * The blk_queue_flush() interface has replaced blk_queue_ordered() - * interface. However, while the old interface was available to all the - * new one is GPL-only. Thus if the GPL-only version is detected we - * implement our own trivial helper. */ #if !defined(HAVE_BLK_ALLOC_DISK_2ARG) || \ !defined(HAVE_BLKDEV_QUEUE_LIMITS_FEATURES) static inline void blk_queue_set_write_cache(struct request_queue *q, bool on) { -#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) if (on) { blk_queue_flag_set(QUEUE_FLAG_WC, q); blk_queue_flag_set(QUEUE_FLAG_FUA, q); @@ -86,18 +55,6 @@ blk_queue_set_write_cache(struct request_queue *q, bool on) blk_queue_flag_clear(QUEUE_FLAG_WC, q); blk_queue_flag_clear(QUEUE_FLAG_FUA, q); } -#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) - blk_queue_write_cache(q, on, on); -#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) - if (on) - q->flush_flags |= REQ_FLUSH | REQ_FUA; - else - q->flush_flags &= ~(REQ_FLUSH | REQ_FUA); -#elif defined(HAVE_BLK_QUEUE_FLUSH) - blk_queue_flush(q, on ? (REQ_FLUSH | REQ_FUA) : 0); -#else -#error "Unsupported kernel" -#endif } #endif /* !HAVE_BLK_ALLOC_DISK_2ARG || !HAVE_BLKDEV_QUEUE_LIMITS_FEATURES */ @@ -143,7 +100,6 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) #endif } -#ifdef HAVE_BIO_BVEC_ITER #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx @@ -151,15 +107,6 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) #define bio_for_each_segment4(bv, bvp, b, i) \ bio_for_each_segment((bv), (b), (i)) typedef struct bvec_iter bvec_iterator_t; -#else -#define BIO_BI_SECTOR(bio) (bio)->bi_sector -#define BIO_BI_SIZE(bio) (bio)->bi_size -#define BIO_BI_IDX(bio) (bio)->bi_idx -#define BIO_BI_SKIP(bio) (0) -#define bio_for_each_segment4(bv, bvp, b, i) \ - bio_for_each_segment((bvp), (b), (i)) -typedef int bvec_iterator_t; -#endif static inline void bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev, @@ -200,7 +147,6 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev, #define DISK_NAME_LEN 32 #endif /* DISK_NAME_LEN */ -#ifdef HAVE_BIO_BI_STATUS static inline int bi_status_to_errno(blk_status_t status) { @@ -274,42 +220,6 @@ errno_to_bi_status(int error) return (BLK_STS_IOERR); } } -#endif /* HAVE_BIO_BI_STATUS */ - -/* - * 4.3 API change - * The bio_endio() prototype changed slightly. These are helper - * macro's to ensure the prototype and invocation are handled. - */ -#ifdef HAVE_1ARG_BIO_END_IO_T -#ifdef HAVE_BIO_BI_STATUS -#define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status) -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x) -#define BIO_END_IO(bio, error) bio_set_bi_status(bio, error) -static inline void -bio_set_bi_status(struct bio *bio, int error) -{ - ASSERT3S(error, <=, 0); - bio->bi_status = errno_to_bi_status(-error); - bio_endio(bio); -} -#else -#define BIO_END_IO_ERROR(bio) (-(bio->bi_error)) -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x) -#define BIO_END_IO(bio, error) bio_set_bi_error(bio, error) -static inline void -bio_set_bi_error(struct bio *bio, int error) -{ - ASSERT3S(error, <=, 0); - bio->bi_error = error; - bio_endio(bio); -} -#endif /* HAVE_BIO_BI_STATUS */ - -#else -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z) -#define BIO_END_IO(bio, error) bio_endio(bio, error); -#endif /* HAVE_1ARG_BIO_END_IO_T */ /* * 5.15 MACRO, @@ -337,6 +247,19 @@ zfs_check_disk_status(struct block_device *bdev) #endif } +/* + * 5.17 API change + * + * GENHD_FL_EXT_DEVT flag removed + * GENHD_FL_NO_PART_SCAN renamed GENHD_FL_NO_PART + */ +#ifndef HAVE_GENHD_FL_EXT_DEVT +#define GENHD_FL_EXT_DEVT (0) +#endif +#ifndef HAVE_GENHD_FL_NO_PART +#define GENHD_FL_NO_PART (GENHD_FL_NO_PART_SCAN) +#endif + /* * 4.1 API, * 3.10.0 CentOS 7.x API, @@ -399,9 +322,6 @@ zfs_check_media_change(struct block_device *bdev) * The function was exported for use, prior to this it existed but the * symbol was not exported. * - * 4.4.0-6.21 API change for Ubuntu - * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions. - * * 5.11 API change * Changed to take a dev_t argument which is set on success and return a * non-zero error code on failure. @@ -419,15 +339,6 @@ vdev_lookup_bdev(const char *path, dev_t *dev) *dev = bdev->bd_dev; bdput(bdev); - return (0); -#elif defined(HAVE_MODE_LOOKUP_BDEV) - struct block_device *bdev = lookup_bdev(path, FMODE_READ); - if (IS_ERR(bdev)) - return (PTR_ERR(bdev)); - - *dev = bdev->bd_dev; - bdput(bdev); - return (0); #else #error "Unsupported kernel" @@ -447,56 +358,24 @@ vdev_lookup_bdev(const char *path, dev_t *dev) static inline void bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags) { -#if defined(HAVE_BIO_BI_OPF) bio->bi_opf = rw | flags; -#else - bio->bi_rw |= rw | flags; -#endif /* HAVE_BIO_BI_OPF */ } #endif /* * bio_set_flush - Set the appropriate flags in a bio to guarantee * data are on non-volatile media on completion. - * - * 2.6.37 - 4.8 API, - * Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a - * replacement for WRITE_BARRIER to allow expressing richer semantics - * to the block layer. It's up to the block layer to implement the - * semantics correctly. Use the WRITE_FLUSH_FUA flag combination. - * - * 4.8 - 4.9 API, - * REQ_FLUSH was renamed to REQ_PREFLUSH. For consistency with previous - * OpenZFS releases, prefer the WRITE_FLUSH_FUA flag set if it's available. - * - * 4.10 API, - * The read/write flags and their modifiers, including WRITE_FLUSH, - * WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in - * torvalds/linux@70fd7614 and replaced by direct flag modification - * of the REQ_ flags in bio->bi_opf. Use REQ_PREFLUSH. */ static inline void bio_set_flush(struct bio *bio) { -#if defined(HAVE_REQ_PREFLUSH) /* >= 4.10 */ bio_set_op_attrs(bio, 0, REQ_PREFLUSH | REQ_OP_WRITE); -#elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */ - bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA); -#else -#error "Allowing the build will cause bio_set_flush requests to be ignored." -#endif } /* * 4.8 API, * REQ_OP_FLUSH * - * 4.8-rc0 - 4.8-rc1, - * REQ_PREFLUSH - * - * 2.6.36 - 4.7 API, - * REQ_FLUSH - * * in all cases but may have a performance impact for some kernels. It * has the advantage of minimizing kernel specific changes in the zvol code. * @@ -504,77 +383,40 @@ bio_set_flush(struct bio *bio) static inline boolean_t bio_is_flush(struct bio *bio) { -#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF) - return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH)); -#elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF) - return (bio->bi_opf & REQ_PREFLUSH); -#elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF) - return (bio->bi_rw & REQ_PREFLUSH); -#elif defined(HAVE_REQ_FLUSH) - return (bio->bi_rw & REQ_FLUSH); -#else -#error "Unsupported kernel" -#endif + return (bio_op(bio) == REQ_OP_FLUSH); } /* * 4.8 API, * REQ_FUA flag moved to bio->bi_opf - * - * 2.6.x - 4.7 API, - * REQ_FUA */ static inline boolean_t bio_is_fua(struct bio *bio) { -#if defined(HAVE_BIO_BI_OPF) return (bio->bi_opf & REQ_FUA); -#elif defined(REQ_FUA) - return (bio->bi_rw & REQ_FUA); -#else -#error "Allowing the build will cause fua requests to be ignored." -#endif } /* * 4.8 API, * REQ_OP_DISCARD * - * 2.6.36 - 4.7 API, - * REQ_DISCARD - * * In all cases the normal I/O path is used for discards. The only * difference is how the kernel tags individual I/Os as discards. */ static inline boolean_t bio_is_discard(struct bio *bio) { -#if defined(HAVE_REQ_OP_DISCARD) return (bio_op(bio) == REQ_OP_DISCARD); -#elif defined(HAVE_REQ_DISCARD) - return (bio->bi_rw & REQ_DISCARD); -#else -#error "Unsupported kernel" -#endif } /* * 4.8 API, * REQ_OP_SECURE_ERASE - * - * 2.6.36 - 4.7 API, - * REQ_SECURE */ static inline boolean_t bio_is_secure_erase(struct bio *bio) { -#if defined(HAVE_REQ_OP_SECURE_ERASE) return (bio_op(bio) == REQ_OP_SECURE_ERASE); -#elif defined(REQ_SECURE) - return (bio->bi_rw & REQ_SECURE); -#else - return (0); -#endif } /* @@ -615,9 +457,6 @@ bdev_discard_supported(struct block_device *bdev) * * 4.8 API, * blk_queue_secure_erase() - * - * 2.6.36 - 4.7 API, - * blk_queue_secdiscard() */ static inline boolean_t bdev_secure_discard_supported(struct block_device *bdev) @@ -626,8 +465,6 @@ bdev_secure_discard_supported(struct block_device *bdev) return (!!bdev_max_secure_erase_sectors(bdev)); #elif defined(HAVE_BLK_QUEUE_SECURE_ERASE) return (!!blk_queue_secure_erase(bdev_get_queue(bdev))); -#elif defined(HAVE_BLK_QUEUE_SECDISCARD) - return (!!blk_queue_secdiscard(bdev_get_queue(bdev))); #else #error "Unsupported kernel" #endif @@ -657,10 +494,6 @@ blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)), return (disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio))); #elif defined(HAVE_BIO_IO_ACCT) return (bio_start_io_acct(bio)); -#elif defined(HAVE_GENERIC_IO_ACCT_3ARG) - unsigned long start_time = jiffies; - generic_start_io_acct(rw, bio_sectors(bio), &disk->part0); - return (start_time); #elif defined(HAVE_GENERIC_IO_ACCT_4ARG) unsigned long start_time = jiffies; generic_start_io_acct(q, rw, bio_sectors(bio), &disk->part0); @@ -685,8 +518,6 @@ blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)), disk_end_io_acct(disk, bio_op(bio), start_time); #elif defined(HAVE_BIO_IO_ACCT) bio_end_io_acct(bio, start_time); -#elif defined(HAVE_GENERIC_IO_ACCT_3ARG) - generic_end_io_acct(rw, &disk->part0, start_time); #elif defined(HAVE_GENERIC_IO_ACCT_4ARG) generic_end_io_acct(q, rw, &disk->part0, start_time); #endif @@ -718,7 +549,6 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id) static inline int io_data_dir(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) { if (op_is_write(req_op(rq))) { return (WRITE); @@ -726,57 +556,38 @@ io_data_dir(struct bio *bio, struct request *rq) return (READ); } } -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_data_dir(bio)); } static inline int io_is_flush(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_FLUSH); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_flush(bio)); } static inline int io_is_discard(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_DISCARD); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_discard(bio)); } static inline int io_is_secure_erase(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_SECURE_ERASE); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_secure_erase(bio)); } static inline int io_is_fua(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (rq->cmd_flags & REQ_FUA); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_fua(bio)); } @@ -784,36 +595,24 @@ io_is_fua(struct bio *bio, struct request *rq) static inline uint64_t io_offset(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (blk_rq_pos(rq) << 9); -#else - ASSERT3P(rq, ==, NULL); -#endif return (BIO_BI_SECTOR(bio) << 9); } static inline uint64_t io_size(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (blk_rq_bytes(rq)); -#else - ASSERT3P(rq, ==, NULL); -#endif return (BIO_BI_SIZE(bio)); } static inline int io_has_data(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (bio_has_data(rq->bio)); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_has_data(bio)); } #endif /* _ZFS_BLKDEV_H */ diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index ab1711b99f3f..de533a5fd28b 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -31,13 +31,7 @@ #define dname(dentry) ((char *)((dentry)->d_name.name)) #define dlen(dentry) ((int)((dentry)->d_name.len)) -#ifndef HAVE_D_MAKE_ROOT -#define d_make_root(inode) d_alloc_root(inode) -#endif /* HAVE_D_MAKE_ROOT */ - -#ifdef HAVE_DENTRY_D_U_ALIASES #define d_alias d_u.d_alias -#endif /* * Starting from Linux 5.13, flush_dcache_page() becomes an inline function diff --git a/include/os/linux/kernel/linux/kmap_compat.h b/include/os/linux/kernel/linux/kmap_compat.h index fb59c5f0267c..8f9ddca2211b 100644 --- a/include/os/linux/kernel/linux/kmap_compat.h +++ b/include/os/linux/kernel/linux/kmap_compat.h @@ -38,6 +38,8 @@ #define zfs_kmap_local(page) kmap_atomic(page) #define zfs_kunmap_local(addr) kunmap_atomic(addr) #endif +#define zfs_kmap(page) kmap(page) +#define zfs_kunmap(page) kunmap(page) /* 5.0 API change - no more 'type' argument for access_ok() */ #ifdef HAVE_ACCESS_OK_TYPE diff --git a/include/os/linux/kernel/linux/mm_compat.h b/include/os/linux/kernel/linux/mm_compat.h index 817f6df422de..4cdc0e099994 100644 --- a/include/os/linux/kernel/linux/mm_compat.h +++ b/include/os/linux/kernel/linux/mm_compat.h @@ -40,4 +40,18 @@ #define page_mapping(p) folio_mapping(page_folio(p)) #endif +/* + * 6.12 removed PG_error, SetPageError and ClearPageError, with no direct + * replacement, because page writeback errors are recorded elsewhere. Since we + * only use the page cache to assist with mmap(), never directly backing it + * with IO, it shouldn't be possible for this condition to occur on our pages + * anyway, even if this is the right way to report it. So it should be safe + * to remove, but for avoidance of doubt, we make it a no-op on 6.12 and leave + * it for everything else. + */ +#ifndef HAVE_MM_PAGE_FLAG_ERROR +#define SetPageError(p) do {} while (0) +#define ClearPageError(p) do {} while (0) +#endif + #endif /* _ZFS_MM_COMPAT_H */ diff --git a/include/os/linux/kernel/linux/page_compat.h b/include/os/linux/kernel/linux/page_compat.h index bd6cb398b0c0..963b96ba6351 100644 --- a/include/os/linux/kernel/linux/page_compat.h +++ b/include/os/linux/kernel/linux/page_compat.h @@ -1,67 +1,11 @@ #ifndef _ZFS_PAGE_COMPAT_H #define _ZFS_PAGE_COMPAT_H -/* - * We have various enum members moving between two separate enum types, - * and accessed by different functions at various times. Centralise the - * insanity. - * - * < v4.8: all enums in zone_stat_item, via global_page_state() - * v4.8: some enums moved to node_stat_item, global_node_page_state() introduced - * v4.13: some enums moved from zone_stat_item to node_state_item - * v4.14: global_page_state() rename to global_zone_page_state() - * - * The defines used here are created by config/kernel-global_page_state.m4 - */ - /* * Create our own accessor functions to follow the Linux API changes */ -#if defined(ZFS_GLOBAL_ZONE_PAGE_STATE) - -/* global_zone_page_state() introduced */ -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES) -#define nr_file_pages() global_node_page_state(NR_FILE_PAGES) -#else -#define nr_file_pages() global_zone_page_state(NR_FILE_PAGES) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON) -#define nr_inactive_anon_pages() global_node_page_state(NR_INACTIVE_ANON) -#else -#define nr_inactive_anon_pages() global_zone_page_state(NR_INACTIVE_ANON) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE) -#define nr_inactive_file_pages() global_node_page_state(NR_INACTIVE_FILE) -#else -#define nr_inactive_file_pages() global_zone_page_state(NR_INACTIVE_FILE) -#endif - -#elif defined(ZFS_GLOBAL_NODE_PAGE_STATE) - -/* global_node_page_state() introduced */ -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES) #define nr_file_pages() global_node_page_state(NR_FILE_PAGES) -#else -#define nr_file_pages() global_page_state(NR_FILE_PAGES) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON) #define nr_inactive_anon_pages() global_node_page_state(NR_INACTIVE_ANON) -#else -#define nr_inactive_anon_pages() global_page_state(NR_INACTIVE_ANON) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE) #define nr_inactive_file_pages() global_node_page_state(NR_INACTIVE_FILE) -#else -#define nr_inactive_file_pages() global_page_state(NR_INACTIVE_FILE) -#endif - -#else - -/* global_page_state() only */ -#define nr_file_pages() global_page_state(NR_FILE_PAGES) -#define nr_inactive_anon_pages() global_page_state(NR_INACTIVE_ANON) -#define nr_inactive_file_pages() global_page_state(NR_INACTIVE_FILE) - -#endif /* ZFS_GLOBAL_ZONE_PAGE_STATE */ #endif /* _ZFS_PAGE_COMPAT_H */ diff --git a/include/os/linux/kernel/linux/simd.h b/include/os/linux/kernel/linux/simd.h index f4376b218a55..e7d7a4f2255e 100644 --- a/include/os/linux/kernel/linux/simd.h +++ b/include/os/linux/kernel/linux/simd.h @@ -45,4 +45,8 @@ #define kfpu_fini() ((void) 0) #endif + +void simd_stat_init(void); +void simd_stat_fini(void); + #endif /* _LINUX_SIMD_H */ diff --git a/include/os/linux/kernel/linux/simd_aarch64.h b/include/os/linux/kernel/linux/simd_aarch64.h index 123a0c72bc6a..e580fbe23ea5 100644 --- a/include/os/linux/kernel/linux/simd_aarch64.h +++ b/include/os/linux/kernel/linux/simd_aarch64.h @@ -56,17 +56,7 @@ #include #include #include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) #include -#else -#define sys_reg(op0, op1, crn, crm, op2) ( \ - ((op0) << Op0_shift) | \ - ((op1) << Op1_shift) | \ - ((crn) << CRn_shift) | \ - ((crm) << CRm_shift) | \ - ((op2) << Op2_shift)) -#endif #define ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) diff --git a/include/os/linux/kernel/linux/simd_powerpc.h b/include/os/linux/kernel/linux/simd_powerpc.h index a649790ce50f..d9ccacee1ab9 100644 --- a/include/os/linux/kernel/linux/simd_powerpc.h +++ b/include/os/linux/kernel/linux/simd_powerpc.h @@ -56,16 +56,10 @@ #include #include #include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) #include -#else -#include -#endif #define kfpu_allowed() 1 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) #ifdef CONFIG_ALTIVEC #define ENABLE_KERNEL_ALTIVEC enable_kernel_altivec(); #define DISABLE_KERNEL_ALTIVEC disable_kernel_altivec(); @@ -101,11 +95,6 @@ DISABLE_KERNEL_ALTIVEC \ preempt_enable(); \ } -#else -/* seems that before 4.5 no-one bothered */ -#define kfpu_begin() -#define kfpu_end() preempt_enable() -#endif /* Linux version >= 4.5 */ #define kfpu_init() 0 #define kfpu_fini() ((void) 0) diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index aea8bd5ed22c..3ed456dbf4c8 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -32,82 +32,6 @@ #include #include -/* - * 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments. - * 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. - * 4.12 - x.y, super_setup_bdi_name() new interface. - */ -#if defined(HAVE_SUPER_SETUP_BDI_NAME) -extern atomic_long_t zfs_bdi_seq; - -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - return super_setup_bdi_name(sb, "%.28s-%ld", name, - atomic_long_inc_return(&zfs_bdi_seq)); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ -} -#elif defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - struct backing_dev_info *bdi; - int error; - - bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP); - error = bdi_setup_and_register(bdi, name); - if (error) { - kmem_free(bdi, sizeof (struct backing_dev_info)); - return (error); - } - - sb->s_bdi = bdi; - - return (0); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ - struct backing_dev_info *bdi = sb->s_bdi; - - bdi_destroy(bdi); - kmem_free(bdi, sizeof (struct backing_dev_info)); - sb->s_bdi = NULL; -} -#elif defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER) -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - struct backing_dev_info *bdi; - int error; - - bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP); - error = bdi_setup_and_register(bdi, name, BDI_CAP_MAP_COPY); - if (error) { - kmem_free(sb->s_bdi, sizeof (struct backing_dev_info)); - return (error); - } - - sb->s_bdi = bdi; - - return (0); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ - struct backing_dev_info *bdi = sb->s_bdi; - - bdi_destroy(bdi); - kmem_free(bdi, sizeof (struct backing_dev_info)); - sb->s_bdi = NULL; -} -#else -#error "Unsupported kernel" -#endif - /* * 4.14 adds SB_* flag definitions, define them to MS_* equivalents * if not set. @@ -136,17 +60,7 @@ zpl_bdi_destroy(struct super_block *sb) #define SB_NOATIME MS_NOATIME #endif -/* - * 3.5 API change, - * The clear_inode() function replaces end_writeback() and introduces an - * ordering change regarding when the inode_sync_wait() occurs. See the - * configure check in config/kernel-clear-inode.m4 for full details. - */ -#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE) -#define clear_inode(ip) end_writeback(ip) -#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */ - -#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(HAVE_LSEEK_EXECUTE) +#if defined(SEEK_HOLE) && defined(SEEK_DATA) static inline loff_t lseek_execute( struct file *filp, @@ -154,7 +68,11 @@ lseek_execute( loff_t offset, loff_t maxsize) { +#ifdef FMODE_UNSIGNED_OFFSET if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET)) +#else + if (offset < 0 && !(filp->f_op->fop_flags & FOP_UNSIGNED_OFFSET)) +#endif return (-EINVAL); if (offset > maxsize) @@ -163,13 +81,15 @@ lseek_execute( if (offset != filp->f_pos) { spin_lock(&filp->f_lock); filp->f_pos = offset; +#ifdef HAVE_FILE_F_VERSION filp->f_version = 0; +#endif spin_unlock(&filp->f_lock); } return (offset); } -#endif /* SEEK_HOLE && SEEK_DATA && !HAVE_LSEEK_EXECUTE */ +#endif /* SEEK_HOLE && SEEK_DATA */ #if defined(CONFIG_FS_POSIX_ACL) /* @@ -184,9 +104,6 @@ lseek_execute( #include -#if defined(HAVE_POSIX_ACL_RELEASE) && !defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY) -#define zpl_posix_acl_release(arg) posix_acl_release(arg) -#else void zpl_posix_acl_release_impl(struct posix_acl *); static inline void @@ -194,106 +111,11 @@ zpl_posix_acl_release(struct posix_acl *acl) { if ((acl == NULL) || (acl == ACL_NOT_CACHED)) return; -#ifdef HAVE_ACL_REFCOUNT if (refcount_dec_and_test(&acl->a_refcount)) zpl_posix_acl_release_impl(acl); -#else - if (atomic_dec_and_test(&acl->a_refcount)) - zpl_posix_acl_release_impl(acl); -#endif -} -#endif /* HAVE_POSIX_ACL_RELEASE */ - -#ifdef HAVE_SET_CACHED_ACL_USABLE -#define zpl_set_cached_acl(ip, ty, n) set_cached_acl(ip, ty, n) -#define zpl_forget_cached_acl(ip, ty) forget_cached_acl(ip, ty) -#else -static inline void -zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer) -{ - struct posix_acl *older = NULL; - - spin_lock(&ip->i_lock); - - if ((newer != ACL_NOT_CACHED) && (newer != NULL)) - posix_acl_dup(newer); - - switch (type) { - case ACL_TYPE_ACCESS: - older = ip->i_acl; - rcu_assign_pointer(ip->i_acl, newer); - break; - case ACL_TYPE_DEFAULT: - older = ip->i_default_acl; - rcu_assign_pointer(ip->i_default_acl, newer); - break; - } - - spin_unlock(&ip->i_lock); - - zpl_posix_acl_release(older); -} - -static inline void -zpl_forget_cached_acl(struct inode *ip, int type) -{ - zpl_set_cached_acl(ip, type, (struct posix_acl *)ACL_NOT_CACHED); } -#endif /* HAVE_SET_CACHED_ACL_USABLE */ - -/* - * 3.1 API change, - * posix_acl_chmod() was added as the preferred interface. - * - * 3.14 API change, - * posix_acl_chmod() was changed to __posix_acl_chmod() - */ -#ifndef HAVE___POSIX_ACL_CHMOD -#ifdef HAVE_POSIX_ACL_CHMOD -#define __posix_acl_chmod(acl, gfp, mode) posix_acl_chmod(acl, gfp, mode) -#define __posix_acl_create(acl, gfp, mode) posix_acl_create(acl, gfp, mode) -#else -#error "Unsupported kernel" -#endif /* HAVE_POSIX_ACL_CHMOD */ -#endif /* HAVE___POSIX_ACL_CHMOD */ - -/* - * 4.8 API change, - * posix_acl_valid() now must be passed a namespace, the namespace from - * from super block associated with the given inode is used for this purpose. - */ -#ifdef HAVE_POSIX_ACL_VALID_WITH_NS -#define zpl_posix_acl_valid(ip, acl) posix_acl_valid(ip->i_sb->s_user_ns, acl) -#else -#define zpl_posix_acl_valid(ip, acl) posix_acl_valid(acl) -#endif - #endif /* CONFIG_FS_POSIX_ACL */ -/* - * 3.19 API change - * struct access f->f_dentry->d_inode was replaced by accessor function - * file_inode(f) - */ -#ifndef HAVE_FILE_INODE -static inline struct inode *file_inode(const struct file *f) -{ - return (f->f_dentry->d_inode); -} -#endif /* HAVE_FILE_INODE */ - -/* - * 4.1 API change - * struct access file->f_path.dentry was replaced by accessor function - * file_dentry(f) - */ -#ifndef HAVE_FILE_DENTRY -static inline struct dentry *file_dentry(const struct file *f) -{ - return (f->f_path.dentry); -} -#endif /* HAVE_FILE_DENTRY */ - static inline uid_t zfs_uid_read_impl(struct inode *ip) { return (from_kuid(kcred->user_ns, ip->i_uid)); @@ -371,16 +193,7 @@ setattr_prepare(struct dentry *dentry, struct iattr *ia) * 4.11 takes struct path *, < 4.11 takes vfsmount * */ -#ifdef HAVE_VFSMOUNT_IOPS_GETATTR -#define ZPL_GETATTR_WRAPPER(func) \ -static int \ -func(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) \ -{ \ - struct path path = { .mnt = mnt, .dentry = dentry }; \ - return func##_impl(&path, stat, STATX_BASIC_STATS, \ - AT_STATX_SYNC_AS_STAT); \ -} -#elif defined(HAVE_PATH_IOPS_GETATTR) +#if defined(HAVE_PATH_IOPS_GETATTR) #define ZPL_GETATTR_WRAPPER(func) \ static int \ func(const struct path *path, struct kstat *stat, u32 request_mask, \ @@ -410,32 +223,6 @@ func(struct mnt_idmap *user_ns, const struct path *path, \ #error #endif -/* - * 4.9 API change - * Preferred interface to get the current FS time. - */ -#if !defined(HAVE_CURRENT_TIME) -static inline struct timespec -current_time(struct inode *ip) -{ - return (timespec_trunc(current_kernel_time(), ip->i_sb->s_time_gran)); -} -#endif - -/* - * 4.16 API change - * Added iversion interface for managing inode version field. - */ -#ifdef HAVE_INODE_SET_IVERSION -#include -#else -static inline void -inode_set_iversion(struct inode *ip, u64 val) -{ - ip->i_version = val; -} -#endif - /* * Returns true when called in the context of a 32-bit system call. */ @@ -443,11 +230,7 @@ static inline int zpl_is_32bit_api(void) { #ifdef CONFIG_COMPAT -#ifdef HAVE_IN_COMPAT_SYSCALL return (in_compat_syscall()); -#else - return (is_compat_task()); -#endif #else return (BITS_PER_LONG == 32); #endif diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h index bcc7289ad857..f7e62da62007 100644 --- a/include/os/linux/kernel/linux/xattr_compat.h +++ b/include/os/linux/kernel/linux/xattr_compat.h @@ -40,97 +40,34 @@ typedef const struct xattr_handler xattr_handler_t; /* * 4.5 API change, */ -#if defined(HAVE_XATTR_LIST_SIMPLE) #define ZPL_XATTR_LIST_WRAPPER(fn) \ static bool \ fn(struct dentry *dentry) \ { \ return (!!__ ## fn(dentry->d_inode, NULL, 0, NULL, 0)); \ } -/* - * 4.4 API change, - */ -#elif defined(HAVE_XATTR_LIST_DENTRY) -#define ZPL_XATTR_LIST_WRAPPER(fn) \ -static size_t \ -fn(struct dentry *dentry, char *list, size_t list_size, \ - const char *name, size_t name_len, int type) \ -{ \ - return (__ ## fn(dentry->d_inode, \ - list, list_size, name, name_len)); \ -} -/* - * 2.6.33 API change, - */ -#elif defined(HAVE_XATTR_LIST_HANDLER) -#define ZPL_XATTR_LIST_WRAPPER(fn) \ -static size_t \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - char *list, size_t list_size, const char *name, size_t name_len) \ -{ \ - return (__ ## fn(dentry->d_inode, \ - list, list_size, name, name_len)); \ -} -#else -#error "Unsupported kernel" -#endif +#ifdef HAVE_XATTR_GET_DENTRY_INODE_FLAGS /* - * 4.7 API change, - * The xattr_handler->get() callback was changed to take a both dentry and - * inode, because the dentry might not be attached to an inode yet. + * Android API change, + * The xattr_handler->get() callback also takes a flags arg. */ -#if defined(HAVE_XATTR_GET_DENTRY_INODE) #define ZPL_XATTR_GET_WRAPPER(fn) \ static int \ fn(const struct xattr_handler *handler, struct dentry *dentry, \ - struct inode *inode, const char *name, void *buffer, size_t size) \ + struct inode *inode, const char *name, void *buffer, \ + size_t size, int flags) \ { \ return (__ ## fn(inode, name, buffer, size)); \ } -/* - * 4.4 API change, - * The xattr_handler->get() callback was changed to take a xattr_handler, - * and handler_flags argument was removed and should be accessed by - * handler->flags. - */ -#elif defined(HAVE_XATTR_GET_HANDLER) -#define ZPL_XATTR_GET_WRAPPER(fn) \ -static int \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - const char *name, void *buffer, size_t size) \ -{ \ - return (__ ## fn(dentry->d_inode, name, buffer, size)); \ -} -/* - * 2.6.33 API change, - * The xattr_handler->get() callback was changed to take a dentry - * instead of an inode, and a handler_flags argument was added. - */ -#elif defined(HAVE_XATTR_GET_DENTRY) -#define ZPL_XATTR_GET_WRAPPER(fn) \ -static int \ -fn(struct dentry *dentry, const char *name, void *buffer, size_t size, \ - int unused_handler_flags) \ -{ \ - return (__ ## fn(dentry->d_inode, name, buffer, size)); \ -} -/* - * Android API change, - * The xattr_handler->get() callback was changed to take a dentry and inode - * and flags, because the dentry might not be attached to an inode yet. - */ -#elif defined(HAVE_XATTR_GET_DENTRY_INODE_FLAGS) +#else #define ZPL_XATTR_GET_WRAPPER(fn) \ static int \ fn(const struct xattr_handler *handler, struct dentry *dentry, \ - struct inode *inode, const char *name, void *buffer, \ - size_t size, int flags) \ + struct inode *inode, const char *name, void *buffer, size_t size) \ { \ return (__ ## fn(inode, name, buffer, size)); \ } -#else -#error "Unsupported kernel" #endif /* @@ -177,35 +114,6 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \ { \ return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\ } -/* - * 4.4 API change, - * The xattr_handler->set() callback was changed to take a xattr_handler, - * and handler_flags argument was removed and should be accessed by - * handler->flags. - */ -#elif defined(HAVE_XATTR_SET_HANDLER) -#define ZPL_XATTR_SET_WRAPPER(fn) \ -static int \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - const char *name, const void *buffer, size_t size, int flags) \ -{ \ - return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \ - buffer, size, flags)); \ -} -/* - * 2.6.33 API change, - * The xattr_handler->set() callback was changed to take a dentry - * instead of an inode, and a handler_flags argument was added. - */ -#elif defined(HAVE_XATTR_SET_DENTRY) -#define ZPL_XATTR_SET_WRAPPER(fn) \ -static int \ -fn(struct dentry *dentry, const char *name, const void *buffer, \ - size_t size, int flags, int unused_handler_flags) \ -{ \ - return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \ - buffer, size, flags)); \ -} #else #error "Unsupported kernel" #endif diff --git a/include/os/linux/spl/sys/condvar.h b/include/os/linux/spl/sys/condvar.h index ef405763ca56..cb94ae89c866 100644 --- a/include/os/linux/spl/sys/condvar.h +++ b/include/os/linux/spl/sys/condvar.h @@ -66,8 +66,8 @@ typedef struct { int cv_magic; - spl_wait_queue_head_t cv_event; - spl_wait_queue_head_t cv_destroy; + wait_queue_head_t cv_event; + wait_queue_head_t cv_destroy; atomic_t cv_refs; atomic_t cv_waiters; kmutex_t *cv_mutex; diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h index c19c3c0719ff..1c5120c24371 100644 --- a/include/os/linux/spl/sys/cred.h +++ b/include/os/linux/spl/sys/cred.h @@ -63,11 +63,7 @@ zfs_is_init_userns(struct user_namespace *user_ns) static inline struct user_namespace *zfs_i_user_ns(struct inode *inode) { -#ifdef HAVE_SUPER_USER_NS return (inode->i_sb->s_user_ns); -#else - return (kcred->user_ns); -#endif } static inline boolean_t zfs_no_idmapping(struct user_namespace *mnt_userns, diff --git a/include/os/linux/spl/sys/file.h b/include/os/linux/spl/sys/file.h index e0bbd6d98cba..38d19d8c68b0 100644 --- a/include/os/linux/spl/sys/file.h +++ b/include/os/linux/spl/sys/file.h @@ -28,7 +28,6 @@ #define FKIOCTL 0x80000000 #define ED_CASE_CONFLICT 0x10 -#ifdef HAVE_INODE_LOCK_SHARED #define spl_inode_lock(ip) inode_lock(ip) #define spl_inode_unlock(ip) inode_unlock(ip) #define spl_inode_lock_shared(ip) inode_lock_shared(ip) @@ -37,15 +36,5 @@ #define spl_inode_trylock_shared(ip) inode_trylock_shared(ip) #define spl_inode_is_locked(ip) inode_is_locked(ip) #define spl_inode_lock_nested(ip, s) inode_lock_nested(ip, s) -#else -#define spl_inode_lock(ip) mutex_lock(&(ip)->i_mutex) -#define spl_inode_unlock(ip) mutex_unlock(&(ip)->i_mutex) -#define spl_inode_lock_shared(ip) mutex_lock(&(ip)->i_mutex) -#define spl_inode_unlock_shared(ip) mutex_unlock(&(ip)->i_mutex) -#define spl_inode_trylock(ip) mutex_trylock(&(ip)->i_mutex) -#define spl_inode_trylock_shared(ip) mutex_trylock(&(ip)->i_mutex) -#define spl_inode_is_locked(ip) mutex_is_locked(&(ip)->i_mutex) -#define spl_inode_lock_nested(ip, s) mutex_lock_nested(&(ip)->i_mutex, s) -#endif #endif /* SPL_FILE_H */ diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index 2b4f120e6427..71048db4ec5a 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -160,7 +160,7 @@ typedef struct spl_kmem_cache { struct list_head skc_partial_list; /* Partially alloc'ed */ struct rb_root skc_emergency_tree; /* Min sized objects */ spinlock_t skc_lock; /* Cache lock */ - spl_wait_queue_head_t skc_waitq; /* Allocation waiters */ + wait_queue_head_t skc_waitq; /* Allocation waiters */ uint64_t skc_slab_fail; /* Slab alloc failures */ uint64_t skc_slab_create; /* Slab creates */ uint64_t skc_slab_destroy; /* Slab destroys */ @@ -200,6 +200,7 @@ extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache); /* Avoid conflicts with kernel names that might be implemented as macros. */ #undef kmem_cache_alloc +#undef kmem_cache_create #define kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) \ spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) diff --git a/include/os/linux/spl/sys/signal.h b/include/os/linux/spl/sys/signal.h index cb4b33261647..473a1be14c34 100644 --- a/include/os/linux/spl/sys/signal.h +++ b/include/os/linux/spl/sys/signal.h @@ -25,10 +25,7 @@ #define _SPL_SIGNAL_H #include - -#ifdef HAVE_SCHED_SIGNAL_HEADER #include -#endif extern int issig(void); diff --git a/include/os/linux/spl/sys/string.h b/include/os/linux/spl/sys/string.h index f44bf23eb326..20bde1f0898a 100644 --- a/include/os/linux/spl/sys/string.h +++ b/include/os/linux/spl/sys/string.h @@ -25,10 +25,7 @@ #include -/* Fallbacks for kernel missing strlcpy */ #ifndef HAVE_KERNEL_STRLCPY - -#if defined(HAVE_KERNEL_STRSCPY) /* * strscpy is strlcpy, but returns an error on truncation. strlcpy is defined * to return strlen(src), so detect error and override it. @@ -41,10 +38,6 @@ strlcpy(char *dest, const char *src, size_t size) return ((size_t)ret); return (strlen(src)); } -#else -#error "no strlcpy fallback available" -#endif - #endif /* HAVE_KERNEL_STRLCPY */ #endif /* _SPL_STRING_H */ diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index 8051de36ba82..f63a397f293d 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -131,8 +131,8 @@ typedef struct taskq { struct list_head tq_prio_list; /* priority taskq_ent_t's */ struct list_head tq_delay_list; /* delayed taskq_ent_t's */ struct list_head tq_taskqs; /* all taskq_t's */ - spl_wait_queue_head_t tq_work_waitq; /* new work waitq */ - spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */ + wait_queue_head_t tq_work_waitq; /* new work waitq */ + wait_queue_head_t tq_wait_waitq; /* wait waitq */ tq_lock_role_t tq_lock_class; /* class when taking tq_lock */ /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; @@ -144,7 +144,7 @@ typedef struct taskq { typedef struct taskq_ent { spinlock_t tqent_lock; - spl_wait_queue_head_t tqent_waitq; + wait_queue_head_t tqent_waitq; struct timer_list tqent_timer; struct list_head tqent_list; taskqid_t tqent_id; diff --git a/include/os/linux/spl/sys/thread.h b/include/os/linux/spl/sys/thread.h index bc88ff4efb67..4f7f659e528d 100644 --- a/include/os/linux/spl/sys/thread.h +++ b/include/os/linux/spl/sys/thread.h @@ -80,11 +80,4 @@ typedef kernel_siginfo_t spl_kernel_siginfo_t; typedef siginfo_t spl_kernel_siginfo_t; #endif -#ifdef HAVE_SET_SPECIAL_STATE -#define spl_set_special_state(x) set_special_state((x)) -#else -#define spl_set_special_state(x) __set_current_state((x)) -#endif - - #endif /* _SPL_THREAD_H */ diff --git a/include/os/linux/spl/sys/time.h b/include/os/linux/spl/sys/time.h index fec85f8b8d13..036948d87aae 100644 --- a/include/os/linux/spl/sys/time.h +++ b/include/os/linux/spl/sys/time.h @@ -59,11 +59,7 @@ typedef struct timespec timespec_t; #define TIMESPEC_OVERFLOW(ts) \ ((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX) -#if defined(HAVE_INODE_TIMESPEC64_TIMES) typedef struct timespec64 inode_timespec_t; -#else -typedef struct timespec inode_timespec_t; -#endif /* Include for Lustre compatibility */ #define timestruc_t inode_timespec_t @@ -71,46 +67,22 @@ typedef struct timespec inode_timespec_t; static inline void gethrestime(inode_timespec_t *ts) { -#if defined(HAVE_INODE_TIMESPEC64_TIMES) - -#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64) ktime_get_coarse_real_ts64(ts); -#else - *ts = current_kernel_time64(); -#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */ - -#else - *ts = current_kernel_time(); -#endif } static inline uint64_t gethrestime_sec(void) { -#if defined(HAVE_INODE_TIMESPEC64_TIMES) -#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64) inode_timespec_t ts; ktime_get_coarse_real_ts64(&ts); -#else - inode_timespec_t ts = current_kernel_time64(); -#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */ - -#else - inode_timespec_t ts = current_kernel_time(); -#endif return (ts.tv_sec); } static inline hrtime_t gethrtime(void) { -#if defined(HAVE_KTIME_GET_RAW_TS64) struct timespec64 ts; ktime_get_raw_ts64(&ts); -#else - struct timespec ts; - getrawmonotonic(&ts); -#endif return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec); } diff --git a/include/os/linux/spl/sys/timer.h b/include/os/linux/spl/sys/timer.h index 02c3c7893477..61a1fa6aeffb 100644 --- a/include/os/linux/spl/sys/timer.h +++ b/include/os/linux/spl/sys/timer.h @@ -62,24 +62,4 @@ container_of(timer, typeof(*var), timer_field) #endif -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST -typedef struct timer_list *spl_timer_list_t; -#else -typedef unsigned long spl_timer_list_t; -#endif - -#ifndef HAVE_KERNEL_TIMER_SETUP - -static inline void -timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl) -{ -#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS - (timer)->flags = fl; -#endif - init_timer(timer); - setup_timer(timer, func, (spl_timer_list_t)(timer)); -} - -#endif /* HAVE_KERNEL_TIMER_SETUP */ - #endif /* _SPL_TIMER_H */ diff --git a/include/os/linux/spl/sys/types.h b/include/os/linux/spl/sys/types.h index 94ba7b6ad323..d3aefb18449d 100644 --- a/include/os/linux/spl/sys/types.h +++ b/include/os/linux/spl/sys/types.h @@ -83,4 +83,10 @@ typedef struct user_namespace zidmap_t; extern zidmap_t *zfs_init_idmap; +#ifdef HAVE_1ARG_ASSIGN_STR +#define __assign_str_impl(a, b) __assign_str(a) +#else +#define __assign_str_impl(a, b) __assign_str(a, b) +#endif + #endif /* _SPL_TYPES_H */ diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index 5e6ea8d3c221..5d483685eb20 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -33,6 +33,12 @@ #include #include #include +#include + +/* + * uio_extflg: extended flags + */ +#define UIO_DIRECT 0x0001 /* Direct I/O request */ #if defined(HAVE_VFS_IOV_ITER) && defined(HAVE_FAULT_IN_IOV_ITER_READABLE) #define iov_iter_fault_in_readable(a, b) fault_in_iov_iter_readable(a, b) @@ -54,6 +60,14 @@ typedef enum zfs_uio_seg { #endif } zfs_uio_seg_t; +/* + * This structures is used when doing Direct I/O. + */ +typedef struct { + struct page **pages; /* Mapped pages */ + long npages; /* Number of mapped pages */ +} zfs_uio_dio_t; + typedef struct zfs_uio { union { const struct iovec *uio_iov; @@ -62,15 +76,16 @@ typedef struct zfs_uio { struct iov_iter *uio_iter; #endif }; - int uio_iovcnt; - offset_t uio_loffset; - zfs_uio_seg_t uio_segflg; + int uio_iovcnt; /* Number of iovecs */ + offset_t uio_soffset; /* Starting logical offset */ + offset_t uio_loffset; /* Current logical offset */ + zfs_uio_seg_t uio_segflg; /* Segment type */ boolean_t uio_fault_disable; - uint16_t uio_fmode; - uint16_t uio_extflg; - ssize_t uio_resid; - - size_t uio_skip; + uint16_t uio_fmode; /* Access mode (unused) */ + uint16_t uio_extflg; /* Extra flags (UIO_DIRECT) */ + ssize_t uio_resid; /* Residual unprocessed bytes */ + size_t uio_skip; /* Skipped bytes in current iovec */ + zfs_uio_dio_t uio_dio; /* Direct I/O user pages */ struct request *rq; } zfs_uio_t; @@ -83,6 +98,7 @@ typedef struct zfs_uio { #define zfs_uio_iovlen(u, idx) (u)->uio_iov[(idx)].iov_len #define zfs_uio_iovbase(u, idx) (u)->uio_iov[(idx)].iov_base #define zfs_uio_fault_disable(u, set) (u)->uio_fault_disable = set +#define zfs_uio_soffset(u) (u)->uio_soffset #define zfs_uio_rlimit_fsize(z, u) (0) #define zfs_uio_fault_move(p, n, rw, u) zfs_uiomove((p), (n), (rw), (u)) @@ -94,6 +110,13 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) uio->uio_loffset = off; } +static inline void +zfs_uio_setsoffset(zfs_uio_t *uio, offset_t off) +{ + ASSERT3U(zfs_uio_offset(uio), ==, off); + zfs_uio_soffset(uio) = off; +} + static inline void zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { @@ -117,6 +140,8 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov, uio->uio_extflg = 0; uio->uio_resid = resid; uio->uio_skip = skip; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } static inline void @@ -146,6 +171,8 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq) } uio->rq = rq; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } #if defined(HAVE_VFS_IOV_ITER) @@ -162,8 +189,10 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset, uio->uio_extflg = 0; uio->uio_resid = resid; uio->uio_skip = skip; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } -#endif +#endif /* HAVE_VFS_IOV_ITER */ #if defined(HAVE_ITER_IOV) #define zfs_uio_iter_iov(iter) iter_iov((iter)) diff --git a/include/os/linux/spl/sys/wait.h b/include/os/linux/spl/sys/wait.h index 65cd83e5ef12..78ec62b08155 100644 --- a/include/os/linux/spl/sys/wait.h +++ b/include/os/linux/spl/sys/wait.h @@ -27,28 +27,4 @@ #include #include -#ifndef HAVE_WAIT_ON_BIT_ACTION -#define spl_wait_on_bit(word, bit, mode) wait_on_bit(word, bit, mode) -#else - -static inline int -spl_bit_wait(void *word) -{ - schedule(); - return (0); -} - -#define spl_wait_on_bit(word, bit, mode) \ - wait_on_bit(word, bit, spl_bit_wait, mode) - -#endif /* HAVE_WAIT_ON_BIT_ACTION */ - -#ifdef HAVE_WAIT_QUEUE_ENTRY_T -typedef wait_queue_head_t spl_wait_queue_head_t; -typedef wait_queue_entry_t spl_wait_queue_entry_t; -#else -typedef wait_queue_head_t spl_wait_queue_head_t; -typedef wait_queue_t spl_wait_queue_entry_t; -#endif - #endif /* SPL_WAIT_H */ diff --git a/include/os/linux/spl/sys/wmsum.h b/include/os/linux/spl/sys/wmsum.h index 0871bd69504c..1c87f56e13ac 100644 --- a/include/os/linux/spl/sys/wmsum.h +++ b/include/os/linux/spl/sys/wmsum.h @@ -36,12 +36,7 @@ typedef struct percpu_counter wmsum_t; static inline void wmsum_init(wmsum_t *ws, uint64_t value) { - -#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP percpu_counter_init(ws, value, GFP_KERNEL); -#else - percpu_counter_init(ws, value); -#endif } static inline void @@ -62,11 +57,7 @@ static inline void wmsum_add(wmsum_t *ws, int64_t delta) { -#ifdef HAVE_PERCPU_COUNTER_ADD_BATCH percpu_counter_add_batch(ws, delta, INT_MAX / 2); -#else - __percpu_counter_add(ws, delta, INT_MAX / 2); -#endif } #ifdef __cplusplus diff --git a/include/os/linux/zfs/sys/abd_os.h b/include/os/linux/zfs/sys/abd_os.h index ce4f5a2bdf9b..606e8bf682e8 100644 --- a/include/os/linux/zfs/sys/abd_os.h +++ b/include/os/linux/zfs/sys/abd_os.h @@ -55,6 +55,9 @@ int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *, unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t); unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); +__attribute__((malloc)) +abd_t *abd_alloc_from_pages(struct page **, unsigned long, uint64_t); + #ifdef __cplusplus } #endif diff --git a/include/os/linux/zfs/sys/trace_dbgmsg.h b/include/os/linux/zfs/sys/trace_dbgmsg.h index 19f533baaaaf..58081d64cf40 100644 --- a/include/os/linux/zfs/sys/trace_dbgmsg.h +++ b/include/os/linux/zfs/sys/trace_dbgmsg.h @@ -32,6 +32,7 @@ #define _TRACE_DBGMSG_H #include +#include /* * This file defines tracepoint events for use by the dbgmsg(), @@ -59,7 +60,7 @@ DECLARE_EVENT_CLASS(zfs_dprintf_class, __string(msg, msg) ), TP_fast_assign( - __assign_str(msg, msg); + __assign_str_impl(msg, msg); ), TP_printk("%s", __get_str(msg)) ); diff --git a/include/os/linux/zfs/sys/trace_dbuf.h b/include/os/linux/zfs/sys/trace_dbuf.h index 0e9cbdd725b4..b61807744d5c 100644 --- a/include/os/linux/zfs/sys/trace_dbuf.h +++ b/include/os/linux/zfs/sys/trace_dbuf.h @@ -45,9 +45,13 @@ * dmu_buf_impl_t *, ..., * zio_t *, ...); */ +#define DBUF_TP_STRUCT_ENTRY_OS_SPA \ + (db != NULL && \ + POINTER_IS_VALID(DB_DNODE(db)->dn_objset)) \ + ? spa_name(DB_DNODE(db)->dn_objset->os_spa) : "NULL" #define DBUF_TP_STRUCT_ENTRY \ - __dynamic_array(char, os_spa, TRACE_DBUF_MSG_MAX) \ + __string(os_spa, DBUF_TP_STRUCT_ENTRY_OS_SPA) \ __field(uint64_t, ds_object) \ __field(uint64_t, db_object) \ __field(uint64_t, db_level) \ @@ -55,18 +59,11 @@ __field(uint64_t, db_offset) \ __field(uint64_t, db_size) \ __field(uint64_t, db_state) \ - __field(int64_t, db_holds) \ - __dynamic_array(char, msg, TRACE_DBUF_MSG_MAX) + __field(int64_t, db_holds) #define DBUF_TP_FAST_ASSIGN \ if (db != NULL) { \ - if (POINTER_IS_VALID(DB_DNODE(db)->dn_objset)) { \ - __assign_str(os_spa, \ - spa_name(DB_DNODE(db)->dn_objset->os_spa)); \ - } else { \ - __assign_str(os_spa, "NULL"); \ - } \ - \ + __assign_str_impl(os_spa, DBUF_TP_STRUCT_ENTRY_OS_SPA); \ __entry->ds_object = db->db_objset->os_dsl_dataset ? \ db->db_objset->os_dsl_dataset->ds_object : 0; \ \ @@ -77,10 +74,8 @@ __entry->db_size = db->db.db_size; \ __entry->db_state = db->db_state; \ __entry->db_holds = zfs_refcount_count(&db->db_holds); \ - snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX, \ - DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS); \ } else { \ - __assign_str(os_spa, "NULL"); \ + __assign_str_impl(os_spa, DBUF_TP_STRUCT_ENTRY_OS_SPA); \ __entry->ds_object = 0; \ __entry->db_object = 0; \ __entry->db_level = 0; \ @@ -89,8 +84,6 @@ __entry->db_size = 0; \ __entry->db_state = 0; \ __entry->db_holds = 0; \ - snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX, \ - "dbuf { NULL }"); \ } #define DBUF_TP_PRINTK_FMT \ @@ -109,7 +102,7 @@ DECLARE_EVENT_CLASS(zfs_dbuf_class, TP_ARGS(db, zio), TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY), TP_fast_assign(DBUF_TP_FAST_ASSIGN), - TP_printk("%s", __get_str(msg)) + TP_printk(DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS) ); DECLARE_EVENT_CLASS(zfs_dbuf_state_class, @@ -117,7 +110,7 @@ DECLARE_EVENT_CLASS(zfs_dbuf_state_class, TP_ARGS(db, why), TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY), TP_fast_assign(DBUF_TP_FAST_ASSIGN), - TP_printk("%s", __get_str(msg)) + TP_printk(DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS) ); /* END CSTYLED */ @@ -139,7 +132,7 @@ DECLARE_EVENT_CLASS(zfs_dbuf_evict_one_class, TP_ARGS(db, mls), TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY), TP_fast_assign(DBUF_TP_FAST_ASSIGN), - TP_printk("%s", __get_str(msg)) + TP_printk(DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS) ); /* END CSTYLED */ diff --git a/include/os/linux/zfs/sys/zfs_ctldir.h b/include/os/linux/zfs/sys/zfs_ctldir.h index ad16ab5e4444..8f18cda2952f 100644 --- a/include/os/linux/zfs/sys/zfs_ctldir.h +++ b/include/os/linux/zfs/sys/zfs_ctldir.h @@ -45,7 +45,7 @@ (ZTOZSB(zdp)->z_ctldir != NULL)) #define zfs_show_ctldir(zdp) \ (zfs_has_ctldir(zdp) && \ - (ZTOZSB(zdp)->z_show_ctldir)) + (ZTOZSB(zdp)->z_show_ctldir == ZFS_SNAPDIR_VISIBLE)) extern int zfs_expire_snapshot; diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h index b4d5db21f5e5..7067eb17900d 100644 --- a/include/os/linux/zfs/sys/zfs_vfsops_os.h +++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h @@ -110,7 +110,7 @@ struct zfsvfs { kmutex_t z_znodes_lock; /* lock for z_all_znodes */ arc_prune_t *z_arc_prune; /* called by ARC to prune caches */ struct inode *z_ctldir; /* .zfs directory inode */ - boolean_t z_show_ctldir; /* expose .zfs in the root dir */ + uint_t z_show_ctldir; /* how to expose .zfs in the root dir */ boolean_t z_issnap; /* true if this is a snapshot */ boolean_t z_use_fuids; /* version allows fuids */ boolean_t z_replay; /* set during ZIL replay */ @@ -118,6 +118,7 @@ struct zfsvfs { boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */ boolean_t z_draining; /* is true when drain is active */ boolean_t z_drain_cancel; /* signal the unlinked drain to stop */ + boolean_t z_longname; /* Dataset supports long names */ uint64_t z_version; /* ZPL version */ uint64_t z_shares_dir; /* hidden shares dir */ dataset_kstats_t z_kstat; /* fs kstats */ diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 830c76e5743a..db33eda57705 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -44,6 +44,7 @@ extern int zfs_write_simple(znode_t *zp, const void *data, size_t len, loff_t pos, size_t *resid); extern int zfs_lookup(znode_t *dzp, char *nm, znode_t **zpp, int flags, cred_t *cr, int *direntflags, pathname_t *realpnp); +extern int zfs_get_name(znode_t *dzp, char *name, znode_t *zp); extern int zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns); @@ -55,7 +56,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp, cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns); extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, int flags); -extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr); +extern int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr); #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip, struct kstat *sp); diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h index 0be2c445ab76..cc8e5150eaf1 100644 --- a/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -47,16 +47,9 @@ extern "C" { #endif -#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) #define ZNODE_OS_FIELDS \ inode_timespec_t z_btime; /* creation/birth time (cached) */ \ struct inode z_inode; -#else -#define ZNODE_OS_FIELDS \ - inode_timespec_t z_btime; /* creation/birth time (cached) */ \ - struct inode z_inode; \ - boolean_t z_is_mapped; /* we are mmap'ed */ -#endif /* * Convert between znode pointers and inode pointers @@ -77,13 +70,8 @@ extern "C" { #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) #define Z_ISDIR(type) S_ISDIR(type) -#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) #define zn_has_cached_data(zp, start, end) \ filemap_range_has_page(ZTOI(zp)->i_mapping, start, end) -#else -#define zn_has_cached_data(zp, start, end) \ - ((zp)->z_is_mapped) -#endif #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) #define zn_rlimit_fsize(size) (0) @@ -153,27 +141,14 @@ do { \ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ } while (0) -#if defined(HAVE_INODE_TIMESPEC64_TIMES) /* * Decode ZFS stored time values to a struct timespec64 - * 4.18 and newer kernels. */ #define ZFS_TIME_DECODE(tp, stmp) \ do { \ (tp)->tv_sec = (time64_t)(stmp)[0]; \ (tp)->tv_nsec = (long)(stmp)[1]; \ } while (0) -#else -/* - * Decode ZFS stored time values to a struct timespec - * 4.17 and older kernels. - */ -#define ZFS_TIME_DECODE(tp, stmp) \ -do { \ - (tp)->tv_sec = (time_t)(stmp)[0]; \ - (tp)->tv_nsec = (long)(stmp)[1]; \ -} while (0) -#endif /* HAVE_INODE_TIMESPEC64_TIMES */ #define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) @@ -184,12 +159,6 @@ extern int zfs_inode_alloc(struct super_block *, struct inode **ip); extern void zfs_inode_destroy(struct inode *); extern void zfs_mark_inode_dirty(struct inode *); extern boolean_t zfs_relatime_need_update(const struct inode *); - -#if defined(HAVE_UIO_RW) -extern caddr_t zfs_map_page(page_t *, enum seg_rw); -extern void zfs_unmap_page(page_t *, caddr_t); -#endif /* HAVE_UIO_RW */ - extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE]; #ifdef __cplusplus diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index cfb2a6ed3e9f..ce7e50c13bed 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -42,21 +41,13 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr, zidmap_t *mnt_ns); extern const struct inode_operations zpl_inode_operations; -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER -extern const struct inode_operations_wrapper zpl_dir_inode_operations; -#else extern const struct inode_operations zpl_dir_inode_operations; -#endif extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; /* zpl_file.c */ extern const struct address_space_operations zpl_address_space_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND -extern const struct file_operations_extend zpl_file_operations; -#else extern const struct file_operations zpl_file_operations; -#endif extern const struct file_operations zpl_dir_file_operations; /* zpl_super.c */ @@ -70,8 +61,9 @@ extern struct file_system_type zpl_fs_type; extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size); extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip, const struct qstr *qstr); + #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) + #if defined(HAVE_SET_ACL_IDMAP_DENTRY) extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); @@ -84,7 +76,7 @@ extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry, #else extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type); #endif /* HAVE_SET_ACL_USERNS */ -#endif /* HAVE_SET_ACL */ + #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL) extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu); #elif defined(HAVE_GET_ACL) @@ -127,73 +119,6 @@ extern const struct inode_operations zpl_ops_snapdir; extern const struct file_operations zpl_fops_shares; extern const struct inode_operations zpl_ops_shares; -#if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED) - -#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \ - .actor = _actor, \ - .pos = _pos, \ -} - -typedef struct dir_context zpl_dir_context_t; - -#define zpl_dir_emit dir_emit -#define zpl_dir_emit_dot dir_emit_dot -#define zpl_dir_emit_dotdot dir_emit_dotdot -#define zpl_dir_emit_dots dir_emit_dots - -#else - -typedef struct zpl_dir_context { - void *dirent; - const filldir_t actor; - loff_t pos; -} zpl_dir_context_t; - -#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \ - .dirent = _dirent, \ - .actor = _actor, \ - .pos = _pos, \ -} - -static inline bool -zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen, - uint64_t ino, unsigned type) -{ - return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type)); -} - -static inline bool -zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx) -{ - return (ctx->actor(ctx->dirent, ".", 1, ctx->pos, - file_inode(file)->i_ino, DT_DIR) == 0); -} - -static inline bool -zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx) -{ - return (ctx->actor(ctx->dirent, "..", 2, ctx->pos, - parent_ino(file_dentry(file)), DT_DIR) == 0); -} - -static inline bool -zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) -{ - if (ctx->pos == 0) { - if (!zpl_dir_emit_dot(file, ctx)) - return (false); - ctx->pos = 1; - } - if (ctx->pos == 1) { - if (!zpl_dir_emit_dotdot(file, ctx)) - return (false); - ctx->pos = 2; - } - return (true); -} -#endif /* HAVE_VFS_ITERATE */ - - /* zpl_file_range.c */ /* handlers for file_operations of the same name */ @@ -244,12 +169,9 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg); #if defined(HAVE_INODE_TIMESTAMP_TRUNCATE) #define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip) -#elif defined(HAVE_INODE_TIMESPEC64_TIMES) -#define zpl_inode_timestamp_truncate(ts, ip) \ - timespec64_trunc(ts, (ip)->i_sb->s_time_gran) #else #define zpl_inode_timestamp_truncate(ts, ip) \ - timespec_trunc(ts, (ip)->i_sb->s_time_gran) + timespec64_trunc(ts, (ip)->i_sb->s_time_gran) #endif #if defined(HAVE_INODE_OWNER_OR_CAPABLE) diff --git a/include/sys/abd.h b/include/sys/abd.h index 567b88c0fc01..bd3a7bd7c935 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -46,6 +46,7 @@ typedef enum abd_flags { ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */ ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */ ABD_FLAG_ALLOCD = 1 << 8, /* we allocated the abd_t */ + ABD_FLAG_FROM_PAGES = 1 << 9, /* does not own pages */ } abd_flags_t; typedef struct abd { @@ -200,6 +201,12 @@ abd_get_size(abd_t *abd) return (abd->abd_size); } +static inline boolean_t +abd_is_from_pages(abd_t *abd) +{ + return ((abd->abd_flags & ABD_FLAG_FROM_PAGES) ? B_TRUE : B_FALSE); +} + /* * Module lifecycle * Defined in each specific OS's abd_os.c diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h index 1eb25d94adc5..35a64f8621a5 100644 --- a/include/sys/abd_impl.h +++ b/include/sys/abd_impl.h @@ -43,6 +43,9 @@ typedef enum abd_stats_op { /* forward declarations */ struct scatterlist; struct page; +#if defined(__FreeBSD__) && defined(_KERNEL) +struct sf_buf; +#endif struct abd_iter { /* public interface */ @@ -70,7 +73,11 @@ struct abd_iter { size_t iter_pos; size_t iter_offset; /* offset in current sg/abd_buf, */ /* abd_offset included */ +#if defined(__FreeBSD__) && defined(_KERNEL) + struct sf_buf *sf; /* used to map in vm_page_t FreeBSD */ +#else struct scatterlist *iter_sg; /* current sg */ +#endif }; extern abd_t *abd_zero_scatter; @@ -78,6 +85,7 @@ extern abd_t *abd_zero_scatter; abd_t *abd_gang_get_offset(abd_t *, size_t *); abd_t *abd_alloc_struct(size_t); void abd_free_struct(abd_t *); +void abd_init_struct(abd_t *); /* * OS specific functions @@ -108,9 +116,9 @@ void abd_iter_page(struct abd_iter *); #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) -#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter) -#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf) -#define ABD_GANG(abd) (abd->abd_u.abd_gang) +#define ABD_SCATTER(abd) ((abd)->abd_u.abd_scatter) +#define ABD_LINEAR_BUF(abd) ((abd)->abd_u.abd_linear.abd_buf) +#define ABD_GANG(abd) ((abd)->abd_u.abd_gang) #ifdef __cplusplus } diff --git a/include/sys/arc.h b/include/sys/arc.h index c92b3eee618c..883c07b4ff3d 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -120,7 +120,7 @@ typedef enum arc_flags /* * Private ARC flags. These flags are private ARC only flags that - * will show up in b_flags in the arc_hdr_buf_t. These flags should + * will show up in b_flags in the arc_buf_hdr_t. These flags should * only be set by ARC code. */ ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ @@ -179,7 +179,6 @@ typedef enum arc_flags ARC_FLAG_COMPRESS_4 = 1 << 28, ARC_FLAG_COMPRESS_5 = 1 << 29, ARC_FLAG_COMPRESS_6 = 1 << 30 - } arc_flags_t; typedef enum arc_buf_flags { diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 8b03b1f895f8..56741cd2a58b 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -61,17 +61,17 @@ extern "C" { /* * The simplified state transition diagram for dbufs looks like: * - * +--> READ --+ - * | | - * | V - * (alloc)-->UNCACHED CACHED-->EVICTING-->(free) - * ^ | ^ ^ - * | | | | - * | +--> FILL --+ | - * | | | - * | | | - * | +------> NOFILL -----+ - * | | + * +-------> READ ------+ + * | | + * | V + * (alloc)-->UNCACHED CACHED-->EVICTING-->(free) + * ^ | ^ ^ + * | | | | + * | +-------> FILL ------+ | + * | | | | + * | | | | + * | +------> NOFILL -----+-----> UNCACHED + * | | (Direct I/O) * +---------------+ * * DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range @@ -176,6 +176,7 @@ typedef struct dbuf_dirty_record { uint8_t dr_copies; boolean_t dr_nopwrite; boolean_t dr_brtwrite; + boolean_t dr_diowrite; boolean_t dr_has_raw_params; /* @@ -384,7 +385,7 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, uint64_t blkid, uint64_t *hash_out); int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); -void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail); boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed); @@ -393,6 +394,8 @@ dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx); boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +int dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp); +int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa); arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db); void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, bp_embedded_type_t etype, enum zio_compress comp, @@ -473,7 +476,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg) (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) -boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db); +boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp); #ifdef ZFS_DEBUG diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 928f5f2b4fd4..22cbd7fc73b6 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -525,6 +525,7 @@ void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, #define WP_NOFILL 0x1 #define WP_DMU_SYNC 0x2 #define WP_SPILL 0x4 +#define WP_DIRECT_WR 0x8 void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); @@ -589,6 +590,7 @@ int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, dmu_buf_t ***dbpp, uint32_t flags); int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag, dmu_buf_t **dbp); + /* * Add a reference to a dmu buffer that has already been held via * dmu_buf_hold() in the current context. @@ -873,16 +875,20 @@ int dmu_free_long_object(objset_t *os, uint64_t object); #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ #define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ +#define DMU_DIRECTIO 4 /* use Direct I/O */ + int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - void *buf, uint32_t flags); + void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, uint32_t flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx); -void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); +int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx); +int dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx, uint32_t flags); void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - dmu_tx_t *tx); + dmu_tx_t *tx); #ifdef _KERNEL int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size); int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size); diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 83ae2b76ba1f..4eaa399407dd 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -35,6 +35,10 @@ #include #include #include +#include +#include +#include +#include #ifdef __cplusplus extern "C" { @@ -134,7 +138,7 @@ extern "C" { * db_data_pending * db_dirtied * db_link - * db_dirty_node (??) + * db_dirty_records * db_dirtycnt * db_d.* * db.* @@ -150,8 +154,10 @@ extern "C" { * dbuf_find: none (db_holds) * dbuf_hash_insert: none (db_holds) * dmu_buf_read_array_impl: none (db_state, db_changed) - * dmu_sync: none (db_dirty_node, db_d) + * dmu_sync: none (db_dirty_records, db_d) * dnode_reallocate: none (db) + * dmu_write_direct: none (db_dirty_records, db_d) + * dmu_write_direct_done: none (db_dirty_records, db_d) * * dn_mtx (leaf) * protects: @@ -234,8 +240,9 @@ extern "C" { * dnode_new_blkid */ -struct objset; struct dmu_pool; +struct dmu_buf; +struct zgd; typedef struct dmu_sendstatus { list_node_t dss_link; @@ -245,9 +252,30 @@ typedef struct dmu_sendstatus { uint64_t dss_blocks; /* blocks visited during the sending process */ } dmu_sendstatus_t; +/* + * dmu_sync_{ready/done} args + */ +typedef struct { + dbuf_dirty_record_t *dsa_dr; + void (*dsa_done)(struct zgd *, int); + struct zgd *dsa_zgd; + dmu_tx_t *dsa_tx; +} dmu_sync_arg_t; + +void dmu_sync_done(zio_t *, arc_buf_t *buf, void *varg); +void dmu_sync_ready(zio_t *, arc_buf_t *buf, void *varg); + void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *); +int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *); +int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t flags); +int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t, dmu_tx_t *); +#if defined(_KERNEL) +int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t); +int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_tx_t *); +#endif + #ifdef __cplusplus } #endif diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a9123e862af7..587dac738bae 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -134,6 +134,7 @@ struct objset { zfs_cache_type_t os_secondary_cache; zfs_prefetch_type_t os_prefetch; zfs_sync_type_t os_sync; + zfs_direct_t os_direct; zfs_redundant_metadata_type_t os_redundant_metadata; uint64_t os_recordsize; /* diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index c746600cd2d5..55b150c044ee 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -42,6 +42,7 @@ extern "C" { #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_DELAY "delay" #define FM_EREPORT_ZFS_DEADMAN "deadman" +#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify" #define FM_EREPORT_ZFS_POOL "zpool" #define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" #define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" @@ -84,6 +85,7 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N "vdev_slow_io_n" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T "vdev_slow_io_t" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DIO_VERIFY_ERRORS "dio_verify_errors" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index fc4f22cd5304..1676020d04d3 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -30,6 +30,7 @@ * Portions Copyright 2010 Robert Milkowski * Copyright (c) 2021, Colm Buckley * Copyright (c) 2022 Hewlett Packard Enterprise Development LP. + * Copyright (c) 2024, Klara, Inc. */ #ifndef _SYS_FS_ZFS_H @@ -81,6 +82,7 @@ typedef enum dmu_objset_type { * All of these include the terminating NUL byte. */ #define ZAP_MAXNAMELEN 256 +#define ZAP_MAXNAMELEN_NEW 1024 #define ZAP_MAXVALUELEN (1024 * 8) #define ZAP_OLDMAXVALUELEN 1024 #define ZFS_MAX_DATASET_NAME_LEN 256 @@ -193,6 +195,8 @@ typedef enum { ZFS_PROP_SNAPSHOTS_CHANGED, ZFS_PROP_PREFETCH, ZFS_PROP_VOLTHREADING, + ZFS_PROP_DIRECT, + ZFS_PROP_LONGNAME, ZFS_NUM_PROPS } zfs_prop_t; @@ -533,6 +537,12 @@ typedef enum { ZFS_VOLMODE_NONE = 3 } zfs_volmode_t; +typedef enum { + ZFS_DIRECT_DISABLED = 0, + ZFS_DIRECT_STANDARD, + ZFS_DIRECT_ALWAYS +} zfs_direct_t; + typedef enum zfs_keystatus { ZFS_KEYSTATUS_NONE = 0, ZFS_KEYSTATUS_UNAVAILABLE, @@ -790,6 +800,9 @@ typedef struct zpool_load_policy { /* Number of slow IOs */ #define ZPOOL_CONFIG_VDEV_SLOW_IOS "vdev_slow_ios" +/* Number of Direct I/O write verify errors */ +#define ZPOOL_CONFIG_VDEV_DIO_VERIFY_ERRORS "vdev_dio_verify_errors" + /* vdev enclosure sysfs path */ #define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path" @@ -1262,6 +1275,7 @@ typedef struct vdev_stat { uint64_t vs_physical_ashift; /* vdev_physical_ashift */ uint64_t vs_noalloc; /* allocations halted? */ uint64_t vs_pspace; /* physical capacity */ + uint64_t vs_dio_verify_errors; /* DIO write verify errors */ } vdev_stat_t; #define VDEV_STAT_VALID(field, uint64_t_field_count) \ @@ -1618,6 +1632,7 @@ typedef enum { ZFS_ERR_CRYPTO_NOTSUP, ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS, ZFS_ERR_ASHIFT_MISMATCH, + ZFS_ERR_STREAM_LARGE_MICROZAP, } zfs_errno_t; /* diff --git a/include/sys/sa_impl.h b/include/sys/sa_impl.h index 6eb0c96188fa..3632e5c99236 100644 --- a/include/sys/sa_impl.h +++ b/include/sys/sa_impl.h @@ -167,8 +167,8 @@ typedef struct sa_hdr_phys { * | hdrsz |layout | * +--------+-------+ * - * Bits 0-10 are the layout number - * Bits 11-16 are the size of the header. + * Bits 0-9 (10 bits) are the layout number (0-1023) + * Bits 10-15 (6 bits) are the size of the header (0-63) * The hdrsize is the number * 8 * * For example. diff --git a/include/sys/spa.h b/include/sys/spa.h index aa66d489ef1a..ca30b60c0af7 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -949,6 +949,14 @@ typedef struct spa_iostats { kstat_named_t simple_trim_bytes_skipped; kstat_named_t simple_trim_extents_failed; kstat_named_t simple_trim_bytes_failed; + kstat_named_t arc_read_count; + kstat_named_t arc_read_bytes; + kstat_named_t arc_write_count; + kstat_named_t arc_write_bytes; + kstat_named_t direct_read_count; + kstat_named_t direct_read_bytes; + kstat_named_t direct_write_count; + kstat_named_t direct_write_bytes; } spa_iostats_t; extern void spa_stats_init(spa_t *spa); @@ -972,6 +980,10 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type, uint64_t extents_written, uint64_t bytes_written, uint64_t extents_skipped, uint64_t bytes_skipped, uint64_t extents_failed, uint64_t bytes_failed); +extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, + uint32_t flags); +extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, + uint32_t flags); extern void spa_import_progress_add(spa_t *spa); extern void spa_import_progress_remove(uint64_t spa_guid); extern int spa_import_progress_set_mmp_check(uint64_t pool_guid, diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h index aa34edda5f6a..90b80127246c 100644 --- a/include/sys/uio_impl.h +++ b/include/sys/uio_impl.h @@ -40,10 +40,47 @@ #define _SYS_UIO_IMPL_H #include +#include extern int zfs_uiomove(void *, size_t, zfs_uio_rw_t, zfs_uio_t *); extern int zfs_uiocopy(void *, size_t, zfs_uio_rw_t, zfs_uio_t *, size_t *); extern void zfs_uioskip(zfs_uio_t *, size_t); +extern void zfs_uio_free_dio_pages(zfs_uio_t *, zfs_uio_rw_t); +extern int zfs_uio_get_dio_pages_alloc(zfs_uio_t *, zfs_uio_rw_t); +extern boolean_t zfs_uio_page_aligned(zfs_uio_t *); + +static inline boolean_t +zfs_dio_page_aligned(void *buf) +{ + return ((((uintptr_t)(buf) & (PAGESIZE - 1)) == 0) ? + B_TRUE : B_FALSE); +} + +static inline boolean_t +zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz) +{ + return (IS_P2ALIGNED(offset, blksz)); +} + +static inline boolean_t +zfs_dio_size_aligned(uint64_t size, uint64_t blksz) +{ + return ((size % blksz) == 0); +} + +static inline boolean_t +zfs_dio_aligned(uint64_t offset, uint64_t size, uint64_t blksz) +{ + return (zfs_dio_offset_aligned(offset, blksz) && + zfs_dio_size_aligned(size, blksz)); +} + +static inline boolean_t +zfs_uio_aligned(zfs_uio_t *uio, uint64_t blksz) +{ + return (zfs_dio_aligned(zfs_uio_offset(uio), zfs_uio_resid(uio), + blksz)); +} static inline void zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 57ff31e89eb9..abd66b8abc96 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -448,9 +448,14 @@ struct vdev { /* * We rate limit ZIO delay, deadman, and checksum events, since they * can flood ZED with tons of events when a drive is acting up. + * + * We also rate limit Direct I/O write verify errors, since a user might + * be continually manipulating a buffer that can flood ZED with tons of + * events. */ zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_deadman_rl; + zfs_ratelimit_t vdev_dio_verify_rl; zfs_ratelimit_t vdev_checksum_rl; /* @@ -649,6 +654,11 @@ extern uint_t zfs_vdev_max_auto_ashift; int param_set_min_auto_ashift(ZFS_MODULE_PARAM_ARGS); int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS); +/* + * VDEV checksum verification for Direct I/O writes + */ +extern uint_t zfs_vdev_direct_write_verify; + #ifdef __cplusplus } #endif diff --git a/include/sys/zap.h b/include/sys/zap.h index 0027f7c5103e..43e8bbea1991 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -315,7 +315,7 @@ int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count); * match must be exact (ie, same as mask=-1ULL). */ int zap_value_search(objset_t *os, uint64_t zapobj, - uint64_t value, uint64_t mask, char *name); + uint64_t value, uint64_t mask, char *name, uint64_t namelen); /* * Transfer all the entries from fromobj into intoobj. Only works on @@ -376,9 +376,20 @@ typedef struct { boolean_t za_normalization_conflict; uint64_t za_num_integers; uint64_t za_first_integer; /* no sign extension for <8byte ints */ - char za_name[ZAP_MAXNAMELEN]; + uint32_t za_name_len; + char za_name[]; } zap_attribute_t; +void zap_init(void); +void zap_fini(void); + +/* + * Alloc and free zap_attribute_t. + */ +zap_attribute_t *zap_attribute_alloc(void); +zap_attribute_t *zap_attribute_long_alloc(void); +void zap_attribute_free(zap_attribute_t *attrp); + /* * The interface for listing all the attributes of a zapobj can be * thought of as cursor moving down a list of the attributes one by diff --git a/include/sys/zap_impl.h b/include/sys/zap_impl.h index 2959aa9b2ca4..fad2c8bfa695 100644 --- a/include/sys/zap_impl.h +++ b/include/sys/zap_impl.h @@ -24,6 +24,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2013, 2016 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. + * Copyright (c) 2024, Klara, Inc. */ #ifndef _SYS_ZAP_IMPL_H @@ -45,7 +46,6 @@ extern int fzap_default_block_shift; #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) -#define MZAP_MAX_BLKSZ SPA_OLD_MAXBLOCKSIZE #define ZAP_NEED_CD (-1U) @@ -191,7 +191,8 @@ typedef struct zap_name { uint64_t zn_hash; matchtype_t zn_matchtype; int zn_normflags; - char zn_normbuf[ZAP_MAXNAMELEN]; + int zn_normbuf_len; + char zn_normbuf[]; } zap_name_t; #define zap_f zap_u.zap_fat @@ -209,6 +210,8 @@ int zap_hashbits(zap_t *zap); uint32_t zap_maxcd(zap_t *zap); uint64_t zap_getflags(zap_t *zap); +uint64_t zap_get_micro_max_size(spa_t *spa); + #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n)))) void fzap_byteswap(void *buf, size_t size); diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h index e944165adc40..64f116c21ba7 100644 --- a/include/sys/zfs_file.h +++ b/include/sys/zfs_file.h @@ -53,7 +53,7 @@ int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off, int zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence); int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr); int zfs_file_fsync(zfs_file_t *fp, int flags); -int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len); +int zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len); loff_t zfs_file_off(zfs_file_t *fp); int zfs_file_unlink(const char *); diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 525d40759fdd..aa20e52a7634 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -23,6 +23,7 @@ * Copyright (c) 2012, 2024 by Delphix. All rights reserved. * Copyright 2016 RackTop Systems. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2024, Klara, Inc. */ #ifndef _SYS_ZFS_IOCTL_H @@ -57,6 +58,7 @@ extern "C" { */ #define ZFS_SNAPDIR_HIDDEN 0 #define ZFS_SNAPDIR_VISIBLE 1 +#define ZFS_SNAPDIR_DISABLED 2 /* * Property values for snapdev @@ -70,10 +72,36 @@ extern "C" { #define ZFS_ACLTYPE_POSIX 1 #define ZFS_ACLTYPE_NFSV4 2 +/* + * The drr_versioninfo field of the dmu_replay_record has the + * following layout: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |reserve| feature-flags |C|S| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * The low order two bits indicate the header type: SUBSTREAM (0x1) + * or COMPOUNDSTREAM (0x2). Using two bits for this is historical: + * this field used to be a version number, where the two version types + * were 1 and 2. Using two bits for this allows earlier versions of + * the code to be able to recognize send streams that don't use any + * of the features indicated by feature flags. + * + * The top 8 bits are reserved for future expansion. At time of writing there + * are no plans for these. If you want to use them, please reach out to the + * OpenZFS community, e.g., on GitHub or Slack. + */ + /* * Field manipulation macros for the drr_versioninfo field of the * send stream header. */ +#define DMU_GET_STREAM_HDRTYPE(vi) BF64_GET((vi), 0, 2) +#define DMU_SET_STREAM_HDRTYPE(vi, x) BF64_SET((vi), 0, 2, x) + +#define DMU_GET_FEATUREFLAGS(vi) BF64_GET((vi), 2, 56) +#define DMU_SET_FEATUREFLAGS(vi, x) BF64_SET((vi), 2, 56, x) /* * Header types for zfs send streams. @@ -83,16 +111,9 @@ typedef enum drr_headertype { DMU_COMPOUNDSTREAM = 0x2 } drr_headertype_t; -#define DMU_GET_STREAM_HDRTYPE(vi) BF64_GET((vi), 0, 2) -#define DMU_SET_STREAM_HDRTYPE(vi, x) BF64_SET((vi), 0, 2, x) - -#define DMU_GET_FEATUREFLAGS(vi) BF64_GET((vi), 2, 30) -#define DMU_SET_FEATUREFLAGS(vi, x) BF64_SET((vi), 2, 30, x) - /* * Feature flags for zfs send streams (flags in drr_versioninfo) */ - #define DMU_BACKUP_FEATURE_DEDUP (1 << 0) #define DMU_BACKUP_FEATURE_DEDUPPROPS (1 << 1) #define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2) @@ -124,13 +145,8 @@ typedef enum drr_headertype { * default use of "zfs send" won't encounter the bug mentioned above. */ #define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27) -/* flag #28 is reserved for a Nutanix feature */ -/* - * flag #29 is the last unused bit. It is reserved to indicate a to-be-designed - * extension to the stream format which will accomodate more feature flags. - * If you need to add another feature flag, please reach out to the OpenZFS - * community, e.g., on GitHub or Slack. - */ +#define DMU_BACKUP_FEATURE_LONGNAME (1 << 28) +#define DMU_BACKUP_FEATURE_LARGE_MICROZAP (1 << 29) /* * Mask of all supported backup features @@ -141,7 +157,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \ DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \ DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \ - DMU_BACKUP_FEATURE_ZSTD) + DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_LONGNAME | \ + DMU_BACKUP_FEATURE_LARGE_MICROZAP) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -150,23 +167,6 @@ typedef enum dmu_send_resume_token_version { ZFS_SEND_RESUME_TOKEN_VERSION = 1 } dmu_send_resume_token_version_t; -/* - * The drr_versioninfo field of the dmu_replay_record has the - * following layout: - * - * 64 56 48 40 32 24 16 8 0 - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | reserved | feature-flags |C|S| - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * - * The low order two bits indicate the header type: SUBSTREAM (0x1) - * or COMPOUNDSTREAM (0x2). Using two bits for this is historical: - * this field used to be a version number, where the two version types - * were 1 and 2. Using two bits for this allows earlier versions of - * the code to be able to recognize send streams that don't use any - * of the features indicated by feature flags. - */ - #define DMU_BACKUP_MAGIC 0x2F5bacbacULL /* @@ -230,10 +230,6 @@ typedef enum dmu_send_resume_token_version { ((drro)->drr_raw_bonuslen != 0 ? \ (drro)->drr_raw_bonuslen : P2ROUNDUP((drro)->drr_bonuslen, 8)) -/* - * zfs ioctl command structure - */ - /* Header is used in C++ so can't forward declare untagged struct */ struct drr_begin { uint64_t drr_magic; @@ -477,6 +473,10 @@ typedef enum zfs_case { ZFS_CASE_MIXED } zfs_case_t; +/* + * zfs ioctl command structure + */ + /* * Note: this struct must have the same layout in 32-bit and 64-bit, so * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit diff --git a/include/sys/zfs_racct.h b/include/sys/zfs_racct.h index 0e8bd04c1a13..ff84cccb09a1 100644 --- a/include/sys/zfs_racct.h +++ b/include/sys/zfs_racct.h @@ -26,12 +26,13 @@ #ifndef _SYS_ZFS_RACCT_H #define _SYS_ZFS_RACCT_H -#include +#include +#include /* * Platform-dependent resource accounting hooks */ -void zfs_racct_read(uint64_t size, uint64_t iops); -void zfs_racct_write(uint64_t size, uint64_t iops); +void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags); +void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags); #endif /* _SYS_ZFS_RACCT_H */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d71144807f47..040bfbdac831 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -158,6 +158,8 @@ extern "C" { #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); +extern int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, + sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir); extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); #ifdef _KERNEL @@ -308,7 +310,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, const char *dname, znode_t *szp, znode_t *wzp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, boolean_t commit, - zil_callback_t callback, void *callback_data); + boolean_t o_direct, zil_callback_t callback, void *callback_data); extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len); extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, diff --git a/include/sys/zil.h b/include/sys/zil.h index 384678b223d5..259f2d03fc05 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -248,6 +248,7 @@ typedef struct { uint32_t lr_attr_masksize; /* number of elements in array */ uint32_t lr_attr_bitmap; /* First entry of array */ /* remainder of array and additional lr_attr_end_t fields */ + uint8_t lr_attr_data[]; } lr_attr_t; /* @@ -264,9 +265,14 @@ typedef struct { uint64_t lr_gen; /* generation (txg of creation) */ uint64_t lr_crtime[2]; /* creation time */ uint64_t lr_rdev; /* rdev of object to create */ +} _lr_create_t; + +typedef struct { + _lr_create_t lr_create; /* common create portion */ /* name of object to create follows this */ /* for symlinks, link content follows name */ /* for creates with xvattr data, the name follows the xvattr info */ + uint8_t lr_data[]; } lr_create_t; /* @@ -293,18 +299,20 @@ typedef struct { * and group will be in lr_create. Name follows ACL data. */ typedef struct { - lr_create_t lr_create; /* common create portion */ + _lr_create_t lr_create; /* common create portion */ uint64_t lr_aclcnt; /* number of ACEs in ACL */ uint64_t lr_domcnt; /* number of unique domains */ uint64_t lr_fuidcnt; /* number of real fuids */ uint64_t lr_acl_bytes; /* number of bytes in ACL */ uint64_t lr_acl_flags; /* ACL flags */ + uint8_t lr_data[]; } lr_acl_create_t; typedef struct { lr_t lr_common; /* common portion of log record */ uint64_t lr_doid; /* obj id of directory */ /* name of object to remove follows this */ + uint8_t lr_data[]; } lr_remove_t; typedef struct { @@ -312,18 +320,24 @@ typedef struct { uint64_t lr_doid; /* obj id of directory */ uint64_t lr_link_obj; /* obj id of link */ /* name of object to link follows this */ + uint8_t lr_data[]; } lr_link_t; typedef struct { lr_t lr_common; /* common portion of log record */ uint64_t lr_sdoid; /* obj id of source directory */ uint64_t lr_tdoid; /* obj id of target directory */ +} _lr_rename_t; + +typedef struct { + _lr_rename_t lr_rename; /* common rename portion */ /* 2 strings: names of source and destination follow this */ + uint8_t lr_data[]; } lr_rename_t; typedef struct { - lr_rename_t lr_rename; /* common rename portion */ - /* members related to the whiteout file (based on lr_create_t) */ + _lr_rename_t lr_rename; /* common rename portion */ + /* members related to the whiteout file (based on _lr_create_t) */ uint64_t lr_wfoid; /* obj id of the new whiteout file */ uint64_t lr_wmode; /* mode of object */ uint64_t lr_wuid; /* uid of whiteout */ @@ -332,6 +346,7 @@ typedef struct { uint64_t lr_wcrtime[2]; /* creation time */ uint64_t lr_wrdev; /* always makedev(0, 0) */ /* 2 strings: names of source and destination follow this */ + uint8_t lr_data[]; } lr_rename_whiteout_t; typedef struct { @@ -342,6 +357,7 @@ typedef struct { uint64_t lr_blkoff; /* no longer used */ blkptr_t lr_blkptr; /* spa block pointer for replay */ /* write data will follow for small writes */ + uint8_t lr_data[]; } lr_write_t; typedef struct { @@ -362,6 +378,7 @@ typedef struct { uint64_t lr_atime[2]; /* access time */ uint64_t lr_mtime[2]; /* modification time */ /* optional attribute lr_attr_t may be here */ + uint8_t lr_data[]; } lr_setattr_t; typedef struct { @@ -369,6 +386,7 @@ typedef struct { uint64_t lr_foid; /* file object to change attributes */ uint64_t lr_size; /* xattr name and value follows */ + uint8_t lr_data[]; } lr_setsaxattr_t; typedef struct { @@ -376,6 +394,7 @@ typedef struct { uint64_t lr_foid; /* obj id of file */ uint64_t lr_aclcnt; /* number of acl entries */ /* lr_aclcnt number of ace_t entries follow this */ + uint8_t lr_data[]; } lr_acl_v0_t; typedef struct { @@ -387,6 +406,7 @@ typedef struct { uint64_t lr_acl_bytes; /* number of bytes in ACL */ uint64_t lr_acl_flags; /* ACL flags */ /* lr_acl_bytes number of variable sized ace's follows */ + uint8_t lr_data[]; } lr_acl_t; typedef struct { @@ -396,8 +416,8 @@ typedef struct { uint64_t lr_length; /* length of the blocks to clone */ uint64_t lr_blksz; /* file's block size */ uint64_t lr_nbps; /* number of block pointers */ - blkptr_t lr_bps[]; /* block pointers of the blocks to clone follows */ + blkptr_t lr_bps[]; } lr_clone_range_t; /* @@ -448,7 +468,7 @@ typedef struct itx { uint64_t itx_oid; /* object id */ uint64_t itx_gen; /* gen number for zfs_get_data */ lr_t itx_lr; /* common part of log record */ - /* followed by type-specific part of lr_xx_t and its immediate data */ + uint8_t itx_lr_data[]; /* type-specific part of lr_xx_t */ } itx_t; /* diff --git a/include/sys/zio.h b/include/sys/zio.h index 3a756949a422..f9409433e031 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -29,6 +29,7 @@ * Copyright (c) 2019, Allan Jude * Copyright (c) 2019, 2023, 2024, Klara Inc. * Copyright (c) 2019-2020, Michael Niewöhner + * Copyright (c) 2024 by George Melikov. All rights reserved. */ #ifndef _ZIO_H @@ -225,6 +226,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NOPWRITE (1ULL << 28) #define ZIO_FLAG_REEXECUTED (1ULL << 29) #define ZIO_FLAG_DELEGATED (1ULL << 30) +#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) @@ -355,6 +357,7 @@ typedef struct zio_prop { boolean_t zp_brtwrite; boolean_t zp_encrypt; boolean_t zp_byteorder; + boolean_t zp_direct_write; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; @@ -601,6 +604,8 @@ extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); +extern size_t zio_get_compression_max_size(enum zio_compress compress, + uint64_t gcd_alloc, uint64_t min_alloc, size_t s_len); extern int zio_wait(zio_t *zio); extern void zio_nowait(zio_t *zio); extern void zio_execute(void *zio); diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h index 31602039a150..ceef757abd20 100644 --- a/include/sys/zio_compress.h +++ b/include/sys/zio_compress.h @@ -25,6 +25,7 @@ * Copyright (c) 2019, 2024, Klara, Inc. * Use is subject to license terms. * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #ifndef _SYS_ZIO_COMPRESS_H @@ -174,7 +175,7 @@ extern int zfs_lz4_decompress(abd_t *src, abd_t *dst, size_t s_len, * Compress and decompress data if necessary. */ extern size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, - size_t s_len, uint8_t level); + size_t s_len, size_t d_len, uint8_t level); extern int zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *abd, size_t s_len, size_t d_len, uint8_t *level); extern int zio_compress_to_feature(enum zio_compress comp); diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 2c846a5d41f6..a5e3ab2384ba 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -160,8 +160,9 @@ enum zio_stage { ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--XT */ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */ + ZIO_STAGE_DIO_CHECKSUM_VERIFY = 1 << 25, /* -W---- */ - ZIO_STAGE_DONE = 1 << 25 /* RWFCXT */ + ZIO_STAGE_DONE = 1 << 26 /* RWFCXT */ }; #define ZIO_ROOT_PIPELINE \ @@ -227,6 +228,10 @@ enum zio_stage { ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) +#define ZIO_DIRECT_WRITE_PIPELINE \ + ZIO_WRITE_PIPELINE & \ + (~ZIO_STAGE_ISSUE_ASYNC) + #define ZIO_DDT_CHILD_WRITE_PIPELINE \ (ZIO_INTERLOCK_STAGES | \ ZIO_VDEV_IO_STAGES | \ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 5733a8187a95..ac42b5c0cd6b 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -24,6 +24,7 @@ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2024, Klara, Inc. */ #ifndef _ZFEATURE_COMMON_H @@ -83,6 +84,8 @@ typedef enum spa_feature { SPA_FEATURE_REDACTION_LIST_SPILL, SPA_FEATURE_RAIDZ_EXPANSION, SPA_FEATURE_FAST_DEDUP, + SPA_FEATURE_LONGNAME, + SPA_FEATURE_LARGE_MICROZAP, SPA_FEATURES } spa_feature_t; diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index 69009375e887..e3eacb195463 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -1,5 +1,6 @@ + @@ -2139,27 +2140,197 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + - + + - + + + + + + - + + + + + + diff --git a/lib/libshare/os/freebsd/nfs.c b/lib/libshare/os/freebsd/nfs.c index b5ce221bb1cb..8a7ee6b35fe1 100644 --- a/lib/libshare/os/freebsd/nfs.c +++ b/lib/libshare/os/freebsd/nfs.c @@ -93,7 +93,9 @@ translate_opts(char *oldopts, FILE *out) return (EOF); newopts[0] = '\0'; s = oldopts; - while ((o = strsep(&s, "-, ")) != NULL) { + while ((o = strsep(&s, ", ")) != NULL) { + if (o[0] == '-') + o++; if (o[0] == '\0') continue; for (i = 0; i < ARRAY_SIZE(known_opts); ++i) { diff --git a/lib/libspl/include/sys/simd.h b/lib/libspl/include/sys/simd.h index 41f9df506468..2926dc680764 100644 --- a/lib/libspl/include/sys/simd.h +++ b/lib/libspl/include/sys/simd.h @@ -592,4 +592,7 @@ zfs_isa207_available(void) #endif +extern void simd_stat_init(void); +extern void simd_stat_fini(void); + #endif /* _LIBSPL_SYS_SIMD_H */ diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 665bfc42301b..2cb0107d58f5 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -82,6 +82,32 @@ typedef struct zfs_uio { #define zfs_uio_iovlen(uio, idx) (uio)->uio_iov[(idx)].iov_len #define zfs_uio_iovbase(uio, idx) (uio)->uio_iov[(idx)].iov_base +static inline boolean_t +zfs_dio_page_aligned(void *buf) +{ + return ((((unsigned long)(buf) & (PAGESIZE - 1)) == 0) ? + B_TRUE : B_FALSE); +} + +static inline boolean_t +zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz) +{ + return (IS_P2ALIGNED(offset, blksz)); +} + +static inline boolean_t +zfs_dio_size_aligned(uint64_t size, uint64_t blksz) +{ + return ((size % blksz) == 0); +} + +static inline boolean_t +zfs_dio_aligned(uint64_t offset, uint64_t size, uint64_t blksz) +{ + return (zfs_dio_offset_aligned(offset, blksz) && + zfs_dio_size_aligned(size, blksz)); +} + static inline void zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) { diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 1ad837b0edf8..7cb92ac9f3f8 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -1,5 +1,6 @@ + @@ -596,25 +597,199 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + - + + - + + + + + + - + + + + + + + @@ -834,7 +1009,6 @@ - diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 51b29643ee0c..1a96460c2b84 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -2,6 +2,7 @@ + @@ -153,6 +154,9 @@ + + + @@ -625,7 +629,7 @@ - + @@ -1124,17 +1128,180 @@ - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + - + + + + + + + + + + + + + + @@ -1880,7 +2047,9 @@ - + + + @@ -6024,7 +6193,9 @@ - + + + @@ -9179,6 +9350,21 @@ + + + + + + + + + + + + + + + @@ -9188,8 +9374,8 @@ - - + + @@ -9266,7 +9452,7 @@ - + diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index ee01ee9b218a..b9780720e5a3 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -30,6 +30,7 @@ * Copyright 2016 Igor Kozhukhov * Copyright (c) 2018, loli10K . All rights reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2024, Klara, Inc. */ #include @@ -2828,7 +2829,12 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, case EROFS: zfs_error_aux(hdl, "%s", zfs_strerror(errno)); return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - + case ZFS_ERR_STREAM_LARGE_MICROZAP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source snapshot contains large microzaps, " + "need -L (--large-block) or -w (--raw) to " + "generate stream")); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); default: return (zfs_standard_error(hdl, errno, errbuf)); } diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index cba071a1a900..1f7e7b0e647e 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1618,15 +1618,17 @@ static int str2shift(libzfs_handle_t *hdl, const char *buf) { const char *ends = "BKMGTPEZ"; - int i; + int i, len; if (buf[0] == '\0') return (0); - for (i = 0; i < strlen(ends); i++) { + + len = strlen(ends); + for (i = 0; i < len; i++) { if (toupper(buf[0]) == ends[i]) break; } - if (i == strlen(ends)) { + if (i == len) { if (hdl) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid numeric suffix '%s'"), buf); diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 5ee6b8e09d6d..6a9c20a2bb88 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -1,6 +1,7 @@ + @@ -597,24 +598,195 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + - + + - + + + + + + - + + + + + + diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index ff30af7d2b9f..397959c679e9 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -12,9 +12,13 @@ CPPCHECKTARGETS += libzpool.la dist_libzpool_la_SOURCES = \ %D%/abd_os.c \ + %D%/arc_os.c \ %D%/kernel.c \ %D%/taskq.c \ - %D%/util.c + %D%/util.c \ + %D%/vdev_label_os.c \ + %D%/zfs_racct.c \ + %D%/zfs_debug.c nodist_libzpool_la_SOURCES = \ module/lua/lapi.c \ @@ -42,16 +46,11 @@ nodist_libzpool_la_SOURCES = \ module/lua/lvm.c \ module/lua/lzio.c \ \ - module/os/linux/zfs/arc_os.c \ - module/os/linux/zfs/trace.c \ module/os/linux/zfs/vdev_file.c \ - module/os/linux/zfs/vdev_label_os.c \ - module/os/linux/zfs/zfs_debug.c \ - module/os/linux/zfs/zfs_racct.c \ - module/os/linux/zfs/zfs_znode.c \ module/os/linux/zfs/zio_crypt.c \ \ module/zcommon/cityhash.c \ + module/zcommon/simd_stat.c \ module/zcommon/zfeature_common.c \ module/zcommon/zfs_comutil.c \ module/zcommon/zfs_deleg.c \ @@ -87,6 +86,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/ddt_zap.c \ module/zfs/dmu.c \ module/zfs/dmu_diff.c \ + module/zfs/dmu_direct.c \ module/zfs/dmu_object.c \ module/zfs/dmu_objset.c \ module/zfs/dmu_recv.c \ @@ -183,6 +183,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/zfs_ratelimit.c \ module/zfs/zfs_rlock.c \ module/zfs/zfs_sa.c \ + module/zfs/zfs_znode.c \ module/zfs/zil.c \ module/zfs/zio.c \ module/zfs/zio_checksum.c \ diff --git a/lib/libzpool/abd_os.c b/lib/libzpool/abd_os.c index 5a91605b2fe3..8531b8f40ace 100644 --- a/lib/libzpool/abd_os.c +++ b/lib/libzpool/abd_os.c @@ -363,3 +363,67 @@ void abd_cache_reap_now(void) { } + +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will alloate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + if (!abd_is_linear(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will + * no change the contents of the ABD and will ASSERT that you didn't modify + * the buffer since it was borrowed. If you want any changes you made to buf to + * be copied back to abd, use abd_return_buf_copy() instead. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} diff --git a/lib/libzpool/arc_os.c b/lib/libzpool/arc_os.c new file mode 100644 index 000000000000..ed99322ddb05 --- /dev/null +++ b/lib/libzpool/arc_os.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + */ + +#include +#include + +/* + * Return a default max arc size based on the amount of physical memory. + * This may be overridden by tuning the zfs_arc_max module parameter. + */ +uint64_t +arc_default_max(uint64_t min, uint64_t allmem) +{ + uint64_t size; + + if (allmem >= 1 << 30) + size = allmem - (1 << 30); + else + size = min; + return (MAX(allmem * 5 / 8, size)); +} + +int64_t +arc_available_memory(void) +{ + int64_t lowest = INT64_MAX; + + /* Every 100 calls, free a small amount */ + if (random_in_range(100) == 0) + lowest = -1024; + + return (lowest); +} + +int +arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) +{ + (void) spa, (void) reserve, (void) txg; + return (0); +} + +uint64_t +arc_all_memory(void) +{ + return (ptob(physmem) / 2); +} + +uint64_t +arc_free_memory(void) +{ + return (random_in_range(arc_all_memory() * 20 / 100)); +} + +void +arc_register_hotplug(void) +{ +} + +void +arc_unregister_hotplug(void) +{ +} diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index a3930ee07f73..4f4cf490f8e9 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1367,24 +1367,32 @@ zfs_file_fsync(zfs_file_t *fp, int flags) } /* - * fallocate - allocate or free space on disk + * deallocate - zero and/or deallocate file storage * * fp - file pointer - * mode (non-standard options for hole punching etc) - * offset - offset to start allocating or freeing from - * len - length to free / allocate - * - * OPTIONAL + * offset - offset to start zeroing or deallocating + * len - length to zero or deallocate */ int -zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) +zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len) { -#ifdef __linux__ - return (fallocate(fp->f_fd, mode, offset, len)); + int rc; +#if defined(__linux__) + rc = fallocate(fp->f_fd, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len); +#elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029) + struct spacectl_range rqsr = { + .r_offset = offset, + .r_len = len, + }; + rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr); #else - (void) fp, (void) mode, (void) offset, (void) len; - return (EOPNOTSUPP); + (void) fp, (void) offset, (void) len; + rc = EOPNOTSUPP; #endif + if (rc) + return (SET_ERROR(rc)); + return (0); } /* diff --git a/include/os/linux/kernel/linux/percpu_compat.h b/lib/libzpool/vdev_label_os.c similarity index 51% rename from include/os/linux/kernel/linux/percpu_compat.h rename to lib/libzpool/vdev_label_os.c index bf3a5a01c27e..c303cba6dc09 100644 --- a/include/os/linux/kernel/linux/percpu_compat.h +++ b/lib/libzpool/vdev_label_os.c @@ -20,25 +20,32 @@ */ /* - * Copyright (c) 2020 by Delphix. All rights reserved. + * Copyright (c) 2023 by iXsystems, Inc. */ -#ifndef _ZFS_PERCPU_H -#define _ZFS_PERCPU_H - -#include +#include +#include +#include +#include +#include /* - * 3.18 API change, - * percpu_counter_init() now must be passed a gfp mask which will be - * used for the dynamic allocation of the actual counter. + * Check if the reserved boot area is in-use. This is called from + * spa_vdev_attach() when adding a device to a raidz vdev, to ensure that the + * reserved area is available as scratch space for raidz expansion. + * + * This function currently always returns 0. On Linux, there are no known + * external uses of the reserved area. On FreeBSD, the reserved boot area is + * used when booting to a ZFS root from an MBR partition. + * + * Currently nothing using libzpool can add a disk to a pool, so this does + * nothing. */ -#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP -#define percpu_counter_init_common(counter, n, gfp) \ - percpu_counter_init(counter, n, gfp) -#else -#define percpu_counter_init_common(counter, n, gfp) \ - percpu_counter_init(counter, n) -#endif +int +vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd) +{ + (void) spa; + (void) childvd; -#endif /* _ZFS_PERCPU_H */ + return (0); +} diff --git a/lib/libzpool/zfs_debug.c b/lib/libzpool/zfs_debug.c new file mode 100644 index 000000000000..df49a9a33fe8 --- /dev/null +++ b/lib/libzpool/zfs_debug.c @@ -0,0 +1,106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2024, Rob Norris + */ + +#include + +typedef struct zfs_dbgmsg { + list_node_t zdm_node; + uint64_t zdm_timestamp; + uint_t zdm_size; + char zdm_msg[]; /* variable length allocation */ +} zfs_dbgmsg_t; + +static list_t zfs_dbgmsgs; +static kmutex_t zfs_dbgmsgs_lock; + +int zfs_dbgmsg_enable = B_TRUE; + +void +zfs_dbgmsg_init(void) +{ + list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t), + offsetof(zfs_dbgmsg_t, zdm_node)); + mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +zfs_dbgmsg_fini(void) +{ + zfs_dbgmsg_t *zdm; + while ((zdm = list_remove_head(&zfs_dbgmsgs))) + umem_free(zdm, zdm->zdm_size); + mutex_destroy(&zfs_dbgmsgs_lock); +} + +void +__set_error(const char *file, const char *func, int line, int err) +{ + if (zfs_flags & ZFS_DEBUG_SET_ERROR) + __dprintf(B_FALSE, file, func, line, "error %lu", + (ulong_t)err); +} + +void +__zfs_dbgmsg(char *buf) +{ + uint_t size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1; + zfs_dbgmsg_t *zdm = umem_zalloc(size, KM_SLEEP); + zdm->zdm_size = size; + zdm->zdm_timestamp = gethrestime_sec(); + strcpy(zdm->zdm_msg, buf); + + mutex_enter(&zfs_dbgmsgs_lock); + list_insert_tail(&zfs_dbgmsgs, zdm); + mutex_exit(&zfs_dbgmsgs_lock); +} + +void +zfs_dbgmsg_print(int fd, const char *tag) +{ + ssize_t ret __attribute__((unused)); + + mutex_enter(&zfs_dbgmsgs_lock); + + /* + * We use write() in this function instead of printf() + * so it is safe to call from a signal handler. + */ + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") START:\n", 9); + + for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs); zdm != NULL; + zdm = list_next(&zfs_dbgmsgs, zdm)) { + ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); + ret = write(fd, "\n", 1); + } + + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") END\n", 6); + + mutex_exit(&zfs_dbgmsgs_lock); +} diff --git a/lib/libzpool/zfs_racct.c b/lib/libzpool/zfs_racct.c new file mode 100644 index 000000000000..b1aff465c2e6 --- /dev/null +++ b/lib/libzpool/zfs_racct.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +void +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + (void) spa, (void) size, (void) iops, (void) flags; +} + +void +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + (void) spa, (void) size, (void) iops, (void) flags; +} diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 06705ff4d9b4..77fa0ce38b2a 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1071,6 +1071,7 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels) * Try the slow method. */ zfs_fallthrough; + case EAGAIN: case EOPNOTSUPP: case ENOSYS: do_slow = B_TRUE; @@ -1464,7 +1465,21 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg, * validating labels, a large number of threads can be used due to * minimal contention. */ - t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); + long threads = 2 * sysconf(_SC_NPROCESSORS_ONLN); +#ifdef HAVE_AIO_H + long am; +#ifdef _SC_AIO_LISTIO_MAX + am = sysconf(_SC_AIO_LISTIO_MAX); + if (am >= VDEV_LABELS) + threads = MIN(threads, am / VDEV_LABELS); +#endif +#ifdef _SC_AIO_MAX + am = sysconf(_SC_AIO_MAX); + if (am >= VDEV_LABELS) + threads = MIN(threads, am / VDEV_LABELS); +#endif +#endif + t = tpool_create(1, threads, 0, NULL); for (slice = avl_first(cache); slice; (slice = avl_walk(cache, slice, AVL_AFTER))) (void) tpool_dispatch(t, zpool_open_func, slice); diff --git a/man/man1/arcstat.1 b/man/man1/arcstat.1 index 82358fa686b9..019a8270204a 100644 --- a/man/man1/arcstat.1 +++ b/man/man1/arcstat.1 @@ -199,10 +199,176 @@ Size of the L2ARC mutex_miss per second .It Sy l2bytes Bytes read per second from the L2ARC +.It Sy l2wbytes +Bytes written per second to the L2ARC .It Sy l2miss% L2ARC access miss percentage .It Sy l2asize Actual (compressed) size of the L2ARC +.It Sy cmpsz +Compressed size +.It Sy cmpsz% +Compressed size percentage +.It Sy ovhsz +Overhead size +.It Sy ovhsz% +Overhead size percentage +.It Sy bonsz +Bonus size +.It Sy bonsz% +Bonus size percentage +.It Sy dnosz +Dnode size +.It Sy dnosz% +Dnode size percentage +.It Sy dbusz +Dbuf size +.It Sy dbusz% +Dbuf size percentage +.It Sy hdrsz +Header size +.It Sy hdrsz% +Header size percentage +.It Sy l2hsz +L2 header size +.It Sy l2hsz% +L2 header size percentage +.It Sy abdsz +ABD chunk waste size +.It Sy abdsz% +ABD chunk waste size percentage +.It Sy datatg +ARC data target +.It Sy datatg% +ARC data target percentage +.It Sy datasz +ARC data size +.It Sy datasz% +ARC data size percentage +.It Sy metatg +ARC metadata target +.It Sy metatg% +ARC metadata target percentage +.It Sy metasz +ARC metadata size +.It Sy metasz% +ARC metadata size percentage +.It Sy anosz +Anonymous size +.It Sy anosz% +Anonymous size percentage +.It Sy anoda +Anonymous data size +.It Sy anoda% +Anonymous data size percentage +.It Sy anome +Anonymous metadata size +.It Sy anome% +Anonymous metadata size percentage +.It Sy anoed +Anonymous evictable data size +.It Sy anoed% +Anonymous evictable data size percentage +.It Sy anoem +Anonymous evictable metadata size +.It Sy anoem% +Anonymous evictable metadata size percentage +.It Sy mfutg +MFU target +.It Sy mfutg% +MFU target percentage +.It Sy mfudt +MFU data target +.It Sy mfudt% +MFU data target percentage +.It Sy mfumt +MFU metadata target +.It Sy mfumt% +MFU metadata target percentage +.It Sy mfusz +MFU size +.It Sy mfusz% +MFU size percentage +.It Sy mfuda +MFU data size +.It Sy mfuda% +MFU data size percentage +.It Sy mfume +MFU metadata size +.It Sy mfume% +MFU metadata size percentage +.It Sy mfued +MFU evictable data size +.It Sy mfued% +MFU evictable data size percentage +.It Sy mfuem +MFU evictable metadata size +.It Sy mfuem% +MFU evictable metadata size percentage +.It Sy mfugsz +MFU ghost size +.It Sy mfugd +MFU ghost data size +.It Sy mfugm +MFU ghost metadata size +.It Sy mrutg +MRU target +.It Sy mrutg% +MRU target percentage +.It Sy mrudt +MRU data target +.It Sy mrudt% +MRU data target percentage +.It Sy mrumt +MRU metadata target +.It Sy mrumt% +MRU metadata target percentage +.It Sy mrusz +MRU size +.It Sy mrusz% +MRU size percentage +.It Sy mruda +MRU data size +.It Sy mruda% +MRU data size percentage +.It Sy mrume +MRU metadata size +.It Sy mrume% +MRU metadata size percentage +.It Sy mrued +MRU evictable data size +.It Sy mrued% +MRU evictable data size percentage +.It Sy mruem +MRU evictable metadata size +.It Sy mruem% +MRU evictable metadata size percentage +.It Sy mrugsz +MRU ghost size +.It Sy mrugd +MRU ghost data size +.It Sy mrugm +MRU ghost metadata size +.It Sy uncsz +Uncached size +.It Sy uncsz% +Uncached size percentage +.It Sy uncda +Uncached data size +.It Sy uncda% +Uncached data size percentage +.It Sy uncme +Uncached metadata size +.It Sy uncme% +Uncached metadata size percentage +.It Sy unced +Uncached evictable data size +.It Sy unced% +Uncached evictable data size percentage +.It Sy uncem +Uncached evictable metadata size +.It Sy uncem% +Uncached evictable metadata size percentage .It Sy grow ARC grow disabled .It Sy need diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 20bb95c1aeea..cf6720317d9f 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -16,7 +16,9 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd June 27, 2024 +.\" Copyright (c) 2024, Klara, Inc. +.\" +.Dd October 2, 2024 .Dt ZFS 4 .Os . @@ -291,6 +293,14 @@ Default dnode block size as a power of 2. .It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int Default dnode indirect block size as a power of 2. . +.It Sy zfs_dio_enabled Ns = Ns Sy 0 Ns | Ns 1 Pq int +Enable Direct I/O. +If this setting is 0, then all I/O requests will be directed through the ARC +acting as though the dataset property +.Sy direct +was set to +.Sy disabled . +. .It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64 When attempting to log an output nvlist of an ioctl in the on-disk history, the output will not be stored if it is larger than this size (in bytes). @@ -416,6 +426,26 @@ May be increased up to .Sy ASHIFT_MAX Po 16 Pc , but this may negatively impact pool space efficiency. . +.It Sy zfs_vdev_direct_write_verify Ns = Ns Sy Linux 1 | FreeBSD 0 Pq uint +If non-zero, then a Direct I/O write's checksum will be verified every +time the write is issued and before it is commited to the block pointer. +In the event the checksum is not valid then the I/O operation will return EIO. +This module parameter can be used to detect if the +contents of the users buffer have changed in the process of doing a Direct I/O +write. +It can also help to identify if reported checksum errors are tied to Direct I/O +writes. +Each verify error causes a +.Sy dio_verify +zevent. +Direct Write I/O checkum verify errors can be seen with +.Nm zpool Cm status Fl d . +The default value for this is 1 on Linux, but is 0 for +.Fx +because user pages can be placed under write protection in +.Fx +before the Direct I/O write is issued. +. .It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq uint Minimum ashift used when creating new top-level vdevs. . @@ -586,7 +616,11 @@ However, this is limited by . .It Sy zap_micro_max_size Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int Maximum micro ZAP size. -A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size. +A "micro" ZAP is upgraded to a "fat" ZAP once it grows beyond the specified +size. +Sizes higher than 128KiB will be clamped to 128KiB unless the +.Sy large_microzap +feature is enabled. . .It Sy zap_shrink_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int If set, adjacent empty ZAP blocks will be collapsed, reducing disk space. @@ -1093,6 +1127,9 @@ This will smoothly handle between ten times and a tenth of this number. .Pp .Sy zfs_delay_scale No \(mu Sy zfs_dirty_data_max Em must No be smaller than Sy 2^64 . . +.It Sy zfs_dio_write_verify_events_per_second Ns = Ns Sy 20 Ns /s Pq uint +Rate limit Direct I/O write verify events to this many per second. +. .It Sy zfs_disable_ivset_guid_check Ns = Ns Sy 0 Ns | Ns 1 Pq int Disables requirement for IVset GUIDs to be present and match when doing a raw receive of encrypted datasets. @@ -1539,6 +1576,15 @@ which have the .Em no_root_squash option set. . +.It Sy zfs_snapshot_no_setuid Ns = Ns Sy 0 Ns | Ns 1 Pq int +Whether to disable +.Em setuid/setgid +support for snapshot mounts triggered by access to the +.Sy .zfs/snapshot +directory by setting the +.Em nosuid +mount option. +. .It Sy zfs_flags Ns = Ns Sy 0 Pq int Set additional debugging flags. The following flags may be bitwise-ored together: @@ -1718,6 +1764,11 @@ Therefore, we formerly forbade creating blocks larger than 1M. Larger blocks could be created by changing it, and pools with larger blocks can always be imported and used, regardless of this setting. +.Pp +Note that it is still limited by default to +.Ar 1 MiB +on x86_32, because Linux's +3/1 memory split doesn't leave much room for 16M chunks. . .It Sy zfs_allow_redacted_dataset_mount Ns = Ns Sy 0 Ns | Ns 1 Pq int Allow datasets received with redacted send/receive to be mounted. diff --git a/man/man7/zfsconcepts.7 b/man/man7/zfsconcepts.7 index 1be3d961c3d7..1d2dff7e4865 100644 --- a/man/man7/zfsconcepts.7 +++ b/man/man7/zfsconcepts.7 @@ -71,7 +71,7 @@ File system snapshots can be accessed under the directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. -The visibility of the +The availability and visibility of the .Pa .zfs directory can be controlled by the .Sy snapdir diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index 506b7deff5b7..95a1a8823775 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -912,14 +912,24 @@ zeroes (the NUL byte). When a zero-filled block is detected, it is stored as a hole and not compressed using the indicated compression algorithm. .Pp -Any block being compressed must be no larger than 7/8 of its original size -after compression, otherwise the compression will not be considered worthwhile -and the block saved uncompressed. -Note that when the logical block is less than -8 times the disk sector size this effectively reduces the necessary compression -ratio; for example, 8 KiB blocks on disks with 4 KiB disk sectors must compress -to 1/2 -or less of their original size. +All blocks are allocated as a whole number of sectors +.Pq chunks of 2^ Ns Sy ashift No bytes , e.g . Sy 512B No or Sy 4KB . +Compression may result in a non-sector-aligned size, which will be rounded up +to a whole number of sectors. +If compression saves less than one whole sector, +the block will be stored uncompressed. +Therefore, blocks whose logical size is a small number of sectors will +experience less compression +(e.g. for +.Sy recordsize Ns = Ns Sy 16K +with +.Sy 4K +sectors, which have 4 sectors per block, +compression needs to save at least 25% to actually save space on disk). +.Pp +There is +.Sy 12.5% +default compression threshold in addition to sector rounding. .It Xo .Sy context Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level @@ -1034,6 +1044,48 @@ See the section of .Xr zfsconcepts 7 . .It Xo +.Sy direct Ns = Ns Sy disabled Ns | Ns Sy standard Ns | Ns Sy always +.Xc +Controls the behavior of Direct I/O requests +.Pq e.g. Dv O_DIRECT . +The +.Sy standard +behavior for Direct I/O requests is to bypass the ARC when possible. +These requests will not be cached and performance will be limited by the +raw speed of the underlying disks +.Pq Dv this is the default . +.Sy always +causes every properly aligned read or write to be treated as a direct request. +.Sy disabled +causes the O_DIRECT flag to be silently ignored and all direct requests will +be handled by the ARC. +This is the default behavior for OpenZFS 2.2 and prior releases. +.Pp +Bypassing the ARC requires that a direct request be correctly aligned. +For write requests the starting offset and size of the request must be +.Sy recordsize Ns +-aligned, if not then the unaligned portion of the request will be silently +redirected through the ARC. +For read requests there is no +.Sy recordsize +alignment restriction on either the starting offset or size. +All direct requests must use a page-aligned memory buffer and the request +size must be a multiple of the page size or an error is returned. +.Pp +Concurrently mixing buffered and direct requests to overlapping regions of +a file can decrease performance. +However, the resulting file will always be coherent. +For example, a direct read after a buffered write will return the data +from the buffered write. +Furthermore, if an application uses +.Xr mmap 2 +based file access then in order to maintain coherency all direct requests +are converted to buffered requests while the file is mapped. +Currently Direct I/O is not supported with zvols. +If dedup is enabled on a dataset, Direct I/O writes will not check for +deduplication. +Deduplication and Direct I/O writes are currently incompatible. +.It Xo .Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns .Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k .Xc @@ -1470,11 +1522,22 @@ and less than or equal to If the .Sy large_blocks feature is enabled on the pool, the size may be up to -.Ar 1 MiB . +.Ar 16 MiB . See .Xr zpool-features 7 for details on ZFS feature flags. .Pp +However, blocks larger than +.Ar 1 MiB +can have an impact on i/o latency (e.g. tying up a spinning disk for +~300ms), and also potentially on the memory allocator. +.Pp +Note that maximum size is still limited by default to +.Ar 1 MiB +on x86_32, see +.Sy zfs_max_recordsize +module parameter. +.Pp Changing the file system's .Sy recordsize affects only files created afterward; existing files are unaffected. @@ -1780,11 +1843,11 @@ Controls whether the volume snapshot devices under are hidden or visible. The default value is .Sy hidden . -.It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible +.It Sy snapdir Ns = Ns Sy disabled Ns | Ns Sy hidden Ns | Ns Sy visible Controls whether the .Pa .zfs -directory is hidden or visible in the root of the file system as discussed in -the +directory is disabled, hidden or visible in the root of the file system as +discussed in the .Sx Snapshots section of .Xr zfsconcepts 7 . @@ -1914,14 +1977,13 @@ enabled for virus scanning to occur. The default value is .Sy off . This property is not used by OpenZFS. -.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa +.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy dir Ns | Ns Sy sa Controls whether extended attributes are enabled for this file system. Two styles of extended attributes are supported: either directory-based or system-attribute-based. .Pp -The default value of -.Sy on -enables directory-based extended attributes. +Directory-based extended attributes can be enabled by setting the value to +.Sy dir . This style of extended attribute imposes no practical limit on either the size or number of attributes which can be set on a file. Although under Linux the @@ -1934,7 +1996,10 @@ This is the most compatible style of extended attribute and is supported by all ZFS implementations. .Pp System-attribute-based xattrs can be enabled by setting the value to -.Sy sa . +.Sy sa +(default and equal to +.Sy on +) . The key advantage of this type of xattr is improved performance. Storing extended attributes as system attributes significantly decreases the amount of disk I/O required. diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index ff6e485a4819..7b392a896150 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -14,12 +14,11 @@ .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] -.\" Copyright (c) 2019, Klara Inc. +.\" Copyright (c) 2019, 2023, 2024, Klara, Inc. .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley -.\" Copyright (c) 2023, Klara Inc. .\" -.Dd February 14, 2024 +.Dd October 2, 2024 .Dt ZPOOL-FEATURES 7 .Os . @@ -219,7 +218,7 @@ to the end of the line is ignored. .Sy Example : .Bd -literal -compact -offset 4n .No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2 -# Features which are supported by GRUB2 +# Features which are supported by GRUB2 versions from v2.12 onwards. allocation_classes async_destroy block_cloning @@ -243,6 +242,31 @@ userobj_accounting zilsaxattr zpool_checkpoint +.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2-2.06 +# Features which are supported by GRUB2 versions prior to v2.12. +# +# GRUB is not able to detect ZFS pool if snaphsot of top level boot pool +# is created. This issue is observed with GRUB versions before v2.12 if +# extensible_dataset feature is enabled on ZFS boot pool. +# +# This file lists all read-only comaptible features except +# extensible_dataset and any other feature that depends on it. +# +allocation_classes +async_destroy +block_cloning +device_rebuild +embedded_data +empty_bpobj +enabled_txg +hole_birth +log_spacemap +lz4_compress +resilver_defer +spacemap_histogram +spacemap_v2 +zpool_checkpoint + .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev .Ed .Pp @@ -681,7 +705,25 @@ are destroyed. Large dnodes allow more data to be stored in the bonus buffer, thus potentially improving performance by avoiding the use of spill blocks. . -.feature com.delphix livelist yes +.feature com.klarasystems large_microzap yes extensible_dataset large_blocks +This feature allows "micro" ZAPs to grow larger than 128 KiB without being +upgraded to "fat" ZAPs. +.Pp +This feature becomes +.Sy active +the first time a micro ZAP grows larger than 128KiB. +It will only be returned to the +.Sy enabled +state when all datasets that ever had a large micro ZAP are destroyed. +.Pp +Note that even when this feature is enabled, micro ZAPs cannot grow larger +than 128 KiB without also changing the +.Sy zap_micro_max_size +module parameter. +See +.Xr zfs 4 . +. +.feature com.delphix livelist yes extensible_dataset This feature allows clones to be deleted faster than the traditional method when a large number of random/sparse writes have been made to the clone. All blocks allocated and freed after a clone is created are tracked by the @@ -698,6 +740,23 @@ instead of scattering multiple writes to all the metaslab spacemaps. .Pp \*[instant-never] . +.feature org.zfsonlinux longname no extensible_dataset +This feature allows creating files and directories with name up to 1023 bytes +in length. +A new dataset property +.Sy longname +is also introduced to toggle longname support for each dataset individually. +This property can be disabled even if it contains longname files. +In such case, new file cannot be created with longname but existing longname +files can still be looked up. +.Pp +This feature becomes +.Sy active +when a file name greater than 255 is created in a dataset, and returns to +being +.Sy enabled +when all such datasets are destroyed. +. .feature org.illumos lz4_compress no .Sy lz4 is a high-performance real-time compression algorithm that diff --git a/man/man8/zfs-send.8 b/man/man8/zfs-send.8 index ba604bf77855..877d954147b6 100644 --- a/man/man8/zfs-send.8 +++ b/man/man8/zfs-send.8 @@ -28,8 +28,9 @@ .\" Copyright 2019 Richard Laager. All rights reserved. .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. +.\" Copyright (c) 2024, Klara, Inc. .\" -.Dd July 27, 2023 +.Dd October 2, 2024 .Dt ZFS-SEND 8 .Os . @@ -111,6 +112,9 @@ property of this filesystem has never been set above 128 KiB. The receiving system must have the .Sy large_blocks pool feature enabled as well. +This flag is required if the +.Sy large_microzap +pool feature is active. See .Xr zpool-features 7 for details on ZFS feature flags and the diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8 index ef20ef4e003c..234612baea8d 100644 --- a/man/man8/zpool-events.8 +++ b/man/man8/zpool-events.8 @@ -98,6 +98,17 @@ This can be an indicator of problems with the underlying storage device. The number of delay events is ratelimited by the .Sy zfs_slow_io_events_per_second module parameter. +.It Sy dio_verify +Issued when there was a checksum verify error after a Direct I/O write has been +issued. +This event can only take place if the module parameter +.Sy zfs_vdev_direct_write_verify +is not set to zero. +See +.Xr zfs 4 +for more details on the +.Sy zfs_vdev_direct_write_verify +module paramter. .It Sy config Issued every time a vdev change have been done to the pool. .It Sy zpool @@ -408,8 +419,9 @@ ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--XT ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--XT ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R----- +ZIO_STAGE_DIO_CHECKSUM_VERIFY:0x02000000:-W---- -ZIO_STAGE_DONE:0x02000000:RWFCXT +ZIO_STAGE_DONE:0x04000000:RWFCXT .TE . .Sh I/O FLAGS diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index b40faeb9977f..868fc4414dbb 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -36,7 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm status -.Op Fl DegiLpPstvx +.Op Fl dDegiLpPstvx .Op Fl T Sy u Ns | Ns Sy d .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … .Oo Ar pool Oc Ns … @@ -81,6 +81,15 @@ to display vdevs in flat hierarchy instead of nested vdev objects. Specify .Sy --json-pool-key-guid to set pool GUID as key for pool objects instead of pool names. +.It Fl d +Display the number of Direct I/O write checksum verify errors that have occured +on a top-level VDEV. +See +.Sx zfs_vdev_direct_write_verify +in +.Xr zfs 4 +for details about the conditions that can cause Direct I/O write checksum +verify failures to occur. .It Fl D Display a histogram of deduplication statistics, showing the allocated .Pq physically present on disk diff --git a/module/Kbuild.in b/module/Kbuild.in index c14fa4ec1bce..f8980618be77 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -233,6 +233,7 @@ zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS)) ZCOMMON_OBJS := \ cityhash.o \ + simd_stat.o \ zfeature_common.o \ zfs_comutil.o \ zfs_deleg.o \ @@ -328,6 +329,7 @@ ZFS_OBJS := \ ddt_stats.o \ ddt_zap.o \ dmu.o \ + dmu_direct.o \ dmu_diff.o \ dmu_object.o \ dmu_objset.o \ @@ -424,6 +426,7 @@ ZFS_OBJS := \ zfs_rlock.o \ zfs_sa.o \ zfs_vnops.o \ + zfs_znode.o \ zil.o \ zio.o \ zio_checksum.o \ @@ -458,7 +461,7 @@ ZFS_OBJS_OS := \ zfs_uio.o \ zfs_vfsops.o \ zfs_vnops_os.o \ - zfs_znode.o \ + zfs_znode_os.o \ zio_crypt.o \ zpl_ctldir.o \ zpl_export.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 9161204c99d3..61a664c5bf66 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -212,7 +212,7 @@ SRCS+= abd_os.c \ zfs_racct.c \ zfs_vfsops.c \ zfs_vnops_os.c \ - zfs_znode.c \ + zfs_znode_os.c \ zio_crypt.c \ zvol_os.c @@ -257,6 +257,7 @@ SRCS+= abd.c \ ddt_stats.c \ ddt_zap.c \ dmu.c \ + dmu_direct.c \ dmu_diff.c \ dmu_object.c \ dmu_objset.c \ @@ -357,6 +358,7 @@ SRCS+= abd.c \ zfs_rlock.c \ zfs_sa.c \ zfs_vnops.c \ + zfs_znode.c \ zil.c \ zio.c \ zio_checksum.c \ diff --git a/module/lua/lobject.h b/module/lua/lobject.h index b7c6b41ac7f4..06fdcdcbcb84 100644 --- a/module/lua/lobject.h +++ b/module/lua/lobject.h @@ -404,19 +404,22 @@ typedef TValue *StkId; /* index to stack elements */ /* ** Header for string value; string bytes follow the end of this structure */ -typedef union TString { - L_Umaxalign dummy; /* ensures maximum alignment for strings */ - struct { - CommonHeader; - lu_byte extra; /* reserved words for short strings; "has hash" for longs */ - unsigned int hash; - size_t len; /* number of characters in string */ - } tsv; +typedef struct TString { + union { + L_Umaxalign dummy; /* ensures maximum alignment for strings */ + struct { + CommonHeader; + lu_byte extra; /* reserved words for short strings; "has hash" for longs */ + unsigned int hash; + size_t len; /* number of characters in string */ + } tsv; + }; + char contents[]; } TString; /* get the actual string (array of bytes) from a TString */ -#define getstr(ts) cast(const char *, (ts) + 1) +#define getstr(ts) ((ts)->contents) /* get the actual string (array of bytes) from a Lua value */ #define svalue(o) getstr(rawtsvalue(o)) diff --git a/module/lua/lstate.h b/module/lua/lstate.h index 75c6ceda6d33..c5fa59335a9b 100644 --- a/module/lua/lstate.h +++ b/module/lua/lstate.h @@ -185,7 +185,7 @@ struct lua_State { */ union GCObject { GCheader gch; /* common header */ - union TString ts; + struct TString ts; union Udata u; union Closure cl; struct Table h; diff --git a/module/lua/lstring.c b/module/lua/lstring.c index 15a73116bb77..149a4ffc5023 100644 --- a/module/lua/lstring.c +++ b/module/lua/lstring.c @@ -103,7 +103,7 @@ static TString *createstrobj (lua_State *L, const char *str, size_t l, ts->tsv.len = l; ts->tsv.hash = h; ts->tsv.extra = 0; - sbuf = (char *)(TString *)(ts + 1); + sbuf = ts->contents; memcpy(sbuf, str, l*sizeof(char)); sbuf[l] = '\0'; /* ending 0 */ return ts; diff --git a/module/lua/lstring.h b/module/lua/lstring.h index 260e7f169bd0..257b384172ec 100644 --- a/module/lua/lstring.h +++ b/module/lua/lstring.h @@ -12,7 +12,7 @@ #include "lstate.h" -#define sizestring(s) (sizeof(union TString)+((s)->len+1)*sizeof(char)) +#define sizestring(s) (sizeof(struct TString)+((s)->len+1)*sizeof(char)) #define sizeudata(u) (sizeof(union Udata)+(u)->len) diff --git a/module/os/freebsd/spl/spl_taskq.c b/module/os/freebsd/spl/spl_taskq.c index e4b75f91b8fc..af32c0ee2e5c 100644 --- a/module/os/freebsd/spl/spl_taskq.c +++ b/module/os/freebsd/spl/spl_taskq.c @@ -357,7 +357,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid) taskq_ent_t *ent; if ((ent = taskq_lookup(tid)) == NULL) - return (0); + return (ENOENT); if (ent->tqent_type == NORMAL_TASK) { rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend); @@ -380,7 +380,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid) } /* Free the extra reference we added with taskq_lookup. */ taskq_free(ent); - return (rc); + return (pend ? 0 : ENOENT); } static void diff --git a/module/os/freebsd/spl/spl_uio.c b/module/os/freebsd/spl/spl_uio.c index 17886cbeb501..74cbe36bbd9b 100644 --- a/module/os/freebsd/spl/spl_uio.c +++ b/module/os/freebsd/spl/spl_uio.c @@ -44,6 +44,10 @@ #include #include #include +#include +#include +#include +#include static void zfs_freeuio(struct uio *uio) @@ -115,3 +119,200 @@ zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) ASSERT3U(zfs_uio_rw(uio), ==, dir); return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio))); } + +/* + * Check if the uio is page-aligned in memory. + */ +boolean_t +zfs_uio_page_aligned(zfs_uio_t *uio) +{ + const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov; + + for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) { + uintptr_t addr = (uintptr_t)iov->iov_base; + size_t size = iov->iov_len; + if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + return (B_FALSE); + } + } + + return (B_TRUE); +} + +static void +zfs_uio_set_pages_to_stable(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + ASSERT3S(uio->uio_dio.npages, >, 0); + + for (int i = 0; i < uio->uio_dio.npages; i++) { + vm_page_t page = uio->uio_dio.pages[i]; + ASSERT3P(page, !=, NULL); + + MPASS(page == PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(page))); + vm_page_busy_acquire(page, VM_ALLOC_SBUSY); + pmap_remove_write(page); + } +} + +static void +zfs_uio_release_stable_pages(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + for (int i = 0; i < uio->uio_dio.npages; i++) { + vm_page_t page = uio->uio_dio.pages[i]; + + ASSERT3P(page, !=, NULL); + vm_page_sunbusy(page); + } +} + +/* + * If the operation is marked as read, then we are stating the pages will be + * written to and must be given write access. + */ +static int +zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages, + zfs_uio_rw_t rw, vm_page_t *pages) +{ + vm_map_t map; + vm_prot_t prot; + int count; + + map = &curthread->td_proc->p_vmspace->vm_map; + ASSERT3S(len, >, 0); + + prot = rw == UIO_READ ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; + count = vm_fault_quick_hold_pages(map, start, len, prot, pages, + nr_pages); + + return (count); +} + +void +zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3P(uio->uio_dio.pages, !=, NULL); + ASSERT(zfs_uio_rw(uio) == rw); + + if (rw == UIO_WRITE) + zfs_uio_release_stable_pages(uio); + + vm_page_unhold_pages(&uio->uio_dio.pages[0], + uio->uio_dio.npages); + + kmem_free(uio->uio_dio.pages, + uio->uio_dio.npages * sizeof (vm_page_t)); +} + +static int +zfs_uio_get_user_pages(unsigned long start, int nr_pages, + size_t len, zfs_uio_rw_t rw, vm_page_t *pages) +{ + int count; + + count = zfs_uio_hold_pages(start, len, nr_pages, rw, pages); + + if (count != nr_pages) { + if (count > 0) + vm_page_unhold_pages(pages, count); + return (0); + } + + ASSERT3S(count, ==, nr_pages); + + return (count); +} + +static int +zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages) +{ + unsigned long addr = (unsigned long)(v.iov_base); + size_t len = v.iov_len; + int n = DIV_ROUND_UP(len, PAGE_SIZE); + + int res = zfs_uio_get_user_pages( + P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len, + zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]); + + if (res != n) + return (SET_ERROR(EFAULT)); + + ASSERT3U(len, ==, res * PAGE_SIZE); + *numpages = res; + return (0); +} + +static int +zfs_uio_get_dio_pages_impl(zfs_uio_t *uio) +{ + const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov; + size_t len = zfs_uio_resid(uio); + + for (int i = 0; i < zfs_uio_iovcnt(uio); i++) { + struct iovec iov; + int numpages = 0; + + if (iovp->iov_len == 0) { + iovp++; + continue; + } + iov.iov_len = MIN(len, iovp->iov_len); + iov.iov_base = iovp->iov_base; + int error = zfs_uio_iov_step(iov, uio, &numpages); + + if (error) + return (error); + + uio->uio_dio.npages += numpages; + len -= iov.iov_len; + iovp++; + } + + ASSERT0(len); + + return (0); +} + +/* + * This function holds user pages into the kernel. In the event that the user + * pages are not successfully held an error value is returned. + * + * On success, 0 is returned. + */ +int +zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + int error = 0; + int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE); + size_t size = npages * sizeof (vm_page_t); + + ASSERT(zfs_uio_rw(uio) == rw); + + uio->uio_dio.pages = kmem_alloc(size, KM_SLEEP); + + error = zfs_uio_get_dio_pages_impl(uio); + + if (error) { + vm_page_unhold_pages(&uio->uio_dio.pages[0], + uio->uio_dio.npages); + kmem_free(uio->uio_dio.pages, size); + return (error); + } + + ASSERT3S(uio->uio_dio.npages, >, 0); + + /* + * Since we will be writing the user pages we must make sure that + * they are stable. That way the contents of the pages can not change + * while we are doing: compression, checksumming, encryption, parity + * calculations or deduplication. + */ + if (zfs_uio_rw(uio) == UIO_WRITE) + zfs_uio_set_pages_to_stable(uio); + + uio->uio_extflg |= UIO_DIRECT; + + return (0); +} diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index f24ea3dc7685..f20dc5d8c325 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -32,6 +32,7 @@ #include #include #include +#include typedef struct abd_stats { kstat_named_t abdstat_struct_size; @@ -135,7 +136,9 @@ abd_size_alloc_linear(size_t size) void abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) { - uint_t n = abd_scatter_chunkcnt(abd); + uint_t n; + + n = abd_scatter_chunkcnt(abd); ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); int waste = (n << PAGE_SHIFT) - abd->abd_size; if (op == ABDSTAT_INCR) { @@ -198,10 +201,16 @@ abd_free_chunks(abd_t *abd) { uint_t i, n; - n = abd_scatter_chunkcnt(abd); - for (i = 0; i < n; i++) { - kmem_cache_free(abd_chunk_cache, - ABD_SCATTER(abd).abd_chunks[i]); + /* + * Scatter ABDs may be constructed by abd_alloc_from_pages() from + * an array of pages. In which case they should not be freed. + */ + if (!abd_is_from_pages(abd)) { + n = abd_scatter_chunkcnt(abd); + for (i = 0; i < n; i++) { + kmem_cache_free(abd_chunk_cache, + ABD_SCATTER(abd).abd_chunks[i]); + } } } @@ -342,11 +351,8 @@ abd_fini(void) void abd_free_linear_page(abd_t *abd) { - /* - * FreeBSD does not have scatter linear pages - * so there is an error. - */ - VERIFY(0); + ASSERT3P(abd->abd_u.abd_linear.sf, !=, NULL); + zfs_unmap_page(abd->abd_u.abd_linear.sf); } /* @@ -365,6 +371,26 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata) return (abd_alloc_linear(size, is_metadata)); } +static abd_t * +abd_get_offset_from_pages(abd_t *abd, abd_t *sabd, size_t chunkcnt, + size_t new_offset) +{ + ASSERT(abd_is_from_pages(sabd)); + + /* + * Set the child child chunks to point at the parent chunks as + * the chunks are just pages and we don't want to copy them. + */ + size_t parent_offset = new_offset / PAGE_SIZE; + ASSERT3U(parent_offset, <, abd_scatter_chunkcnt(sabd)); + for (int i = 0; i < chunkcnt; i++) + ABD_SCATTER(abd).abd_chunks[i] = + ABD_SCATTER(sabd).abd_chunks[parent_offset + i]; + + abd->abd_flags |= ABD_FLAG_FROM_PAGES; + return (abd); +} + abd_t * abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, size_t size) @@ -399,6 +425,11 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK; + if (abd_is_from_pages(sabd)) { + return (abd_get_offset_from_pages(abd, sabd, chunkcnt, + new_offset)); + } + /* Copy the scatterlist starting at the correct offset */ (void) memcpy(&ABD_SCATTER(abd).abd_chunks, &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT], @@ -407,6 +438,44 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, return (abd); } +/* + * Allocate a scatter ABD structure from user pages. + */ +abd_t * +abd_alloc_from_pages(vm_page_t *pages, unsigned long offset, uint64_t size) +{ + VERIFY3U(size, <=, DMU_MAX_ACCESS); + ASSERT3U(offset, <, PAGE_SIZE); + ASSERT3P(pages, !=, NULL); + + abd_t *abd = abd_alloc_struct(size); + abd->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_FROM_PAGES; + abd->abd_size = size; + + if ((offset + size) <= PAGE_SIZE) { + /* + * There is only a single page worth of data, so we will just + * use a linear ABD. We have to make sure to take into account + * the offset though. In all other cases our offset will be 0 + * as we are always PAGE_SIZE aligned. + */ + abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE; + ABD_LINEAR_BUF(abd) = (char *)zfs_map_page(pages[0], + &abd->abd_u.abd_linear.sf) + offset; + } else { + ABD_SCATTER(abd).abd_offset = offset; + ASSERT0(ABD_SCATTER(abd).abd_offset); + + /* + * Setting the ABD's abd_chunks to point to the user pages. + */ + for (int i = 0; i < abd_chunkcnt_for_bytes(size); i++) + ABD_SCATTER(abd).abd_chunks[i] = pages[i]; + } + + return (abd); +} + /* * Initialize the abd_iter. */ @@ -468,6 +537,16 @@ abd_iter_map(struct abd_iter *aiter) if (abd_is_linear(abd)) { aiter->iter_mapsize = abd->abd_size - offset; paddr = ABD_LINEAR_BUF(abd); + } else if (abd_is_from_pages(abd)) { + aiter->sf = NULL; + offset += ABD_SCATTER(abd).abd_offset; + size_t index = offset / PAGE_SIZE; + offset &= PAGE_MASK; + aiter->iter_mapsize = MIN(PAGE_SIZE - offset, + abd->abd_size - aiter->iter_pos); + paddr = zfs_map_page( + ABD_SCATTER(aiter->iter_abd).abd_chunks[index], + &aiter->sf); } else { offset += ABD_SCATTER(abd).abd_offset; paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT]; @@ -490,6 +569,12 @@ abd_iter_unmap(struct abd_iter *aiter) ASSERT3U(aiter->iter_mapsize, >, 0); } + if (abd_is_from_pages(aiter->iter_abd) && + !abd_is_linear_page(aiter->iter_abd)) { + ASSERT3P(aiter->sf, !=, NULL); + zfs_unmap_page(aiter->sf); + } + aiter->iter_mapaddr = NULL; aiter->iter_mapsize = 0; } @@ -499,3 +584,67 @@ abd_cache_reap_now(void) { kmem_cache_reap_soon(abd_chunk_cache); } + +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will alloate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + if (!abd_is_linear(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will + * no change the contents of the ABD and will ASSERT that you didn't modify + * the buffer since it was borrowed. If you want any changes you made to buf to + * be copied back to abd, use abd_return_buf_copy() instead. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} diff --git a/module/os/freebsd/zfs/vdev_file.c b/module/os/freebsd/zfs/vdev_file.c index 869093afa3ed..6719c87f82e5 100644 --- a/module/os/freebsd/zfs/vdev_file.c +++ b/module/os/freebsd/zfs/vdev_file.c @@ -260,16 +260,9 @@ vdev_file_io_start(zio_t *zio) zio_execute(zio); return; } else if (zio->io_type == ZIO_TYPE_TRIM) { -#ifdef notyet - int mode = 0; - ASSERT3U(zio->io_size, !=, 0); - - /* XXX FreeBSD has no fallocate routine in file ops */ - zio->io_error = zfs_file_fallocate(vf->vf_file, - mode, zio->io_offset, zio->io_size); -#endif - zio->io_error = SET_ERROR(ENOTSUP); + zio->io_error = zfs_file_deallocate(vf->vf_file, + zio->io_offset, zio->io_size); zio_execute(zio); return; } diff --git a/module/os/freebsd/zfs/zfs_acl.c b/module/os/freebsd/zfs/zfs_acl.c index fb47013d06ef..3ef0f797f362 100644 --- a/module/os/freebsd/zfs/zfs_acl.c +++ b/module/os/freebsd/zfs/zfs_acl.c @@ -473,7 +473,7 @@ zfs_acl_node_alloc(size_t bytes) aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP); if (bytes) { - aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP); + aclnode->z_acldata = kmem_zalloc(bytes, KM_SLEEP); aclnode->z_allocdata = aclnode->z_acldata; aclnode->z_allocsize = bytes; aclnode->z_size = bytes; diff --git a/module/os/freebsd/zfs/zfs_debug.c b/module/os/freebsd/zfs/zfs_debug.c index c4cebe102075..6d4ae4355867 100644 --- a/module/os/freebsd/zfs/zfs_debug.c +++ b/module/os/freebsd/zfs/zfs_debug.c @@ -140,15 +140,11 @@ zfs_dbgmsg_fini(void) { if (zfs_dbgmsg_kstat) kstat_delete(zfs_dbgmsg_kstat); - /* - * TODO - decide how to make this permanent - */ -#ifdef _KERNEL + mutex_enter(&zfs_dbgmsgs_lock); zfs_dbgmsg_purge(0); mutex_exit(&zfs_dbgmsgs_lock); mutex_destroy(&zfs_dbgmsgs_lock); -#endif } void @@ -184,7 +180,6 @@ __set_error(const char *file, const char *func, int line, int err) __dprintf(B_FALSE, file, func, line, "error %lu", (ulong_t)err); } -#ifdef _KERNEL void __dprintf(boolean_t dprint, const char *file, const char *func, int line, const char *fmt, ...) @@ -218,47 +213,23 @@ __dprintf(boolean_t dprint, const char *file, const char *func, } /* - * Get rid of trailing newline. + * Get rid of trailing newline for dprintf logs. */ - nl = strrchr(buf, '\n'); - if (nl != NULL) - *nl = '\0'; - - __zfs_dbgmsg(buf); - - kmem_free(buf, size); -} - -#else - -void -zfs_dbgmsg_print(int fd, const char *tag) -{ - ssize_t ret __attribute__((unused)); + if (dprint && buf[0] != '\0') { + nl = &buf[strlen(buf) - 1]; + if (*nl == '\n') + *nl = '\0'; + } /* - * We use write() in this function instead of printf() - * so it is safe to call from a signal handler. + * To get this data: + * + * $ sysctl -n kstat.zfs.misc.dbgmsg */ - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") START:\n", 9); - - mutex_enter(&zfs_dbgmsgs_lock); - - for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs); zdm != NULL; - zdm = list_next(&zfs_dbgmsgs, zdm)) - ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); - ret = write(fd, "\n", 1); - } - - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") END\n", 6); + __zfs_dbgmsg(buf); - mutex_exit(&zfs_dbgmsgs_lock); + kmem_free(buf, size); } -#endif /* _KERNEL */ ZFS_MODULE_PARAM(zfs, zfs_, dbgmsg_enable, INT, ZMOD_RW, "Enable ZFS debug message log"); diff --git a/module/os/freebsd/zfs/zfs_dir.c b/module/os/freebsd/zfs/zfs_dir.c index 00d499c8c63e..1358c048619c 100644 --- a/module/os/freebsd/zfs/zfs_dir.c +++ b/module/os/freebsd/zfs/zfs_dir.c @@ -287,7 +287,7 @@ void zfs_unlinked_drain(zfsvfs_t *zfsvfs) { zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap; dmu_object_info_t doi; znode_t *zp; dmu_tx_t *tx; @@ -296,8 +296,9 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) /* * Iterate over the contents of the unlinked set. */ + zap = zap_attribute_alloc(); for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); - zap_cursor_retrieve(&zc, &zap) == 0; + zap_cursor_retrieve(&zc, zap) == 0; zap_cursor_advance(&zc)) { /* @@ -305,7 +306,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) */ error = dmu_object_info(zfsvfs->z_os, - zap.za_first_integer, &doi); + zap->za_first_integer, &doi); if (error != 0) continue; @@ -315,7 +316,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) * We need to re-mark these list entries for deletion, * so we pull them back into core and set zp->z_unlinked. */ - error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); + error = zfs_zget(zfsvfs, zap->za_first_integer, &zp); /* * We may pick up znodes that are already marked for deletion. @@ -351,6 +352,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) vput(ZTOV(zp)); } zap_cursor_fini(&zc); + zap_attribute_free(zap); } /* @@ -368,18 +370,19 @@ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; int skipped = 0; int error; + zap = zap_attribute_alloc(); for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); - (error = zap_cursor_retrieve(&zc, &zap)) == 0; + (error = zap_cursor_retrieve(&zc, zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, - ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); + ZFS_DIRENT_OBJ(zap->za_first_integer), &xzp); if (error) { skipped += 1; continue; @@ -391,7 +394,7 @@ zfs_purgedir(znode_t *dzp) tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); + dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap->za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ @@ -405,7 +408,7 @@ zfs_purgedir(znode_t *dzp) continue; } - error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL); + error = zfs_link_destroy(dzp, zap->za_name, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); @@ -413,6 +416,7 @@ zfs_purgedir(znode_t *dzp) vput(ZTOV(xzp)); } zap_cursor_fini(&zc); + zap_attribute_free(zap); if (error != ENOENT) skipped += 1; return (skipped); @@ -623,6 +627,15 @@ zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, return (error); } + /* + * If we added a longname activate the SPA_FEATURE_LONGNAME. + */ + if (strlen(name) >= ZAP_MAXNAMELEN) { + dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os); + ds->ds_feature_activation[SPA_FEATURE_LONGNAME] = + (void *)B_TRUE; + } + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &dzp->z_id, sizeof (dzp->z_id)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c index bdb8c4fac774..f6ee391deddd 100644 --- a/module/os/freebsd/zfs/zfs_file_os.c +++ b/module/os/freebsd/zfs/zfs_file_os.c @@ -285,6 +285,32 @@ zfs_file_fsync(zfs_file_t *fp, int flags) return (zfs_vop_fsync(fp->f_vnode)); } +/* + * deallocate - zero and/or deallocate file storage + * + * fp - file pointer + * offset - offset to start zeroing or deallocating + * len - length to zero or deallocate + */ +int +zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len) +{ + int rc; +#if __FreeBSD_version >= 1400029 + struct thread *td; + + td = curthread; + rc = fo_fspacectl(fp, SPACECTL_DEALLOC, &offset, &len, 0, + td->td_ucred, td); +#else + (void) fp, (void) offset, (void) len; + rc = EOPNOTSUPP; +#endif + if (rc) + return (SET_ERROR(rc)); + return (0); +} + zfs_file_t * zfs_file_get(int fd) { diff --git a/module/os/freebsd/zfs/zfs_racct.c b/module/os/freebsd/zfs/zfs_racct.c index 883255bc1901..2989a9af9235 100644 --- a/module/os/freebsd/zfs/zfs_racct.c +++ b/module/os/freebsd/zfs/zfs_racct.c @@ -27,7 +27,7 @@ #include void -zfs_racct_read(uint64_t size, uint64_t iops) +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { curthread->td_ru.ru_inblock += iops; #ifdef RACCT @@ -40,10 +40,12 @@ zfs_racct_read(uint64_t size, uint64_t iops) #else (void) size; #endif /* RACCT */ + + spa_iostats_read_add(spa, size, iops, flags); } void -zfs_racct_write(uint64_t size, uint64_t iops) +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { curthread->td_ru.ru_oublock += iops; #ifdef RACCT @@ -56,4 +58,6 @@ zfs_racct_write(uint64_t size, uint64_t iops) #else (void) size; #endif /* RACCT */ + + spa_iostats_write_add(spa, size, iops, flags); } diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c index b96393df4626..a3fac1636981 100644 --- a/module/os/freebsd/zfs/zfs_vfsops.c +++ b/module/os/freebsd/zfs/zfs_vfsops.c @@ -614,6 +614,14 @@ acl_type_changed_cb(void *arg, uint64_t newval) zfsvfs->z_acl_type = newval; } +static void +longname_changed_cb(void *arg, uint64_t newval) +{ + zfsvfs_t *zfsvfs = arg; + + zfsvfs->z_longname = newval; +} + static int zfs_register_callbacks(vfs_t *vfsp) { @@ -751,6 +759,8 @@ zfs_register_callbacks(vfs_t *vfsp) error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_LONGNAME), longname_changed_cb, zfsvfs); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; @@ -1489,7 +1499,8 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp) strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, sizeof (statp->f_mntonname)); - statp->f_namemax = MAXNAMELEN - 1; + statp->f_namemax = + zfsvfs->z_longname ? (ZAP_MAXNAMELEN_NEW - 1) : (MAXNAMELEN - 1); zfs_exit(zfsvfs, FTAG); return (0); diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 01b964f98f3a..a2222a899380 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -774,6 +774,8 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, } if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { zfs_exit(zfsvfs, FTAG); + if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) + return (SET_ERROR(ENOENT)); if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) return (SET_ERROR(ENOTSUP)); error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); @@ -892,6 +894,14 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, return (error); } +static inline bool +is_nametoolong(zfsvfs_t *zfsvfs, const char *name) +{ + size_t dlen = strlen(name); + return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) || + dlen >= ZAP_MAXNAMELEN_NEW); +} + /* * Attempt to create a new entry in a directory. If the entry * already exists, truncate the file if permissible, else return @@ -937,6 +947,9 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, vnode_t *dvp = ZTOV(dzp); #endif + if (is_nametoolong(zfsvfs, name)) + return (SET_ERROR(ENAMETOOLONG)); + /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version @@ -1301,6 +1314,9 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, ASSERT3U(vap->va_type, ==, VDIR); + if (is_nametoolong(zfsvfs, dirname)) + return (SET_ERROR(ENAMETOOLONG)); + /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version @@ -1568,7 +1584,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, caddr_t outbuf; size_t bufsize; zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap; uint_t bytes_wanted; uint64_t offset; /* must be unsigned; checks for < 1 */ uint64_t parent; @@ -1616,6 +1632,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, os = zfsvfs->z_os; offset = zfs_uio_offset(uio); prefetch = zp->z_zn_prefetch; + zap = zap_attribute_long_alloc(); /* * Initialize the iterator cursor. @@ -1671,33 +1688,33 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, * Special case `.', `..', and `.zfs'. */ if (offset == 0) { - (void) strcpy(zap.za_name, "."); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, "."); + zap->za_normalization_conflict = 0; objnum = zp->z_id; type = DT_DIR; } else if (offset == 1) { - (void) strcpy(zap.za_name, ".."); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, ".."); + zap->za_normalization_conflict = 0; objnum = parent; type = DT_DIR; } else if (offset == 2 && zfs_show_ctldir(zp)) { - (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, ZFS_CTLDIR_NAME); + zap->za_normalization_conflict = 0; objnum = ZFSCTL_INO_ROOT; type = DT_DIR; } else { /* * Grab next entry. */ - if ((error = zap_cursor_retrieve(&zc, &zap))) { + if ((error = zap_cursor_retrieve(&zc, zap))) { if ((*eofp = (error == ENOENT)) != 0) break; else goto update; } - if (zap.za_integer_length != 8 || - zap.za_num_integers != 1) { + if (zap->za_integer_length != 8 || + zap->za_num_integers != 1) { cmn_err(CE_WARN, "zap_readdir: bad directory " "entry, obj = %lld, offset = %lld\n", (u_longlong_t)zp->z_id, @@ -1706,15 +1723,15 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, goto update; } - objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); + objnum = ZFS_DIRENT_OBJ(zap->za_first_integer); /* * MacOS X can extract the object type here such as: * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); */ - type = ZFS_DIRENT_TYPE(zap.za_first_integer); + type = ZFS_DIRENT_TYPE(zap->za_first_integer); } - reclen = DIRENT64_RECLEN(strlen(zap.za_name)); + reclen = DIRENT64_RECLEN(strlen(zap->za_name)); /* * Will this entry fit in the buffer? @@ -1734,10 +1751,10 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, */ odp->d_ino = objnum; odp->d_reclen = reclen; - odp->d_namlen = strlen(zap.za_name); + odp->d_namlen = strlen(zap->za_name); /* NOTE: d_off is the offset for the *next* entry. */ next = &odp->d_off; - strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); + strlcpy(odp->d_name, zap->za_name, odp->d_namlen + 1); odp->d_type = type; dirent_terminate(odp); odp = (dirent64_t *)((intptr_t)odp + reclen); @@ -1788,6 +1805,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, update: zap_cursor_fini(&zc); + zap_attribute_free(zap); if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) kmem_free(outbuf, bufsize); @@ -3292,6 +3310,9 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, int error; svp = tvp = NULL; + if (is_nametoolong(tdzp->z_zfsvfs, tname)) + return (SET_ERROR(ENAMETOOLONG)); + if (rflags != 0 || wo_vap != NULL) return (SET_ERROR(EINVAL)); @@ -3356,6 +3377,9 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, ASSERT3S(vap->va_type, ==, VLNK); + if (is_nametoolong(zfsvfs, name)) + return (SET_ERROR(ENAMETOOLONG)); + if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) return (error); zilog = zfsvfs->z_log; @@ -3538,6 +3562,9 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); + if (is_nametoolong(zfsvfs, name)) + return (SET_ERROR(ENAMETOOLONG)); + if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0) return (error); zilog = zfsvfs->z_log; @@ -4131,7 +4158,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, * but that would make the locking messier */ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, - len, commit, NULL, NULL); + len, commit, B_FALSE, NULL, NULL); zfs_vmobject_wlock(object); for (i = 0; i < ncount; i++) { @@ -4266,6 +4293,8 @@ ioflags(int ioflags) flags |= O_APPEND; if (ioflags & IO_NDELAY) flags |= O_NONBLOCK; + if (ioflags & IO_DIRECT) + flags |= O_DIRECT; if (ioflags & IO_SYNC) flags |= O_SYNC; @@ -4285,9 +4314,36 @@ static int zfs_freebsd_read(struct vop_read_args *ap) { zfs_uio_t uio; + int error = 0; zfs_uio_init(&uio, ap->a_uio); - return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), - ap->a_cred)); + error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), + ap->a_cred); + /* + * XXX We occasionally get an EFAULT for Direct I/O reads on + * FreeBSD 13. This still needs to be resolved. The EFAULT comes + * from: + * zfs_uio_get__dio_pages_alloc() -> + * zfs_uio_get_dio_pages_impl() -> + * zfs_uio_iov_step() -> + * zfs_uio_get_user_pages(). + * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O + * read fails to map in the user pages (returning EFAULT) the + * Direct I/O request is broken up into two separate IO requests + * and issued separately using Direct I/O. + */ +#ifdef ZFS_DEBUG + if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) { +#if 0 + printf("%s(%d): Direct I/O read returning EFAULT " + "uio = %p, zfs_uio_offset(uio) = %lu " + "zfs_uio_resid(uio) = %lu\n", + __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio), + zfs_uio_resid(&uio)); +#endif + } + +#endif + return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -5965,7 +6021,8 @@ zfs_vptocnp(struct vop_vptocnp_args *ap) znode_t *dzp; size_t len; - error = zfs_znode_parent_and_name(zp, &dzp, name); + error = zfs_znode_parent_and_name(zp, &dzp, name, + sizeof (name)); if (error == 0) { len = strlen(name); if (*ap->a_buflen < len) diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode_os.c similarity index 85% rename from module/os/freebsd/zfs/zfs_znode.c rename to module/os/freebsd/zfs/zfs_znode_os.c index e5c50874e1dd..a31ecc367414 100644 --- a/module/os/freebsd/zfs/zfs_znode.c +++ b/module/os/freebsd/zfs/zfs_znode_os.c @@ -27,7 +27,6 @@ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2011 Martin Matuska */ -#ifdef _KERNEL #include #include #include @@ -52,8 +51,6 @@ #include #include #include -#endif /* _KERNEL */ - #include #include #include @@ -86,12 +83,6 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ -/* - * Functions needed for userland (ie: libzpool) are not put under - * #ifdef_KERNEL; the rest of the functions have dependencies - * (such as VFS logic) that will not compile easily in userland. - */ -#ifdef _KERNEL #if !defined(KMEM_DEBUG) #define _ZFS_USE_SMR static uma_zone_t znode_uma_zone; @@ -1787,361 +1778,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_destroy(&zfsvfs->z_hold_mtx[i]); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } -#endif /* _KERNEL */ - -static int -zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) -{ - uint64_t sa_obj = 0; - int error; - - error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - if (error != 0 && error != ENOENT) - return (error); - - error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); - return (error); -} - -static int -zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db, const void *tag) -{ - dmu_object_info_t doi; - int error; - - if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) - return (error); - - dmu_object_info_from_db(*db, &doi); - if ((doi.doi_bonus_type != DMU_OT_SA && - doi.doi_bonus_type != DMU_OT_ZNODE) || - (doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, tag); - return (SET_ERROR(ENOTSUP)); - } - - error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); - if (error != 0) { - sa_buf_rele(*db, tag); - return (error); - } - - return (0); -} - -static void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) -{ - sa_handle_destroy(hdl); - sa_buf_rele(db, tag); -} - -/* - * Given an object number, return its parent object number and whether - * or not the object is an extended attribute directory. - */ -static int -zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, - uint64_t *pobjp, int *is_xattrdir) -{ - uint64_t parent; - uint64_t pflags; - uint64_t mode; - uint64_t parent_mode; - sa_bulk_attr_t bulk[3]; - sa_handle_t *sa_hdl; - dmu_buf_t *sa_db; - int count = 0; - int error; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, - &parent, sizeof (parent)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, - &pflags, sizeof (pflags)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &mode, sizeof (mode)); - - if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) - return (error); - - /* - * When a link is removed its parent pointer is not changed and will - * be invalid. There are two cases where a link is removed but the - * file stays around, when it goes to the delete queue and when there - * are additional links. - */ - error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); - if (error != 0) - return (error); - - error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - if (error != 0) - return (error); - - *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); - - /* - * Extended attributes can be applied to files, directories, etc. - * Otherwise the parent must be a directory. - */ - if (!*is_xattrdir && !S_ISDIR(parent_mode)) - return (SET_ERROR(EINVAL)); - - *pobjp = parent; - - return (0); -} - -/* - * Given an object number, return some zpl level statistics - */ -static int -zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, - zfs_stat_t *sb) -{ - sa_bulk_attr_t bulk[4]; - int count = 0; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &sb->zs_mode, sizeof (sb->zs_mode)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, - &sb->zs_gen, sizeof (sb->zs_gen)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, - &sb->zs_links, sizeof (sb->zs_links)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, - &sb->zs_ctime, sizeof (sb->zs_ctime)); - - return (sa_bulk_lookup(hdl, bulk, count)); -} - -static int -zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, - sa_attr_type_t *sa_table, char *buf, int len) -{ - sa_handle_t *sa_hdl; - sa_handle_t *prevhdl = NULL; - dmu_buf_t *prevdb = NULL; - dmu_buf_t *sa_db = NULL; - char *path = buf + len - 1; - int error; - - *path = '\0'; - sa_hdl = hdl; - - uint64_t deleteq_obj; - VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, - ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); - error = zap_lookup_int(osp, deleteq_obj, obj); - if (error == 0) { - return (ESTALE); - } else if (error != ENOENT) { - return (error); - } - - for (;;) { - uint64_t pobj; - char component[MAXNAMELEN + 2]; - size_t complen; - int is_xattrdir; - - if (prevdb) { - ASSERT3P(prevhdl, !=, NULL); - zfs_release_sa_handle(prevhdl, prevdb, FTAG); - } - - if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, - &is_xattrdir)) != 0) - break; - - if (pobj == obj) { - if (path[0] != '/') - *--path = '/'; - break; - } - - component[0] = '/'; - if (is_xattrdir) { - (void) sprintf(component + 1, ""); - } else { - error = zap_value_search(osp, pobj, obj, - ZFS_DIRENT_OBJ(-1ULL), component + 1); - if (error != 0) - break; - } - - complen = strlen(component); - path -= complen; - ASSERT3P(path, >=, buf); - memcpy(path, component, complen); - obj = pobj; - - if (sa_hdl != hdl) { - prevhdl = sa_hdl; - prevdb = sa_db; - } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); - if (error != 0) { - sa_hdl = prevhdl; - sa_db = prevdb; - break; - } - } - - if (sa_hdl != NULL && sa_hdl != hdl) { - ASSERT3P(sa_db, !=, NULL); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - } - - if (error == 0) - (void) memmove(buf, path, buf + len - path); - - return (error); -} - -int -zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) -{ - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -int -zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, - char *buf, int len) -{ - char *path = buf + len - 1; - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - *path = '\0'; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_stats_impl(hdl, sa_table, sb); - if (error != 0) { - zfs_release_sa_handle(hdl, db, FTAG); - return (error); - } - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -/* - * Read a property stored within the master node. - */ -int -zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) -{ - uint64_t *cached_copy = NULL; - - /* - * Figure out where in the objset_t the cached copy would live, if it - * is available for the requested property. - */ - if (os != NULL) { - switch (prop) { - case ZFS_PROP_VERSION: - cached_copy = &os->os_version; - break; - case ZFS_PROP_NORMALIZE: - cached_copy = &os->os_normalization; - break; - case ZFS_PROP_UTF8ONLY: - cached_copy = &os->os_utf8only; - break; - case ZFS_PROP_CASE: - cached_copy = &os->os_casesensitivity; - break; - default: - break; - } - } - if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { - *value = *cached_copy; - return (0); - } - - /* - * If the property wasn't cached, look up the file system's value for - * the property. For the version property, we look up a slightly - * different string. - */ - const char *pname; - int error = ENOENT; - if (prop == ZFS_PROP_VERSION) { - pname = ZPL_VERSION_STR; - } else { - pname = zfs_prop_to_name(prop); - } - - if (os != NULL) { - ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); - error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); - } - - if (error == ENOENT) { - /* No value set, use the default value */ - switch (prop) { - case ZFS_PROP_VERSION: - *value = ZPL_VERSION; - break; - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - *value = 0; - break; - case ZFS_PROP_CASE: - *value = ZFS_CASE_SENSITIVE; - break; - case ZFS_PROP_ACLTYPE: - *value = ZFS_ACLTYPE_NFSV4; - break; - default: - return (error); - } - error = 0; - } - - /* - * If one of the methods for getting the property value above worked, - * copy it into the objset_t's cache. - */ - if (error == 0 && cached_copy != NULL) { - *cached_copy = *value; - } - - return (error); -} - - void zfs_znode_update_vfs(znode_t *zp) @@ -2155,10 +1791,9 @@ zfs_znode_update_vfs(znode_t *zp) vnode_pager_setsize(ZTOV(zp), zp->z_size); } - -#ifdef _KERNEL int -zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) +zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf, + uint64_t buflen) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t parent; @@ -2180,15 +1815,13 @@ zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) return (SET_ERROR(EINVAL)); err = zap_value_search(zfsvfs->z_os, parent, zp->z_id, - ZFS_DIRENT_OBJ(-1ULL), buf); + ZFS_DIRENT_OBJ(-1ULL), buf, buflen); if (err != 0) return (err); err = zfs_zget(zfsvfs, parent, dzpp); return (err); } -#endif /* _KERNEL */ -#ifdef _KERNEL int zfs_rlimit_fsize(off_t fsize) { @@ -2211,4 +1844,3 @@ zfs_rlimit_fsize(off_t fsize) return (EFBIG); } -#endif /* _KERNEL */ diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index ddb20b031448..c3be4730d4b6 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -922,6 +922,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) if (commit) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); + return (error); } diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c index 5898789ad53d..1cb0aeebcb67 100644 --- a/module/os/linux/spl/spl-condvar.c +++ b/module/os/linux/spl/spl-condvar.c @@ -31,10 +31,7 @@ #include #include - -#ifdef HAVE_SCHED_SIGNAL_HEADER #include -#endif #define MAX_HRTIMEOUT_SLACK_US 1000 static unsigned int spl_schedule_hrtimeout_slack_us = 0; @@ -209,48 +206,6 @@ __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_idle); -#if defined(HAVE_IO_SCHEDULE_TIMEOUT) -#define spl_io_schedule_timeout(t) io_schedule_timeout(t) -#else - -struct spl_task_timer { - struct timer_list timer; - struct task_struct *task; -}; - -static void -__cv_wakeup(spl_timer_list_t t) -{ - struct timer_list *tmr = (struct timer_list *)t; - struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); - - wake_up_process(task_timer->task); -} - -static long -spl_io_schedule_timeout(long time_left) -{ - long expire_time = jiffies + time_left; - struct spl_task_timer task_timer; - struct timer_list *timer = &task_timer.timer; - - task_timer.task = current; - - timer_setup(timer, __cv_wakeup, 0); - - timer->expires = expire_time; - add_timer(timer); - - io_schedule(); - - del_timer_sync(timer); - - time_left = expire_time - jiffies; - - return (time_left < 0 ? 0 : time_left); -} -#endif - /* * 'expire_time' argument is an absolute wall clock time in jiffies. * Return value is time left (expire_time - now) or -1 if timeout occurred. @@ -290,7 +245,7 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, */ mutex_exit(mp); if (io) - time_left = spl_io_schedule_timeout(time_left); + time_left = io_schedule_timeout(time_left); else time_left = schedule_timeout(time_left); diff --git a/module/os/linux/spl/spl-cred.c b/module/os/linux/spl/spl-cred.c index d407fc66b2de..7254df6bf294 100644 --- a/module/os/linux/spl/spl-cred.c +++ b/module/os/linux/spl/spl-cred.c @@ -74,26 +74,13 @@ crgetngroups(const cred_t *cr) gi = cr->group_info; rc = gi->ngroups; -#ifndef HAVE_GROUP_INFO_GID - /* - * For Linux <= 4.8, - * crgetgroups will only returns gi->blocks[0], which contains only - * the first NGROUPS_PER_BLOCK groups. - */ - if (rc > NGROUPS_PER_BLOCK) { - WARN_ON_ONCE(1); - rc = NGROUPS_PER_BLOCK; - } -#endif + return (rc); } /* * Return an array of supplemental gids. The returned address is safe * to use as long as the caller has taken a reference with crhold(). - * - * Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d - * array via ->gid. */ gid_t * crgetgroups(const cred_t *cr) @@ -102,12 +89,8 @@ crgetgroups(const cred_t *cr) gid_t *gids = NULL; gi = cr->group_info; -#ifdef HAVE_GROUP_INFO_GID gids = KGIDP_TO_SGIDP(gi->gid); -#else - if (gi->nblocks > 0) - gids = KGIDP_TO_SGIDP(gi->blocks[0]); -#endif + return (gids); } diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 6ee0236d289a..6a95d77ac278 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -623,26 +623,6 @@ ddi_copyout(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyout); -static ssize_t -spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_read(file, (void __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - static int spl_getattr(struct file *filp, struct kstat *stat) { @@ -651,16 +631,8 @@ spl_getattr(struct file *filp, struct kstat *stat) ASSERT(filp); ASSERT(stat); -#if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&filp->f_path, stat); -#elif defined(HAVE_3ARGS_VFS_GETATTR) - rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat); -#else -#error "No available vfs_getattr()" -#endif if (rc) return (-rc); @@ -738,7 +710,7 @@ hostid_read(uint32_t *hostid) * Read directly into the variable like eglibc does. * Short reads are okay; native behavior is preserved. */ - error = spl_kernel_read(filp, &value, sizeof (value), &off); + error = kernel_read(filp, &value, sizeof (value), &off); if (error < 0) { filp_close(filp, 0); return (EIO); diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 16412bc9e6cf..7e806bd5699c 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -23,7 +23,6 @@ #define SPL_KMEM_CACHE_IMPLEMENTING -#include #include #include #include @@ -728,8 +727,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, skc->skc_obj_emergency = 0; skc->skc_obj_emergency_max = 0; - rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0, - GFP_KERNEL); + rc = percpu_counter_init(&skc->skc_linux_alloc, 0, GFP_KERNEL); if (rc != 0) { kfree(skc); return (NULL); @@ -788,25 +786,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, if (skc->skc_flags & KMC_RECLAIMABLE) slabflags |= SLAB_RECLAIM_ACCOUNT; -#if defined(SLAB_USERCOPY) - /* - * Required for PAX-enabled kernels if the slab is to be - * used for copying between user and kernel space. - */ - slabflags |= SLAB_USERCOPY; -#endif - -#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY) - /* - * Newer grsec patchset uses kmem_cache_create_usercopy() - * instead of SLAB_USERCOPY flag - */ skc->skc_linux_cache = kmem_cache_create_usercopy( skc->skc_name, size, align, slabflags, 0, size, NULL); -#else - skc->skc_linux_cache = kmem_cache_create( - skc->skc_name, size, align, slabflags, NULL); -#endif if (skc->skc_linux_cache == NULL) goto out; } @@ -1024,7 +1005,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj) * then return so the local magazine can be rechecked for new objects. */ if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) { - rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING, + rc = wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING, TASK_UNINTERRUPTIBLE); return (rc ? rc : -EAGAIN); } diff --git a/module/os/linux/spl/spl-kmem.c b/module/os/linux/spl/spl-kmem.c index d85e2a7daa8d..cae304d33bc3 100644 --- a/module/os/linux/spl/spl-kmem.c +++ b/module/os/linux/spl/spl-kmem.c @@ -134,7 +134,6 @@ EXPORT_SYMBOL(kmem_strfree); void * spl_kvmalloc(size_t size, gfp_t lflags) { -#ifdef HAVE_KVMALLOC /* * GFP_KERNEL allocations can safely use kvmalloc which may * improve performance by avoiding a) high latency caused by @@ -146,7 +145,6 @@ spl_kvmalloc(size_t size, gfp_t lflags) */ if ((lflags & GFP_KERNEL) == GFP_KERNEL) return (kvmalloc(size, lflags)); -#endif gfp_t kmalloc_lflags = lflags; diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c index 9fefcd03c410..9af192274733 100644 --- a/module/os/linux/spl/spl-proc.c +++ b/module/os/linux/spl/spl-proc.c @@ -39,7 +39,7 @@ #include #include "zfs_gitrev.h" -#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) +#if defined(CONSTIFY_PLUGIN) typedef struct ctl_table __no_const spl_ctl_table; #else typedef struct ctl_table spl_ctl_table; diff --git a/module/os/linux/spl/spl-shrinker.c b/module/os/linux/spl/spl-shrinker.c index d5c8da471cbb..ff1c196d09f6 100644 --- a/module/os/linux/spl/spl-shrinker.c +++ b/module/os/linux/spl/spl-shrinker.c @@ -26,25 +26,6 @@ #include #include -#ifdef HAVE_SINGLE_SHRINKER_CALLBACK -/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */ -struct spl_shrinker_wrap { - struct shrinker shrinker; - spl_shrinker_cb countfunc; - spl_shrinker_cb scanfunc; -}; - -static int -spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker; - - if (sc->nr_to_scan != 0) - (void) sw->scanfunc(&sw->shrinker, sc); - return (sw->countfunc(&sw->shrinker, sc)); -} -#endif - struct shrinker * spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost) @@ -52,34 +33,20 @@ spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, struct shrinker *shrinker; /* allocate shrinker */ -#if defined(HAVE_SHRINKER_REGISTER) +#ifdef HAVE_SHRINKER_REGISTER /* 6.7: kernel will allocate the shrinker for us */ shrinker = shrinker_alloc(0, name); -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) - /* 3.12-6.6: we allocate the shrinker */ - shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - /* 3.0-3.11: allocate a wrapper */ - struct spl_shrinker_wrap *sw = - kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP); - shrinker = &sw->shrinker; #else - /* 2.x-2.6.22, or a newer shrinker API has been introduced. */ -#error "Unknown shrinker API" + /* 4.4-6.6: we allocate the shrinker */ + shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); #endif if (shrinker == NULL) return (NULL); /* set callbacks */ -#ifdef HAVE_SINGLE_SHRINKER_CALLBACK - sw->countfunc = countfunc; - sw->scanfunc = scanfunc; - shrinker->shrink = spl_shrinker_single_cb; -#else shrinker->count_objects = countfunc; shrinker->scan_objects = scanfunc; -#endif /* set params */ shrinker->seeks = seek_cost; @@ -102,14 +69,9 @@ spl_unregister_shrinker(struct shrinker *shrinker) { #if defined(HAVE_SHRINKER_REGISTER) shrinker_free(shrinker); -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) +#else unregister_shrinker(shrinker); kmem_free(shrinker, sizeof (struct shrinker)); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - unregister_shrinker(shrinker); - kmem_free(shrinker, sizeof (struct spl_shrinker_wrap)); -#else -#error "Unknown shrinker API" #endif } EXPORT_SYMBOL(spl_unregister_shrinker); diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index c16bc9bc6409..7f4cab5da114 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -35,9 +35,7 @@ #include #include #include -#ifdef HAVE_CPU_HOTPLUG #include -#endif typedef struct taskq_kstats { /* static values, for completeness */ @@ -156,10 +154,8 @@ EXPORT_SYMBOL(system_delay_taskq); static taskq_t *dynamic_taskq; static taskq_thread_t *taskq_thread_create(taskq_t *); -#ifdef HAVE_CPU_HOTPLUG /* Multi-callback id for cpu hotplugging. */ static int spl_taskq_cpuhp_state; -#endif /* List of all taskqs */ LIST_HEAD(tq_list); @@ -351,7 +347,7 @@ task_expire_impl(taskq_ent_t *t) } static void -task_expire(spl_timer_list_t tl) +task_expire(struct timer_list *tl) { struct timer_list *tmr = (struct timer_list *)tl; taskq_ent_t *t = from_timer(t, tmr, tqent_timer); @@ -1349,7 +1345,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, return (NULL); tq->tq_hp_support = B_FALSE; -#ifdef HAVE_CPU_HOTPLUG + if (flags & TASKQ_THREADS_CPU_PCT) { tq->tq_hp_support = B_TRUE; if (cpuhp_state_add_instance_nocalls(spl_taskq_cpuhp_state, @@ -1358,7 +1354,6 @@ taskq_create(const char *name, int threads_arg, pri_t pri, return (NULL); } } -#endif spin_lock_init(&tq->tq_lock); INIT_LIST_HEAD(&tq->tq_thread_list); @@ -1447,12 +1442,11 @@ taskq_destroy(taskq_t *tq) tq->tq_flags &= ~TASKQ_ACTIVE; spin_unlock_irqrestore(&tq->tq_lock, flags); -#ifdef HAVE_CPU_HOTPLUG if (tq->tq_hp_support) { VERIFY0(cpuhp_state_remove_instance_nocalls( spl_taskq_cpuhp_state, &tq->tq_hp_cb_node)); } -#endif + /* * When TASKQ_ACTIVE is clear new tasks may not be added nor may * new worker threads be spawned for dynamic taskq. @@ -1709,7 +1703,6 @@ module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint, MODULE_PARM_DESC(spl_taskq_kick, "Write nonzero to kick stuck taskqs to spawn more threads"); -#ifdef HAVE_CPU_HOTPLUG /* * This callback will be called exactly once for each core that comes online, * for each dynamic taskq. We attempt to expand taskqs that have @@ -1787,7 +1780,6 @@ spl_taskq_prepare_down(unsigned int cpu, struct hlist_node *node) spin_unlock_irqrestore(&tq->tq_lock, flags); return (0); } -#endif int spl_taskq_init(void) @@ -1795,10 +1787,8 @@ spl_taskq_init(void) init_rwsem(&tq_list_sem); tsd_create(&taskq_tsd, NULL); -#ifdef HAVE_CPU_HOTPLUG spl_taskq_cpuhp_state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "fs/spl_taskq:online", spl_taskq_expand, spl_taskq_prepare_down); -#endif system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64), maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); @@ -1808,9 +1798,7 @@ spl_taskq_init(void) system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4), maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); if (system_delay_taskq == NULL) { -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); -#endif taskq_destroy(system_taskq); return (-ENOMEM); } @@ -1818,9 +1806,7 @@ spl_taskq_init(void) dynamic_taskq = taskq_create("spl_dynamic_taskq", 1, maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE); if (dynamic_taskq == NULL) { -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); -#endif taskq_destroy(system_taskq); taskq_destroy(system_delay_taskq); return (-ENOMEM); @@ -1854,8 +1840,6 @@ spl_taskq_fini(void) tsd_destroy(&taskq_tsd); -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); spl_taskq_cpuhp_state = 0; -#endif } diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index 2af766ac2049..7f74d44f91ff 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -162,30 +162,24 @@ issig(void) if (!signal_pending(current)) return (0); - struct task_struct *task = current; spl_kernel_siginfo_t __info; sigset_t set; siginitsetinv(&set, 1ULL << (SIGSTOP - 1) | 1ULL << (SIGTSTP - 1)); - sigorsets(&set, &task->blocked, &set); + sigorsets(&set, ¤t->blocked, &set); - spin_lock_irq(&task->sighand->siglock); -#ifdef HAVE_DEQUEUE_SIGNAL_4ARG + spin_lock_irq(¤t->sighand->siglock); +#if defined(HAVE_DEQUEUE_SIGNAL_4ARG) enum pid_type __type; - if (dequeue_signal(task, &set, &__info, &__type) != 0) { + if (dequeue_signal(current, &set, &__info, &__type) != 0) { +#elif defined(HAVE_DEQUEUE_SIGNAL_3ARG_TASK) + if (dequeue_signal(current, &set, &__info) != 0) { #else - if (dequeue_signal(task, &set, &__info) != 0) { + enum pid_type __type; + if (dequeue_signal(&set, &__info, &__type) != 0) { #endif -#ifdef HAVE_SIGNAL_STOP - spin_unlock_irq(&task->sighand->siglock); - kernel_signal_stop(); -#else - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - spl_set_special_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); + kernel_signal_stop(); - schedule(); -#endif /* * Dequeued SIGSTOP/SIGTSTP. * Check if process has other singal pending. @@ -196,7 +190,7 @@ issig(void) return (0); } - spin_unlock_irq(&task->sighand->siglock); + spin_unlock_irq(¤t->sighand->siglock); return (1); } diff --git a/module/os/linux/spl/spl-vmem.c b/module/os/linux/spl/spl-vmem.c index cab3e9549cfe..7e2402477705 100644 --- a/module/os/linux/spl/spl-vmem.c +++ b/module/os/linux/spl/spl-vmem.c @@ -21,7 +21,6 @@ * with the SPL. If not, see . */ -#include #include #include #include diff --git a/module/os/linux/spl/spl-zlib.c b/module/os/linux/spl/spl-zlib.c index a7b6c14ee150..68078ac32bc5 100644 --- a/module/os/linux/spl/spl-zlib.c +++ b/module/os/linux/spl/spl-zlib.c @@ -53,7 +53,6 @@ */ -#include #include #include #include diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c index d0d0cca154a7..58b5e0dc44b7 100644 --- a/module/os/linux/spl/spl-zone.c +++ b/module/os/linux/spl/spl-zone.c @@ -54,7 +54,7 @@ typedef struct zone_dataset { char zd_dsname[]; /* name of the member dataset */ } zone_dataset_t; -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS /* * Returns: * - 0 on success @@ -95,18 +95,14 @@ user_ns_get(int fd, struct user_namespace **userns) return (error); } -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ static unsigned int user_ns_zoneid(struct user_namespace *user_ns) { unsigned int r; -#if defined(HAVE_USER_NS_COMMON_INUM) r = user_ns->ns.inum; -#else - r = user_ns->proc_inum; -#endif return (r); } @@ -123,7 +119,7 @@ zone_datasets_lookup(unsigned int nsinum) return (NULL); } -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS static struct zone_dataset * zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) { @@ -148,7 +144,7 @@ zone_dataset_cred_check(cred_t *cred) return (0); } -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ static int zone_dataset_name_check(const char *dataset, size_t *dsnamelen) @@ -168,7 +164,7 @@ zone_dataset_name_check(const char *dataset, size_t *dsnamelen) int zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) { -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS struct user_namespace *userns; zone_datasets_t *zds; zone_dataset_t *zd; @@ -213,14 +209,14 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) return (0); #else return (ENXIO); -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ } EXPORT_SYMBOL(zone_dataset_attach); int zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) { -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS struct user_namespace *userns; zone_datasets_t *zds; zone_dataset_t *zd; @@ -262,7 +258,7 @@ zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) return (0); #else return (ENXIO); -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ } EXPORT_SYMBOL(zone_dataset_detach); diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 60287ccdda98..03362b1ee860 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -186,6 +186,7 @@ static int zfs_abd_scatter_min_size = 512 * 3; abd_t *abd_zero_scatter = NULL; struct page; + /* * abd_zero_page is assigned to each of the pages of abd_zero_scatter. It will * point to ZERO_PAGE if it is available or it will be an allocated zero'd @@ -453,14 +454,21 @@ abd_free_chunks(abd_t *abd) if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK) ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); - abd_for_each_sg(abd, sg, nr_pages, i) { - page = sg_page(sg); - abd_unmark_zfs_page(page); - order = compound_order(page); - __free_pages(page, order); - ASSERT3U(sg->length, <=, PAGE_SIZE << order); - ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]); + /* + * Scatter ABDs may be constructed by abd_alloc_from_pages() from + * an array of pages. In which case they should not be freed. + */ + if (!abd_is_from_pages(abd)) { + abd_for_each_sg(abd, sg, nr_pages, i) { + page = sg_page(sg); + abd_unmark_zfs_page(page); + order = compound_order(page); + __free_pages(page, order); + ASSERT3U(sg->length, <=, PAGE_SIZE << order); + ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]); + } } + abd_free_sg_table(abd); } @@ -551,17 +559,19 @@ abd_update_linear_stats(abd_t *abd, abd_stats_op_t op) void abd_verify_scatter(abd_t *abd) { - size_t n; - int i = 0; - struct scatterlist *sg = NULL; - ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0); ASSERT3U(ABD_SCATTER(abd).abd_offset, <, ABD_SCATTER(abd).abd_sgl->length); - n = ABD_SCATTER(abd).abd_nents; + +#ifdef ZFS_DEBUG + struct scatterlist *sg = NULL; + size_t n = ABD_SCATTER(abd).abd_nents; + int i = 0; + abd_for_each_sg(abd, sg, n, i) { ASSERT3P(sg_page(sg), !=, NULL); } +#endif } static void @@ -687,14 +697,77 @@ abd_free_linear_page(abd_t *abd) { /* Transform it back into a scatter ABD for freeing */ struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; + + /* When backed by user page unmap it */ + if (abd_is_from_pages(abd)) + zfs_kunmap(sg_page(sg)); + abd->abd_flags &= ~ABD_FLAG_LINEAR; abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; ABD_SCATTER(abd).abd_nents = 1; ABD_SCATTER(abd).abd_offset = 0; ABD_SCATTER(abd).abd_sgl = sg; abd_free_chunks(abd); +} + +/* + * Allocate a scatter ABD structure from user pages. The pages must be + * pinned with get_user_pages, or similiar, but need not be mapped via + * the kmap interfaces. + */ +abd_t * +abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size) +{ + uint_t npages = DIV_ROUND_UP(size, PAGE_SIZE); + struct sg_table table; - abd_update_scatter_stats(abd, ABDSTAT_DECR); + VERIFY3U(size, <=, DMU_MAX_ACCESS); + ASSERT3U(offset, <, PAGE_SIZE); + ASSERT3P(pages, !=, NULL); + + /* + * Even if this buf is filesystem metadata, we only track that we + * own the underlying data buffer, which is not true in this case. + * Therefore, we don't ever use ABD_FLAG_META here. + */ + abd_t *abd = abd_alloc_struct(0); + abd->abd_flags |= ABD_FLAG_FROM_PAGES | ABD_FLAG_OWNER; + abd->abd_size = size; + + while (sg_alloc_table_from_pages(&table, pages, npages, offset, + size, __GFP_NOWARN | GFP_NOIO) != 0) { + ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); + schedule_timeout_interruptible(1); + } + + if ((offset + size) <= PAGE_SIZE) { + /* + * Since there is only one entry, this ABD can be represented + * as a linear buffer. All single-page (4K) ABD's constructed + * from a user page can be represented this way as long as the + * page is mapped to a virtual address. This allows us to + * apply an offset in to the mapped page. + * + * Note that kmap() must be used, not kmap_atomic(), because + * the mapping needs to bet set up on all CPUs. Using kmap() + * also enables the user of highmem pages when required. + */ + abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE; + abd->abd_u.abd_linear.abd_sgl = table.sgl; + zfs_kmap(sg_page(table.sgl)); + ABD_LINEAR_BUF(abd) = sg_virt(table.sgl); + } else { + ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); + abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; + + ABD_SCATTER(abd).abd_offset = offset; + ABD_SCATTER(abd).abd_sgl = table.sgl; + ABD_SCATTER(abd).abd_nents = table.nents; + + ASSERT0(ABD_SCATTER(abd).abd_offset); + } + + return (abd); } /* @@ -746,6 +819,9 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, ABD_SCATTER(abd).abd_offset = new_offset; ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i; + if (abd_is_from_pages(sabd)) + abd->abd_flags |= ABD_FLAG_FROM_PAGES; + return (abd); } @@ -873,6 +949,115 @@ abd_cache_reap_now(void) { } +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will allocate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + /* + * In the event the ABD is composed of a single user page from Direct + * I/O we can not direclty return the raw buffer. This is a consequence + * of not being able to write protect the page and the contents of the + * page can be changed at any time by the user. + */ + if (abd_is_from_pages(abd)) { + buf = zio_buf_alloc(n); + } else if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } + +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + + /* + * In the event the ABD is composed of a single user page from Direct + * I/O we must make sure copy the data over into the newly allocated + * buffer. This is a consequence of the fact that we can not write + * protect the user page and there is a risk the contents of the page + * could be changed by the user at any moment. + */ + if (!abd_is_linear(abd) || abd_is_from_pages(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scatterd, this will + * not change the contents of the ABD. If you want any changes you made to + * buf to be copied back to abd, use abd_return_buf_copy() instead. If the + * ABD is not constructed from user pages for Direct I/O then an ASSERT + * checks to make sure the contents of buffer have not changed since it was + * borrowed. We can not ASSERT that the contents of the buffer have not changed + * if it is composed of user pages because the pages can not be placed under + * write protection and the user could have possibly changed the contents in + * the pages at any time. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_from_pages(abd)) { + zio_buf_free(buf, n); + } else if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else if (abd_is_gang(abd)) { +#ifdef ZFS_DEBUG + /* + * We have to be careful with gang ABD's that we do not ASSERT0 + * for any ABD's that contain user pages from Direct I/O. In + * order to handle this, we just iterate through the gang ABD + * and only verify ABDs that are not from user pages. + */ + void *cmp_buf = buf; + + for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain); + cabd != NULL; + cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { + if (!abd_is_from_pages(cabd)) { + ASSERT0(abd_cmp_buf(cabd, cmp_buf, + cabd->abd_size)); + } + cmp_buf = (char *)cmp_buf + cabd->abd_size; + } +#endif + zio_buf_free(buf, n); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd) || abd_is_from_pages(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} + /* * This is abd_iter_page(), the function underneath abd_iterate_page_func(). * It yields the next page struct and data offset and size within it, without @@ -896,23 +1081,30 @@ abd_cache_reap_now(void) * Before kernel 4.5 however, compound page heads were refcounted separately * from tail pages, such that moving back to the head page would require us to * take a reference to it and releasing it once we're completely finished with - * it. In practice, that means when our caller is done with the ABD, which we + * it. In practice, that meant when our caller is done with the ABD, which we * have no insight into from here. Rather than contort this API to track head - * page references on such ancient kernels, we disable this special compound - * page handling on 4.5, instead just using treating each page within it as a - * regular PAGESIZE page (which it is). This is slightly less efficient, but - * makes everything far simpler. + * page references on such ancient kernels, we disabled this special compound + * page handling on kernels before 4.5, instead just using treating each page + * within it as a regular PAGESIZE page (which it is). This is slightly less + * efficient, but makes everything far simpler. + * + * We no longer support kernels before 4.5, so in theory none of this is + * necessary. However, this code is still relatively new in the grand scheme of + * things, so I'm leaving the ability to compile this out for the moment. * - * The below test sets/clears ABD_ITER_COMPOUND_PAGES to enable/disable the - * special handling, and also defines the ABD_ITER_PAGE_SIZE(page) macro to - * understand compound pages, or not, as required. + * Setting/clearing ABD_ITER_COMPOUND_PAGES below enables/disables the special + * handling, by defining the ABD_ITER_PAGE_SIZE(page) macro to understand + * compound pages, or not, and compiling in/out the support to detect compound + * tail pages and move back to the start. */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) -#define ABD_ITER_COMPOUND_PAGES 1 + +/* On by default */ +#define ABD_ITER_COMPOUND_PAGES + +#ifdef ABD_ITER_COMPOUND_PAGES #define ABD_ITER_PAGE_SIZE(page) \ (PageCompound(page) ? page_size(page) : PAGESIZE) #else -#undef ABD_ITER_COMPOUND_PAGES #define ABD_ITER_PAGE_SIZE(page) (PAGESIZE) #endif diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index c6b9cb2ddb3f..b1e45b28743e 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -42,7 +42,6 @@ #include #include #include -#ifdef _KERNEL #include #include #include @@ -50,7 +49,6 @@ #include #include #include -#endif #include #include #include @@ -59,7 +57,6 @@ #include #include -#ifdef _KERNEL /* * This is a limit on how many pages the ARC shrinker makes available for * eviction in response to one page allocation attempt. Note that in @@ -87,7 +84,6 @@ static int zfs_arc_shrinker_seeks = DEFAULT_SEEKS; #ifdef CONFIG_MEMORY_HOTPLUG static struct notifier_block arc_hotplug_callback_mem_nb; #endif -#endif /* * Return a default max arc size based on the amount of physical memory. @@ -105,7 +101,6 @@ arc_default_max(uint64_t min, uint64_t allmem) return (MAX(allmem * 5 / 8, size)); } -#ifdef _KERNEL /* * Return maximum amount of memory that we could possibly use. Reduced * to half of all memory in user space which is primarily used for testing. @@ -459,48 +454,6 @@ arc_unregister_hotplug(void) unregister_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } -#else /* _KERNEL */ -int64_t -arc_available_memory(void) -{ - int64_t lowest = INT64_MAX; - - /* Every 100 calls, free a small amount */ - if (random_in_range(100) == 0) - lowest = -1024; - - return (lowest); -} - -int -arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) -{ - (void) spa, (void) reserve, (void) txg; - return (0); -} - -uint64_t -arc_all_memory(void) -{ - return (ptob(physmem) / 2); -} - -uint64_t -arc_free_memory(void) -{ - return (random_in_range(arc_all_memory() * 20 / 100)); -} - -void -arc_register_hotplug(void) -{ -} - -void -arc_unregister_hotplug(void) -{ -} -#endif /* _KERNEL */ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index a3801c605dd6..a7b5a7b3e66d 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -296,22 +296,13 @@ secpolicy_zfs(const cred_t *cr) * Equivalent to secpolicy_zfs(), but works even if the cred_t is not that of * the current process. Takes both cred_t and proc_t so that this can work * easily on all platforms. - * - * The has_capability() function was first exported in the 4.10 Linux kernel - * then backported to some LTS kernels. Prior to this change there was no - * mechanism to perform this check therefore EACCES is returned when the - * functionality is not present in the kernel. */ int secpolicy_zfs_proc(const cred_t *cr, proc_t *proc) { -#if defined(HAVE_HAS_CAPABILITY) if (!has_capability(proc, CAP_SYS_ADMIN)) return (EACCES); return (0); -#else - return (EACCES); -#endif } void diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index e69c5f3841ec..a6271d3a7df1 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -38,9 +38,7 @@ #include #include #include -#ifdef HAVE_LINUX_BLK_CGROUP_HEADER #include -#endif /* * Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying @@ -481,16 +479,6 @@ vdev_disk_close(vdev_t *v) v->vdev_tsd = NULL; } -static inline void -vdev_submit_bio_impl(struct bio *bio) -{ -#ifdef HAVE_1ARG_SUBMIT_BIO - (void) submit_bio(bio); -#else - (void) submit_bio(bio_data_dir(bio), bio); -#endif -} - /* * preempt_schedule_notrace is GPL-only which breaks the ZFS build, so * replace it with preempt_schedule under the following condition: @@ -508,7 +496,6 @@ vdev_submit_bio_impl(struct bio *bio) */ #if !defined(HAVE_BIO_ALLOC_4ARG) -#ifdef HAVE_BIO_SET_DEV #if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY) /* * The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by @@ -594,16 +581,6 @@ vdev_bio_set_dev(struct bio *bio, struct block_device *bdev) #define bio_set_dev vdev_bio_set_dev #endif #endif -#else -/* - * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels. - */ -static inline void -bio_set_dev(struct bio *bio, struct block_device *bdev) -{ - bio->bi_bdev = bdev; -} -#endif /* HAVE_BIO_SET_DEV */ #endif /* !HAVE_BIO_ALLOC_4ARG */ static inline void @@ -611,7 +588,7 @@ vdev_submit_bio(struct bio *bio) { struct bio_list *bio_list = current->bio_list; current->bio_list = NULL; - vdev_submit_bio_impl(bio); + (void) submit_bio(bio); current->bio_list = bio_list; } @@ -709,7 +686,7 @@ vbio_alloc(zio_t *zio, struct block_device *bdev, int flags) return (vbio); } -BIO_END_IO_PROTO(vbio_completion, bio, error); +static void vbio_completion(struct bio *bio); static int vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset) @@ -805,7 +782,8 @@ vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size) } /* IO completion callback */ -BIO_END_IO_PROTO(vbio_completion, bio, error) +static void +vbio_completion(struct bio *bio) { vbio_t *vbio = bio->bi_private; zio_t *zio = vbio->vbio_zio; @@ -813,15 +791,7 @@ BIO_END_IO_PROTO(vbio_completion, bio, error) ASSERT(zio); /* Capture and log any errors */ -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = 0; - if (error) - zio->io_error = -(error); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - zio->io_error = EIO; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); ASSERT3U(zio->io_error, >=, 0); if (zio->io_error) @@ -1072,19 +1042,13 @@ vdev_classic_dio_put(dio_request_t *dr) } } -BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error) +static void +vdev_classic_physio_completion(struct bio *bio) { dio_request_t *dr = bio->bi_private; if (dr->dr_error == 0) { -#ifdef HAVE_1ARG_BIO_END_IO_T - dr->dr_error = BIO_END_IO_ERROR(bio); -#else - if (error) - dr->dr_error = -(error); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - dr->dr_error = EIO; -#endif + dr->dr_error = bi_status_to_errno(bio->bi_status); } /* Drop reference acquired by vdev_classic_physio */ @@ -1223,14 +1187,11 @@ vdev_classic_physio(zio_t *zio) /* ========== */ -BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error) +static void +vdev_disk_io_flush_completion(struct bio *bio) { zio_t *zio = bio->bi_private; -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = -error; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); if (zio->io_error && (zio->io_error == EOPNOTSUPP)) zio->io_vd->vdev_nowritecache = B_TRUE; @@ -1265,14 +1226,12 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) return (0); } -BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error) +static void +vdev_disk_discard_end_io(struct bio *bio) { zio_t *zio = bio->bi_private; -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = -error; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); + bio_put(bio); if (zio->io_error) vdev_disk_error(zio); diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c index ac41a2615f16..4bffb6412ffd 100644 --- a/module/os/linux/zfs/vdev_file.c +++ b/module/os/linux/zfs/vdev_file.c @@ -274,14 +274,9 @@ vdev_file_io_start(zio_t *zio) zio_execute(zio); return; } else if (zio->io_type == ZIO_TYPE_TRIM) { - int mode = 0; - ASSERT3U(zio->io_size, !=, 0); -#ifdef __linux__ - mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; -#endif - zio->io_error = zfs_file_fallocate(vf->vf_file, - mode, zio->io_offset, zio->io_size); + zio->io_error = zfs_file_deallocate(vf->vf_file, + zio->io_offset, zio->io_size); zio_execute(zio); return; } diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c index e83672c468fa..eccd280c494a 100644 --- a/module/os/linux/zfs/zfs_acl.c +++ b/module/os/linux/zfs/zfs_acl.c @@ -472,7 +472,7 @@ zfs_acl_node_alloc(size_t bytes) aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP); if (bytes) { - aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP); + aclnode->z_acldata = kmem_zalloc(bytes, KM_SLEEP); aclnode->z_allocdata = aclnode->z_acldata; aclnode->z_allocsize = bytes; aclnode->z_size = bytes; diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index e042116333fb..8a42a075cd25 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -111,6 +111,7 @@ static krwlock_t zfs_snapshot_lock; */ int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; static int zfs_admin_snapshot = 0; +static int zfs_snapshot_no_setuid = 0; typedef struct { char *se_name; /* full snapshot name */ @@ -500,9 +501,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_atime_dirty = B_FALSE; zp->z_zn_prefetch = B_FALSE; zp->z_is_sa = B_FALSE; -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - zp->z_is_mapped = B_FALSE; -#endif zp->z_is_ctldir = B_TRUE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; @@ -810,7 +808,9 @@ zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp, if ((error = zfs_enter(zfsvfs, FTAG)) != 0) return (error); - if (strcmp(name, "..") == 0) { + if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { + *ipp = NULL; + } else if (strcmp(name, "..") == 0) { *ipp = dip->i_sb->s_root->d_inode; } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, @@ -1100,9 +1100,9 @@ zfsctl_snapshot_mount(struct path *path, int flags) zfsvfs_t *zfsvfs; zfsvfs_t *snap_zfsvfs; zfs_snapentry_t *se; - char *full_name, *full_path; + char *full_name, *full_path, *options; char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n", - NULL, NULL, NULL }; + "-o", NULL, NULL, NULL, NULL }; char *envp[] = { NULL }; int error; struct path spath; @@ -1116,6 +1116,7 @@ zfsctl_snapshot_mount(struct path *path, int flags) full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + options = kmem_zalloc(7, KM_SLEEP); error = zfsctl_snapshot_name(zfsvfs, dname(dentry), ZFS_MAX_DATASET_NAME_LEN, full_name); @@ -1131,6 +1132,9 @@ zfsctl_snapshot_mount(struct path *path, int flags) zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", dname(dentry)); + snprintf(options, 7, "%s", + zfs_snapshot_no_setuid ? "nosuid" : "suid"); + /* * Multiple concurrent automounts of a snapshot are never allowed. * The snapshot may be manually mounted as many times as desired. @@ -1153,8 +1157,9 @@ zfsctl_snapshot_mount(struct path *path, int flags) * value from call_usermodehelper() will be (exitcode << 8 + signal). */ dprintf("mount; name=%s path=%s\n", full_name, full_path); - argv[6] = full_name; - argv[7] = full_path; + argv[7] = options; + argv[8] = full_name; + argv[9] = full_path; error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (error) { if (!(error & MOUNT_BUSY << 8)) { @@ -1315,3 +1320,7 @@ MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); module_param(zfs_expire_snapshot, int, 0644); MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); + +module_param(zfs_snapshot_no_setuid, int, 0644); +MODULE_PARM_DESC(zfs_snapshot_no_setuid, + "Disable setuid/setgid for automounts in .zfs/snapshot"); diff --git a/module/os/linux/zfs/zfs_debug.c b/module/os/linux/zfs/zfs_debug.c index 9ee40771fc19..a017900d5538 100644 --- a/module/os/linux/zfs/zfs_debug.c +++ b/module/os/linux/zfs/zfs_debug.c @@ -111,12 +111,7 @@ zfs_dbgmsg_fini(void) procfs_list_uninstall(&zfs_dbgmsgs); zfs_dbgmsg_purge(0); - /* - * TODO - decide how to make this permanent - */ -#ifdef _KERNEL procfs_list_destroy(&zfs_dbgmsgs); -#endif } void @@ -148,8 +143,6 @@ __zfs_dbgmsg(char *buf) mutex_exit(&zfs_dbgmsgs.pl_lock); } -#ifdef _KERNEL - void __dprintf(boolean_t dprint, const char *file, const char *func, int line, const char *fmt, ...) @@ -218,38 +211,6 @@ __dprintf(boolean_t dprint, const char *file, const char *func, kmem_free(buf, size); } -#else - -void -zfs_dbgmsg_print(int fd, const char *tag) -{ - ssize_t ret __attribute__((unused)); - - mutex_enter(&zfs_dbgmsgs.pl_lock); - - /* - * We use write() in this function instead of printf() - * so it is safe to call from a signal handler. - */ - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") START:\n", 9); - - for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL; - zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) { - ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); - ret = write(fd, "\n", 1); - } - - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") END\n", 6); - - mutex_exit(&zfs_dbgmsgs.pl_lock); -} -#endif /* _KERNEL */ - -#ifdef _KERNEL module_param(zfs_dbgmsg_enable, int, 0644); MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log"); @@ -257,4 +218,3 @@ MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log"); module_param(zfs_dbgmsg_maxsize, uint, 0644); /* END CSTYLED */ MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size"); -#endif diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 1eeabe53d23c..564e89b37d11 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -415,6 +415,9 @@ zfs_dirlook(znode_t *dzp, char *name, znode_t **zpp, int flags, *zpp = zp; rw_exit(&dzp->z_parent_lock); } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { + if (ZTOZSB(dzp)->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { + return (SET_ERROR(ENOENT)); + } ip = zfsctl_root(dzp); *zpp = ITOZ(ip); } else { @@ -476,7 +479,7 @@ zfs_unlinked_drain_task(void *arg) { zfsvfs_t *zfsvfs = arg; zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap = zap_attribute_alloc(); dmu_object_info_t doi; znode_t *zp; int error; @@ -487,7 +490,7 @@ zfs_unlinked_drain_task(void *arg) * Iterate over the contents of the unlinked set. */ for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); - zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel; + zap_cursor_retrieve(&zc, zap) == 0 && !zfsvfs->z_drain_cancel; zap_cursor_advance(&zc)) { /* @@ -495,7 +498,7 @@ zfs_unlinked_drain_task(void *arg) */ error = dmu_object_info(zfsvfs->z_os, - zap.za_first_integer, &doi); + zap->za_first_integer, &doi); if (error != 0) continue; @@ -505,7 +508,7 @@ zfs_unlinked_drain_task(void *arg) * We need to re-mark these list entries for deletion, * so we pull them back into core and set zp->z_unlinked. */ - error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); + error = zfs_zget(zfsvfs, zap->za_first_integer, &zp); /* * We may pick up znodes that are already marked for deletion. @@ -532,6 +535,7 @@ zfs_unlinked_drain_task(void *arg) zfsvfs->z_draining = B_FALSE; zfsvfs->z_drain_task = TASKQID_INVALID; + zap_attribute_free(zap); } /* @@ -589,7 +593,7 @@ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap = zap_attribute_alloc(); znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = ZTOZSB(dzp); @@ -598,10 +602,10 @@ zfs_purgedir(znode_t *dzp) int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); - (error = zap_cursor_retrieve(&zc, &zap)) == 0; + (error = zap_cursor_retrieve(&zc, zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, - ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); + ZFS_DIRENT_OBJ(zap->za_first_integer), &xzp); if (error) { skipped += 1; continue; @@ -612,7 +616,7 @@ zfs_purgedir(znode_t *dzp) tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); + dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap->za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ @@ -627,7 +631,7 @@ zfs_purgedir(znode_t *dzp) } memset(&dl, 0, sizeof (dl)); dl.dl_dzp = dzp; - dl.dl_name = zap.za_name; + dl.dl_name = zap->za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) @@ -637,6 +641,7 @@ zfs_purgedir(znode_t *dzp) zfs_zrele_async(xzp); } zap_cursor_fini(&zc); + zap_attribute_free(zap); if (error != ENOENT) skipped += 1; return (skipped); @@ -845,6 +850,15 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) return (error); } + /* + * If we added a longname activate the SPA_FEATURE_LONGNAME. + */ + if (strlen(dl->dl_name) >= ZAP_MAXNAMELEN) { + dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os); + ds->ds_feature_activation[SPA_FEATURE_LONGNAME] = + (void *)B_TRUE; + } + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &dzp->z_id, sizeof (dzp->z_id)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c index bc753614be27..4acdc6a4156a 100644 --- a/module/os/linux/zfs/zfs_file_os.c +++ b/module/os/linux/zfs/zfs_file_os.c @@ -69,26 +69,6 @@ zfs_file_close(zfs_file_t *fp) filp_close(fp, 0); } -static ssize_t -zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off) -{ -#if defined(HAVE_KERNEL_WRITE_PPOS) - return (kernel_write(fp, buf, count, off)); -#else - mm_segment_t saved_fs; - ssize_t rc; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = vfs_write(fp, (__force const char __user __user *)buf, count, off); - - set_fs(saved_fs); - - return (rc); -#endif -} - /* * Stateful write - use os internal file pointer to determine where to * write and update on successful completion. @@ -106,7 +86,7 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) loff_t off = fp->f_pos; ssize_t rc; - rc = zfs_file_write_impl(fp, buf, count, &off); + rc = kernel_write(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -138,7 +118,7 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, { ssize_t rc; - rc = zfs_file_write_impl(fp, buf, count, &off); + rc = kernel_write(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -151,25 +131,6 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, return (0); } -static ssize_t -zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(fp, buf, count, off)); -#else - mm_segment_t saved_fs; - ssize_t rc; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = vfs_read(fp, (void __user *)buf, count, off); - set_fs(saved_fs); - - return (rc); -#endif -} - /* * Stateful read - use os internal file pointer to determine where to * read and update on successful completion. @@ -187,7 +148,7 @@ zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) loff_t off = fp->f_pos; ssize_t rc; - rc = zfs_file_read_impl(fp, buf, count, &off); + rc = kernel_read(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -219,7 +180,7 @@ zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, { ssize_t rc; - rc = zfs_file_read_impl(fp, buf, count, &off); + rc = kernel_read(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -274,16 +235,8 @@ zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr) struct kstat stat; int rc; -#if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&filp->f_path, &stat); -#elif defined(HAVE_3ARGS_VFS_GETATTR) - rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); -#else -#error "No available vfs_getattr()" -#endif if (rc) return (-rc); @@ -328,17 +281,14 @@ zfs_file_fsync(zfs_file_t *filp, int flags) } /* - * fallocate - allocate or free space on disk + * deallocate - zero and/or deallocate file storage * * fp - file pointer - * mode (non-standard options for hole punching etc) - * offset - offset to start allocating or freeing from - * len - length to free / allocate - * - * OPTIONAL + * offset - offset to start zeroing or deallocating + * len - length to zero or deallocate */ int -zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) +zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len) { /* * May enter XFS which generates a warning when PF_FSTRANS is set. @@ -354,12 +304,16 @@ zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) */ int error = EOPNOTSUPP; if (fp->f_op->fallocate) - error = fp->f_op->fallocate(fp, mode, offset, len); + error = -fp->f_op->fallocate(fp, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len); if (fstrans) current->flags |= __SPL_PF_FSTRANS; - return (error); + if (error) + return (SET_ERROR(error)); + + return (0); } /* diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index 663474ea49ab..502d173b70f9 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -303,6 +303,12 @@ openzfs_init_os(void) "ZFS pool version %s, ZFS filesystem version %s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR, SPA_VERSION_STRING, ZPL_VERSION_STRING); +#ifdef HAVE_LINUX_EXPERIMENTAL + printk(KERN_NOTICE "ZFS: Using ZFS with kernel %s is EXPERIMENTAL and " + "SERIOUS DATA LOSS may occur!\n", utsname()->release); + printk(KERN_NOTICE "ZFS: Please report your results at: " + "https://github.com/openzfs/zfs/issues/new\n"); +#endif #ifndef CONFIG_FS_POSIX_ACL printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n"); #endif /* CONFIG_FS_POSIX_ACL */ diff --git a/module/os/linux/zfs/zfs_racct.c b/module/os/linux/zfs/zfs_racct.c index ce623ef9d185..ce197caa45f0 100644 --- a/module/os/linux/zfs/zfs_racct.c +++ b/module/os/linux/zfs/zfs_racct.c @@ -25,14 +25,35 @@ #include +#ifdef _KERNEL +#include + +void +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + task_io_account_read(size); + spa_iostats_read_add(spa, size, iops, flags); +} + void -zfs_racct_read(uint64_t size, uint64_t iops) +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { - (void) size, (void) iops; + task_io_account_write(size); + spa_iostats_write_add(spa, size, iops, flags); } +#else + void -zfs_racct_write(uint64_t size, uint64_t iops) +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { - (void) size, (void) iops; + (void) spa, (void) size, (void) iops, (void) flags; } + +void +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + (void) spa, (void) size, (void) iops, (void) flags; +} + +#endif /* _KERNEL */ diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index a99a1ba88256..0146d842339a 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -41,12 +41,19 @@ #ifdef _KERNEL +#include +#include +#include #include #include #include #include +#include +#include #include #include +#include +#include /* * Move "n" bytes at byte address "p"; "rw" indicates the direction @@ -161,7 +168,6 @@ zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) return (0); } -#ifdef HAVE_BLK_MQ static void zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, struct bio_vec *bv) @@ -253,17 +259,12 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) } return (0); } -#endif static int zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { -#ifdef HAVE_BLK_MQ if (uio->rq != NULL) return (zfs_uiomove_bvec_rq(p, n, rw, uio)); -#else - ASSERT3P(uio->rq, ==, NULL); -#endif return (zfs_uiomove_bvec_impl(p, n, rw, uio)); } @@ -327,8 +328,13 @@ EXPORT_SYMBOL(zfs_uiomove); int zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) { - if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) { - /* There's never a need to fault in kernel pages */ + if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC || + (uio->uio_extflg & UIO_DIRECT)) { + /* + * There's never a need to fault in kernel pages or Direct I/O + * write pages. Direct I/O write pages have been pinned in so + * there is never a time for these pages a fault will occur. + */ return (0); #if defined(HAVE_VFS_IOV_ITER) } else if (uio->uio_segflg == UIO_ITER) { @@ -437,9 +443,295 @@ zfs_uioskip(zfs_uio_t *uio, size_t n) uio->uio_iovcnt--; } } + uio->uio_loffset += n; uio->uio_resid -= n; } EXPORT_SYMBOL(zfs_uioskip); +/* + * Check if the uio is page-aligned in memory. + */ +boolean_t +zfs_uio_page_aligned(zfs_uio_t *uio) +{ + boolean_t aligned = B_TRUE; + + if (uio->uio_segflg == UIO_USERSPACE || + uio->uio_segflg == UIO_SYSSPACE) { + const struct iovec *iov = uio->uio_iov; + size_t skip = uio->uio_skip; + + for (int i = uio->uio_iovcnt; i > 0; iov++, i--) { + uintptr_t addr = (uintptr_t)(iov->iov_base + skip); + size_t size = iov->iov_len - skip; + if ((addr & (PAGE_SIZE - 1)) || + (size & (PAGE_SIZE - 1))) { + aligned = B_FALSE; + break; + } + skip = 0; + } +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + unsigned long alignment = + iov_iter_alignment(uio->uio_iter); + aligned = IS_P2ALIGNED(alignment, PAGE_SIZE); +#endif + } else { + /* Currently not supported */ + aligned = B_FALSE; + } + + return (aligned); +} + + +#if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64) +#define ZFS_MARKEED_PAGE 0x0 +#define IS_ZFS_MARKED_PAGE(_p) 0 +#define zfs_mark_page(_p) +#define zfs_unmark_page(_p) +#define IS_ZERO_PAGE(_p) 0 + +#else +/* + * Mark pages to know if they were allocated to replace ZERO_PAGE() for + * Direct I/O writes. + */ +#define ZFS_MARKED_PAGE 0x5a465350414745 /* ASCII: ZFSPAGE */ +#define IS_ZFS_MARKED_PAGE(_p) \ + (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE) +#define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0)) + +static inline void +zfs_mark_page(struct page *page) +{ + ASSERT3P(page, !=, NULL); + get_page(page); + SetPagePrivate(page); + set_page_private(page, ZFS_MARKED_PAGE); +} + +static inline void +zfs_unmark_page(struct page *page) +{ + ASSERT3P(page, !=, NULL); + set_page_private(page, 0UL); + ClearPagePrivate(page); + put_page(page); +} +#endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */ + +static void +zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + + for (long i = 0; i < uio->uio_dio.npages; i++) { + struct page *p = uio->uio_dio.pages[i]; + lock_page(p); + + if (IS_ZERO_PAGE(p)) { + /* + * If the user page points the kernels ZERO_PAGE() a + * new zero filled page will just be allocated so the + * contents of the page can not be changed by the user + * while a Direct I/O write is taking place. + */ + gfp_t gfp_zero_page = __GFP_NOWARN | GFP_NOIO | + __GFP_ZERO | GFP_KERNEL; + + ASSERT0(IS_ZFS_MARKED_PAGE(p)); + unlock_page(p); + put_page(p); + + p = __page_cache_alloc(gfp_zero_page); + zfs_mark_page(p); + } else { + unlock_page(p); + } + } +} + +void +zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3P(uio->uio_dio.pages, !=, NULL); + + for (long i = 0; i < uio->uio_dio.npages; i++) { + struct page *p = uio->uio_dio.pages[i]; + + if (IS_ZFS_MARKED_PAGE(p)) { + zfs_unmark_page(p); + __free_page(p); + continue; + } + + put_page(p); + } + + vmem_free(uio->uio_dio.pages, + uio->uio_dio.npages * sizeof (struct page *)); +} + +/* + * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's + * iov_iter_get_pages(). + */ +static int +zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio, + long *numpages) +{ + unsigned long addr = (unsigned long)(v.iov_base); + size_t len = v.iov_len; + unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE); + + /* + * read returning FOLL_WRITE is due to the fact that we are stating + * that the kernel will have write access to the user pages. So, when a + * Direct I/O read request is issued, the kernel must write to the user + * pages. + */ + long res = get_user_pages_unlocked( + P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, + &uio->uio_dio.pages[uio->uio_dio.npages], + rw == UIO_READ ? FOLL_WRITE : 0); + if (res < 0) { + return (SET_ERROR(-res)); + } else if (len != (res * PAGE_SIZE)) { + return (SET_ERROR(EFAULT)); + } + + ASSERT3S(len, ==, res * PAGE_SIZE); + *numpages = res; + return (0); +} + +static int +zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + const struct iovec *iovp = uio->uio_iov; + size_t skip = uio->uio_skip; + size_t len = uio->uio_resid - skip; + + ASSERT(uio->uio_segflg != UIO_SYSSPACE); + + for (int i = 0; i < uio->uio_iovcnt; i++) { + struct iovec iov; + long numpages = 0; + + if (iovp->iov_len == 0) { + iovp++; + skip = 0; + continue; + } + iov.iov_len = MIN(len, iovp->iov_len - skip); + iov.iov_base = iovp->iov_base + skip; + int error = zfs_uio_iov_step(iov, rw, uio, &numpages); + + if (error) + return (error); + + uio->uio_dio.npages += numpages; + len -= iov.iov_len; + skip = 0; + iovp++; + } + + ASSERT0(len); + + return (0); +} + +#if defined(HAVE_VFS_IOV_ITER) +static int +zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + size_t skip = uio->uio_skip; + size_t wanted = uio->uio_resid - uio->uio_skip; + ssize_t rollback = 0; + ssize_t cnt; + unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE); + + while (wanted) { +#if defined(HAVE_IOV_ITER_GET_PAGES2) + cnt = iov_iter_get_pages2(uio->uio_iter, + &uio->uio_dio.pages[uio->uio_dio.npages], + wanted, maxpages, &skip); +#else + cnt = iov_iter_get_pages(uio->uio_iter, + &uio->uio_dio.pages[uio->uio_dio.npages], + wanted, maxpages, &skip); +#endif + if (cnt < 0) { + iov_iter_revert(uio->uio_iter, rollback); + return (SET_ERROR(-cnt)); + } + uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE); + rollback += cnt; + wanted -= cnt; + skip = 0; +#if !defined(HAVE_IOV_ITER_GET_PAGES2) + /* + * iov_iter_get_pages2() advances the iov_iter on success. + */ + iov_iter_advance(uio->uio_iter, cnt); +#endif + + } + ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip); + iov_iter_revert(uio->uio_iter, rollback); + + return (0); +} +#endif /* HAVE_VFS_IOV_ITER */ + +/* + * This function pins user pages. In the event that the user pages were not + * successfully pinned an error value is returned. + * + * On success, 0 is returned. + */ +int +zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + int error = 0; + long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE); + size_t size = npages * sizeof (struct page *); + + if (uio->uio_segflg == UIO_USERSPACE) { + uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); + error = zfs_uio_get_dio_pages_iov(uio, rw); +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); + error = zfs_uio_get_dio_pages_iov_iter(uio, rw); +#endif + } else { + return (SET_ERROR(EOPNOTSUPP)); + } + + ASSERT3S(uio->uio_dio.npages, >=, 0); + + if (error) { + for (long i = 0; i < uio->uio_dio.npages; i++) + put_page(uio->uio_dio.pages[i]); + vmem_free(uio->uio_dio.pages, size); + return (error); + } else { + ASSERT3S(uio->uio_dio.npages, ==, npages); + } + + if (rw == UIO_WRITE) { + zfs_uio_dio_check_for_zero_page(uio); + } + + uio->uio_extflg |= UIO_DIRECT; + + return (0); +} + #endif /* _KERNEL */ diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index d315355be93b..49346338b3f8 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -57,8 +57,10 @@ #include #include #include +#include #include #include +#include #include "zfs_comutil.h" enum { @@ -163,7 +165,7 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp) vfsp->vfs_do_xattr = B_TRUE; break; case TOKEN_XATTR: - vfsp->vfs_xattr = ZFS_XATTR_DIR; + vfsp->vfs_xattr = ZFS_XATTR_SA; vfsp->vfs_do_xattr = B_TRUE; break; case TOKEN_NOXATTR: @@ -466,6 +468,12 @@ acl_inherit_changed_cb(void *arg, uint64_t newval) ((zfsvfs_t *)arg)->z_acl_inherit = newval; } +static void +longname_changed_cb(void *arg, uint64_t newval) +{ + ((zfsvfs_t *)arg)->z_longname = newval; +} + static int zfs_register_callbacks(vfs_t *vfsp) { @@ -526,6 +534,8 @@ zfs_register_callbacks(vfs_t *vfsp) zfsvfs); error = error ? error : dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_LONGNAME), longname_changed_cb, zfsvfs); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; @@ -1157,7 +1167,8 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp) statp->f_fsid.val[0] = (uint32_t)fsid; statp->f_fsid.val[1] = (uint32_t)(fsid >> 32); statp->f_type = ZFS_SUPER_MAGIC; - statp->f_namelen = MAXNAMELEN - 1; + statp->f_namelen = + zfsvfs->z_longname ? (ZAP_MAXNAMELEN_NEW - 1) : (MAXNAMELEN - 1); /* * We have all of 40 characters to stuff a string here. @@ -1195,64 +1206,6 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp) return (error); } -/* - * Linux kernels older than 3.1 do not support a per-filesystem shrinker. - * To accommodate this we must improvise and manually walk the list of znodes - * attempting to prune dentries in order to be able to drop the inodes. - * - * To avoid scanning the same znodes multiple times they are always rotated - * to the end of the z_all_znodes list. New znodes are inserted at the - * end of the list so we're always scanning the oldest znodes first. - */ -static int -zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan) -{ - znode_t **zp_array, *zp; - int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *)); - int objects = 0; - int i = 0, j = 0; - - zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP); - - mutex_enter(&zfsvfs->z_znodes_lock); - while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) { - - if ((i++ > nr_to_scan) || (j >= max_array)) - break; - - ASSERT(list_link_active(&zp->z_link_node)); - list_remove(&zfsvfs->z_all_znodes, zp); - list_insert_tail(&zfsvfs->z_all_znodes, zp); - - /* Skip active znodes and .zfs entries */ - if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir) - continue; - - if (igrab(ZTOI(zp)) == NULL) - continue; - - zp_array[j] = zp; - j++; - } - mutex_exit(&zfsvfs->z_znodes_lock); - - for (i = 0; i < j; i++) { - zp = zp_array[i]; - - ASSERT3P(zp, !=, NULL); - d_prune_aliases(ZTOI(zp)); - - if (atomic_read(&ZTOI(zp)->i_count) == 1) - objects++; - - zrele(zp); - } - - vmem_free(zp_array, max_array * sizeof (znode_t *)); - - return (objects); -} - /* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data @@ -1278,9 +1231,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) if ((error = zfs_enter(zfsvfs, FTAG)) != 0) return (error); -#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \ - defined(SHRINK_CONTROL_HAS_NID) && \ - defined(SHRINKER_NUMA_AWARE) +#ifdef SHRINKER_NUMA_AWARE if (shrinker->flags & SHRINKER_NUMA_AWARE) { long tc = 1; for_each_online_node(sc.nid) { @@ -1302,27 +1253,8 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) } else { *objects = (*shrinker->scan_objects)(shrinker, &sc); } - -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) - *objects = (*shrinker->scan_objects)(shrinker, &sc); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - *objects = (*shrinker->shrink)(shrinker, &sc); -#elif defined(HAVE_D_PRUNE_ALIASES) -#define D_PRUNE_ALIASES_IS_DEFAULT - *objects = zfs_prune_aliases(zfsvfs, nr_to_scan); #else -#error "No available dentry and inode cache pruning mechanism." -#endif - -#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT) -#undef D_PRUNE_ALIASES_IS_DEFAULT - /* - * Fall back to zfs_prune_aliases if the kernel's per-superblock - * shrinker couldn't free anything, possibly due to the inodes being - * allocated in a different memcg. - */ - if (*objects == 0) - *objects = zfs_prune_aliases(zfsvfs, nr_to_scan); + *objects = (*shrinker->scan_objects)(shrinker, &sc); #endif zfs_exit(zfsvfs, FTAG); @@ -1481,9 +1413,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) return (0); } -#if defined(HAVE_SUPER_SETUP_BDI_NAME) -atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0); -#endif +static atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0); int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) @@ -1544,7 +1474,8 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); - error = -zpl_bdi_setup(sb, "zfs"); + error = -super_setup_bdi_name(sb, "%.28s-%ld", "zfs", + atomic_long_inc_return(&zfs_bdi_seq)); if (error) goto out; @@ -1672,7 +1603,6 @@ zfs_umount(struct super_block *sb) arc_remove_prune_callback(zfsvfs->z_arc_prune); VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; - zpl_bdi_destroy(sb); /* * z_os will be NULL if there was an error in @@ -1790,6 +1720,11 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { *ipp = zfsvfs->z_ctldir; ASSERT(*ipp != NULL); + + if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { + return (SET_ERROR(ENOENT)); + } + if (object == ZFSCTL_INO_SNAPDIR) { VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, 0, kcred, NULL, NULL) == 0); @@ -2123,9 +2058,6 @@ zfs_init(void) zfs_znode_init(); dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); register_filesystem(&zpl_fs_type); -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - register_fo_extend(&zpl_file_operations); -#endif } void @@ -2136,9 +2068,6 @@ zfs_fini(void) */ taskq_wait(system_delay_taskq); taskq_wait(system_taskq); -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - unregister_fo_extend(&zpl_file_operations); -#endif unregister_filesystem(&zpl_fs_type); zfs_znode_fini(); zfsctl_fini(); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 9a12b361c16c..4c3f14536b73 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -260,6 +260,15 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) } else { ClearPageError(pp); SetPageUptodate(pp); + if (!PagePrivate(pp)) { + /* + * Set private bit so page migration + * will wait for us to finish writeback + * before calling migrate_folio(). + */ + SetPagePrivate(pp); + get_page(pp); + } if (mapping_writably_mapped(mp)) flush_dcache_page(pp); @@ -296,6 +305,7 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT); if (pp) { + /* * If filemap_fault() retries there exists a window * where the page will be unlocked and not up to date. @@ -532,6 +542,46 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr, return (error); } +/* + * Perform a linear search in directory for the name of specific inode. + * Note we don't pass in the buffer size of name because it's hardcoded to + * NAME_MAX+1(256) in Linux. + * + * IN: dzp - znode of directory to search. + * zp - znode of the target + * + * OUT: name - dentry name of the target + * + * RETURN: 0 on success, error code on failure. + */ +int +zfs_get_name(znode_t *dzp, char *name, znode_t *zp) +{ + zfsvfs_t *zfsvfs = ZTOZSB(dzp); + int error = 0; + + if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) + return (error); + + if ((error = zfs_verify_zp(zp)) != 0) { + zfs_exit(zfsvfs, FTAG); + return (error); + } + + /* ctldir should have got their name in zfs_vget */ + if (dzp->z_is_ctldir || zp->z_is_ctldir) { + zfs_exit(zfsvfs, FTAG); + return (ENOENT); + } + + /* buffer len is hardcoded to 256 in Linux kernel */ + error = zap_value_search(zfsvfs->z_os, dzp->z_id, zp->z_id, + ZFS_DIRENT_OBJ(-1ULL), name, ZAP_MAXNAMELEN); + + zfs_exit(zfsvfs, FTAG); + return (error); +} + /* * Attempt to create a new entry in a directory. If the entry * already exists, truncate the file if permissible, else return @@ -1538,14 +1588,14 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, * we use the offset 2 for the '.zfs' directory. */ int -zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) +zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr) { (void) cr; znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); objset_t *os; zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap; int error; uint8_t prefetch; uint8_t type; @@ -1570,6 +1620,7 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) os = zfsvfs->z_os; offset = ctx->pos; prefetch = zp->z_zn_prefetch; + zap = zap_attribute_long_alloc(); /* * Initialize the iterator cursor. @@ -1595,25 +1646,25 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) * Special case `.', `..', and `.zfs'. */ if (offset == 0) { - (void) strcpy(zap.za_name, "."); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, "."); + zap->za_normalization_conflict = 0; objnum = zp->z_id; type = DT_DIR; } else if (offset == 1) { - (void) strcpy(zap.za_name, ".."); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, ".."); + zap->za_normalization_conflict = 0; objnum = parent; type = DT_DIR; } else if (offset == 2 && zfs_show_ctldir(zp)) { - (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); - zap.za_normalization_conflict = 0; + (void) strcpy(zap->za_name, ZFS_CTLDIR_NAME); + zap->za_normalization_conflict = 0; objnum = ZFSCTL_INO_ROOT; type = DT_DIR; } else { /* * Grab next entry. */ - if ((error = zap_cursor_retrieve(&zc, &zap))) { + if ((error = zap_cursor_retrieve(&zc, zap))) { if (error == ENOENT) break; else @@ -1627,24 +1678,24 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) * * XXX: This should be a feature flag for compatibility */ - if (zap.za_integer_length != 8 || - zap.za_num_integers == 0) { + if (zap->za_integer_length != 8 || + zap->za_num_integers == 0) { cmn_err(CE_WARN, "zap_readdir: bad directory " "entry, obj = %lld, offset = %lld, " "length = %d, num = %lld\n", (u_longlong_t)zp->z_id, (u_longlong_t)offset, - zap.za_integer_length, - (u_longlong_t)zap.za_num_integers); + zap->za_integer_length, + (u_longlong_t)zap->za_num_integers); error = SET_ERROR(ENXIO); goto update; } - objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); - type = ZFS_DIRENT_TYPE(zap.za_first_integer); + objnum = ZFS_DIRENT_OBJ(zap->za_first_integer); + type = ZFS_DIRENT_TYPE(zap->za_first_integer); } - done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name), + done = !dir_emit(ctx, zap->za_name, strlen(zap->za_name), objnum, type); if (done) break; @@ -1667,6 +1718,7 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) update: zap_cursor_fini(&zc); + zap_attribute_free(zap); if (error == ENOENT) error = 0; out: @@ -1765,7 +1817,7 @@ zfs_setattr_dir(znode_t *dzp) zfsvfs_t *zfsvfs = ZTOZSB(dzp); objset_t *os = zfsvfs->z_os; zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap; zfs_dirlock_t *dl; znode_t *zp = NULL; dmu_tx_t *tx = NULL; @@ -1774,15 +1826,16 @@ zfs_setattr_dir(znode_t *dzp) int count; int err; + zap = zap_attribute_alloc(); zap_cursor_init(&zc, os, dzp->z_id); - while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) { + while ((err = zap_cursor_retrieve(&zc, zap)) == 0) { count = 0; - if (zap.za_integer_length != 8 || zap.za_num_integers != 1) { + if (zap->za_integer_length != 8 || zap->za_num_integers != 1) { err = ENXIO; break; } - err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp, + err = zfs_dirent_lock(&dl, dzp, (char *)zap->za_name, &zp, ZEXISTS, NULL, NULL); if (err == ENOENT) goto next; @@ -1874,6 +1927,7 @@ zfs_setattr_dir(znode_t *dzp) zfs_dirent_unlock(dl); } zap_cursor_fini(&zc); + zap_attribute_free(zap); return (err == ENOENT ? 0 : err); } @@ -3489,9 +3543,9 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr, boolean_t waited = B_FALSE; boolean_t is_tmpfile = 0; uint64_t txg; -#ifdef HAVE_TMPFILE + is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE)); -#endif + ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode)); if (name == NULL) @@ -3795,8 +3849,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, /* * Speed up any non-sync page writebacks since * they may take several seconds to complete. - * Refer to the comment in zpl_fsync() (when - * HAVE_FSYNC_RANGE is defined) for details. + * Refer to the comment in zpl_fsync() for details. */ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { zil_commit(zfsvfs->z_log, zp->z_id); @@ -3895,7 +3948,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, } zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit, - for_sync ? zfs_putpage_sync_commit_cb : + B_FALSE, for_sync ? zfs_putpage_sync_commit_cb : zfs_putpage_async_commit_cb, pp); dmu_tx_commit(tx); @@ -4038,6 +4091,7 @@ zfs_inactive(struct inode *ip) static int zfs_fillpage(struct inode *ip, struct page *pp) { + znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); loff_t i_size = i_size_read(ip); u_offset_t io_off = page_offset(pp); @@ -4049,7 +4103,7 @@ zfs_fillpage(struct inode *ip, struct page *pp) io_len = i_size - io_off; void *va = kmap(pp); - int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off, + int error = dmu_read(zfsvfs->z_os, zp->z_id, io_off, io_len, va, DMU_READ_PREFETCH); if (io_len != PAGE_SIZE) memset((char *)va + io_len, 0, PAGE_SIZE - io_len); @@ -4065,6 +4119,14 @@ zfs_fillpage(struct inode *ip, struct page *pp) } else { ClearPageError(pp); SetPageUptodate(pp); + if (!PagePrivate(pp)) { + /* + * Set private bit so page migration will wait for us to + * finish writeback before calling migrate_folio(). + */ + SetPagePrivate(pp); + get_page(pp); + } } return (error); @@ -4087,11 +4149,49 @@ zfs_getpage(struct inode *ip, struct page *pp) zfsvfs_t *zfsvfs = ITOZSB(ip); znode_t *zp = ITOZ(ip); int error; + loff_t i_size = i_size_read(ip); + u_offset_t io_off = page_offset(pp); + size_t io_len = PAGE_SIZE; if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); + ASSERT3U(io_off, <, i_size); + + if (io_off + io_len > i_size) + io_len = i_size - io_off; + + /* + * It is important to hold the rangelock here because it is possible + * a Direct I/O write or block clone might be taking place at the same + * time that a page is being faulted in through filemap_fault(). With + * Direct I/O writes and block cloning db->db_data will be set to NULL + * with dbuf_clear_data() in dmu_buif_will_clone_or_dio(). If the + * rangelock is not held, then there is a race between faulting in a + * page and writing out a Direct I/O write or block cloning. Without + * the rangelock a NULL pointer dereference can occur in + * dmu_read_impl() for db->db_data during the mempcy operation when + * zfs_fillpage() calls dmu_read(). + */ + zfs_locked_range_t *lr = zfs_rangelock_tryenter(&zp->z_rangelock, + io_off, io_len, RL_READER); + if (lr == NULL) { + /* + * It is important to drop the page lock before grabbing the + * rangelock to avoid another deadlock between here and + * zfs_write() -> update_pages(). update_pages() holds both the + * rangelock and the page lock. + */ + get_page(pp); + unlock_page(pp); + lr = zfs_rangelock_enter(&zp->z_rangelock, io_off, + io_len, RL_READER); + lock_page(pp); + put_page(pp); + } error = zfs_fillpage(ip, pp); + zfs_rangelock_exit(lr); + if (error == 0) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE); diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode_os.c similarity index 85% rename from module/os/linux/zfs/zfs_znode.c rename to module/os/linux/zfs/zfs_znode_os.c index 265153e011e7..bc1e17f086d9 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -25,7 +25,6 @@ /* Portions Copyright 2007 Jeremy Teo */ -#ifdef _KERNEL #include #include #include @@ -49,8 +48,6 @@ #include #include #include -#endif /* _KERNEL */ - #include #include #include @@ -61,17 +58,11 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_comutil.h" -/* - * Functions needed for userland (ie: libzpool) are not put under - * #ifdef_KERNEL; the rest of the functions have dependencies - * (such as VFS logic) that will not compile easily in userland. - */ -#ifdef _KERNEL - static kmem_cache_t *znode_cache = NULL; static kmem_cache_t *znode_hold_cache = NULL; unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; @@ -415,21 +406,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) switch (ip->i_mode & S_IFMT) { case S_IFREG: ip->i_op = &zpl_inode_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - ip->i_fop = &zpl_file_operations.kabi_fops; -#else ip->i_fop = &zpl_file_operations; -#endif ip->i_mapping->a_ops = &zpl_address_space_operations; break; case S_IFDIR: -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER - ip->i_flags |= S_IOPS_WRAPPER; - ip->i_op = &zpl_dir_inode_operations.ops; -#else ip->i_op = &zpl_dir_inode_operations; -#endif ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; @@ -459,11 +441,7 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) /* Assume the inode is a file and attempt to continue */ ip->i_mode = S_IFREG | 0644; ip->i_op = &zpl_inode_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - ip->i_fop = &zpl_file_operations.kabi_fops; -#else ip->i_fop = &zpl_file_operations; -#endif ip->i_mapping->a_ops = &zpl_address_space_operations; break; } @@ -476,7 +454,6 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) * Linux and Solaris have different sets of file attributes, so we * restrict this conversion to the intersection of the two. */ -#ifdef HAVE_INODE_SET_FLAGS unsigned int flags = 0; if (zp->z_pflags & ZFS_IMMUTABLE) flags |= S_IMMUTABLE; @@ -484,17 +461,6 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) flags |= S_APPEND; inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND); -#else - if (zp->z_pflags & ZFS_IMMUTABLE) - ip->i_flags |= S_IMMUTABLE; - else - ip->i_flags &= ~S_IMMUTABLE; - - if (zp->z_pflags & ZFS_APPENDONLY) - ip->i_flags |= S_APPEND; - else - ip->i_flags &= ~S_APPEND; -#endif } /* @@ -560,9 +526,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - zp->z_is_mapped = B_FALSE; -#endif zp->z_is_ctldir = B_FALSE; zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; @@ -1391,12 +1354,6 @@ zfs_zinactive(znode_t *zp) zfs_znode_hold_exit(zfsvfs, zh); } -#if defined(HAVE_INODE_TIMESPEC64_TIMES) -#define zfs_compare_timespec timespec64_compare -#else -#define zfs_compare_timespec timespec_compare -#endif - /* * Determine whether the znode's atime must be updated. The logic mostly * duplicates the Linux kernel's relatime_need_update() functionality. @@ -1416,11 +1373,11 @@ zfs_relatime_need_update(const struct inode *ip) * has passed since the last update of atime. */ tmp_ts = zpl_inode_get_mtime(ip); - if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) + if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); tmp_ts = zpl_inode_get_ctime(ip); - if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) + if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60) @@ -1620,6 +1577,14 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) mark_page_accessed(pp); SetPageUptodate(pp); ClearPageError(pp); + if (!PagePrivate(pp)) { + /* + * Set private bit so page migration will wait for us to + * finish writeback before calling migrate_folio(). + */ + SetPagePrivate(pp); + get_page(pp); + } unlock_page(pp); put_page(pp); } @@ -2006,360 +1971,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) kmem_free(sb, sizeof (struct super_block)); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } -#endif /* _KERNEL */ - -static int -zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) -{ - uint64_t sa_obj = 0; - int error; - - error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - if (error != 0 && error != ENOENT) - return (error); - - error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); - return (error); -} - -static int -zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db, const void *tag) -{ - dmu_object_info_t doi; - int error; - - if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) - return (error); - - dmu_object_info_from_db(*db, &doi); - if ((doi.doi_bonus_type != DMU_OT_SA && - doi.doi_bonus_type != DMU_OT_ZNODE) || - (doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, tag); - return (SET_ERROR(ENOTSUP)); - } - - error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); - if (error != 0) { - sa_buf_rele(*db, tag); - return (error); - } - - return (0); -} - -static void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) -{ - sa_handle_destroy(hdl); - sa_buf_rele(db, tag); -} - -/* - * Given an object number, return its parent object number and whether - * or not the object is an extended attribute directory. - */ -static int -zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, - uint64_t *pobjp, int *is_xattrdir) -{ - uint64_t parent; - uint64_t pflags; - uint64_t mode; - uint64_t parent_mode; - sa_bulk_attr_t bulk[3]; - sa_handle_t *sa_hdl; - dmu_buf_t *sa_db; - int count = 0; - int error; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, - &parent, sizeof (parent)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, - &pflags, sizeof (pflags)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &mode, sizeof (mode)); - - if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) - return (error); - - /* - * When a link is removed its parent pointer is not changed and will - * be invalid. There are two cases where a link is removed but the - * file stays around, when it goes to the delete queue and when there - * are additional links. - */ - error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); - if (error != 0) - return (error); - - error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - if (error != 0) - return (error); - - *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); - - /* - * Extended attributes can be applied to files, directories, etc. - * Otherwise the parent must be a directory. - */ - if (!*is_xattrdir && !S_ISDIR(parent_mode)) - return (SET_ERROR(EINVAL)); - - *pobjp = parent; - - return (0); -} - -/* - * Given an object number, return some zpl level statistics - */ -static int -zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, - zfs_stat_t *sb) -{ - sa_bulk_attr_t bulk[4]; - int count = 0; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &sb->zs_mode, sizeof (sb->zs_mode)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, - &sb->zs_gen, sizeof (sb->zs_gen)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, - &sb->zs_links, sizeof (sb->zs_links)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, - &sb->zs_ctime, sizeof (sb->zs_ctime)); - - return (sa_bulk_lookup(hdl, bulk, count)); -} - -static int -zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, - sa_attr_type_t *sa_table, char *buf, int len) -{ - sa_handle_t *sa_hdl; - sa_handle_t *prevhdl = NULL; - dmu_buf_t *prevdb = NULL; - dmu_buf_t *sa_db = NULL; - char *path = buf + len - 1; - int error; - - *path = '\0'; - sa_hdl = hdl; - - uint64_t deleteq_obj; - VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, - ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); - error = zap_lookup_int(osp, deleteq_obj, obj); - if (error == 0) { - return (ESTALE); - } else if (error != ENOENT) { - return (error); - } - - for (;;) { - uint64_t pobj = 0; - char component[MAXNAMELEN + 2]; - size_t complen; - int is_xattrdir = 0; - - if (prevdb) { - ASSERT(prevhdl != NULL); - zfs_release_sa_handle(prevhdl, prevdb, FTAG); - } - - if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, - &is_xattrdir)) != 0) - break; - - if (pobj == obj) { - if (path[0] != '/') - *--path = '/'; - break; - } - - component[0] = '/'; - if (is_xattrdir) { - strcpy(component + 1, ""); - } else { - error = zap_value_search(osp, pobj, obj, - ZFS_DIRENT_OBJ(-1ULL), component + 1); - if (error != 0) - break; - } - - complen = strlen(component); - path -= complen; - ASSERT(path >= buf); - memcpy(path, component, complen); - obj = pobj; - - if (sa_hdl != hdl) { - prevhdl = sa_hdl; - prevdb = sa_db; - } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); - if (error != 0) { - sa_hdl = prevhdl; - sa_db = prevdb; - break; - } - } - - if (sa_hdl != NULL && sa_hdl != hdl) { - ASSERT(sa_db != NULL); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - } - - if (error == 0) - (void) memmove(buf, path, buf + len - path); - - return (error); -} - -int -zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) -{ - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -int -zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, - char *buf, int len) -{ - char *path = buf + len - 1; - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - *path = '\0'; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_stats_impl(hdl, sa_table, sb); - if (error != 0) { - zfs_release_sa_handle(hdl, db, FTAG); - return (error); - } - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -/* - * Read a property stored within the master node. - */ -int -zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) -{ - uint64_t *cached_copy = NULL; - - /* - * Figure out where in the objset_t the cached copy would live, if it - * is available for the requested property. - */ - if (os != NULL) { - switch (prop) { - case ZFS_PROP_VERSION: - cached_copy = &os->os_version; - break; - case ZFS_PROP_NORMALIZE: - cached_copy = &os->os_normalization; - break; - case ZFS_PROP_UTF8ONLY: - cached_copy = &os->os_utf8only; - break; - case ZFS_PROP_CASE: - cached_copy = &os->os_casesensitivity; - break; - default: - break; - } - } - if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { - *value = *cached_copy; - return (0); - } - - /* - * If the property wasn't cached, look up the file system's value for - * the property. For the version property, we look up a slightly - * different string. - */ - const char *pname; - int error = ENOENT; - if (prop == ZFS_PROP_VERSION) - pname = ZPL_VERSION_STR; - else - pname = zfs_prop_to_name(prop); - - if (os != NULL) { - ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); - error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); - } - - if (error == ENOENT) { - /* No value set, use the default value */ - switch (prop) { - case ZFS_PROP_VERSION: - *value = ZPL_VERSION; - break; - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - *value = 0; - break; - case ZFS_PROP_CASE: - *value = ZFS_CASE_SENSITIVE; - break; - case ZFS_PROP_ACLTYPE: - *value = ZFS_ACLTYPE_OFF; - break; - default: - return (error); - } - error = 0; - } - - /* - * If one of the methods for getting the property value above worked, - * copy it into the objset_t's cache. - */ - if (error == 0 && cached_copy != NULL) { - *cached_copy = *value; - } - - return (error); -} -#if defined(_KERNEL) EXPORT_SYMBOL(zfs_create_fs); EXPORT_SYMBOL(zfs_obj_to_path); @@ -2369,4 +1981,3 @@ MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); module_param(zfs_unlink_suspend_progress, int, 0644); MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks " "(debug - leaks space into the unlinked set)"); -#endif diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c index 8ee7fcecc7b7..fe64bc710387 100644 --- a/module/os/linux/zfs/zpl_ctldir.c +++ b/module/os/linux/zfs/zpl_ctldir.c @@ -52,19 +52,23 @@ zpl_common_open(struct inode *ip, struct file *filp) * Get root directory contents. */ static int -zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_root_iterate(struct file *filp, struct dir_context *ctx) { zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp)); int error = 0; + if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { + return (SET_ERROR(ENOENT)); + } + if ((error = zpl_enter(zfsvfs, FTAG)) != 0) return (error); - if (!zpl_dir_emit_dots(filp, ctx)) + if (!dir_emit_dots(filp, ctx)) goto out; if (ctx->pos == 2) { - if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME, + if (!dir_emit(ctx, ZFS_SNAPDIR_NAME, strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR)) goto out; @@ -72,7 +76,7 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) } if (ctx->pos == 3) { - if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME, + if (!dir_emit(ctx, ZFS_SHAREDIR_NAME, strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR)) goto out; @@ -84,21 +88,6 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_root_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - /* * Get root directory attributes. */ @@ -167,13 +156,7 @@ const struct file_operations zpl_fops_root = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_root_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_root_iterate, -#else - .readdir = zpl_root_readdir, -#endif }; const struct inode_operations zpl_ops_root = { @@ -207,11 +190,7 @@ zpl_snapdir_automount(struct path *path) * the snapshot being immediately unmounted. */ static int -#ifdef HAVE_D_REVALIDATE_NAMEIDATA -zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i) -#else zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) -#endif { return (!!dentry->d_inode); } @@ -258,7 +237,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, } static int -zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx) { zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp)); fstrans_cookie_t cookie; @@ -271,7 +250,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); cookie = spl_fstrans_mark(); - if (!zpl_dir_emit_dots(filp, ctx)) + if (!dir_emit_dots(filp, ctx)) goto out; /* Start the position at 0 if it already emitted . and .. */ @@ -284,7 +263,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) if (error) goto out; - if (!zpl_dir_emit(ctx, snapname, strlen(snapname), + if (!dir_emit(ctx, snapname, strlen(snapname), ZFSCTL_INO_SHARES - id, DT_DIR)) goto out; @@ -300,21 +279,6 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_snapdir_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - static int #ifdef HAVE_IOPS_RENAME_USERNS zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip, @@ -478,13 +442,7 @@ const struct file_operations zpl_fops_snapdir = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_snapdir_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_snapdir_iterate, -#else - .readdir = zpl_snapdir_readdir, -#endif }; @@ -535,7 +493,7 @@ zpl_shares_lookup(struct inode *dip, struct dentry *dentry, } static int -zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_shares_iterate(struct file *filp, struct dir_context *ctx) { fstrans_cookie_t cookie; cred_t *cr = CRED(); @@ -548,7 +506,7 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) cookie = spl_fstrans_mark(); if (zfsvfs->z_shares_dir == 0) { - zpl_dir_emit_dots(filp, ctx); + dir_emit_dots(filp, ctx); goto out; } @@ -569,21 +527,6 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_shares_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - static int #ifdef HAVE_USERNS_IOPS_GETATTR zpl_shares_getattr_impl(struct user_namespace *user_ns, @@ -654,14 +597,7 @@ const struct file_operations zpl_fops_shares = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_shares_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_shares_iterate, -#else - .readdir = zpl_shares_readdir, -#endif - }; /* diff --git a/module/os/linux/zfs/zpl_export.c b/module/os/linux/zfs/zpl_export.c index aa80b72e2d7a..ff865d129954 100644 --- a/module/os/linux/zfs/zpl_export.c +++ b/module/os/linux/zfs/zpl_export.c @@ -24,6 +24,7 @@ */ +#include #include #include #include @@ -31,15 +32,8 @@ static int -#ifdef HAVE_ENCODE_FH_WITH_INODE zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent) { -#else -zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable) -{ - /* CSTYLED */ - struct inode *ip = dentry->d_inode; -#endif /* HAVE_ENCODE_FH_WITH_INODE */ fstrans_cookie_t cookie; ushort_t empty_fid = 0; fid_t *fid; @@ -109,6 +103,35 @@ zpl_fh_to_dentry(struct super_block *sb, struct fid *fh, return (d_obtain_alias(ip)); } +/* + * In case the filesystem contains name longer than 255, we need to override + * the default get_name so we don't get buffer overflow. Unfortunately, since + * the buffer size is hardcoded in Linux, we will get ESTALE error in this + * case. + */ +static int +zpl_get_name(struct dentry *parent, char *name, struct dentry *child) +{ + cred_t *cr = CRED(); + fstrans_cookie_t cookie; + struct inode *dir = parent->d_inode; + struct inode *ip = child->d_inode; + int error; + + if (!dir || !S_ISDIR(dir->i_mode)) + return (-ENOTDIR); + + crhold(cr); + cookie = spl_fstrans_mark(); + spl_inode_lock_shared(dir); + error = -zfs_get_name(ITOZ(dir), name, ITOZ(ip)); + spl_inode_unlock_shared(dir); + spl_fstrans_unmark(cookie); + crfree(cr); + + return (error); +} + static struct dentry * zpl_get_parent(struct dentry *child) { @@ -153,6 +176,7 @@ zpl_commit_metadata(struct inode *inode) const struct export_operations zpl_export_operations = { .encode_fh = zpl_encode_fh, .fh_to_dentry = zpl_fh_to_dentry, + .get_name = zpl_get_name, .get_parent = zpl_get_parent, .commit_metadata = zpl_commit_metadata, }; diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 9dec52215c7c..50c63695dcc8 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -38,9 +38,7 @@ defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO) #include #endif -#ifdef HAVE_FILE_FADVISE #include -#endif #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO #include #endif @@ -93,7 +91,7 @@ zpl_release(struct inode *ip, struct file *filp) } static int -zpl_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_iterate(struct file *filp, struct dir_context *ctx) { cred_t *cr = CRED(); int error; @@ -109,62 +107,6 @@ zpl_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - -#if defined(HAVE_FSYNC_WITHOUT_DENTRY) -/* - * Linux 2.6.35 - 3.0 API, - * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed - * redundant. The dentry is still accessible via filp->f_path.dentry, - * and we are guaranteed that filp will never be NULL. - */ -static int -zpl_fsync(struct file *filp, int datasync) -{ - struct inode *inode = filp->f_mapping->host; - cred_t *cr = CRED(); - int error; - fstrans_cookie_t cookie; - - crhold(cr); - cookie = spl_fstrans_mark(); - error = -zfs_fsync(ITOZ(inode), datasync, cr); - spl_fstrans_unmark(cookie); - crfree(cr); - ASSERT3S(error, <=, 0); - - return (error); -} - -#ifdef HAVE_FILE_AIO_FSYNC -static int -zpl_aio_fsync(struct kiocb *kiocb, int datasync) -{ - return (zpl_fsync(kiocb->ki_filp, datasync)); -} -#endif - -#elif defined(HAVE_FSYNC_RANGE) -/* - * Linux 3.1 API, - * As of 3.1 the responsibility to call filemap_write_and_wait_range() has - * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex - * lock is no longer held by the caller, for zfs we don't require the lock - * to be held so we don't acquire it. - */ static int zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { @@ -229,18 +171,6 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) return (error); } -#ifdef HAVE_FILE_AIO_FSYNC -static int -zpl_aio_fsync(struct kiocb *kiocb, int datasync) -{ - return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync)); -} -#endif - -#else -#error "Unsupported fops->fsync() implementation" -#endif - static inline int zfs_io_flags(struct kiocb *kiocb) { @@ -285,8 +215,6 @@ zpl_file_accessed(struct file *filp) } } -#if defined(HAVE_VFS_RW_ITERATE) - /* * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to @@ -322,14 +250,14 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) crhold(cr); cookie = spl_fstrans_mark(); - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, + ssize_t ret = -zfs_read(ITOZ(filp->f_mapping->host), &uio, filp->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); + if (ret < 0) + return (ret); ssize_t read = count - uio.uio_resid; kiocb->ki_pos += read; @@ -343,23 +271,11 @@ static inline ssize_t zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from, size_t *countp) { -#ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB ssize_t ret = generic_write_checks(kiocb, from); if (ret <= 0) return (ret); *countp = ret; -#else - struct file *file = kiocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *ip = mapping->host; - int isblk = S_ISBLK(ip->i_mode); - - *countp = iov_iter_count(from); - ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk); - if (ret) - return (ret); -#endif return (0); } @@ -384,169 +300,50 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) crhold(cr); cookie = spl_fstrans_mark(); - int error = -zfs_write(ITOZ(ip), &uio, + ret = -zfs_write(ITOZ(ip), &uio, filp->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); - - ssize_t wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - return (wrote); -} - -#else /* !HAVE_VFS_RW_ITERATE */ - -static ssize_t -zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (ret) - return (ret); - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, - filp->f_flags | zfs_io_flags(kiocb), cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - - ssize_t read = count - uio.uio_resid; - kiocb->ki_pos += read; - - zpl_file_accessed(filp); - - return (read); -} - -static ssize_t -zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (ret) - return (ret); - - ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode)); - if (ret) + if (ret < 0) return (ret); - kiocb->ki_pos = pos; - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_write(ITOZ(ip), &uio, - filp->f_flags | zfs_io_flags(kiocb), cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - ssize_t wrote = count - uio.uio_resid; kiocb->ki_pos += wrote; return (wrote); } -#endif /* HAVE_VFS_RW_ITERATE */ -#if defined(HAVE_VFS_RW_ITERATE) static ssize_t -zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter) +zpl_direct_IO_impl(void) { - if (rw == WRITE) - return (zpl_iter_write(kiocb, iter)); - else - return (zpl_iter_read(kiocb, iter)); + /* + * All O_DIRECT requests should be handled by + * zpl_{iter/aio}_{write/read}(). There is no way kernel generic code + * should call the direct_IO address_space_operations function. We set + * this code path to be fatal if it is executed. + */ + PANIC(0); + return (0); } + #if defined(HAVE_VFS_DIRECT_IO_ITER) static ssize_t zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) { - return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); + return (zpl_direct_IO_impl()); } #elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET) static ssize_t zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) { - ASSERT3S(pos, ==, kiocb->ki_pos); - return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); -} -#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) -{ - ASSERT3S(pos, ==, kiocb->ki_pos); - return (zpl_direct_IO_impl(rw, kiocb, iter)); -} -#else -#error "Unknown direct IO interface" -#endif - -#else /* HAVE_VFS_RW_ITERATE */ - -#if defined(HAVE_VFS_DIRECT_IO_IOVEC) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) -{ - if (rw == WRITE) - return (zpl_aio_write(kiocb, iov, nr_segs, pos)); - else - return (zpl_aio_read(kiocb, iov, nr_segs, pos)); -} -#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) -{ - const struct iovec *iovp = iov_iter_iovec(iter); - unsigned long nr_segs = iter->nr_segs; - - ASSERT3S(pos, ==, kiocb->ki_pos); - if (rw == WRITE) - return (zpl_aio_write(kiocb, iovp, nr_segs, pos)); - else - return (zpl_aio_read(kiocb, iovp, nr_segs, pos)); + return (zpl_direct_IO_impl()); } #else -#error "Unknown direct IO interface" +#error "Unknown Direct I/O interface" #endif -#endif /* HAVE_VFS_RW_ITERATE */ - static loff_t zpl_llseek(struct file *filp, loff_t offset, int whence) { @@ -627,6 +424,7 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start, (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags); spl_fstrans_unmark(cookie); + if (error) return (error); @@ -634,13 +432,6 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) if (error) return (error); -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - znode_t *zp = ITOZ(ip); - mutex_enter(&zp->z_lock); - zp->z_is_mapped = B_TRUE; - mutex_exit(&zp->z_lock); -#endif - return (error); } @@ -816,6 +607,42 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) return (zpl_putpage(pp, wbc, &for_sync)); } +static int +zpl_releasepage(struct page *pp, gfp_t gfp) +{ + if (PagePrivate(pp)) { + ClearPagePrivate(pp); + put_page(pp); + } + return (1); +} + +#ifdef HAVE_VFS_RELEASE_FOLIO +static bool +zpl_release_folio(struct folio *folio, gfp_t gfp) +{ + return (zpl_releasepage(&folio->page, gfp)); +} +#endif + +#ifdef HAVE_VFS_INVALIDATE_FOLIO +static void +zpl_invalidate_folio(struct folio *folio, size_t offset, size_t len) +{ + if ((offset == 0) && (len == PAGE_SIZE)) { + zpl_releasepage(&folio->page, 0); + } +} +#else +static void +zpl_invalidatepage(struct page *pp, unsigned int offset, unsigned int len) +{ + if ((offset == 0) && (len == PAGE_SIZE)) { + zpl_releasepage(pp, 0); + } +} +#endif + /* * The flag combination which matches the behavior of zfs_space() is * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE @@ -833,10 +660,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) fstrans_cookie_t cookie; int error = 0; - int test_mode = FALLOC_FL_PUNCH_HOLE; -#ifdef HAVE_FALLOC_FL_ZERO_RANGE - test_mode |= FALLOC_FL_ZERO_RANGE; -#endif + int test_mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE; if ((mode & ~(FALLOC_FL_KEEP_SIZE | test_mode)) != 0) return (-EOPNOTSUPP); @@ -920,7 +744,6 @@ zpl_ioctl_getversion(struct file *filp, void __user *arg) return (copy_to_user(arg, &generation, sizeof (generation))); } -#ifdef HAVE_FILE_FADVISE static int zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) { @@ -973,7 +796,6 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) return (error); } -#endif /* HAVE_FILE_FADVISE */ #define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL) #define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL) @@ -1304,22 +1126,22 @@ const struct address_space_operations zpl_address_space_operations = { #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO .dirty_folio = filemap_dirty_folio, #endif +#ifdef HAVE_VFS_RELEASE_FOLIO + .release_folio = zpl_release_folio, +#else + .releasepage = zpl_releasepage, +#endif +#ifdef HAVE_VFS_INVALIDATE_FOLIO + .invalidate_folio = zpl_invalidate_folio, +#else + .invalidatepage = zpl_invalidatepage, +#endif }; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND -const struct file_operations_extend zpl_file_operations = { - .kabi_fops = { -#else const struct file_operations zpl_file_operations = { -#endif .open = zpl_open, .release = zpl_release, .llseek = zpl_llseek, -#ifdef HAVE_VFS_RW_ITERATE -#ifdef HAVE_NEW_SYNC_READ - .read = new_sync_read, - .write = new_sync_write, -#endif .read_iter = zpl_iter_read, .write_iter = zpl_iter_write, #ifdef HAVE_VFS_IOV_ITER @@ -1329,22 +1151,11 @@ const struct file_operations zpl_file_operations = { .splice_read = generic_file_splice_read, #endif .splice_write = iter_file_splice_write, -#endif -#else - .read = do_sync_read, - .write = do_sync_write, - .aio_read = zpl_aio_read, - .aio_write = zpl_aio_write, #endif .mmap = zpl_mmap, .fsync = zpl_fsync, -#ifdef HAVE_FILE_AIO_FSYNC - .aio_fsync = zpl_aio_fsync, -#endif .fallocate = zpl_fallocate, -#ifdef HAVE_VFS_COPY_FILE_RANGE .copy_file_range = zpl_copy_file_range, -#endif #ifdef HAVE_VFS_CLONE_FILE_RANGE .clone_file_range = zpl_clone_file_range, #endif @@ -1354,30 +1165,17 @@ const struct file_operations zpl_file_operations = { #ifdef HAVE_VFS_DEDUPE_FILE_RANGE .dedupe_file_range = zpl_dedupe_file_range, #endif -#ifdef HAVE_FILE_FADVISE .fadvise = zpl_fadvise, -#endif .unlocked_ioctl = zpl_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = zpl_compat_ioctl, #endif -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - }, /* kabi_fops */ - .copy_file_range = zpl_copy_file_range, - .clone_file_range = zpl_clone_file_range, -#endif }; const struct file_operations zpl_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, -#if defined(HAVE_VFS_ITERATE_SHARED) .iterate_shared = zpl_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_iterate, -#else - .readdir = zpl_readdir, -#endif .fsync = zpl_fsync, .unlocked_ioctl = zpl_ioctl, #ifdef CONFIG_COMPAT diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c index 64728fdb1187..d63797568ed2 100644 --- a/module/os/linux/zfs/zpl_file_range.c +++ b/module/os/linux/zfs/zpl_file_range.c @@ -83,8 +83,6 @@ zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, return ((ssize_t)len_o); } -#if defined(HAVE_VFS_COPY_FILE_RANGE) || \ - defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) /* * Entry point for copy_file_range(). Copy len bytes from src_off in src_file * to dst_off in dst_file. We are permitted to do this however we like, so we @@ -134,7 +132,6 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, return (ret); } -#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ #ifdef HAVE_VFS_REMAP_FILE_RANGE /* @@ -179,8 +176,7 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off, } #endif /* HAVE_VFS_REMAP_FILE_RANGE */ -#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ - defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) +#if defined(HAVE_VFS_CLONE_FILE_RANGE) /* * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. */ @@ -201,7 +197,7 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off, return (ret); } -#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ +#endif /* HAVE_VFS_CLONE_FILE_RANGE */ #ifdef HAVE_VFS_DEDUPE_FILE_RANGE /* diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index b25f3e9224f3..2c90953f8a96 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -46,9 +46,29 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) pathname_t pn; int zfs_flags = 0; zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; + dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os); + size_t dlen = dlen(dentry); - if (dlen(dentry) >= ZAP_MAXNAMELEN) + /* + * If z_longname is disabled, disallow create or rename of names + * longer than ZAP_MAXNAMELEN. + * + * This is needed in cases where longname was enabled first and some + * files/dirs with names > ZAP_MAXNAMELEN were created. And later + * longname was disabled. In such a case allow access to existing + * longnames. But disallow creation newer longnamed entities. + */ + if (!zfsvfs->z_longname && (dlen >= ZAP_MAXNAMELEN)) { + /* + * If this is for create or rename fail it. + */ + if (!dsl_dataset_feature_is_active(ds, SPA_FEATURE_LONGNAME) || + (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))) + return (ERR_PTR(-ENAMETOOLONG)); + } + if (dlen >= ZAP_MAXNAMELEN_NEW) { return (ERR_PTR(-ENAMETOOLONG)); + } crhold(cr); cookie = spl_fstrans_mark(); @@ -131,6 +151,16 @@ zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr, } } +static inline bool +is_nametoolong(struct dentry *dentry) +{ + zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; + size_t dlen = dlen(dentry); + + return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) || + dlen >= ZAP_MAXNAMELEN_NEW); +} + static int #ifdef HAVE_IOPS_CREATE_USERNS zpl_create(struct user_namespace *user_ns, struct inode *dir, @@ -151,6 +181,10 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag) zidmap_t *user_ns = kcred->user_ns; #endif + if (is_nametoolong(dentry)) { + return (-ENAMETOOLONG); + } + crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode, cr, user_ns); @@ -201,6 +235,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, zidmap_t *user_ns = kcred->user_ns; #endif + if (is_nametoolong(dentry)) { + return (-ENAMETOOLONG); + } + /* * We currently expect Linux to supply rdev=0 for all sockets * and fifos, but we want to know if this behavior ever changes. @@ -238,7 +276,6 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, return (error); } -#ifdef HAVE_TMPFILE static int #ifdef HAVE_TMPFILE_IDMAP zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir, @@ -307,7 +344,6 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return (error); } -#endif static int zpl_unlink(struct inode *dir, struct dentry *dentry) @@ -355,6 +391,10 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) zidmap_t *user_ns = kcred->user_ns; #endif + if (is_nametoolong(dentry)) { + return (-ENAMETOOLONG); + } + crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode | S_IFDIR, cr, user_ns); @@ -570,6 +610,10 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, zidmap_t *user_ns = kcred->user_ns; #endif + if (is_nametoolong(tdentry)) { + return (-ENAMETOOLONG); + } + crhold(cr); if (rflags & RENAME_WHITEOUT) { wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); @@ -591,7 +635,6 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, #if !defined(HAVE_IOPS_RENAME_USERNS) && \ !defined(HAVE_RENAME_WANTS_FLAGS) && \ - !defined(HAVE_RENAME2) && \ !defined(HAVE_IOPS_RENAME_IDMAP) static int zpl_rename(struct inode *sdip, struct dentry *sdentry, @@ -621,6 +664,10 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) zidmap_t *user_ns = kcred->user_ns; #endif + if (is_nametoolong(dentry)) { + return (-ENAMETOOLONG); + } + crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr, user_ns); @@ -647,28 +694,11 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) return (error); } -#if defined(HAVE_PUT_LINK_COOKIE) -static void -zpl_put_link(struct inode *unused, void *cookie) -{ - kmem_free(cookie, MAXPATHLEN); -} -#elif defined(HAVE_PUT_LINK_NAMEIDATA) -static void -zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) -{ - const char *link = nd_get_link(nd); - - if (!IS_ERR(link)) - kmem_free(link, MAXPATHLEN); -} -#elif defined(HAVE_PUT_LINK_DELAYED) static void zpl_put_link(void *ptr) { kmem_free(ptr, MAXPATHLEN); } -#endif static int zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) @@ -700,7 +730,6 @@ zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) return (error); } -#if defined(HAVE_GET_LINK_DELAYED) static const char * zpl_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) @@ -719,51 +748,6 @@ zpl_get_link(struct dentry *dentry, struct inode *inode, return (link); } -#elif defined(HAVE_GET_LINK_COOKIE) -static const char * -zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie) -{ - char *link = NULL; - int error; - - if (!dentry) - return (ERR_PTR(-ECHILD)); - - error = zpl_get_link_common(dentry, inode, &link); - if (error) - return (ERR_PTR(error)); - - return (*cookie = link); -} -#elif defined(HAVE_FOLLOW_LINK_COOKIE) -static const char * -zpl_follow_link(struct dentry *dentry, void **cookie) -{ - char *link = NULL; - int error; - - error = zpl_get_link_common(dentry, dentry->d_inode, &link); - if (error) - return (ERR_PTR(error)); - - return (*cookie = link); -} -#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA) -static void * -zpl_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *link = NULL; - int error; - - error = zpl_get_link_common(dentry, dentry->d_inode, &link); - if (error) - nd_set_link(nd, ERR_PTR(error)); - else - nd_set_link(nd, link); - - return (NULL); -} -#endif static int zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -773,6 +757,10 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) int error; fstrans_cookie_t cookie; + if (is_nametoolong(dentry)) { + return (-ENAMETOOLONG); + } + if (ip->i_nlink >= ZFS_LINK_MAX) return (-EMLINK); @@ -800,16 +788,9 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else @@ -819,12 +800,7 @@ const struct inode_operations zpl_inode_operations = { .permission = zpl_permission, }; -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER -const struct inode_operations_wrapper zpl_dir_inode_operations = { - .ops = { -#else const struct inode_operations zpl_dir_inode_operations = { -#endif .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, @@ -833,30 +809,19 @@ const struct inode_operations zpl_dir_inode_operations = { .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, -#ifdef HAVE_RENAME2 - .rename2 = zpl_rename2, -#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) +#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) .rename = zpl_rename2, #elif defined(HAVE_IOPS_RENAME_IDMAP) .rename = zpl_rename2, #else .rename = zpl_rename, #endif -#ifdef HAVE_TMPFILE .tmpfile = zpl_tmpfile, -#endif .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else @@ -864,47 +829,21 @@ const struct inode_operations zpl_dir_inode_operations = { #endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ .permission = zpl_permission, -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER - }, - .rename2 = zpl_rename2, -#endif }; const struct inode_operations zpl_symlink_inode_operations = { -#ifdef HAVE_GENERIC_READLINK - .readlink = generic_readlink, -#endif -#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE) .get_link = zpl_get_link, -#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA) - .follow_link = zpl_follow_link, -#endif -#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA) - .put_link = zpl_put_link, -#endif .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, }; const struct inode_operations zpl_special_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index e7d2a8fdaf1c..a9618eb0dc15 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -29,6 +29,7 @@ #include #include #include +#include static struct inode * @@ -54,7 +55,6 @@ zpl_inode_destroy(struct inode *ip) * inode has changed. We use it to ensure the znode system attributes * are always strictly update to date with respect to the inode. */ -#ifdef HAVE_DIRTY_INODE_WITH_FLAGS static void zpl_dirty_inode(struct inode *ip, int flags) { @@ -64,17 +64,6 @@ zpl_dirty_inode(struct inode *ip, int flags) zfs_dirty_inode(ip, flags); spl_fstrans_unmark(cookie); } -#else -static void -zpl_dirty_inode(struct inode *ip) -{ - fstrans_cookie_t cookie; - - cookie = spl_fstrans_mark(); - zfs_dirty_inode(ip, 0); - spl_fstrans_unmark(cookie); -} -#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */ /* * When ->drop_inode() is called its return value indicates if the diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 1b9c354fcec5..fe19ac80c219 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -176,19 +176,20 @@ static int zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf) { zap_cursor_t zc; - zap_attribute_t zap; + zap_attribute_t *zap = zap_attribute_alloc(); int error; zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id); - while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) { + while ((error = -zap_cursor_retrieve(&zc, zap)) == 0) { - if (zap.za_integer_length != 8 || zap.za_num_integers != 1) { + if (zap->za_integer_length != 8 || zap->za_num_integers != 1) { error = -ENXIO; break; } - error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name)); + error = zpl_xattr_filldir(xf, zap->za_name, + strlen(zap->za_name)); if (error) break; @@ -196,6 +197,7 @@ zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf) } zap_cursor_fini(&zc); + zap_attribute_free(zap); if (error == -ENOENT) error = 0; @@ -743,10 +745,6 @@ __zpl_xattr_user_get(struct inode *ip, const char *name, { int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) return (-EINVAL); if (!(ITOZSB(ip)->z_flags & ZSB_XATTR)) @@ -776,10 +774,6 @@ __zpl_xattr_user_set(zidmap_t *user_ns, (void) user_ns; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) return (-EINVAL); if (!(ITOZSB(ip)->z_flags & ZSB_XATTR)) @@ -866,10 +860,6 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name, if (!capable(CAP_SYS_ADMIN)) return (-EACCES); /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); error = zpl_xattr_get(ip, xattr_name, value, size); kmem_strfree(xattr_name); @@ -890,10 +880,6 @@ __zpl_xattr_trusted_set(zidmap_t *user_ns, if (!capable(CAP_SYS_ADMIN)) return (-EACCES); /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); error = zpl_xattr_set(ip, xattr_name, value, size, flags); kmem_strfree(xattr_name); @@ -936,10 +922,6 @@ __zpl_xattr_security_get(struct inode *ip, const char *name, char *xattr_name; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); error = zpl_xattr_get(ip, xattr_name, value, size); kmem_strfree(xattr_name); @@ -957,10 +939,6 @@ __zpl_xattr_security_set(zidmap_t *user_ns, char *xattr_name; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); error = zpl_xattr_set(ip, xattr_name, value, size, flags); kmem_strfree(xattr_name); @@ -1080,15 +1058,14 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type) if (!error) { if (acl) - zpl_set_cached_acl(ip, type, acl); + set_cached_acl(ip, type, acl); else - zpl_forget_cached_acl(ip, type); + forget_cached_acl(ip, type); } return (error); } -#ifdef HAVE_SET_ACL int #ifdef HAVE_SET_ACL_USERNS zpl_set_acl(struct user_namespace *userns, struct inode *ip, @@ -1111,7 +1088,6 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) return (zpl_set_acl_impl(ip, acl, type)); #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */ } -#endif /* HAVE_SET_ACL */ static struct posix_acl * zpl_get_acl_impl(struct inode *ip, int type) @@ -1120,17 +1096,6 @@ zpl_get_acl_impl(struct inode *ip, int type) void *value = NULL; char *name; - /* - * As of Linux 3.14, the kernel get_acl will check this for us. - * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong - * as the kernel get_acl will set it to temporary sentinel value. - */ -#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE - acl = get_cached_acl(ip, type); - if (acl != ACL_NOT_CACHED) - return (acl); -#endif - switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; @@ -1159,12 +1124,6 @@ zpl_get_acl_impl(struct inode *ip, int type) if (size > 0) kmem_free(value, size); - /* As of Linux 4.7, the kernel get_acl will set this for us */ -#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE - if (!IS_ERR(acl)) - zpl_set_cached_acl(ip, type, acl); -#endif - return (acl); } @@ -1301,10 +1260,6 @@ __zpl_xattr_acl_get_access(struct inode *ip, const char *name, int type = ACL_TYPE_ACCESS; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1329,10 +1284,6 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name, int type = ACL_TYPE_DEFAULT; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1358,10 +1309,6 @@ __zpl_xattr_acl_set_access(zidmap_t *mnt_ns, int type = ACL_TYPE_ACCESS; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1379,7 +1326,7 @@ __zpl_xattr_acl_set_access(zidmap_t *mnt_ns, if (IS_ERR(acl)) return (PTR_ERR(acl)); else if (acl) { - error = zpl_posix_acl_valid(ip, acl); + error = posix_acl_valid(ip->i_sb->s_user_ns, acl); if (error) { zpl_posix_acl_release(acl); return (error); @@ -1404,10 +1351,6 @@ __zpl_xattr_acl_set_default(zidmap_t *mnt_ns, int type = ACL_TYPE_DEFAULT; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1425,7 +1368,7 @@ __zpl_xattr_acl_set_default(zidmap_t *mnt_ns, if (IS_ERR(acl)) return (PTR_ERR(acl)); else if (acl) { - error = zpl_posix_acl_valid(ip, acl); + error = posix_acl_valid(ip->i_sb->s_user_ns, acl); if (error) { zpl_posix_acl_release(acl); return (error); @@ -1449,41 +1392,25 @@ ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default); * whole name and reject anything that has .name only as prefix. */ static xattr_handler_t zpl_xattr_acl_access_handler = { -#ifdef HAVE_XATTR_HANDLER_NAME .name = XATTR_NAME_POSIX_ACL_ACCESS, -#else - .prefix = XATTR_NAME_POSIX_ACL_ACCESS, -#endif .list = zpl_xattr_acl_list_access, .get = zpl_xattr_acl_get_access, .set = zpl_xattr_acl_set_access, -#if defined(HAVE_XATTR_LIST_SIMPLE) || \ - defined(HAVE_XATTR_LIST_DENTRY) || \ - defined(HAVE_XATTR_LIST_HANDLER) .flags = ACL_TYPE_ACCESS, -#endif }; /* * ACL default xattr namespace handlers. * - * Use .name instead of .prefix when available. xattr_resolve_name will match - * whole name and reject anything that has .name only as prefix. + * Use .name instead of .prefix. xattr_resolve_name will match whole name and + * reject anything that has .name only as prefix. */ static xattr_handler_t zpl_xattr_acl_default_handler = { -#ifdef HAVE_XATTR_HANDLER_NAME .name = XATTR_NAME_POSIX_ACL_DEFAULT, -#else - .prefix = XATTR_NAME_POSIX_ACL_DEFAULT, -#endif .list = zpl_xattr_acl_list_default, .get = zpl_xattr_acl_get_default, .set = zpl_xattr_acl_set_default, -#if defined(HAVE_XATTR_LIST_SIMPLE) || \ - defined(HAVE_XATTR_LIST_DENTRY) || \ - defined(HAVE_XATTR_LIST_HANDLER) .flags = ACL_TYPE_DEFAULT, -#endif }; #endif /* CONFIG_FS_POSIX_ACL */ @@ -2016,24 +1943,15 @@ zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len) } if (handler->list) { -#if defined(HAVE_XATTR_LIST_SIMPLE) if (!handler->list(d)) return (XAPERM_DENY); -#elif defined(HAVE_XATTR_LIST_DENTRY) - if (!handler->list(d, NULL, 0, name, name_len, 0)) - return (XAPERM_DENY); -#elif defined(HAVE_XATTR_LIST_HANDLER) - if (!handler->list(handler, d, NULL, 0, name, name_len)) - return (XAPERM_DENY); -#endif } return (perm); } -#if defined(CONFIG_FS_POSIX_ACL) && \ - (!defined(HAVE_POSIX_ACL_RELEASE) || \ - defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)) +#ifdef CONFIG_FS_POSIX_ACL + struct acl_rel_struct { struct acl_rel_struct *next; struct posix_acl *acl; diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index d1e3061b50e6..2396690b40fd 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -44,10 +44,7 @@ #include #include #include - -#ifdef HAVE_BLK_MQ #include -#endif static void zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, boolean_t force_sync); @@ -68,7 +65,6 @@ static unsigned int zvol_open_timeout_ms = 1000; #endif static unsigned int zvol_threads = 0; -#ifdef HAVE_BLK_MQ static unsigned int zvol_blk_mq_threads = 0; static unsigned int zvol_blk_mq_actual_threads; static boolean_t zvol_use_blk_mq = B_FALSE; @@ -84,7 +80,6 @@ static boolean_t zvol_use_blk_mq = B_FALSE; * read and write tests to a zvol in an NVMe pool (with 16 CPUs). */ static unsigned int zvol_blk_mq_blocks_per_thread = 8; -#endif static unsigned int zvol_num_taskqs = 0; @@ -96,31 +91,26 @@ static unsigned int zvol_num_taskqs = 0; /* * Finalize our BIO or request. */ -#ifdef HAVE_BLK_MQ -#define END_IO(zv, bio, rq, error) do { \ - if (bio) { \ - BIO_END_IO(bio, error); \ - } else { \ - blk_mq_end_request(rq, errno_to_bi_status(error)); \ - } \ -} while (0) -#else -#define END_IO(zv, bio, rq, error) BIO_END_IO(bio, error) -#endif +static inline void +zvol_end_io(struct bio *bio, struct request *rq, int error) +{ + if (bio) { + bio->bi_status = errno_to_bi_status(-error); + bio_endio(bio); + } else { + blk_mq_end_request(rq, errno_to_bi_status(error)); + } +} -#ifdef HAVE_BLK_MQ static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ; static unsigned int zvol_actual_blk_mq_queue_depth; -#endif struct zvol_state_os { struct gendisk *zvo_disk; /* generic disk */ struct request_queue *zvo_queue; /* request queue */ dev_t zvo_dev; /* device id */ -#ifdef HAVE_BLK_MQ struct blk_mq_tag_set tag_set; -#endif /* Set from the global 'zvol_use_blk_mq' at zvol load */ boolean_t use_blk_mq; @@ -165,8 +155,6 @@ zv_request_task_free(zv_request_task_t *task) kmem_free(task, sizeof (*task)); } -#ifdef HAVE_BLK_MQ - /* * This is called when a new block multiqueue request comes in. A request * contains one or more BIOs. @@ -219,7 +207,6 @@ static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv) return (blk_mq_alloc_tag_set(&zso->tag_set)); } -#endif /* HAVE_BLK_MQ */ /* * Given a path, return TRUE if path is a ZVOL. @@ -265,7 +252,7 @@ zvol_write(zv_request_t *zvr) /* Some requests are just for flush and nothing else. */ if (io_size(bio, rq) == 0) { rw_exit(&zv->zv_suspend_lock); - END_IO(zv, bio, rq, 0); + zvol_end_io(bio, rq, 0); return; } @@ -332,7 +319,7 @@ zvol_write(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, WRITE, bio, start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -421,7 +408,7 @@ zvol_discard(zv_request_t *zvr) start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -498,7 +485,7 @@ zvol_read(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, READ, bio, start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -529,7 +516,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, int rw = io_data_dir(bio, rq); if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { - END_IO(zv, bio, rq, -SET_ERROR(ENXIO)); + zvol_end_io(bio, rq, -SET_ERROR(ENXIO)); goto out; } @@ -548,7 +535,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, (long long unsigned)offset, (long unsigned)size); - END_IO(zv, bio, rq, -SET_ERROR(EIO)); + zvol_end_io(bio, rq, -SET_ERROR(EIO)); goto out; } @@ -557,7 +544,6 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint_t blk_mq_hw_queue = 0; uint_t tq_idx; uint_t taskq_hash; -#ifdef HAVE_BLK_MQ if (rq) #ifdef HAVE_BLK_MQ_RQ_HCTX blk_mq_hw_queue = rq->mq_hctx->queue_num; @@ -565,14 +551,13 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, blk_mq_hw_queue = rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; #endif -#endif - taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, - blk_mq_hw_queue, 0); + taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, + blk_mq_hw_queue); tq_idx = taskq_hash % ztqs->tqs_cnt; if (rw == WRITE) { if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - END_IO(zv, bio, rq, -SET_ERROR(EROFS)); + zvol_end_io(bio, rq, -SET_ERROR(EROFS)); goto out; } @@ -657,7 +642,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, * data and require no additional handling. */ if (size == 0) { - END_IO(zv, bio, rq, 0); + zvol_end_io(bio, rq, 0); goto out; } @@ -1171,7 +1156,6 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv, * the correct number of segments for the volblocksize and * number of chunks you want. */ -#ifdef HAVE_BLK_MQ if (zvol_blk_mq_blocks_per_thread != 0) { unsigned int chunks; chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX); @@ -1188,7 +1172,6 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv, limits->zql_max_segment_size = UINT_MAX; } } else { -#endif limits->zql_max_segments = UINT16_MAX; limits->zql_max_segment_size = UINT_MAX; } @@ -1301,7 +1284,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) static int zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) { -#ifdef HAVE_BLK_MQ struct zvol_state_os *zso = zv->zv_zso; /* Allocate our blk-mq tag_set */ @@ -1348,7 +1330,6 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) #endif zvol_queue_limits_apply(limits, zso->zvo_queue); -#endif return (0); } @@ -1384,9 +1365,7 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); -#ifdef HAVE_BLK_MQ zv->zv_zso->use_blk_mq = zvol_use_blk_mq; -#endif zvol_queue_limits_t limits; zvol_queue_limits_init(&limits, zv, zv->zv_zso->use_blk_mq); @@ -1442,8 +1421,8 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) */ if (volmode == ZFS_VOLMODE_DEV) { zso->zvo_disk->minors = 1; - zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT; - zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART; + zso->zvo_disk->flags &= ~GENHD_FL_EXT_DEVT; + zso->zvo_disk->flags |= GENHD_FL_NO_PART; } zso->zvo_disk->first_minor = (dev & MINORMASK); @@ -1495,10 +1474,8 @@ zvol_os_free(zvol_state_t *zv) put_disk(zv->zv_zso->zvo_disk); #endif -#ifdef HAVE_BLK_MQ if (zv->zv_zso->use_blk_mq) blk_mq_free_tag_set(&zv->zv_zso->tag_set); -#endif ida_simple_remove(&zvol_ida, MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); @@ -1863,7 +1840,6 @@ zvol_init(void) return (error); } -#ifdef HAVE_BLK_MQ if (zvol_blk_mq_queue_depth == 0) { zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ; } else { @@ -1877,7 +1853,7 @@ zvol_init(void) zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1), 1024); } -#endif + for (uint_t i = 0; i < num_tqs; i++) { char name[32]; (void) snprintf(name, sizeof (name), "%s_tq-%u", @@ -1949,7 +1925,6 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end"); module_param(zvol_volmode, uint, 0644); MODULE_PARM_DESC(zvol_volmode, "Default volmode property value"); -#ifdef HAVE_BLK_MQ module_param(zvol_blk_mq_queue_depth, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth"); @@ -1959,7 +1934,6 @@ MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols"); module_param(zvol_blk_mq_blocks_per_thread, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread, "Process volblocksize blocks per thread"); -#endif #ifndef HAVE_BLKDEV_GET_ERESTARTSYS module_param(zvol_open_timeout_ms, uint, 0644); diff --git a/module/zcommon/cityhash.c b/module/zcommon/cityhash.c index 413a96df2cda..c758ec47d1f6 100644 --- a/module/zcommon/cityhash.c +++ b/module/zcommon/cityhash.c @@ -49,8 +49,8 @@ cityhash_helper(uint64_t u, uint64_t v, uint64_t mul) return (b); } -uint64_t -cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +static inline uint64_t +cityhash_impl(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) { uint64_t mul = HASH_K2 + 64; uint64_t a = w1 * HASH_K1; @@ -59,9 +59,38 @@ cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) uint64_t d = w3 * HASH_K2; return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d, a + rotate(b + HASH_K2, 18) + c, mul)); +} +/* + * Passing w as the 2nd argument could save one 64-bit multiplication. + */ +uint64_t +cityhash1(uint64_t w) +{ + return (cityhash_impl(0, w, 0, 0)); +} + +uint64_t +cityhash2(uint64_t w1, uint64_t w2) +{ + return (cityhash_impl(w1, w2, 0, 0)); +} + +uint64_t +cityhash3(uint64_t w1, uint64_t w2, uint64_t w3) +{ + return (cityhash_impl(w1, w2, w3, 0)); +} + +uint64_t +cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +{ + return (cityhash_impl(w1, w2, w3, w4)); } #if defined(_KERNEL) +EXPORT_SYMBOL(cityhash1); +EXPORT_SYMBOL(cityhash2); +EXPORT_SYMBOL(cityhash3); EXPORT_SYMBOL(cityhash4); #endif diff --git a/module/zcommon/simd_stat.c b/module/zcommon/simd_stat.c new file mode 100644 index 000000000000..33c15140cdb9 --- /dev/null +++ b/module/zcommon/simd_stat.c @@ -0,0 +1,203 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2024 Google, Inc. All rights reserved. + */ +#include +#include +#include + + +#ifdef _KERNEL +#ifdef __linux__ +#include +#endif /* __linux__ */ +kstat_t *simd_stat_kstat; +#endif /* _KERNEL */ + +#ifdef _KERNEL +/* Sometimes, we don't define these at all. */ +#ifndef HAVE_KERNEL_FPU +#define HAVE_KERNEL_FPU (0) +#endif +#ifndef HAVE_KERNEL_NEON +#define HAVE_KERNEL_NEON (0) +#endif +#ifndef HAVE_KERNEL_FPU_INTERNAL +#define HAVE_KERNEL_FPU_INTERNAL (0) +#endif +#ifndef HAVE_UNDERSCORE_KERNEL_FPU +#define HAVE_UNDERSCORE_KERNEL_FPU (0) +#endif + +#define SIMD_STAT_PRINT(s, feat, val) \ + kmem_scnprintf(s + off, MAX(4095-off, 0), "%-16s\t%1d\n", feat, (val)) + +static int +simd_stat_kstat_data(char *buf, size_t size, void *data) +{ + (void) data; + + static char simd_stat_kstat_payload[4096] = {0}; + static int off = 0; +#ifdef __linux__ + if (off == 0) { + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "kfpu_allowed", kfpu_allowed()); +#if defined(__x86_64__) || defined(__i386__) + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "kfpu", HAVE_KERNEL_FPU); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "kfpu_internal", HAVE_KERNEL_FPU_INTERNAL); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "__kernel_fpu", HAVE_UNDERSCORE_KERNEL_FPU); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sse", zfs_sse_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sse2", zfs_sse2_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sse3", zfs_sse3_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "ssse3", zfs_ssse3_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sse41", zfs_sse4_1_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sse42", zfs_sse4_2_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx", zfs_avx_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx2", zfs_avx2_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512f", zfs_avx512f_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512cd", zfs_avx512cd_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512er", zfs_avx512er_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512pf", zfs_avx512pf_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512bw", zfs_avx512bw_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512dq", zfs_avx512dq_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512vl", zfs_avx512vl_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512ifma", zfs_avx512ifma_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "avx512vbmi", zfs_avx512vbmi_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "ymm", __ymm_enabled()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "zmm", __zmm_enabled()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "bmi1", zfs_bmi1_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "bmi2", zfs_bmi2_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "aes", zfs_aes_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "pclmulqdq", zfs_pclmulqdq_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "movbe", zfs_movbe_available()); + + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "osxsave", boot_cpu_has(X86_FEATURE_OSXSAVE)); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "xsaves", static_cpu_has(X86_FEATURE_XSAVES)); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "xsaveopt", static_cpu_has(X86_FEATURE_XSAVEOPT)); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "xsave", static_cpu_has(X86_FEATURE_XSAVE)); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "fxsr", static_cpu_has(X86_FEATURE_FXSR)); +#endif /* __x86__ */ +#if defined(__arm__) || defined(__aarch64__) + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "kernel_neon", HAVE_KERNEL_NEON); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "kernel_mode_neon", CONFIG_KERNEL_MODE_NEON); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "neon", zfs_neon_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sha256", zfs_sha256_available()); +#if defined(__aarch64__) + /* + * This technically can exist on 32b ARM but we don't + * define hooks to check for it and I didn't want to + * learn enough ARM ASM to add one. + */ + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "sha512", zfs_sha512_available()); +#endif /* __aarch64__ */ +#endif /* __arm__ */ + /* We want to short-circuit this on unsupported platforms. */ + off += 1; + } + + kmem_scnprintf(buf, MIN(off, size), "%s", simd_stat_kstat_payload); +#endif /* __linux__ */ + return (0); +} +#endif /* _KERNEL */ + +void +simd_stat_init(void) +{ + static boolean_t simd_stat_initialized = B_FALSE; + + if (!simd_stat_initialized) { +#if defined(_KERNEL) + /* Install kstats for all implementations */ + simd_stat_kstat = kstat_create("zfs", 0, "simd", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + + + if (simd_stat_kstat != NULL) { + simd_stat_kstat->ks_data = (void*)(uintptr_t)1; + simd_stat_kstat->ks_ndata = 1; + simd_stat_kstat->ks_flags |= KSTAT_FLAG_NO_HEADERS; + kstat_set_raw_ops(simd_stat_kstat, + NULL, + simd_stat_kstat_data, + NULL); + kstat_install(simd_stat_kstat); + } +#endif /* _KERNEL */ + } + /* Finish initialization */ + simd_stat_initialized = B_TRUE; +} + +void +simd_stat_fini(void) +{ +#if defined(_KERNEL) + if (simd_stat_kstat != NULL) { + kstat_delete(simd_stat_kstat); + simd_stat_kstat = NULL; + } +#endif +} + +#ifdef _KERNEL +EXPORT_SYMBOL(simd_stat_init); +EXPORT_SYMBOL(simd_stat_fini); +#endif diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 8dec5f27b0af..96f0086d7858 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -25,7 +25,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude */ @@ -760,6 +760,31 @@ zpool_feature_init(void) ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + { + static const spa_feature_t longname_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_LONGNAME, + "org.zfsonlinux:longname", "longname", + "support filename up to 1024 bytes", + ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN, + longname_deps, sfeatures); + } + + { + static const spa_feature_t large_microzap_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_LARGE_BLOCKS, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_LARGE_MICROZAP, + "com.klarasystems:large_microzap", "large_microzap", + "Support for microzaps larger than 128KB.", + ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT, + ZFEATURE_TYPE_BOOLEAN, large_microzap_deps, sfeatures); + } + zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 31425802f97f..3c85462a1d02 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_deleg.h" @@ -237,6 +238,7 @@ zfs_prop_init(void) static const zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, + { "disabled", ZFS_SNAPDIR_DISABLED }, { NULL } }; @@ -361,7 +363,7 @@ zfs_prop_init(void) static const zprop_index_t xattr_table[] = { { "off", ZFS_XATTR_OFF }, - { "on", ZFS_XATTR_DIR }, + { "on", ZFS_XATTR_SA }, { "sa", ZFS_XATTR_SA }, { "dir", ZFS_XATTR_DIR }, { NULL } @@ -395,6 +397,13 @@ zfs_prop_init(void) { NULL } }; + static const zprop_index_t direct_table[] = { + { "disabled", ZFS_DIRECT_DISABLED }, + { "standard", ZFS_DIRECT_STANDARD }, + { "always", ZFS_DIRECT_ALWAYS }, + { NULL } + }; + struct zfs_mod_supported_features *sfeatures = zfs_mod_list_supported(ZFS_SYSFS_DATASET_PROPERTIES); @@ -428,7 +437,7 @@ zfs_prop_init(void) "COMPRESS", compress_table, sfeatures); zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "hidden | visible", "SNAPDIR", snapdir_table, sfeatures); + "disabled | hidden | visible", "SNAPDIR", snapdir_table, sfeatures); zprop_register_index(ZFS_PROP_SNAPDEV, "snapdev", ZFS_SNAPDEV_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "hidden | visible", "SNAPDEV", snapdev_table, sfeatures); @@ -461,7 +470,7 @@ zfs_prop_init(void) zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "latency | throughput", "LOGBIAS", logbias_table, sfeatures); - zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR, + zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_SA, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off | dir | sa", "XATTR", xattr_table, sfeatures); zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize", @@ -473,6 +482,10 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "default | full | geom | dev | none", "VOLMODE", volmode_table, sfeatures); + zprop_register_index(ZFS_PROP_DIRECT, "direct", + ZFS_DIRECT_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "disabled | standard | always", "DIRECT", direct_table, + sfeatures); /* inherit index (boolean) properties */ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, @@ -754,6 +767,10 @@ zfs_prop_init(void) ZFS_TYPE_VOLUME, "", "SNAPSHOTS_CHANGED", B_FALSE, B_TRUE, B_TRUE, NULL, sfeatures); + zprop_register_index(ZFS_PROP_LONGNAME, "longname", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "LONGNAME", boolean_table, + sfeatures); + zfs_mod_list_supported_free(sfeatures); } @@ -786,11 +803,12 @@ zfs_name_to_prop(const char *propname) boolean_t zfs_prop_user(const char *name) { - int i; + int i, len; char c; boolean_t foundsep = B_FALSE; - for (i = 0; i < strlen(name); i++) { + len = strlen(name); + for (i = 0; i < len; i++) { c = name[i]; if (!zprop_valid_char(c)) return (B_FALSE); @@ -1065,6 +1083,7 @@ zcommon_init(void) return (error); fletcher_4_init(); + simd_stat_init(); return (0); } @@ -1072,6 +1091,7 @@ zcommon_init(void) void zcommon_fini(void) { + simd_stat_fini(); fletcher_4_fini(); kfpu_fini(); } diff --git a/module/zcommon/zfs_valstr.c b/module/zcommon/zfs_valstr.c index e2d4d1aefefb..622323bbbd5f 100644 --- a/module/zcommon/zfs_valstr.c +++ b/module/zcommon/zfs_valstr.c @@ -218,6 +218,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "NP", "NOPWRITE" }, { '.', "EX", "REEXECUTED" }, { '.', "DG", "DELEGATED" }, + { '.', "DC", "DIO_CHKSUM_ERR" }, ) /* END CSTYLED */ @@ -252,6 +253,7 @@ _VALSTR_BITFIELD_IMPL(zio_stage, { 'V', "VD", "VDEV_IO_DONE" }, { 'V', "VA", "VDEV_IO_ASSESS" }, { 'C', "CV", "CHECKSUM_VERIFY" }, + { 'C', "DC", "DIO_CHECKSUM_VERIFY" }, { 'X', "X ", "DONE" }, ) /* END CSTYLED */ diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index afdbb6f15e97..d3355730ba3d 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -494,11 +494,12 @@ vdev_name_to_prop(const char *propname) boolean_t vdev_prop_user(const char *name) { - int i; + int i, len; char c; boolean_t foundsep = B_FALSE; - for (i = 0; i < strlen(name); i++) { + len = strlen(name); + for (i = 0; i < len; i++) { c = name[i]; if (!zprop_valid_char(c)) return (B_FALSE); diff --git a/module/zfs/abd.c b/module/zfs/abd.c index c8c4d2270fae..529deeecfd4b 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -89,8 +89,8 @@ * functions. * * As an additional feature, linear and scatter ABD's can be stitched together - * by using the gang ABD type (abd_alloc_gang_abd()). This allows for - * multiple ABDs to be viewed as a singular ABD. + * by using the gang ABD type (abd_alloc_gang()). This allows for multiple ABDs + * to be viewed as a singular ABD. * * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to * B_FALSE. @@ -109,11 +109,15 @@ void abd_verify(abd_t *abd) { #ifdef ZFS_DEBUG - ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); + if (abd_is_from_pages(abd)) { + ASSERT3U(abd->abd_size, <=, DMU_MAX_ACCESS); + } else { + ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); + } ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG | - ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD)); + ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD | ABD_FLAG_FROM_PAGES)); IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); if (abd_is_linear(abd)) { @@ -136,7 +140,7 @@ abd_verify(abd_t *abd) #endif } -static void +void abd_init_struct(abd_t *abd) { list_link_init(&abd->abd_gang_link); @@ -238,6 +242,7 @@ abd_free_linear(abd_t *abd) abd_free_linear_page(abd); return; } + if (abd->abd_flags & ABD_FLAG_META) { zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); } else { @@ -520,6 +525,21 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size) */ abd->abd_flags |= ABD_FLAG_LINEAR; + /* + * User pages from Direct I/O requests may be in a single page + * (ABD_FLAG_LINEAR_PAGE), and we must make sure to still flag + * that here for abd. This is required because we have to be + * careful when borrowing the buffer from the ABD because we + * can not place user pages under write protection on Linux. + * See the comments in abd_os.c for abd_borrow_buf(), + * abd_borrow_buf_copy(), abd_return_buf() and + * abd_return_buf_copy(). + */ + if (abd_is_from_pages(sabd)) { + abd->abd_flags |= ABD_FLAG_FROM_PAGES | + ABD_FLAG_LINEAR_PAGE; + } + ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off; } else if (abd_is_gang(sabd)) { size_t left = size; @@ -648,70 +668,6 @@ abd_to_buf(abd_t *abd) return (ABD_LINEAR_BUF(abd)); } -/* - * Borrow a raw buffer from an ABD without copying the contents of the ABD - * into the buffer. If the ABD is scattered, this will allocate a raw buffer - * whose contents are undefined. To copy over the existing data in the ABD, use - * abd_borrow_buf_copy() instead. - */ -void * -abd_borrow_buf(abd_t *abd, size_t n) -{ - void *buf; - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - buf = abd_to_buf(abd); - } else { - buf = zio_buf_alloc(n); - } -#ifdef ZFS_DEBUG - (void) zfs_refcount_add_many(&abd->abd_children, n, buf); -#endif - return (buf); -} - -void * -abd_borrow_buf_copy(abd_t *abd, size_t n) -{ - void *buf = abd_borrow_buf(abd, n); - if (!abd_is_linear(abd)) { - abd_copy_to_buf(buf, abd, n); - } - return (buf); -} - -/* - * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will - * not change the contents of the ABD and will ASSERT that you didn't modify - * the buffer since it was borrowed. If you want any changes you made to buf to - * be copied back to abd, use abd_return_buf_copy() instead. - */ -void -abd_return_buf(abd_t *abd, void *buf, size_t n) -{ - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); -#ifdef ZFS_DEBUG - (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); -#endif - if (abd_is_linear(abd)) { - ASSERT3P(buf, ==, abd_to_buf(abd)); - } else { - ASSERT0(abd_cmp_buf(abd, buf, n)); - zio_buf_free(buf, n); - } -} - -void -abd_return_buf_copy(abd_t *abd, void *buf, size_t n) -{ - if (!abd_is_linear(abd)) { - abd_copy_from_buf(abd, buf, n); - } - abd_return_buf(abd, buf, n); -} - void abd_release_ownership_of_buf(abd_t *abd) { diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 714a30e863a7..a4b81be0f0a1 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -29,6 +29,7 @@ * Copyright (c) 2019, 2023, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2020, The FreeBSD Foundation [1] + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. * * [1] Portions of this software were developed by Allan Jude * under sponsorship from the FreeBSD Foundation. @@ -1786,9 +1787,13 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) !HDR_COMPRESSION_ENABLED(hdr)) { abd = NULL; csize = zio_compress_data(HDR_GET_COMPRESS(hdr), - hdr->b_l1hdr.b_pabd, &abd, lsize, hdr->b_complevel); + hdr->b_l1hdr.b_pabd, &abd, lsize, MIN(lsize, psize), + hdr->b_complevel); + if (csize >= lsize || csize > psize) { + ret = SET_ERROR(EIO); + return (ret); + } ASSERT3P(abd, !=, NULL); - ASSERT3U(csize, <=, psize); abd_zero_off(abd, csize, psize - csize); free_abd = B_TRUE; } @@ -4320,7 +4325,7 @@ arc_evict(void) /* Evict MFU metadata. */ w = wt * (int64_t)(arc_meta >> 16) >> 16; - e = MIN((int64_t)(asize - arc_c), (int64_t)(m - w)); + e = MIN((int64_t)(asize - arc_c), (int64_t)(m - bytes - w)); bytes = arc_evict_impl(arc_mfu, ARC_BUFC_METADATA, e); total_evicted += bytes; mfum -= bytes; @@ -5961,7 +5966,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); - zfs_racct_read(size, 1); + zfs_racct_read(spa, size, 1, 0); } /* Check if the spa even has l2 configured */ @@ -9029,8 +9034,8 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { cabd = abd_alloc_for_io(MAX(size, asize), ismd); uint64_t csize = zio_compress_data(compress, to_write, &cabd, - size, hdr->b_complevel); - if (csize > psize) { + size, MIN(size, psize), hdr->b_complevel); + if (csize >= size || csize > psize) { /* * We can't re-compress the block into the original * psize. Even if it fits into asize, it does not @@ -10521,9 +10526,12 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) */ list_insert_tail(&cb->l2wcb_abd_list, abd_buf); - /* try to compress the buffer */ + /* try to compress the buffer, at least one sector to save */ psize = zio_compress_data(ZIO_COMPRESS_LZ4, - abd_buf->abd, &abd, sizeof (*lb), 0); + abd_buf->abd, &abd, sizeof (*lb), + zio_get_compression_max_size(ZIO_COMPRESS_LZ4, + dev->l2ad_vdev->vdev_ashift, + dev->l2ad_vdev->vdev_ashift, sizeof (*lb)), 0); /* a log block is never entirely zero */ ASSERT(psize != 0); diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index 914260e742f9..27a04c2af06c 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -217,8 +217,7 @@ dataset_kstats_rename(dataset_kstats_t *dk, const char *name) } void -dataset_kstats_update_write_kstats(dataset_kstats_t *dk, - int64_t nwritten) +dataset_kstats_update_write_kstats(dataset_kstats_t *dk, int64_t nwritten) { ASSERT3S(nwritten, >=, 0); @@ -230,8 +229,7 @@ dataset_kstats_update_write_kstats(dataset_kstats_t *dk, } void -dataset_kstats_update_read_kstats(dataset_kstats_t *dk, - int64_t nread) +dataset_kstats_update_read_kstats(dataset_kstats_t *dk, int64_t nread) { ASSERT3S(nread, >=, 0); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 099883ba2652..df9368fc8bdb 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -628,7 +628,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db) * L2ARC. */ boolean_t -dbuf_is_l2cacheable(dmu_buf_impl_t *db) +dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *bp) { if (db->db_objset->os_secondary_cache == ZFS_CACHE_ALL || (db->db_objset->os_secondary_cache == @@ -636,10 +636,17 @@ dbuf_is_l2cacheable(dmu_buf_impl_t *db) if (l2arc_exclude_special == 0) return (B_TRUE); - blkptr_t *bp = db->db_blkptr; - if (bp == NULL || BP_IS_HOLE(bp)) + /* + * bp must be checked in the event it was passed from + * dbuf_read_impl() as the result of a the BP being set from + * a Direct I/O write in dbuf_read(). See comments in + * dbuf_read(). + */ + blkptr_t *db_bp = bp == NULL ? db->db_blkptr : bp; + + if (db_bp == NULL || BP_IS_HOLE(db_bp)) return (B_FALSE); - uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); + uint64_t vdev = DVA_GET_VDEV(db_bp->blk_dva); vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev; vdev_t *vd = NULL; @@ -1380,6 +1387,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, mutex_enter(&db->db_mtx); ASSERT3U(db->db_state, ==, DB_READ); + /* * All reads are synchronous, so we must have a hold on the dbuf */ @@ -1570,12 +1578,11 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags) */ static int dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, - db_lock_type_t dblt, const void *tag) + db_lock_type_t dblt, blkptr_t *bp, const void *tag) { zbookmark_phys_t zb; uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; - blkptr_t bp, *bpp = NULL; ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(MUTEX_HELD(&db->db_mtx)); @@ -1589,43 +1596,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, goto early_unlock; } - /* - * If we have a pending block clone, we don't want to read the - * underlying block, but the content of the block being cloned, - * pointed by the dirty record, so we have the most recent data. - * If there is no dirty record, then we hit a race in a sync - * process when the dirty record is already removed, while the - * dbuf is not yet destroyed. Such case is equivalent to uncached. - */ - if (db->db_state == DB_NOFILL) { - dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); - if (dr != NULL) { - if (!dr->dt.dl.dr_brtwrite) { - err = EIO; - goto early_unlock; - } - bp = dr->dt.dl.dr_overridden_by; - bpp = &bp; - } - } - - if (bpp == NULL && db->db_blkptr != NULL) { - bp = *db->db_blkptr; - bpp = &bp; - } - - err = dbuf_read_hole(db, dn, bpp); + err = dbuf_read_hole(db, dn, bp); if (err == 0) goto early_unlock; - ASSERT(bpp != NULL); + ASSERT(bp != NULL); /* * Any attempt to read a redacted block should result in an error. This * will never happen under normal conditions, but can be useful for * debugging purposes. */ - if (BP_IS_REDACTED(bpp)) { + if (BP_IS_REDACTED(bp)) { ASSERT(dsl_dataset_feature_is_active( db->db_objset->os_dsl_dataset, SPA_FEATURE_REDACTED_DATASETS)); @@ -1640,9 +1622,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. */ - if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) { spa_log_error(db->db_objset->os_spa, &zb, - BP_GET_LOGICAL_BIRTH(bpp)); + BP_GET_LOGICAL_BIRTH(bp)); err = SET_ERROR(EIO); goto early_unlock; } @@ -1653,7 +1635,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, if (!DBUF_IS_CACHEABLE(db)) aflags |= ARC_FLAG_UNCACHED; - else if (dbuf_is_l2cacheable(db)) + else if (dbuf_is_l2cacheable(db, bp)) aflags |= ARC_FLAG_L2CACHE; dbuf_add_ref(db, NULL); @@ -1661,17 +1643,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, zio_flags = (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; - if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) + if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bp)) zio_flags |= ZIO_FLAG_RAW; + /* - * The zio layer will copy the provided blkptr later, but we have our - * own copy so that we can release the parent's rwlock. We have to - * do that so that if dbuf_read_done is called synchronously (on + * The zio layer will copy the provided blkptr later, but we need to + * do this now so that we can release the parent's rwlock. We have to + * do that now so that if dbuf_read_done is called synchronously (on * an l1 cache hit) we don't acquire the db_mtx while holding the * parent's rwlock, which would be a lock ordering violation. */ + blkptr_t copy = *bp; dmu_buf_unlock_parent(db, dblt, tag); - return (arc_read(zio, db->db_objset->os_spa, bpp, + return (arc_read(zio, db->db_objset->os_spa, ©, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb)); @@ -1844,13 +1828,30 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags) ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); - if (pio == NULL && (db->db_state == DB_NOFILL || - (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) { - spa_t *spa = dn->dn_objset->os_spa; - pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); - need_wait = B_TRUE; + blkptr_t *bp; + + /* + * If a block clone or Direct I/O write has occurred we will + * get the dirty records overridden BP so we get the most + * recent data. + */ + err = dmu_buf_get_bp_from_dbuf(db, &bp); + + if (!err) { + if (pio == NULL && (db->db_state == DB_NOFILL || + (bp != NULL && !BP_IS_HOLE(bp)))) { + spa_t *spa = dn->dn_objset->os_spa; + pio = + zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + need_wait = B_TRUE; + } + + err = + dbuf_read_impl(db, dn, pio, flags, dblt, bp, FTAG); + } else { + mutex_exit(&db->db_mtx); + dmu_buf_unlock_parent(db, dblt, FTAG); } - err = dbuf_read_impl(db, dn, pio, flags, dblt, FTAG); /* dbuf_read_impl drops db_mtx and parent's rwlock. */ miss = (db->db_state != DB_CACHED); } @@ -1918,6 +1919,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) uint64_t txg = dr->dr_txg; ASSERT(MUTEX_HELD(&db->db_mtx)); + /* * This assert is valid because dmu_sync() expects to be called by * a zilog's get_data while holding a range lock. This call only @@ -1936,16 +1938,20 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - if (dr->dt.dl.dr_brtwrite) { + if (dr->dt.dl.dr_brtwrite || dr->dt.dl.dr_diowrite) { ASSERT0P(dr->dt.dl.dr_data); dr->dt.dl.dr_data = db->db_buf; } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; + dr->dt.dl.dr_diowrite = B_FALSE; dr->dt.dl.dr_has_raw_params = B_FALSE; /* + * In the event that Direct I/O was used, we do not + * need to release the buffer from the ARC. + * * Release the already-written buffer, so we leave it in * a consistent dirty state. Note that all callers are * modifying the buffer, so they will immediately do @@ -2084,6 +2090,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) */ dmu_buf_will_dirty(&db->db, tx); + VERIFY3P(db->db_buf, !=, NULL); + /* create the data buffer for the new block */ buf = arc_alloc_buf(dn->dn_objset->os_spa, db, type, size); @@ -2532,6 +2540,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { uint64_t txg = tx->tx_txg; boolean_t brtwrite; + boolean_t diowrite; ASSERT(txg != 0); @@ -2557,7 +2566,9 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(dr->dr_dbuf == db); brtwrite = dr->dt.dl.dr_brtwrite; + diowrite = dr->dt.dl.dr_diowrite; if (brtwrite) { + ASSERT3B(diowrite, ==, B_FALSE); /* * We are freeing a block that we cloned in the same * transaction group. @@ -2598,10 +2609,11 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (db->db_state != DB_NOFILL && !brtwrite) { dbuf_unoverride(dr); - ASSERT(db->db_buf != NULL); - ASSERT(dr->dt.dl.dr_data != NULL); - if (dr->dt.dl.dr_data != db->db_buf) + if (dr->dt.dl.dr_data != db->db_buf) { + ASSERT(db->db_buf != NULL); + ASSERT(dr->dt.dl.dr_data != NULL); arc_buf_destroy(dr->dt.dl.dr_data, db); + } } kmem_free(dr, sizeof (dbuf_dirty_record_t)); @@ -2610,7 +2622,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_dirtycnt -= 1; if (zfs_refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { - ASSERT(db->db_state == DB_NOFILL || brtwrite || + ASSERT(db->db_state == DB_NOFILL || brtwrite || diowrite || arc_released(db->db_buf)); dbuf_destroy(db); return (B_TRUE); @@ -2670,8 +2682,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) * Block cloning: Do the dbuf_read() before undirtying the dbuf, as we * want to make sure dbuf_read() will read the pending cloned block and * not the uderlying block that is being replaced. dbuf_undirty() will - * do dbuf_unoverride(), so we will end up with cloned block content, - * without overridden BP. + * do brt_pending_remove() before removing the dirty record. */ (void) dbuf_read(db, NULL, flags); if (undirty) { @@ -2701,23 +2712,126 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) return (dr != NULL); } +/* + * Normally the db_blkptr points to the most recent on-disk content for the + * dbuf (and anything newer will be cached in the dbuf). However, a pending + * block clone or not yet synced Direct I/O write will have a dirty record BP + * pointing to the most recent data. + */ +int +dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + int error = 0; + + if (db->db_level != 0) { + *bp = db->db_blkptr; + return (0); + } + + *bp = db->db_blkptr; + dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); + if (dr && db->db_state == DB_NOFILL) { + /* Block clone */ + if (!dr->dt.dl.dr_brtwrite) + error = EIO; + else + *bp = &dr->dt.dl.dr_overridden_by; + } else if (dr && db->db_state == DB_UNCACHED) { + /* Direct I/O write */ + if (dr->dt.dl.dr_diowrite) + *bp = &dr->dt.dl.dr_overridden_by; + } + + return (error); +} + +/* + * Direct I/O reads can read directly from the ARC, but the data has + * to be untransformed in order to copy it over into user pages. + */ +int +dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa) +{ + int err = 0; + DB_DNODE_ENTER(db); + dnode_t *dn = DB_DNODE(db); + + ASSERT3S(db->db_state, ==, DB_CACHED); + ASSERT(MUTEX_HELD(&db->db_mtx)); + + /* + * Ensure that this block's dnode has been decrypted if + * the caller has requested decrypted data. + */ + err = dbuf_read_verify_dnode_crypt(db, dn, 0); + + /* + * If the arc buf is compressed or encrypted and the caller + * requested uncompressed data, we need to untransform it + * before returning. We also call arc_untransform() on any + * unauthenticated blocks, which will verify their MAC if + * the key is now available. + */ + if (err == 0 && db->db_buf != NULL && + (arc_is_encrypted(db->db_buf) || + arc_is_unauthenticated(db->db_buf) || + arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { + zbookmark_phys_t zb; + + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + dbuf_fix_old_data(db, spa_syncing_txg(spa)); + err = arc_untransform(db->db_buf, spa, &zb, B_FALSE); + dbuf_set_data(db, db->db_buf); + } + DB_DNODE_EXIT(db); + DBUF_STAT_BUMP(hash_hits); + + return (err); +} + void -dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) +dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx) { + /* + * Block clones and Direct I/O writes always happen in open-context. + */ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ASSERT0(db->db_level); + ASSERT(!dmu_tx_is_syncing(tx)); + ASSERT0(db->db_level); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); - /* - * Block cloning: We are going to clone into this block, so undirty - * modifications done to this block so far in this txg. This includes - * writes and clones into this block. - */ mutex_enter(&db->db_mtx); DBUF_VERIFY(db); - VERIFY(!dbuf_undirty(db, tx)); + + /* + * We are going to clone or issue a Direct I/O write on this block, so + * undirty modifications done to this block so far in this txg. This + * includes writes and clones into this block. + * + * If there dirty record associated with this txg from a previous Direct + * I/O write then space accounting cleanup takes place. It is important + * to go ahead free up the space accounting through dbuf_undirty() -> + * dbuf_unoverride() -> zio_free(). Space accountiung for determining + * if a write can occur in zfs_write() happens through dmu_tx_assign(). + * This can cause an issue with Direct I/O writes in the case of + * overwriting the same block, because all DVA allocations are being + * done in open-context. Constantly allowing Direct I/O overwrites to + * the same block can exhaust the pools available space leading to + * ENOSPC errors at the DVA allocation part of the ZIO pipeline, which + * will eventually suspend the pool. By cleaning up sapce acccounting + * now, the ENOSPC error can be avoided. + * + * Since we are undirtying the record in open-context, we must have a + * hold on the db, so it should never be evicted after calling + * dbuf_undirty(). + */ + VERIFY3B(dbuf_undirty(db, tx), ==, B_FALSE); ASSERT0P(dbuf_find_dirty_eq(db, tx->tx_txg)); + if (db->db_buf != NULL) { /* * If there is an associated ARC buffer with this dbuf we can @@ -2728,6 +2842,11 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) arc_buf_destroy(db->db_buf, db); + /* + * Setting the dbuf's data pointers to NULL will force all + * future reads down to the devices to get the most up to date + * version of the data after a Direct I/O write has completed. + */ db->db_buf = NULL; dbuf_clear_data(db); } @@ -2736,7 +2855,8 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) ASSERT3P(db->db.db_data, ==, NULL); db->db_state = DB_NOFILL; - DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone"); + DTRACE_SET_STATE(db, + "allocating NOFILL buffer for clone or direct I/O write"); DBUF_VERIFY(db); mutex_exit(&db->db_mtx); @@ -2773,21 +2893,28 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail) dmu_tx_private_ok(tx)); mutex_enter(&db->db_mtx); - if (db->db_state == DB_NOFILL) { + dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg); + if (db->db_state == DB_NOFILL || + (db->db_state == DB_UNCACHED && dr && dr->dt.dl.dr_diowrite)) { /* - * Block cloning: We will be completely overwriting a block - * cloned in this transaction group, so let's undirty the - * pending clone and mark the block as uncached. This will be - * as if the clone was never done. But if the fill can fail - * we should have a way to return back to the cloned data. + * If the fill can fail we should have a way to return back to + * the cloned or Direct I/O write data. */ - if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) { + if (canfail && dr) { mutex_exit(&db->db_mtx); dmu_buf_will_dirty(db_fake, tx); return; } - VERIFY(!dbuf_undirty(db, tx)); - db->db_state = DB_UNCACHED; + /* + * Block cloning: We will be completely overwriting a block + * cloned in this transaction group, so let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + if (dr && dr->dt.dl.dr_brtwrite) { + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; + } } mutex_exit(&db->db_mtx); @@ -4080,7 +4207,6 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting) } else { mutex_exit(&db->db_mtx); } - } #pragma weak dmu_buf_refcount = dbuf_refcount @@ -4540,24 +4666,32 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) mutex_enter(&db->db_mtx); /* - * To be synced, we must be dirtied. But we - * might have been freed after the dirty. + * To be synced, we must be dirtied. But we might have been freed + * after the dirty. */ if (db->db_state == DB_UNCACHED) { /* This buffer has been freed since it was dirtied */ - ASSERT(db->db.db_data == NULL); + ASSERT3P(db->db.db_data, ==, NULL); } else if (db->db_state == DB_FILL) { /* This buffer was freed and is now being re-filled */ ASSERT(db->db.db_data != dr->dt.dl.dr_data); } else if (db->db_state == DB_READ) { /* - * This buffer has a clone we need to write, and an in-flight - * read on the BP we're about to clone. Its safe to issue the - * write here because the read has already been issued and the - * contents won't change. + * This buffer was either cloned or had a Direct I/O write + * occur and has an in-flgiht read on the BP. It is safe to + * issue the write here, because the read has already been + * issued and the contents won't change. + * + * We can verify the case of both the clone and Direct I/O + * write by making sure the first dirty record for the dbuf + * has no ARC buffer associated with it. */ - ASSERT(dr->dt.dl.dr_brtwrite && - dr->dt.dl.dr_override_state == DR_OVERRIDDEN); + dbuf_dirty_record_t *dr_head = + list_head(&db->db_dirty_records); + ASSERT3P(db->db_buf, ==, NULL); + ASSERT3P(db->db.db_data, ==, NULL); + ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL); + ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN); } else { ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); } @@ -4608,8 +4742,12 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dbuf_check_blkptr(dn, db); /* - * If this buffer is in the middle of an immediate write, - * wait for the synchronous IO to complete. + * If this buffer is in the middle of an immediate write, wait for the + * synchronous IO to complete. + * + * This is also valid even with Direct I/O writes setting a dirty + * records override state into DR_IN_DMU_SYNC, because all + * Direct I/O writes happen in open-context. */ while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); @@ -4913,8 +5051,12 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_level == 0) { ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); + + /* no dr_data if this is a NO_FILL or Direct I/O */ if (dr->dt.dl.dr_data != NULL && dr->dt.dl.dr_data != db->db_buf) { + ASSERT3B(dr->dt.dl.dr_brtwrite, ==, B_FALSE); + ASSERT3B(dr->dt.dl.dr_diowrite, ==, B_FALSE); arc_buf_destroy(dr->dt.dl.dr_data, db); } } else { @@ -5180,7 +5322,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { /* * The BP for this block has been provided by open context - * (by dmu_sync() or dmu_buf_write_embedded()). + * (by dmu_sync(), dmu_write_direct(), + * or dmu_buf_write_embedded()). */ abd_t *contents = (data != NULL) ? abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL; @@ -5219,7 +5362,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dr_zio = arc_write(pio, os->os_spa, txg, &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db), - dbuf_is_l2cacheable(db), &zp, dbuf_write_ready, + dbuf_is_l2cacheable(db, NULL), &zp, dbuf_write_ready, children_ready_cb, dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); } @@ -5239,7 +5382,7 @@ EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dmu_buf_set_crypt_params); EXPORT_SYMBOL(dmu_buf_will_dirty); EXPORT_SYMBOL(dmu_buf_is_dirty); -EXPORT_SYMBOL(dmu_buf_will_clone); +EXPORT_SYMBOL(dmu_buf_will_clone_or_dio); EXPORT_SYMBOL(dmu_buf_will_not_fill); EXPORT_SYMBOL(dmu_buf_will_fill); EXPORT_SYMBOL(dmu_buf_fill_done); diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 0e12e7e49828..e8b9fb498d4b 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -2609,7 +2609,7 @@ ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram) }; ddt_lightweight_entry_t ddlwe = {0}; int error; - int total = 0, valid = 0; + int valid = 0; int candidates = 0; uint64_t now = gethrestime_sec(); ddt_prune_info_t dpi; @@ -2633,7 +2633,6 @@ ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram) if (spa_shutting_down(spa) || issig()) break; - total++; ASSERT(ddt->ddt_flags & DDT_FLAG_FLAT); ASSERT3U(ddlwe.ddlwe_phys.ddp_flat.ddp_refcnt, <=, 1); diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index d96dc505cdea..137fe487a997 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -196,9 +196,10 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, void *phys, size_t psize) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; int error; + za = zap_attribute_alloc(); if (*walk == 0) { /* * We don't want to prefetch the entire ZAP object, because @@ -211,20 +212,20 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, } else { zap_cursor_init_serialized(&zc, os, object, *walk); } - if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { - uint64_t csize = za.za_num_integers; + if ((error = zap_cursor_retrieve(&zc, za)) == 0) { + uint64_t csize = za->za_num_integers; - ASSERT3U(za.za_integer_length, ==, 1); + ASSERT3U(za->za_integer_length, ==, 1); ASSERT3U(csize, <=, psize + 1); uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); - error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, + error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name, DDT_KEY_WORDS, 1, csize, cbuf); ASSERT0(error); if (error == 0) { ddt_zap_decompress(cbuf, phys, csize, psize); - *ddk = *(ddt_key_t *)za.za_name; + *ddk = *(ddt_key_t *)za->za_name; } kmem_free(cbuf, csize); @@ -233,6 +234,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, *walk = zap_cursor_serialize(&zc); } zap_cursor_fini(&zc); + zap_attribute_free(za); return (error); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b3eda8ea5097..3f87cfe6bee9 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -609,8 +609,16 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dbp[i] = &db->db; } - if (!read) - zfs_racct_write(length, nblks); + /* + * If we are doing O_DIRECT we still hold the dbufs, even for reads, + * but we do not issue any reads here. We do not want to account for + * writes in this case. + * + * O_DIRECT write/read accounting takes place in + * dmu_{write/read}_abd(). + */ + if (!read && ((flags & DMU_DIRECTIO) == 0)) + zfs_racct_write(dn->dn_objset->os_spa, length, nblks, flags); if (zs) dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE); @@ -897,7 +905,7 @@ dmu_prefetch_dnode(objset_t *os, uint64_t object, zio_priority_t pri) /* * Get the next "chunk" of file data to free. We traverse the file from - * the end so that the file gets shorter over time (if we crashes in the + * the end so that the file gets shorter over time (if we crash in the * middle, this will leave us in a better state). We find allocated file * data by simply searching the allocated level 1 indirects. * @@ -1168,7 +1176,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, /* * Deal with odd block sizes, where there can't be data past the first - * block. If we ever do the tail block optimization, we will need to + * block. If we ever do the tail block optimization, we will need to * handle that here as well. */ if (dn->dn_maxblkid == 0) { @@ -1178,6 +1186,18 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, size = newsz; } + if (size == 0) + return (0); + + /* Allow Direct I/O when requested and properly aligned */ + if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned(buf) && + zfs_dio_aligned(offset, size, PAGESIZE)) { + abd_t *data = abd_get_from_buf(buf, size); + err = dmu_read_abd(dn, offset, size, data, flags); + abd_free(data); + return (err); + } + while (size > 0) { uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); int i; @@ -1286,22 +1306,41 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } /* - * Note: Lustre is an external consumer of this interface. + * This interface is not used internally by ZFS but is provided for + * use by Lustre which is built on the DMU interfaces. */ -void -dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx) +int +dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx, uint32_t flags) { dmu_buf_t **dbp; int numbufs; + int error; if (size == 0) - return; + return (0); + + /* Allow Direct I/O when requested and properly aligned */ + if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned((void *)buf) && + zfs_dio_aligned(offset, size, dn->dn_datablksz)) { + abd_t *data = abd_get_from_buf((void *)buf, size); + error = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx); + abd_free(data); + return (error); + } VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size, FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH)); dmu_write_impl(dbp, numbufs, offset, size, buf, tx); dmu_buf_rele_array(dbp, numbufs, FTAG); + return (0); +} + +int +dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx) +{ + return (dmu_write_by_dnode_flags(dn, offset, size, buf, tx, 0)); } void @@ -1365,6 +1404,9 @@ dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size) dmu_buf_t **dbp; int numbufs, i, err; + if (uio->uio_extflg & UIO_DIRECT) + return (dmu_read_uio_direct(dn, uio, size)); + /* * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. @@ -1453,23 +1495,53 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) dmu_buf_t **dbp; int numbufs; int err = 0; - int i; + uint64_t write_size; - err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size, +top: + write_size = size; + + /* + * We only allow Direct I/O writes to happen if we are block + * sized aligned. Otherwise, we pass the write off to the ARC. + */ + if ((uio->uio_extflg & UIO_DIRECT) && + (write_size >= dn->dn_datablksz)) { + if (zfs_dio_aligned(zfs_uio_offset(uio), write_size, + dn->dn_datablksz)) { + return (dmu_write_uio_direct(dn, uio, size, tx)); + } else if (write_size > dn->dn_datablksz && + zfs_dio_offset_aligned(zfs_uio_offset(uio), + dn->dn_datablksz)) { + write_size = + dn->dn_datablksz * (write_size / dn->dn_datablksz); + err = dmu_write_uio_direct(dn, uio, write_size, tx); + if (err == 0) { + size -= write_size; + goto top; + } else { + return (err); + } + } else { + write_size = + P2PHASE(zfs_uio_offset(uio), dn->dn_datablksz); + } + } + + err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), write_size, FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); if (err) return (err); - for (i = 0; i < numbufs; i++) { + for (int i = 0; i < numbufs; i++) { uint64_t tocpy; int64_t bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + ASSERT(write_size > 0); offset_t off = zfs_uio_offset(uio); bufoff = off - db->db_offset; - tocpy = MIN(db->db_size - bufoff, size); + tocpy = MIN(db->db_size - bufoff, write_size); ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); @@ -1489,10 +1561,18 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) if (err) break; + write_size -= tocpy; size -= tocpy; } + IMPLY(err == 0, write_size == 0); + dmu_buf_rele_array(dbp, numbufs, FTAG); + + if ((uio->uio_extflg & UIO_DIRECT) && size > 0) { + goto top; + } + return (err); } @@ -1731,7 +1811,7 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, * same size as the dbuf. */ if (offset == db->db.db_offset && blksz == db->db.db_size) { - zfs_racct_write(blksz, 1); + zfs_racct_write(os->os_spa, blksz, 1, 0); dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { @@ -1761,23 +1841,22 @@ dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, return (err); } -typedef struct { - dbuf_dirty_record_t *dsa_dr; - dmu_sync_cb_t *dsa_done; - zgd_t *dsa_zgd; - dmu_tx_t *dsa_tx; -} dmu_sync_arg_t; - -static void +void dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) { (void) buf; dmu_sync_arg_t *dsa = varg; - dmu_buf_t *db = dsa->dsa_zgd->zgd_db; - blkptr_t *bp = zio->io_bp; if (zio->io_error == 0) { + dbuf_dirty_record_t *dr = dsa->dsa_dr; + blkptr_t *bp = zio->io_bp; + if (BP_IS_HOLE(bp)) { + dmu_buf_t *db = NULL; + if (dr) + db = &(dr->dr_dbuf->db); + else + db = dsa->dsa_zgd->zgd_db; /* * A block of zeros may compress to a hole, but the * block size still needs to be known for replay. @@ -1796,7 +1875,7 @@ dmu_sync_late_arrival_ready(zio_t *zio) dmu_sync_ready(zio, NULL, zio->io_private); } -static void +void dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) { (void) buf; @@ -1809,7 +1888,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) * Record the vdev(s) backing this blkptr so they can be flushed after * the writes for the lwb have completed. */ - if (zio->io_error == 0) { + if (zgd && zio->io_error == 0) { zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); } @@ -1848,10 +1927,12 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) } else { dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; } + cv_broadcast(&db->db_changed); mutex_exit(&db->db_mtx); - dsa->dsa_done(dsa->dsa_zgd, zio->io_error); + if (dsa->dsa_done) + dsa->dsa_done(dsa->dsa_zgd, zio->io_error); kmem_free(dsa, sizeof (*dsa)); } @@ -2120,9 +2201,10 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, - dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), - &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa, - ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); + dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), + dbuf_is_l2cacheable(db, NULL), &zp, dmu_sync_ready, NULL, + dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, + &zb)); return (0); } @@ -2385,6 +2467,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) zp->zp_nopwrite = nopwrite; zp->zp_encrypt = encrypt; zp->zp_byteorder = ZFS_HOST_BYTEORDER; + zp->zp_direct_write = (wp & WP_DIRECT_WR) ? B_TRUE : B_FALSE; memset(zp->zp_salt, 0, ZIO_DATA_SALT_LEN); memset(zp->zp_iv, 0, ZIO_DATA_IV_LEN); memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN); @@ -2594,7 +2677,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, ASSERT(db->db_blkid != DMU_SPILL_BLKID); ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - dmu_buf_will_clone(dbuf, tx); + dmu_buf_will_clone_or_dio(dbuf, tx); mutex_enter(&db->db_mtx); @@ -2817,8 +2900,15 @@ EXPORT_SYMBOL(dmu_free_long_range); EXPORT_SYMBOL(dmu_free_long_object); EXPORT_SYMBOL(dmu_read); EXPORT_SYMBOL(dmu_read_by_dnode); +EXPORT_SYMBOL(dmu_read_uio); +EXPORT_SYMBOL(dmu_read_uio_dbuf); +EXPORT_SYMBOL(dmu_read_uio_dnode); EXPORT_SYMBOL(dmu_write); EXPORT_SYMBOL(dmu_write_by_dnode); +EXPORT_SYMBOL(dmu_write_by_dnode_flags); +EXPORT_SYMBOL(dmu_write_uio); +EXPORT_SYMBOL(dmu_write_uio_dbuf); +EXPORT_SYMBOL(dmu_write_uio_dnode); EXPORT_SYMBOL(dmu_prealloc); EXPORT_SYMBOL(dmu_object_info); EXPORT_SYMBOL(dmu_object_info_from_dnode); diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c new file mode 100644 index 000000000000..91a7fd8df464 --- /dev/null +++ b/module/zfs/dmu_direct.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +static abd_t * +make_abd_for_dbuf(dmu_buf_impl_t *db, abd_t *data, uint64_t offset, + uint64_t size) +{ + size_t buf_size = db->db.db_size; + abd_t *pre_buf = NULL, *post_buf = NULL, *mbuf = NULL; + size_t buf_off = 0; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (offset > db->db.db_offset) { + size_t pre_size = offset - db->db.db_offset; + pre_buf = abd_alloc_for_io(pre_size, B_TRUE); + buf_size -= pre_size; + buf_off = 0; + } else { + buf_off = db->db.db_offset - offset; + size -= buf_off; + } + + if (size < buf_size) { + size_t post_size = buf_size - size; + post_buf = abd_alloc_for_io(post_size, B_TRUE); + buf_size -= post_size; + } + + ASSERT3U(buf_size, >, 0); + abd_t *buf = abd_get_offset_size(data, buf_off, buf_size); + + if (pre_buf || post_buf) { + mbuf = abd_alloc_gang(); + if (pre_buf) + abd_gang_add(mbuf, pre_buf, B_TRUE); + abd_gang_add(mbuf, buf, B_TRUE); + if (post_buf) + abd_gang_add(mbuf, post_buf, B_TRUE); + } else { + mbuf = buf; + } + + return (mbuf); +} + +static void +dmu_read_abd_done(zio_t *zio) +{ + abd_free(zio->io_abd); +} + +static void +dmu_write_direct_ready(zio_t *zio) +{ + dmu_sync_ready(zio, NULL, zio->io_private); +} + +static void +dmu_write_direct_done(zio_t *zio) +{ + dmu_sync_arg_t *dsa = zio->io_private; + dbuf_dirty_record_t *dr = dsa->dsa_dr; + dmu_buf_impl_t *db = dr->dr_dbuf; + + abd_free(zio->io_abd); + + mutex_enter(&db->db_mtx); + ASSERT3P(db->db_buf, ==, NULL); + ASSERT3P(dr->dt.dl.dr_data, ==, NULL); + ASSERT3P(db->db.db_data, ==, NULL); + db->db_state = DB_UNCACHED; + mutex_exit(&db->db_mtx); + + dmu_sync_done(zio, NULL, zio->io_private); + + if (zio->io_error != 0) { + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + ASSERT3U(zio->io_error, ==, EIO); + + /* + * In the event of an I/O error this block has been freed in + * zio_done() through zio_dva_unallocate(). Calling + * dmu_sync_done() above set dr_override_state to + * DR_NOT_OVERRIDDEN. In this case when dbuf_undirty() calls + * dbuf_unoverride(), it will skip doing zio_free() to free + * this block as that was already taken care of. + * + * Since we are undirtying the record in open-context, we must + * have a hold on the db, so it should never be evicted after + * calling dbuf_undirty(). + */ + mutex_enter(&db->db_mtx); + VERIFY3B(dbuf_undirty(db, dsa->dsa_tx), ==, B_FALSE); + mutex_exit(&db->db_mtx); + } + + kmem_free(zio->io_bp, sizeof (blkptr_t)); + zio->io_bp = NULL; +} + +int +dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx) +{ + objset_t *os = db->db_objset; + dsl_dataset_t *ds = dmu_objset_ds(os); + zbookmark_phys_t zb; + dbuf_dirty_record_t *dr_head; + + SET_BOOKMARK(&zb, ds->ds_object, + db->db.db_object, db->db_level, db->db_blkid); + + DB_DNODE_ENTER(db); + zio_prop_t zp; + dmu_write_policy(os, DB_DNODE(db), db->db_level, + WP_DMU_SYNC | WP_DIRECT_WR, &zp); + DB_DNODE_EXIT(db); + + /* + * Dirty this dbuf with DB_NOFILL since we will not have any data + * associated with the dbuf. + */ + dmu_buf_will_clone_or_dio(&db->db, tx); + + mutex_enter(&db->db_mtx); + + uint64_t txg = dmu_tx_get_txg(tx); + ASSERT3U(txg, >, spa_last_synced_txg(os->os_spa)); + ASSERT3U(txg, >, spa_syncing_txg(os->os_spa)); + + dr_head = list_head(&db->db_dirty_records); + ASSERT3U(dr_head->dr_txg, ==, txg); + dr_head->dt.dl.dr_diowrite = B_TRUE; + dr_head->dr_accounted = db->db.db_size; + + blkptr_t *bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); + if (db->db_blkptr != NULL) { + /* + * Fill in bp with the current block pointer so that + * the nopwrite code can check if we're writing the same + * data that's already on disk. + */ + *bp = *db->db_blkptr; + } else { + memset(bp, 0, sizeof (blkptr_t)); + } + + /* + * Disable nopwrite if the current block pointer could change + * before this TXG syncs. + */ + if (list_next(&db->db_dirty_records, dr_head) != NULL) + zp.zp_nopwrite = B_FALSE; + + ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); + dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC; + + mutex_exit(&db->db_mtx); + + dmu_objset_willuse_space(os, dr_head->dr_accounted, tx); + + dmu_sync_arg_t *dsa = kmem_zalloc(sizeof (dmu_sync_arg_t), KM_SLEEP); + dsa->dsa_dr = dr_head; + dsa->dsa_tx = tx; + + zio_t *zio = zio_write(pio, os->os_spa, txg, bp, data, + db->db.db_size, db->db.db_size, &zp, + dmu_write_direct_ready, NULL, dmu_write_direct_done, dsa, + ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb); + + if (pio == NULL) + return (zio_wait(zio)); + + zio_nowait(zio); + + return (0); +} + +int +dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size, + abd_t *data, uint32_t flags, dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + spa_t *spa = dn->dn_objset->os_spa; + int numbufs, err; + + ASSERT(flags & DMU_DIRECTIO); + + err = dmu_buf_hold_array_by_dnode(dn, offset, + size, B_FALSE, FTAG, &numbufs, &dbp, flags); + if (err) + return (err); + + zio_t *pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + + for (int i = 0; i < numbufs && err == 0; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + + abd_t *abd = abd_get_offset_size(data, + db->db.db_offset - offset, dn->dn_datablksz); + + zfs_racct_write(spa, db->db.db_size, 1, flags); + err = dmu_write_direct(pio, db, abd, tx); + ASSERT0(err); + } + + err = zio_wait(pio); + + /* + * The dbuf must be held until the Direct I/O write has completed in + * the event there was any errors and dbuf_undirty() was called. + */ + dmu_buf_rele_array(dbp, numbufs, FTAG); + + return (err); +} + +int +dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size, + abd_t *data, uint32_t flags) +{ + objset_t *os = dn->dn_objset; + spa_t *spa = os->os_spa; + dmu_buf_t **dbp; + int numbufs, err; + + ASSERT(flags & DMU_DIRECTIO); + + err = dmu_buf_hold_array_by_dnode(dn, offset, + size, B_FALSE, FTAG, &numbufs, &dbp, flags); + if (err) + return (err); + + zio_t *rio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + + for (int i = 0; i < numbufs; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + abd_t *mbuf; + zbookmark_phys_t zb; + blkptr_t *bp; + + mutex_enter(&db->db_mtx); + + SET_BOOKMARK(&zb, dmu_objset_ds(os)->ds_object, + db->db.db_object, db->db_level, db->db_blkid); + + /* + * If there is another read for this dbuf, we will wait for + * that to complete first before checking the db_state below. + */ + while (db->db_state == DB_READ) + cv_wait(&db->db_changed, &db->db_mtx); + + err = dmu_buf_get_bp_from_dbuf(db, &bp); + if (err) { + mutex_exit(&db->db_mtx); + goto error; + } + + /* + * There is no need to read if this is a hole or the data is + * cached. This will not be considered a direct read for IO + * accounting in the same way that an ARC hit is not counted. + */ + if (bp == NULL || BP_IS_HOLE(bp) || db->db_state == DB_CACHED) { + size_t aoff = offset < db->db.db_offset ? + db->db.db_offset - offset : 0; + size_t boff = offset > db->db.db_offset ? + offset - db->db.db_offset : 0; + size_t len = MIN(size - aoff, db->db.db_size - boff); + + if (db->db_state == DB_CACHED) { + /* + * We need to untransformed the ARC buf data + * before we copy it over. + */ + err = dmu_buf_untransform_direct(db, spa); + ASSERT0(err); + abd_copy_from_buf_off(data, + (char *)db->db.db_data + boff, aoff, len); + } else { + abd_zero_off(data, aoff, len); + } + + mutex_exit(&db->db_mtx); + continue; + } + + mbuf = make_abd_for_dbuf(db, data, offset, size); + ASSERT3P(mbuf, !=, NULL); + + /* + * The dbuf mutex (db_mtx) must be held when creating the ZIO + * for the read. The BP returned from + * dmu_buf_get_bp_from_dbuf() could be from a pending block + * clone or a yet to be synced Direct I/O write that is in the + * dbuf's dirty record. When zio_read() is called, zio_create() + * will make a copy of the BP. However, if zio_read() is called + * without the mutex being held then the dirty record from the + * dbuf could be freed in dbuf_write_done() resulting in garbage + * being set for the zio BP. + */ + zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size, + dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL, &zb); + mutex_exit(&db->db_mtx); + + zfs_racct_read(spa, db->db.db_size, 1, flags); + zio_nowait(cio); + } + + dmu_buf_rele_array(dbp, numbufs, FTAG); + + return (zio_wait(rio)); + +error: + dmu_buf_rele_array(dbp, numbufs, FTAG); + (void) zio_wait(rio); + return (err); +} + +#ifdef _KERNEL +int +dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size) +{ + offset_t offset = zfs_uio_offset(uio); + offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT; + int err; + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3U(page_index, <, uio->uio_dio.npages); + + abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index], + offset & (PAGESIZE - 1), size); + err = dmu_read_abd(dn, offset, size, data, DMU_DIRECTIO); + abd_free(data); + + if (err == 0) + zfs_uioskip(uio, size); + + return (err); +} + +int +dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) +{ + offset_t offset = zfs_uio_offset(uio); + offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT; + int err; + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3U(page_index, <, uio->uio_dio.npages); + + abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index], + offset & (PAGESIZE - 1), size); + err = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx); + abd_free(data); + + if (err == 0) + zfs_uioskip(uio, size); + + return (err); +} +#endif /* _KERNEL */ + +EXPORT_SYMBOL(dmu_read_uio_direct); +EXPORT_SYMBOL(dmu_write_uio_direct); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 8f4fefa4f4dd..8f00e6577bc5 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -66,6 +66,7 @@ #include "zfs_namecheck.h" #include #include +#include /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -350,6 +351,20 @@ smallblk_changed_cb(void *arg, uint64_t newval) os->os_zpl_special_smallblock = newval; } +static void +direct_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_DIRECT_DISABLED || newval == ZFS_DIRECT_STANDARD || + newval == ZFS_DIRECT_ALWAYS); + + os->os_direct = newval; +} + static void logbias_changed_cb(void *arg, uint64_t newval) { @@ -390,27 +405,13 @@ dmu_objset_byteswap(void *buf, size_t size) } /* - * The hash is a CRC-based hash of the objset_t pointer and the object number. + * Runs cityhash on the objset_t pointer and the object number. */ static uint64_t dnode_hash(const objset_t *os, uint64_t obj) { uintptr_t osv = (uintptr_t)os; - uint64_t crc = -1ULL; - - ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); - /* - * The lower 11 bits of the pointer don't have much entropy, because - * the objset_t is more than 1KB long and so likely aligned to 2KB. - */ - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 11)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF]; - - crc ^= (osv>>14) ^ (obj>>24); - - return (crc); + return (cityhash2((uint64_t)osv, obj)); } static unsigned int @@ -633,6 +634,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ZFS_PROP_SPECIAL_SMALL_BLOCKS), smallblk_changed_cb, os); } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_DIRECT), + direct_changed_cb, os); + } } if (err != 0) { arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf); @@ -2615,35 +2621,39 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, { dsl_dataset_t *ds = os->os_dsl_dataset; zap_cursor_t cursor; - zap_attribute_t attr; + zap_attribute_t *attr; ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) return (SET_ERROR(ENOENT)); + attr = zap_attribute_alloc(); zap_cursor_init_serialized(&cursor, ds->ds_dir->dd_pool->dp_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp); - if (zap_cursor_retrieve(&cursor, &attr) != 0) { + if (zap_cursor_retrieve(&cursor, attr) != 0) { zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (SET_ERROR(ENOENT)); } - if (strlen(attr.za_name) + 1 > namelen) { + if (strlen(attr->za_name) + 1 > namelen) { zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (SET_ERROR(ENAMETOOLONG)); } - (void) strlcpy(name, attr.za_name, namelen); + (void) strlcpy(name, attr->za_name, namelen); if (idp) - *idp = attr.za_first_integer; + *idp = attr->za_first_integer; if (case_conflict) - *case_conflict = attr.za_normalization_conflict; + *case_conflict = attr->za_normalization_conflict; zap_cursor_advance(&cursor); *offp = zap_cursor_serialize(&cursor); zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (0); } @@ -2660,33 +2670,37 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, { dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; zap_cursor_t cursor; - zap_attribute_t attr; + zap_attribute_t *attr; /* there is no next dir on a snapshot! */ if (os->os_dsl_dataset->ds_object != dsl_dir_phys(dd)->dd_head_dataset_obj) return (SET_ERROR(ENOENT)); + attr = zap_attribute_alloc(); zap_cursor_init_serialized(&cursor, dd->dd_pool->dp_meta_objset, dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp); - if (zap_cursor_retrieve(&cursor, &attr) != 0) { + if (zap_cursor_retrieve(&cursor, attr) != 0) { zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (SET_ERROR(ENOENT)); } - if (strlen(attr.za_name) + 1 > namelen) { + if (strlen(attr->za_name) + 1 > namelen) { zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (SET_ERROR(ENAMETOOLONG)); } - (void) strlcpy(name, attr.za_name, namelen); + (void) strlcpy(name, attr->za_name, namelen); if (idp) - *idp = attr.za_first_integer; + *idp = attr->za_first_integer; zap_cursor_advance(&cursor); *offp = zap_cursor_serialize(&cursor); zap_cursor_fini(&cursor); + zap_attribute_free(attr); return (0); } @@ -2734,7 +2748,7 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) } thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; - attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + attr = zap_attribute_alloc(); /* * Iterate over all children. @@ -2795,7 +2809,7 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) } } - kmem_free(attr, sizeof (zap_attribute_t)); + zap_attribute_free(attr); if (err != 0) { dsl_dir_rele(dd, FTAG); @@ -2969,7 +2983,7 @@ dmu_objset_find_impl(spa_t *spa, const char *name, } thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; - attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + attr = zap_attribute_alloc(); /* * Iterate over all children. @@ -2997,7 +3011,7 @@ dmu_objset_find_impl(spa_t *spa, const char *name, if (err != 0) { dsl_dir_rele(dd, FTAG); dsl_pool_config_exit(dp, FTAG); - kmem_free(attr, sizeof (zap_attribute_t)); + zap_attribute_free(attr); return (err); } } @@ -3035,7 +3049,7 @@ dmu_objset_find_impl(spa_t *spa, const char *name, } dsl_dir_rele(dd, FTAG); - kmem_free(attr, sizeof (zap_attribute_t)); + zap_attribute_free(attr); dsl_pool_config_exit(dp, FTAG); if (err != 0) diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index a1752650f3ba..b1cd981cec1d 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -25,7 +25,7 @@ * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. * Copyright (c) 2018, loli10K . All rights reserved. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2019 Datto Inc. * Copyright (c) 2022 Axcient. @@ -593,6 +593,9 @@ recv_begin_check_feature_flags_impl(uint64_t featureflags, spa_t *spa) if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) && !spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) return (SET_ERROR(ENOTSUP)); + if ((featureflags & DMU_BACKUP_FEATURE_LARGE_MICROZAP) && + !spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_MICROZAP)) + return (SET_ERROR(ENOTSUP)); /* * Receiving redacted streams requires that redacted datasets are @@ -602,6 +605,13 @@ recv_begin_check_feature_flags_impl(uint64_t featureflags, spa_t *spa) !spa_feature_is_enabled(spa, SPA_FEATURE_REDACTED_DATASETS)) return (SET_ERROR(ENOTSUP)); + /* + * If the LONGNAME is not enabled on the target, fail that request. + */ + if ((featureflags & DMU_BACKUP_FEATURE_LONGNAME) && + !spa_feature_is_enabled(spa, SPA_FEATURE_LONGNAME)) + return (SET_ERROR(ENOTSUP)); + return (0); } @@ -987,9 +997,37 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) numredactsnaps, tx); } + if (featureflags & DMU_BACKUP_FEATURE_LARGE_MICROZAP) { + /* + * The source has seen a large microzap at least once in its + * life, so we activate the feature here to match. It's not + * strictly necessary since a large microzap is usable without + * the feature active, but if that object is sent on from here, + * we need this info to know to add the stream feature. + * + * There may be no large microzap in the incoming stream, or + * ever again, but this is a very niche feature and its very + * difficult to spot a large microzap in the stream, so its + * not worth the effort of trying harder to activate the + * feature at first use. + */ + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_LARGE_MICROZAP, + (void *)B_TRUE, tx); + } + dmu_buf_will_dirty(newds->ds_dbuf, tx); dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; + /* + * Activate longname feature if received + */ + if (featureflags & DMU_BACKUP_FEATURE_LONGNAME && + !dsl_dataset_feature_is_active(newds, SPA_FEATURE_LONGNAME)) { + dsl_dataset_activate_feature(newds->ds_object, + SPA_FEATURE_LONGNAME, (void *)B_TRUE, tx); + newds->ds_feature[SPA_FEATURE_LONGNAME] = (void *)B_TRUE; + } + /* * If we actually created a non-clone, we need to create the objset * in our new dataset. If this is a raw send we postpone this until @@ -1408,7 +1446,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw, abd_t *cabd = abd_alloc_linear(BP_GET_PSIZE(bp), B_FALSE); uint64_t csize = zio_compress_data(BP_GET_COMPRESS(bp), - abd, &cabd, abd_get_size(abd), + abd, &cabd, abd_get_size(abd), BP_GET_PSIZE(bp), rwa->os->os_complevel); abd_zero_off(cabd, csize, BP_GET_PSIZE(bp) - csize); /* Swap in newly compressed data into the abd */ diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c index 1feba0ba83de..651461caffb5 100644 --- a/module/zfs/dmu_redact.c +++ b/module/zfs/dmu_redact.c @@ -189,7 +189,7 @@ zfs_get_deleteq(objset_t *os) objlist_t *deleteq_objlist = objlist_create(); uint64_t deleteq_obj; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; dmu_object_info_t doi; ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); @@ -208,13 +208,15 @@ zfs_get_deleteq(objset_t *os) avl_create(&at, objnode_compare, sizeof (struct objnode), offsetof(struct objnode, node)); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, os, deleteq_obj); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { struct objnode *obj = kmem_zalloc(sizeof (*obj), KM_SLEEP); - obj->obj = za.za_first_integer; + obj->obj = za->za_first_integer; avl_add(&at, obj); } zap_cursor_fini(&zc); + zap_attribute_free(za); struct objnode *next, *found = avl_first(&at); while (found != NULL) { diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index cb2b62fed313..a174972e9b57 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -26,7 +26,7 @@ * Copyright 2014 HybridCluster. All rights reserved. * Copyright 2016 RackTop Systems. * Copyright (c) 2016 Actifio, Inc. All rights reserved. - * Copyright (c) 2019, Klara Inc. + * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude */ @@ -2011,6 +2011,21 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os, if (dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_DNODE)) { *featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE; } + + if (dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LONGNAME)) { + *featureflags |= DMU_BACKUP_FEATURE_LONGNAME; + } + + if (dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_MICROZAP)) { + /* + * We must never split a large microzap block, so we can only + * send large microzaps if LARGE_BLOCKS is already enabled. + */ + if (!(*featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS)) + return (SET_ERROR(ZFS_ERR_STREAM_LARGE_MICROZAP)); + *featureflags |= DMU_BACKUP_FEATURE_LARGE_MICROZAP; + } + return (0); } diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 2c2a6c7642a5..3fdcebdff918 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2024, Klara, Inc. */ #include @@ -575,7 +576,6 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) dmu_tx_t *tx = txh->txh_tx; dnode_t *dn = txh->txh_dnode; int err; - extern int zap_micro_max_size; ASSERT(tx->tx_txg == 0); @@ -591,7 +591,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) * - 2 grown ptrtbl blocks */ (void) zfs_refcount_add_many(&txh->txh_space_towrite, - zap_micro_max_size, FTAG); + zap_get_micro_max_size(tx->tx_pool->dp_spa), FTAG); if (dn == NULL) return; diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index 5fd8bc2a2682..a65aded4efc4 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -870,13 +870,14 @@ dsl_bookmark_init_ds(dsl_dataset_t *ds) int err = 0; zap_cursor_t zc; - zap_attribute_t attr; + zap_attribute_t *attr; + attr = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj); - (err = zap_cursor_retrieve(&zc, &attr)) == 0; + (err = zap_cursor_retrieve(&zc, attr)) == 0; zap_cursor_advance(&zc)) { dsl_bookmark_node_t *dbn = - dsl_bookmark_node_alloc(attr.za_name); + dsl_bookmark_node_alloc(attr->za_name); err = dsl_bookmark_lookup_impl(ds, dbn->dbn_name, &dbn->dbn_phys); @@ -888,6 +889,7 @@ dsl_bookmark_init_ds(dsl_dataset_t *ds) avl_add(&ds->ds_bookmarks, dbn); } zap_cursor_fini(&zc); + zap_attribute_free(attr); if (err == ENOENT) err = 0; return (err); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 8e1055d9bcb1..9e67b5ed4275 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -717,7 +717,7 @@ spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, const void *tag, avl_insert(&spa->spa_keystore.sk_dsl_keys, dck_io, where); *dck_out = dck_io; } else { - dsl_crypto_key_free(dck_io); + dsl_crypto_key_rele(dck_io, tag); *dck_out = dck_ks; } @@ -1497,7 +1497,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, } zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); /* Recurse into all child dsl dirs. */ for (zap_cursor_init(zc, dp->dp_meta_objset, @@ -1529,7 +1529,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, } zap_cursor_fini(zc); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); kmem_free(zc, sizeof (zap_cursor_t)); dsl_dir_rele(dd, FTAG); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 042725b235d0..6a9ed891093b 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -494,7 +494,8 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds) return (err); headphys = headdbuf->db_data; err = zap_value_search(dp->dp_meta_objset, - headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); + headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname, + sizeof (ds->ds_snapname)); if (err != 0 && zfs_recover == B_TRUE) { err = 0; (void) snprintf(ds->ds_snapname, sizeof (ds->ds_snapname), @@ -2290,7 +2291,7 @@ get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val) uint64_t count = 0; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); @@ -2306,19 +2307,22 @@ get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val) if (count != dsl_dataset_phys(ds)->ds_num_children - 1) { return (SET_ERROR(ENOENT)); } + + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, dsl_dataset_phys(ds)->ds_next_clones_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; char buf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone)); + za->za_first_integer, FTAG, &clone)); dsl_dir_name(clone->ds_dir, buf); fnvlist_add_boolean(val, buf); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); + zap_attribute_free(za); return (0); } @@ -3646,16 +3650,16 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) if (dsl_dataset_phys(ds)->ds_next_clones_obj && spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *cnds; uint64_t o; - if (za.za_first_integer == oldnext_obj) { + if (za->za_first_integer == oldnext_obj) { /* * We've already moved the * origin's reference. @@ -3664,7 +3668,7 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) } VERIFY0(dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &cnds)); + za->za_first_integer, FTAG, &cnds)); o = dsl_dir_phys(cnds->ds_dir)-> dd_head_dataset_obj; @@ -3675,6 +3679,7 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) dsl_dataset_rele(cnds, FTAG); } zap_cursor_fini(&zc); + zap_attribute_free(za); } ASSERT(!dsl_prop_hascb(ds)); diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index eff1f7de7731..e457e2fd86ef 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -133,7 +133,7 @@ static void dsl_deadlist_load_tree(dsl_deadlist_t *dl) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; int error; ASSERT(MUTEX_HELD(&dl->dl_lock)); @@ -159,20 +159,21 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl) if (dl->dl_havetree) return; + za = zap_attribute_alloc(); avl_create(&dl->dl_tree, dsl_deadlist_compare, sizeof (dsl_deadlist_entry_t), offsetof(dsl_deadlist_entry_t, dle_node)); for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP); - dle->dle_mintxg = zfs_strtonum(za.za_name, NULL); + dle->dle_mintxg = zfs_strtonum(za->za_name, NULL); /* * Prefetch all the bpobj's so that we do that i/o * in parallel. Then open them all in a second pass. */ - dle->dle_bpobj.bpo_object = za.za_first_integer; + dle->dle_bpobj.bpo_object = za->za_first_integer; dmu_prefetch_dnode(dl->dl_os, dle->dle_bpobj.bpo_object, ZIO_PRIORITY_SYNC_READ); @@ -180,6 +181,7 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl) } VERIFY3U(error, ==, ENOENT); zap_cursor_fini(&zc); + zap_attribute_free(za); for (dsl_deadlist_entry_t *dle = avl_first(&dl->dl_tree); dle != NULL; dle = AVL_NEXT(&dl->dl_tree, dle)) { @@ -207,7 +209,7 @@ static void dsl_deadlist_load_cache(dsl_deadlist_t *dl) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; int error; ASSERT(MUTEX_HELD(&dl->dl_lock)); @@ -221,26 +223,28 @@ dsl_deadlist_load_cache(dsl_deadlist_t *dl) avl_create(&dl->dl_cache, dsl_deadlist_cache_compare, sizeof (dsl_deadlist_cache_entry_t), offsetof(dsl_deadlist_cache_entry_t, dlce_node)); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { - if (za.za_first_integer == empty_bpobj) + if (za->za_first_integer == empty_bpobj) continue; dsl_deadlist_cache_entry_t *dlce = kmem_zalloc(sizeof (*dlce), KM_SLEEP); - dlce->dlce_mintxg = zfs_strtonum(za.za_name, NULL); + dlce->dlce_mintxg = zfs_strtonum(za->za_name, NULL); /* * Prefetch all the bpobj's so that we do that i/o * in parallel. Then open them all in a second pass. */ - dlce->dlce_bpobj = za.za_first_integer; + dlce->dlce_bpobj = za->za_first_integer; dmu_prefetch_dnode(dl->dl_os, dlce->dlce_bpobj, ZIO_PRIORITY_SYNC_READ); avl_add(&dl->dl_cache, dlce); } VERIFY3U(error, ==, ENOENT); zap_cursor_fini(&zc); + zap_attribute_free(za); for (dsl_deadlist_cache_entry_t *dlce = avl_first(&dl->dl_cache); dlce != NULL; dlce = AVL_NEXT(&dl->dl_cache, dlce)) { @@ -381,7 +385,7 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) { dmu_object_info_t doi; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; int error; VERIFY0(dmu_object_info(os, dlobj, &doi)); @@ -390,10 +394,11 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) return; } + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, os, dlobj); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { - uint64_t obj = za.za_first_integer; + uint64_t obj = za->za_first_integer; if (obj == dmu_objset_pool(os)->dp_empty_bpobj) bpobj_decr_empty(os, tx); else @@ -401,6 +406,7 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) } VERIFY3U(error, ==, ENOENT); zap_cursor_fini(&zc); + zap_attribute_free(za); VERIFY0(dmu_object_free(os, dlobj, tx)); } @@ -875,8 +881,8 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) return; } - za = kmem_alloc(sizeof (*za), KM_SLEEP); - pza = kmem_alloc(sizeof (*pza), KM_SLEEP); + za = zap_attribute_alloc(); + pza = zap_attribute_alloc(); mutex_enter(&dl->dl_lock); /* @@ -913,8 +919,8 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) dmu_buf_rele(bonus, FTAG); mutex_exit(&dl->dl_lock); - kmem_free(za, sizeof (*za)); - kmem_free(pza, sizeof (*pza)); + zap_attribute_free(za); + zap_attribute_free(pza); } /* diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 645ad8e5b8dc..5e7a2b3fa595 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -327,9 +327,9 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) mos = dp->dp_meta_objset; zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - baseza = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + baseza = zap_attribute_alloc(); source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -371,9 +371,9 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) } kmem_free(source, ZFS_MAX_DATASET_NAME_LEN); - kmem_free(baseza, sizeof (zap_attribute_t)); + zap_attribute_free(baseza); kmem_free(basezc, sizeof (zap_cursor_t)); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); kmem_free(zc, sizeof (zap_cursor_t)); dsl_dir_rele(startdd, FTAG); @@ -482,7 +482,7 @@ dsl_load_sets(objset_t *mos, uint64_t zapobj, char type, char checkflag, void *valp, avl_tree_t *avl) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; perm_set_t *permnode; avl_index_t idx; uint64_t jumpobj; @@ -495,11 +495,12 @@ dsl_load_sets(objset_t *mos, uint64_t zapobj, if (error != 0) return (error); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, jumpobj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { permnode = kmem_alloc(sizeof (perm_set_t), KM_SLEEP); - (void) strlcpy(permnode->p_setname, za.za_name, + (void) strlcpy(permnode->p_setname, za->za_name, sizeof (permnode->p_setname)); permnode->p_matched = B_FALSE; @@ -510,6 +511,7 @@ dsl_load_sets(objset_t *mos, uint64_t zapobj, } } zap_cursor_fini(&zc); + zap_attribute_free(za); return (0); } @@ -683,7 +685,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, uint64_t jumpobj, pjumpobj; uint64_t zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; char whokey[ZFS_MAX_DELEG_NAME]; zfs_deleg_whokey(whokey, @@ -706,16 +708,18 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); } + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, pjumpobj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { uint64_t zero = 0; - ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); + ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1); - VERIFY(zap_update(mos, jumpobj, za.za_name, + VERIFY(zap_update(mos, jumpobj, za->za_name, 8, 1, &zero, tx) == 0); } zap_cursor_fini(&zc); + zap_attribute_free(za); } /* @@ -746,19 +750,21 @@ int dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; if (zapobj == 0) return (0); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, zapobj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); - VERIFY(0 == zap_destroy(mos, za.za_first_integer, tx)); + ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1); + VERIFY(0 == zap_destroy(mos, za->za_first_integer, tx)); } zap_cursor_fini(&zc); VERIFY(0 == zap_destroy(mos, zapobj, tx)); + zap_attribute_free(za); return (0); } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index d4a6e5b6e9fd..b2b925b135f7 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -216,7 +216,7 @@ dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx, return; zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones); zap_cursor_retrieve(zc, za) == 0; @@ -242,7 +242,7 @@ dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx, } zap_cursor_fini(zc); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); kmem_free(zc, sizeof (zap_cursor_t)); } diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index baf970121a61..1b60fa620b8d 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -239,7 +239,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, err = zap_value_search(dp->dp_meta_objset, dsl_dir_phys(dd->dd_parent)-> dd_child_dir_zapobj, - ddobj, 0, dd->dd_myname); + ddobj, 0, dd->dd_myname, + sizeof (dd->dd_myname)); } if (err != 0) goto errout; @@ -586,7 +587,7 @@ dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) return; zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); /* Iterate my child dirs */ for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj); @@ -635,7 +636,7 @@ dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); kmem_free(zc, sizeof (zap_cursor_t)); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); /* we're in a sync task, update counts */ dmu_buf_will_dirty(dd->dd_dbuf, tx); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 342ec5c15c79..5ae96882935c 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -1196,7 +1196,7 @@ dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp) void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) { - zap_attribute_t za; + zap_attribute_t *za; zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; @@ -1208,19 +1208,20 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) holds = fnvlist_alloc(); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, zapobj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { char *htag; nvlist_t *tags; - htag = strchr(za.za_name, '-'); + htag = strchr(za->za_name, '-'); *htag = '\0'; ++htag; - if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { + if (nvlist_lookup_nvlist(holds, za->za_name, &tags) != 0) { tags = fnvlist_alloc(); fnvlist_add_boolean(tags, htag); - fnvlist_add_nvlist(holds, za.za_name, tags); + fnvlist_add_nvlist(holds, za->za_name, tags); fnvlist_free(tags); } else { fnvlist_add_boolean(tags, htag); @@ -1229,6 +1230,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) dsl_dataset_user_release_tmp(dp, holds); fnvlist_free(holds); zap_cursor_fini(&zc); + zap_attribute_free(za); } /* diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 99f931cd8632..1a0e83419e73 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -662,7 +662,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, } mutex_exit(&dd->dd_lock); - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, za) == 0; @@ -670,7 +670,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, dsl_prop_changed_notify(dp, za->za_first_integer, propname, value, FALSE); } - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); zap_cursor_fini(&zc); dsl_dir_rele(dd, FTAG); } @@ -698,6 +698,10 @@ dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname, *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE) iuv = B_TRUE; break; + case ZFS_PROP_SNAPDIR: + if (*(uint64_t *)value == ZFS_SNAPDIR_DISABLED) + iuv = B_TRUE; + break; default: break; } @@ -1061,11 +1065,11 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, const char *setpoint, dsl_prop_getflags_t flags, nvlist_t *nv) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); int err = 0; for (zap_cursor_init(&zc, mos, propobj); - (err = zap_cursor_retrieve(&zc, &za)) == 0; + (err = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { nvlist_t *propval; zfs_prop_t prop; @@ -1075,7 +1079,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, const char *propname; const char *source; - suffix = strchr(za.za_name, '$'); + suffix = strchr(za->za_name, '$'); if (suffix == NULL) { /* @@ -1085,7 +1089,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, if (flags & DSL_PROP_GET_RECEIVED) continue; - propname = za.za_name; + propname = za->za_name; source = setpoint; /* Skip if iuv entries are preset. */ @@ -1102,8 +1106,8 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, if (flags & DSL_PROP_GET_LOCAL) continue; - (void) strlcpy(buf, za.za_name, - MIN(sizeof (buf), suffix - za.za_name + 1)); + (void) strlcpy(buf, za->za_name, + MIN(sizeof (buf), suffix - za->za_name + 1)); propname = buf; if (!(flags & DSL_PROP_GET_RECEIVED)) { @@ -1128,14 +1132,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, source = ((flags & DSL_PROP_GET_INHERITING) ? setpoint : ZPROP_SOURCE_VAL_RECVD); } else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) { - (void) strlcpy(buf, za.za_name, - MIN(sizeof (buf), suffix - za.za_name + 1)); + (void) strlcpy(buf, za->za_name, + MIN(sizeof (buf), suffix - za->za_name + 1)); propname = buf; source = setpoint; prop = zfs_name_to_prop(propname); if (dsl_prop_known_index(prop, - za.za_first_integer) != 1) + za->za_first_integer) != 1) continue; } else { /* @@ -1162,28 +1166,28 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, continue; VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - if (za.za_integer_length == 1) { + if (za->za_integer_length == 1) { /* * String property */ - char *tmp = kmem_alloc(za.za_num_integers, + char *tmp = kmem_alloc(za->za_num_integers, KM_SLEEP); err = zap_lookup(mos, propobj, - za.za_name, 1, za.za_num_integers, tmp); + za->za_name, 1, za->za_num_integers, tmp); if (err != 0) { - kmem_free(tmp, za.za_num_integers); + kmem_free(tmp, za->za_num_integers); break; } VERIFY(nvlist_add_string(propval, ZPROP_VALUE, tmp) == 0); - kmem_free(tmp, za.za_num_integers); + kmem_free(tmp, za->za_num_integers); } else { /* * Integer property */ - ASSERT(za.za_integer_length == 8); + ASSERT(za->za_integer_length == 8); (void) nvlist_add_uint64(propval, ZPROP_VALUE, - za.za_first_integer); + za->za_first_integer); } VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0); @@ -1191,6 +1195,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj, nvlist_free(propval); } zap_cursor_fini(&zc); + zap_attribute_free(za); if (err == ENOENT) err = 0; return (err); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 9d040e146308..87588455e3ef 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -617,17 +617,18 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) /* reload the queue into the in-core state */ if (scn->scn_phys.scn_queue_obj != 0) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, dp->dp_meta_objset, scn->scn_phys.scn_queue_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; (void) zap_cursor_advance(&zc)) { scan_ds_queue_insert(scn, - zfs_strtonum(za.za_name, NULL), - za.za_first_integer); + zfs_strtonum(za->za_name, NULL), + za->za_first_integer); } zap_cursor_fini(&zc); + zap_attribute_free(za); } ddt_walk_init(spa, scn->scn_phys.scn_max_txg); @@ -2870,16 +2871,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) if (usenext) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_next_clones_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; (void) zap_cursor_advance(&zc)) { scan_ds_queue_insert(scn, - zfs_strtonum(za.za_name, NULL), + zfs_strtonum(za->za_name, NULL), dsl_dataset_phys(ds)->ds_creation_txg); } zap_cursor_fini(&zc); + zap_attribute_free(za); } else { VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, enqueue_clones_cb, &ds->ds_object, @@ -4174,7 +4176,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) zbookmark_phys_t *zb; boolean_t limit_exceeded = B_FALSE; - za = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); zb = kmem_zalloc(sizeof (zbookmark_phys_t), KM_SLEEP); if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { @@ -4205,7 +4207,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_errorscrub_done(scn, B_TRUE, tx); dsl_errorscrub_sync_state(scn, tx); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); kmem_free(zb, sizeof (*zb)); return; } @@ -4219,7 +4221,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) zbookmark_err_phys_t head_ds_block; head_ds_cursor = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP); - head_ds_attr = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP); + head_ds_attr = zap_attribute_alloc(); uint64_t head_ds_err_obj = za->za_first_integer; uint64_t head_ds; @@ -4258,13 +4260,13 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) zap_cursor_fini(head_ds_cursor); kmem_free(head_ds_cursor, sizeof (*head_ds_cursor)); - kmem_free(head_ds_attr, sizeof (*head_ds_attr)); + zap_attribute_free(head_ds_attr); if (config_held) dsl_pool_config_exit(dp, FTAG); } - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); kmem_free(zb, sizeof (*zb)); if (!limit_exceeded) dsl_errorscrub_done(scn, B_TRUE, tx); diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index 75953f70f926..ffe208736ba3 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -674,7 +674,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) zap_attribute_t *za; zap_cursor_t zc; - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, dsl_dataset_phys(ds)->ds_userrefs_obj); zap_cursor_retrieve(&zc, za) == 0; @@ -683,7 +683,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) za->za_first_integer); } zap_cursor_fini(&zc); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); } dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 7170b5eefcea..3bd6e93e93a4 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1679,7 +1679,7 @@ spa_set_allocator(spa_t *spa, const char *allocator) int a = spa_find_allocator_byname(allocator); if (a < 0) a = 0; spa->spa_active_allocator = a; - zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name); + zfs_dbgmsg("spa allocator: %s", metaslab_allocators[a].msop_name); } int diff --git a/module/zfs/sa.c b/module/zfs/sa.c index bc4c9dff31e7..31d30c1b406d 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -840,7 +840,7 @@ sa_attr_table_setup(objset_t *os, const sa_attr_reg_t *reg_attrs, int count) uint64_t attr_value; sa_attr_table_t *tb; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; int registered_count = 0; int i; dmu_objset_type_t ostype = dmu_objset_type(os); @@ -914,11 +914,12 @@ sa_attr_table_setup(objset_t *os, const sa_attr_reg_t *reg_attrs, int count) */ if (sa->sa_reg_attr_obj) { + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { uint64_t value; - value = za.za_first_integer; + value = za->za_first_integer; registered_count++; tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); @@ -930,11 +931,12 @@ sa_attr_table_setup(objset_t *os, const sa_attr_reg_t *reg_attrs, int count) continue; } tb[ATTR_NUM(value)].sa_name = - kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); - (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, - strlen(za.za_name) +1); + kmem_zalloc(strlen(za->za_name) +1, KM_SLEEP); + (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za->za_name, + strlen(za->za_name) +1); } zap_cursor_fini(&zc); + zap_attribute_free(za); /* * Make sure we processed the correct number of registered * attributes @@ -996,7 +998,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, const sa_attr_reg_t *reg_attrs, int count, sa_attr_type_t **user_table) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; sa_os_t *sa; dmu_objset_type_t ostype = dmu_objset_type(os); sa_attr_type_t *tb; @@ -1053,33 +1055,35 @@ sa_setup(objset_t *os, uint64_t sa_obj, const sa_attr_reg_t *reg_attrs, goto fail; } + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { sa_attr_type_t *lot_attrs; uint64_t lot_num; lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * - za.za_num_integers, KM_SLEEP); + za->za_num_integers, KM_SLEEP); if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, - za.za_name, 2, za.za_num_integers, + za->za_name, 2, za->za_num_integers, lot_attrs))) != 0) { kmem_free(lot_attrs, sizeof (sa_attr_type_t) * - za.za_num_integers); + za->za_num_integers); break; } - VERIFY0(ddi_strtoull(za.za_name, NULL, 10, + VERIFY0(ddi_strtoull(za->za_name, NULL, 10, (unsigned long long *)&lot_num)); (void) sa_add_layout_entry(os, lot_attrs, - za.za_num_integers, lot_num, + za->za_num_integers, lot_num, sa_layout_info_hash(lot_attrs, - za.za_num_integers), B_FALSE, NULL); + za->za_num_integers), B_FALSE, NULL); kmem_free(lot_attrs, sizeof (sa_attr_type_t) * - za.za_num_integers); + za->za_num_integers); } zap_cursor_fini(&zc); + zap_attribute_free(za); /* * Make sure layout count matches number of entries added diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 1a68a0953565..6b8c7ee93daa 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -542,12 +542,13 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) { objset_t *mos = spa->spa_meta_objset; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; dsl_pool_t *dp; int err = 0; dp = spa_get_dsl(spa); dsl_pool_config_enter(dp, FTAG); + za = zap_attribute_alloc(); mutex_enter(&spa->spa_props_lock); /* @@ -563,21 +564,21 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) * Get properties from the MOS pool property object. */ for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); - (err = zap_cursor_retrieve(&zc, &za)) == 0; + (err = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { uint64_t intval = 0; char *strval = NULL; zprop_source_t src = ZPROP_SRC_DEFAULT; zpool_prop_t prop; - if ((prop = zpool_name_to_prop(za.za_name)) == - ZPOOL_PROP_INVAL && !zfs_prop_user(za.za_name)) + if ((prop = zpool_name_to_prop(za->za_name)) == + ZPOOL_PROP_INVAL && !zfs_prop_user(za->za_name)) continue; - switch (za.za_integer_length) { + switch (za->za_integer_length) { case 8: /* integer property */ - if (za.za_first_integer != + if (za->za_first_integer != zpool_prop_default_numeric(prop)) src = ZPROP_SRC_LOCAL; @@ -585,7 +586,7 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) dsl_dataset_t *ds = NULL; err = dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &ds); + za->za_first_integer, FTAG, &ds); if (err != 0) break; @@ -595,7 +596,7 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) dsl_dataset_rele(ds, FTAG); } else { strval = NULL; - intval = za.za_first_integer; + intval = za->za_first_integer; } spa_prop_add_list(nv, prop, strval, intval, src); @@ -607,21 +608,21 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) case 1: /* string property */ - strval = kmem_alloc(za.za_num_integers, KM_SLEEP); + strval = kmem_alloc(za->za_num_integers, KM_SLEEP); err = zap_lookup(mos, spa->spa_pool_props_object, - za.za_name, 1, za.za_num_integers, strval); + za->za_name, 1, za->za_num_integers, strval); if (err) { - kmem_free(strval, za.za_num_integers); + kmem_free(strval, za->za_num_integers); break; } if (prop != ZPOOL_PROP_INVAL) { spa_prop_add_list(nv, prop, strval, 0, src); } else { src = ZPROP_SRC_LOCAL; - spa_prop_add_user(nv, za.za_name, strval, + spa_prop_add_user(nv, za->za_name, strval, src); } - kmem_free(strval, za.za_num_integers); + kmem_free(strval, za->za_num_integers); break; default: @@ -632,6 +633,7 @@ spa_prop_get(spa_t *spa, nvlist_t *nv) out: mutex_exit(&spa->spa_props_lock); dsl_pool_config_exit(dp, FTAG); + zap_attribute_free(za); if (err && err != ENOENT) return (err); @@ -2972,12 +2974,13 @@ dsl_get_next_livelist_obj(objset_t *os, uint64_t zap_obj, uint64_t *llp) { int err; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); zap_cursor_init(&zc, os, zap_obj); - err = zap_cursor_retrieve(&zc, &za); + err = zap_cursor_retrieve(&zc, za); zap_cursor_fini(&zc); if (err == 0) - *llp = za.za_first_integer; + *llp = za->za_first_integer; + zap_attribute_free(za); return (err); } @@ -6048,17 +6051,17 @@ static void spa_feature_stats_from_disk(spa_t *spa, nvlist_t *features) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); if (spa->spa_feat_for_read_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_read_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); + ASSERT(za->za_integer_length == sizeof (uint64_t) && + za->za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za->za_name, + za->za_first_integer)); } zap_cursor_fini(&zc); } @@ -6066,15 +6069,16 @@ spa_feature_stats_from_disk(spa_t *spa, nvlist_t *features) if (spa->spa_feat_for_write_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_write_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY0(nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); + ASSERT(za->za_integer_length == sizeof (uint64_t) && + za->za_num_integers == 1); + VERIFY0(nvlist_add_uint64(features, za->za_name, + za->za_first_integer)); } zap_cursor_fini(&zc); } + zap_attribute_free(za); } static void @@ -9434,13 +9438,13 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) /* Diff old AVZ with new one */ zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - uint64_t vdzap = za.za_first_integer; + uint64_t vdzap = za->za_first_integer; if (zap_lookup_int(spa->spa_meta_objset, new_avz, vdzap) == ENOENT) { /* @@ -9453,6 +9457,7 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) } zap_cursor_fini(&zc); + zap_attribute_free(za); /* Destroy the old AVZ */ VERIFY0(zap_destroy(spa->spa_meta_objset, @@ -9466,18 +9471,19 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) spa->spa_all_vdev_zaps = new_avz; } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); /* Walk through the AVZ and destroy all listed ZAPs */ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { - uint64_t zap = za.za_first_integer; + uint64_t zap = za->za_first_integer; VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); } zap_cursor_fini(&zc); + zap_attribute_free(za); /* Destroy and unlink the AVZ itself */ VERIFY0(zap_destroy(spa->spa_meta_objset, diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index 62d7b4fa2df2..a49e28ee7a43 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -429,7 +429,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, zap_attribute_t *za; zc = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); for (zap_cursor_init(zc, spa->spa_meta_objset, zap_clone); zap_cursor_retrieve(zc, za) == 0; @@ -463,7 +463,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, } zap_cursor_fini(zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); kmem_free(zc, sizeof (*zc)); out: @@ -617,10 +617,10 @@ spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb, errphys_to_name(&healed_zep, name, sizeof (name)); zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_errlog_last); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { - if (zap_contains(spa->spa_meta_objset, za.za_first_integer, + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { + if (zap_contains(spa->spa_meta_objset, za->za_first_integer, name) == 0) { if (!MUTEX_HELD(&spa->spa_errlog_lock)) { mutex_enter(&spa->spa_errlog_lock); @@ -657,6 +657,7 @@ spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb, } } zap_cursor_fini(&zc); + zap_attribute_free(za); } /* @@ -704,24 +705,25 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx) } else { errphys_to_name(&se->se_zep, name, sizeof (name)); zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_errlog_last); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { zap_remove(spa->spa_meta_objset, - za.za_first_integer, name, tx); + za->za_first_integer, name, tx); } zap_cursor_fini(&zc); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_errlog_scrub); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { zap_remove(spa->spa_meta_objset, - za.za_first_integer, name, tx); + za->za_first_integer, name, tx); } zap_cursor_fini(&zc); + zap_attribute_free(za); } kmem_free(se, sizeof (spa_error_entry_t)); } @@ -746,15 +748,16 @@ approx_errlog_size_impl(spa_t *spa, uint64_t spa_err_obj) uint64_t total = 0; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { uint64_t count; - if (zap_count(spa->spa_meta_objset, za.za_first_integer, + if (zap_count(spa->spa_meta_objset, za->za_first_integer, &count) == 0) total += count; } zap_cursor_fini(&zc); + zap_attribute_free(za); return (total); } @@ -806,7 +809,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, dmu_tx_t *tx) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; zbookmark_phys_t zb; uint64_t count; @@ -822,14 +825,15 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, return; } + za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { if (spa_upgrade_errlog_limit != 0 && zc.zc_cd == spa_upgrade_errlog_limit) break; - name_to_bookmark(za.za_name, &zb); + name_to_bookmark(za->za_name, &zb); zbookmark_err_phys_t zep; zep.zb_object = zb.zb_object; @@ -909,6 +913,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, buf, 1, strlen(name) + 1, name, tx); } zap_cursor_fini(&zc); + zap_attribute_free(za); VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx)); } @@ -954,7 +959,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) zap_attribute_t *za; zc = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { for (zap_cursor_init(zc, spa->spa_meta_objset, obj); @@ -963,7 +968,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) if (*count == 0) { zap_cursor_fini(zc); kmem_free(zc, sizeof (*zc)); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (SET_ERROR(ENOMEM)); } @@ -974,13 +979,13 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) if (error != 0) { zap_cursor_fini(zc); kmem_free(zc, sizeof (*zc)); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (error); } } zap_cursor_fini(zc); kmem_free(zc, sizeof (*zc)); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (0); } @@ -992,7 +997,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) zap_attribute_t *head_ds_attr; head_ds_cursor = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP); - head_ds_attr = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP); + head_ds_attr = zap_attribute_alloc(); uint64_t head_ds_err_obj = za->za_first_integer; uint64_t head_ds; @@ -1010,20 +1015,20 @@ process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count) zap_cursor_fini(head_ds_cursor); kmem_free(head_ds_cursor, sizeof (*head_ds_cursor)); - kmem_free(head_ds_attr, sizeof (*head_ds_attr)); + zap_attribute_free(head_ds_attr); zap_cursor_fini(zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); kmem_free(zc, sizeof (*zc)); return (error); } } zap_cursor_fini(head_ds_cursor); kmem_free(head_ds_cursor, sizeof (*head_ds_cursor)); - kmem_free(head_ds_attr, sizeof (*head_ds_attr)); + zap_attribute_free(head_ds_attr); } zap_cursor_fini(zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); kmem_free(zc, sizeof (*zc)); return (0); } @@ -1230,14 +1235,15 @@ delete_errlog(spa_t *spa, uint64_t spa_err_obj, dmu_tx_t *tx) { if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); - zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { VERIFY0(dmu_object_free(spa->spa_meta_objset, - za.za_first_integer, tx)); + za->za_first_integer, tx)); } zap_cursor_fini(&zc); + zap_attribute_free(za); } VERIFY0(dmu_object_free(spa->spa_meta_objset, spa_err_obj, tx)); } @@ -1339,20 +1345,21 @@ delete_dataset_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t ds, return; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa->spa_meta_objset, spa_err_obj); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { uint64_t head_ds; - name_to_object(za.za_name, &head_ds); + name_to_object(za->za_name, &head_ds); if (head_ds == ds) { (void) zap_remove(spa->spa_meta_objset, spa_err_obj, - za.za_name, tx); + za->za_name, tx); VERIFY0(dmu_object_free(spa->spa_meta_objset, - za.za_first_integer, tx)); + za->za_first_integer, tx)); break; } } zap_cursor_fini(&zc); + zap_attribute_free(za); } void @@ -1435,22 +1442,23 @@ swap_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t new_head, uint64_t } zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); zbookmark_err_phys_t err_block; for (zap_cursor_init(&zc, spa->spa_meta_objset, old_head_errlog); - zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { const char *name = ""; - name_to_errphys(za.za_name, &err_block); + name_to_errphys(za->za_name, &err_block); if (err_block.zb_birth < txg) { (void) zap_update(spa->spa_meta_objset, new_head_errlog, - za.za_name, 1, strlen(name) + 1, name, tx); + za->za_name, 1, strlen(name) + 1, name, tx); (void) zap_remove(spa->spa_meta_objset, old_head_errlog, - za.za_name, tx); + za->za_name, tx); } } zap_cursor_fini(&zc); + zap_attribute_free(za); } void diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index 32158e8c592c..f55218e3579b 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1020,16 +1020,17 @@ spa_ld_log_sm_metadata(spa_t *spa) } zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, spa_meta_objset(spa), spacemap_zap); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { - uint64_t log_txg = zfs_strtonum(za.za_name, NULL); + uint64_t log_txg = zfs_strtonum(za->za_name, NULL); spa_log_sm_t *sls = - spa_log_sm_alloc(za.za_first_integer, log_txg); + spa_log_sm_alloc(za->za_first_integer, log_txg); avl_add(&spa->spa_sm_logs_by_txg, sls); } zap_cursor_fini(&zc); + zap_attribute_free(za); if (error != ENOENT) { spa_load_failed(spa, "spa_ld_log_sm_metadata(): failed at " "zap_cursor_retrieve(spacemap_zap) [error %d]", diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 97191e768549..f486513fcaf9 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2590,6 +2590,7 @@ spa_init(spa_mode_t mode) scan_init(); qat_init(); spa_import_progress_init(); + zap_init(); } void @@ -2616,6 +2617,7 @@ spa_fini(void) scan_fini(); qat_fini(); spa_import_progress_destroy(); + zap_fini(); avl_destroy(&spa_namespace_avl); avl_destroy(&spa_spare_avl); diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 17ed2a620b1e..45a2f06263a0 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -895,6 +895,14 @@ static const spa_iostats_t spa_iostats_template = { { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 }, { "simple_trim_extents_failed", KSTAT_DATA_UINT64 }, { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 }, + { "arc_read_count", KSTAT_DATA_UINT64 }, + { "arc_read_bytes", KSTAT_DATA_UINT64 }, + { "arc_write_count", KSTAT_DATA_UINT64 }, + { "arc_write_bytes", KSTAT_DATA_UINT64 }, + { "direct_read_count", KSTAT_DATA_UINT64 }, + { "direct_read_bytes", KSTAT_DATA_UINT64 }, + { "direct_write_count", KSTAT_DATA_UINT64 }, + { "direct_write_bytes", KSTAT_DATA_UINT64 }, }; #define SPA_IOSTATS_ADD(stat, val) \ @@ -938,6 +946,44 @@ spa_iostats_trim_add(spa_t *spa, trim_type_t type, } } +void +spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + spa_history_kstat_t *shk = &spa->spa_stats.iostats; + kstat_t *ksp = shk->kstat; + + if (ksp == NULL) + return; + + spa_iostats_t *iostats = ksp->ks_data; + if (flags & DMU_DIRECTIO) { + SPA_IOSTATS_ADD(direct_read_count, iops); + SPA_IOSTATS_ADD(direct_read_bytes, size); + } else { + SPA_IOSTATS_ADD(arc_read_count, iops); + SPA_IOSTATS_ADD(arc_read_bytes, size); + } +} + +void +spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + spa_history_kstat_t *shk = &spa->spa_stats.iostats; + kstat_t *ksp = shk->kstat; + + if (ksp == NULL) + return; + + spa_iostats_t *iostats = ksp->ks_data; + if (flags & DMU_DIRECTIO) { + SPA_IOSTATS_ADD(direct_write_count, iops); + SPA_IOSTATS_ADD(direct_write_bytes, size); + } else { + SPA_IOSTATS_ADD(arc_write_count, iops); + SPA_IOSTATS_ADD(arc_write_bytes, size); + } +} + static int spa_iostats_update(kstat_t *ksp, int rw) { diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 6ae0a14127bf..bcab46c63bfa 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -117,6 +117,11 @@ static unsigned int zfs_slow_io_events_per_second = 20; */ static unsigned int zfs_deadman_events_per_second = 1; +/* + * Rate limit direct write IO verify failures to this many per scond. + */ +static unsigned int zfs_dio_write_verify_events_per_second = 20; + /* * Rate limit checksum events after this many checksum errors per second. */ @@ -153,6 +158,17 @@ int zfs_nocacheflush = 0; uint_t zfs_vdev_max_auto_ashift = 14; uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; +/* + * VDEV checksum verification for Direct I/O writes. This is neccessary for + * Linux, because anonymous pages can not be placed under write protection + * during Direct I/O writes. + */ +#if !defined(__FreeBSD__) +uint_t zfs_vdev_direct_write_verify = 1; +#else +uint_t zfs_vdev_direct_write_verify = 0; +#endif + void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) { @@ -673,6 +689,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) 1); zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_deadman_events_per_second, 1); + zfs_ratelimit_init(&vd->vdev_dio_verify_rl, + &zfs_dio_write_verify_events_per_second, 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -1182,6 +1200,7 @@ vdev_free(vdev_t *vd) zfs_ratelimit_fini(&vd->vdev_delay_rl); zfs_ratelimit_fini(&vd->vdev_deadman_rl); + zfs_ratelimit_fini(&vd->vdev_dio_verify_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); if (vd == spa->spa_root_vdev) @@ -3130,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done) * Reassess DTLs after a config change or scrub completion. If txg == 0 no * write operations will be issued to the pool. */ -void -vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, - boolean_t scrub_done, boolean_t rebuild_done) +static void +vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, + boolean_t scrub_done, boolean_t rebuild_done, boolean_t faulting) { spa_t *spa = vd->vdev_spa; avl_tree_t reftree; @@ -3141,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); for (int c = 0; c < vd->vdev_children; c++) - vdev_dtl_reassess(vd->vdev_child[c], txg, - scrub_txg, scrub_done, rebuild_done); + vdev_dtl_reassess_impl(vd->vdev_child[c], txg, + scrub_txg, scrub_done, rebuild_done, faulting); if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux) return; @@ -3236,11 +3255,21 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, if (scrub_done) range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL); range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL); - if (!vdev_readable(vd)) + + /* + * For the faulting case, treat members of a replacing vdev + * as if they are not available. It's more likely than not that + * a vdev in a replacing vdev could encounter read errors so + * treat it as not being able to contribute. + */ + if (!vdev_readable(vd) || + (faulting && vd->vdev_parent != NULL && + vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) { range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL); - else + } else { range_tree_walk(vd->vdev_dtl[DTL_MISSING], range_tree_add, vd->vdev_dtl[DTL_OUTAGE]); + } /* * If the vdev was resilvering or rebuilding and no longer @@ -3302,6 +3331,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, } } +void +vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, + boolean_t scrub_done, boolean_t rebuild_done) +{ + return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done, + rebuild_done, B_FALSE)); +} + /* * Iterate over all the vdevs except spare, and post kobj events */ @@ -3529,7 +3566,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) } /* - * Determine whether the specified vdev can be offlined/detached/removed + * Determine whether the specified vdev can be + * - offlined + * - detached + * - removed + * - faulted * without losing data. */ boolean_t @@ -3539,6 +3580,7 @@ vdev_dtl_required(vdev_t *vd) vdev_t *tvd = vd->vdev_top; uint8_t cant_read = vd->vdev_cant_read; boolean_t required; + boolean_t faulting = vd->vdev_state == VDEV_STATE_FAULTED; ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -3551,10 +3593,10 @@ vdev_dtl_required(vdev_t *vd) * If not, we can safely offline/detach/remove the device. */ vd->vdev_cant_read = B_TRUE; - vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE); + vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting); required = !vdev_dtl_empty(tvd, DTL_OUTAGE); vd->vdev_cant_read = cant_read; - vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE); + vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting); if (!required && zio_injection_enabled) { required = !!zio_handle_device_injection(vd, NULL, @@ -4475,6 +4517,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_stat.vs_read_errors = 0; vd->vdev_stat.vs_write_errors = 0; vd->vdev_stat.vs_checksum_errors = 0; + vd->vdev_stat.vs_dio_verify_errors = 0; vd->vdev_stat.vs_slow_ios = 0; for (int c = 0; c < vd->vdev_children; c++) @@ -6432,33 +6475,33 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) * Get all properties from the MOS vdev property object. */ zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, mos, objid); - (err = zap_cursor_retrieve(&zc, &za)) == 0; + (err = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { intval = 0; strval = NULL; zprop_source_t src = ZPROP_SRC_DEFAULT; - propname = za.za_name; + propname = za->za_name; - switch (za.za_integer_length) { + switch (za->za_integer_length) { case 8: /* We do not allow integer user properties */ /* This is likely an internal value */ break; case 1: /* string property */ - strval = kmem_alloc(za.za_num_integers, + strval = kmem_alloc(za->za_num_integers, KM_SLEEP); - err = zap_lookup(mos, objid, za.za_name, 1, - za.za_num_integers, strval); + err = zap_lookup(mos, objid, za->za_name, 1, + za->za_num_integers, strval); if (err) { - kmem_free(strval, za.za_num_integers); + kmem_free(strval, za->za_num_integers); break; } vdev_prop_add_list(outnvl, propname, strval, 0, src); - kmem_free(strval, za.za_num_integers); + kmem_free(strval, za->za_num_integers); break; default: @@ -6466,6 +6509,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } } zap_cursor_fini(&zc); + zap_attribute_free(za); } mutex_exit(&spa->spa_props_lock); @@ -6503,7 +6547,14 @@ ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, deadman_events_per_second, UINT, ZMOD_RW, "Rate limit hung IO (deadman) events to this many per second"); +ZFS_MODULE_PARAM(zfs, zfs_, dio_write_verify_events_per_second, UINT, ZMOD_RW, + "Rate Direct I/O write verify events to this many per second"); + /* BEGIN CSTYLED */ +ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify, UINT, ZMOD_RW, + "Direct I/O writes will perform for checksum verification before " + "commiting write"); + ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW, "Rate limit checksum events to this many checksum errors per second " "(do not set below ZED threshold)."); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 47346dd5acff..9d12bc2eb0a2 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -387,6 +387,10 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv) /* IO delays */ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SLOW_IOS, vs->vs_slow_ios); + /* Direct I/O write verify errors */ + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_DIO_VERIFY_ERRORS, + vs->vs_dio_verify_errors); + /* Add extended stats nvlist to main nvlist */ fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx); diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 03b76ea1b7bf..40e7bcf3ed1f 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -832,7 +832,12 @@ zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, static int fzap_checkname(zap_name_t *zn) { - if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN) + uint32_t maxnamelen = zn->zn_normbuf_len; + uint64_t len = (uint64_t)zn->zn_key_orig_numints * zn->zn_key_intlen; + /* Only allow directory zap to have longname */ + if (len > maxnamelen || + (len > ZAP_MAXNAMELEN && + zn->zn_zap->zap_dnode->dn_type != DMU_OT_DIRECTORY_CONTENTS)) return (SET_ERROR(ENAMETOOLONG)); return (0); } @@ -1102,7 +1107,7 @@ zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj, int zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, - char *name) + char *name, uint64_t namelen) { zap_cursor_t zc; int err; @@ -1110,17 +1115,18 @@ zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, if (mask == 0) mask = -1ULL; - zap_attribute_t *za = kmem_alloc(sizeof (*za), KM_SLEEP); + zap_attribute_t *za = zap_attribute_long_alloc(); for (zap_cursor_init(&zc, os, zapobj); (err = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { if ((za->za_first_integer & mask) == (value & mask)) { - (void) strlcpy(name, za->za_name, MAXNAMELEN); + if (strlcpy(name, za->za_name, namelen) >= namelen) + err = SET_ERROR(ENAMETOOLONG); break; } } zap_cursor_fini(&zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (err); } @@ -1130,7 +1136,7 @@ zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx) zap_cursor_t zc; int err = 0; - zap_attribute_t *za = kmem_alloc(sizeof (*za), KM_SLEEP); + zap_attribute_t *za = zap_attribute_long_alloc(); for (zap_cursor_init(&zc, os, fromobj); zap_cursor_retrieve(&zc, za) == 0; (void) zap_cursor_advance(&zc)) { @@ -1144,7 +1150,7 @@ zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx) break; } zap_cursor_fini(&zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (err); } @@ -1155,7 +1161,7 @@ zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, zap_cursor_t zc; int err = 0; - zap_attribute_t *za = kmem_alloc(sizeof (*za), KM_SLEEP); + zap_attribute_t *za = zap_attribute_long_alloc(); for (zap_cursor_init(&zc, os, fromobj); zap_cursor_retrieve(&zc, za) == 0; (void) zap_cursor_advance(&zc)) { @@ -1169,7 +1175,7 @@ zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, break; } zap_cursor_fini(&zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (err); } @@ -1180,7 +1186,7 @@ zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, zap_cursor_t zc; int err = 0; - zap_attribute_t *za = kmem_alloc(sizeof (*za), KM_SLEEP); + zap_attribute_t *za = zap_attribute_long_alloc(); for (zap_cursor_init(&zc, os, fromobj); zap_cursor_retrieve(&zc, za) == 0; (void) zap_cursor_advance(&zc)) { @@ -1200,7 +1206,7 @@ zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, break; } zap_cursor_fini(&zc); - kmem_free(za, sizeof (*za)); + zap_attribute_free(za); return (err); } @@ -1378,7 +1384,7 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) ASSERT(err == 0 || err == EOVERFLOW); } err = zap_entry_read_name(zap, &zeh, - sizeof (za->za_name), za->za_name); + za->za_name_len, za->za_name); ASSERT(err == 0); za->za_normalization_conflict = diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 0d6533b0e131..12938022e976 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. + * Copyright (c) 2024, Klara, Inc. */ #include @@ -36,12 +37,37 @@ #include #include #include +#include #ifdef _KERNEL #include #endif -int zap_micro_max_size = MZAP_MAX_BLKSZ; +/* + * The maximum size (in bytes) of a microzap before it is converted to a + * fatzap. It will be rounded up to next multiple of 512 (SPA_MINBLOCKSIZE). + * + * By definition, a microzap must fit into a single block, so this has + * traditionally been SPA_OLD_MAXBLOCKSIZE, and is set to that by default. + * Setting this higher requires both the large_blocks feature (to even create + * blocks that large) and the large_microzap feature (to enable the stream + * machinery to understand not to try to split a microzap block). + * + * If large_microzap is enabled, this value will be clamped to + * spa_maxblocksize(). If not, it will be clamped to SPA_OLD_MAXBLOCKSIZE. + */ +static int zap_micro_max_size = SPA_OLD_MAXBLOCKSIZE; + +uint64_t +zap_get_micro_max_size(spa_t *spa) +{ + uint64_t maxsz = P2ROUNDUP(zap_micro_max_size, SPA_MINBLOCKSIZE); + if (maxsz <= SPA_OLD_MAXBLOCKSIZE) + return (maxsz); + if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_MICROZAP)) + return (MIN(maxsz, spa_maxblocksize(spa))); + return (SPA_OLD_MAXBLOCKSIZE); +} static int mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags); @@ -131,12 +157,12 @@ zap_hash(zap_name_t *zn) } static int -zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags) +zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags, + size_t outlen) { ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); size_t inlen = strlen(name) + 1; - size_t outlen = ZAP_MAXNAMELEN; int err = 0; (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, @@ -149,43 +175,104 @@ zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags) boolean_t zap_match(zap_name_t *zn, const char *matchname) { + boolean_t res = B_FALSE; ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); if (zn->zn_matchtype & MT_NORMALIZE) { - char norm[ZAP_MAXNAMELEN]; + size_t namelen = zn->zn_normbuf_len; + char normbuf[ZAP_MAXNAMELEN]; + char *norm = normbuf; - if (zap_normalize(zn->zn_zap, matchname, norm, - zn->zn_normflags) != 0) - return (B_FALSE); + /* + * Cannot allocate this on-stack as it exceed the stack-limit of + * 1024. + */ + if (namelen > ZAP_MAXNAMELEN) + norm = kmem_alloc(namelen, KM_SLEEP); - return (strcmp(zn->zn_key_norm, norm) == 0); + if (zap_normalize(zn->zn_zap, matchname, norm, + zn->zn_normflags, namelen) != 0) { + res = B_FALSE; + } else { + res = (strcmp(zn->zn_key_norm, norm) == 0); + } + if (norm != normbuf) + kmem_free(norm, namelen); } else { - return (strcmp(zn->zn_key_orig, matchname) == 0); + res = (strcmp(zn->zn_key_orig, matchname) == 0); } + return (res); +} + +static kmem_cache_t *zap_name_cache; +static kmem_cache_t *zap_attr_cache; +static kmem_cache_t *zap_name_long_cache; +static kmem_cache_t *zap_attr_long_cache; + +void +zap_init(void) +{ + zap_name_cache = kmem_cache_create("zap_name", + sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL, + NULL, NULL, NULL, 0); + + zap_attr_cache = kmem_cache_create("zap_attr_cache", + sizeof (zap_attribute_t) + ZAP_MAXNAMELEN, 0, NULL, + NULL, NULL, NULL, NULL, 0); + + zap_name_long_cache = kmem_cache_create("zap_name_long", + sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL, + NULL, NULL, NULL, 0); + + zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache", + sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, + NULL, NULL, NULL, NULL, 0); +} + +void +zap_fini(void) +{ + kmem_cache_destroy(zap_name_cache); + kmem_cache_destroy(zap_attr_cache); + kmem_cache_destroy(zap_name_long_cache); + kmem_cache_destroy(zap_attr_long_cache); } static zap_name_t * -zap_name_alloc(zap_t *zap) +zap_name_alloc(zap_t *zap, boolean_t longname) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache; + zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP); + zn->zn_zap = zap; + zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN; return (zn); } void zap_name_free(zap_name_t *zn) { - kmem_free(zn, sizeof (zap_name_t)); + if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) { + kmem_cache_free(zap_name_cache, zn); + } else { + ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW); + kmem_cache_free(zap_name_long_cache, zn); + } } static int zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) { zap_t *zap = zn->zn_zap; + size_t key_len = strlen(key) + 1; + + /* Make sure zn is allocated for longname if key is long */ + IMPLY(key_len > ZAP_MAXNAMELEN, + zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW); zn->zn_key_intlen = sizeof (*key); zn->zn_key_orig = key; - zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; + zn->zn_key_orig_numints = key_len; zn->zn_matchtype = mt; zn->zn_normflags = zap->zap_normflags; @@ -203,7 +290,7 @@ zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) * what the hash is computed from. */ if (zap_normalize(zap, key, zn->zn_normbuf, - zap->zap_normflags) != 0) + zap->zap_normflags, zn->zn_normbuf_len) != 0) return (SET_ERROR(ENOTSUP)); zn->zn_key_norm = zn->zn_normbuf; zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; @@ -222,7 +309,7 @@ zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) * what the matching is based on. (Not the hash!) */ if (zap_normalize(zap, key, zn->zn_normbuf, - zn->zn_normflags) != 0) + zn->zn_normflags, zn->zn_normbuf_len) != 0) return (SET_ERROR(ENOTSUP)); zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } @@ -233,7 +320,8 @@ zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) zap_name_t * zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt) { - zap_name_t *zn = zap_name_alloc(zap); + size_t key_len = strlen(key) + 1; + zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN)); if (zap_name_init_str(zn, key, mt) != 0) { zap_name_free(zn); return (NULL); @@ -244,7 +332,7 @@ zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt) static zap_name_t * zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP); ASSERT(zap->zap_normflags == 0); zn->zn_zap = zap; @@ -252,6 +340,7 @@ zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) zn->zn_key_orig = zn->zn_key_norm = key; zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; zn->zn_matchtype = 0; + zn->zn_normbuf_len = ZAP_MAXNAMELEN; zn->zn_hash = zap_hash(zn); return (zn); @@ -467,7 +556,7 @@ mzap_open(dmu_buf_t *db) zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare, mze_find_in_buf, sizeof (mzap_ent_t), 512); - zap_name_t *zn = zap_name_alloc(zap); + zap_name_t *zn = zap_name_alloc(zap, B_FALSE); for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; @@ -575,7 +664,7 @@ zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx, if (zap->zap_ismicro && tx && adding && zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; - if (newsz > zap_micro_max_size) { + if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) { dprintf("upgrading obj %llu: num_entries=%u\n", (u_longlong_t)obj, zap->zap_m.zap_num_entries); *zapp = zap; @@ -587,6 +676,31 @@ zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx, VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx)); zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; + + if (newsz > SPA_OLD_MAXBLOCKSIZE) { + dsl_dataset_t *ds = dmu_objset_ds(os); + if (!dsl_dataset_feature_is_active(ds, + SPA_FEATURE_LARGE_MICROZAP)) { + /* + * A microzap just grew beyond the old limit + * for the first time, so we have to ensure the + * feature flag is activated. + * zap_get_micro_max_size() won't let us get + * here if the feature is not enabled, so we + * don't need any other checks beforehand. + * + * Since we're in open context, we can't + * activate the feature directly, so we instead + * flag it on the dataset for next sync. + */ + dsl_dataset_dirty(ds, tx); + mutex_enter(&ds->ds_lock); + ds->ds_feature_activation + [SPA_FEATURE_LARGE_MICROZAP] = + (void *)B_TRUE; + mutex_exit(&ds->ds_lock); + } + } } *zapp = zap; @@ -674,7 +788,7 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags) fzap_upgrade(zap, tx, flags); - zap_name_t *zn = zap_name_alloc(zap); + zap_name_t *zn = zap_name_alloc(zap, B_FALSE); for (int i = 0; i < nchunks; i++) { mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; if (mze->mze_name[0] == 0) @@ -1600,6 +1714,41 @@ zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints, return (err); } + +static zap_attribute_t * +zap_attribute_alloc_impl(boolean_t longname) +{ + zap_attribute_t *za; + + za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache, + KM_SLEEP); + za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN; + return (za); +} + +zap_attribute_t * +zap_attribute_alloc(void) +{ + return (zap_attribute_alloc_impl(B_FALSE)); +} + +zap_attribute_t * +zap_attribute_long_alloc(void) +{ + return (zap_attribute_alloc_impl(B_TRUE)); +} + +void +zap_attribute_free(zap_attribute_t *za) +{ + if (za->za_name_len == ZAP_MAXNAMELEN) { + kmem_cache_free(zap_attr_cache, za); + } else { + ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW); + kmem_cache_free(zap_attr_long_cache, za); + } +} + /* * Routines for iterating over the attributes. */ @@ -1736,7 +1885,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) za->za_num_integers = 1; za->za_first_integer = mzep->mze_value; (void) strlcpy(za->za_name, mzep->mze_name, - sizeof (za->za_name)); + za->za_name_len); zc->zc_hash = (uint64_t)mze->mze_hash << 32; zc->zc_cd = mze->mze_cd; err = 0; diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c index 2da0bf9740e5..b4aa7a1317a6 100644 --- a/module/zfs/zcp_iter.c +++ b/module/zfs/zcp_iter.c @@ -54,7 +54,7 @@ zcp_clones_iter(lua_State *state) uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); dsl_pool_t *dp = zcp_run_info(state)->zri_pool; dsl_dataset_t *ds, *clone; - zap_attribute_t za; + zap_attribute_t *za; zap_cursor_t zc; err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); @@ -75,9 +75,11 @@ zcp_clones_iter(lua_State *state) dsl_dataset_phys(ds)->ds_next_clones_obj, cursor); dsl_dataset_rele(ds, FTAG); - err = zap_cursor_retrieve(&zc, &za); + za = zap_attribute_alloc(); + err = zap_cursor_retrieve(&zc, za); if (err != 0) { zap_cursor_fini(&zc); + zap_attribute_free(za); if (err != ENOENT) { return (luaL_error(state, "unexpected error %d from zap_cursor_retrieve()", @@ -89,7 +91,8 @@ zcp_clones_iter(lua_State *state) cursor = zap_cursor_serialize(&zc); zap_cursor_fini(&zc); - err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &clone); + err = dsl_dataset_hold_obj(dp, za->za_first_integer, FTAG, &clone); + zap_attribute_free(za); if (err != 0) { return (luaL_error(state, "unexpected error %d from " @@ -499,7 +502,7 @@ zcp_bookmarks_iter(lua_State *state) uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); dsl_pool_t *dp = zcp_run_info(state)->zri_pool; dsl_dataset_t *ds; - zap_attribute_t za; + zap_attribute_t *za; zap_cursor_t zc; int err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); @@ -536,9 +539,11 @@ zcp_bookmarks_iter(lua_State *state) ds->ds_bookmarks_obj, cursor); dsl_dataset_rele(ds, FTAG); - err = zap_cursor_retrieve(&zc, &za); + za = zap_attribute_alloc(); + err = zap_cursor_retrieve(&zc, za); if (err != 0) { zap_cursor_fini(&zc); + zap_attribute_free(za); if (err != ENOENT) { return (luaL_error(state, "unexpected error %d from zap_cursor_retrieve()", @@ -552,7 +557,8 @@ zcp_bookmarks_iter(lua_State *state) /* Create the full "pool/fs#bookmark" string to return */ int n = snprintf(bookmark_name, ZFS_MAX_DATASET_NAME_LEN, "%s#%s", - ds_name, za.za_name); + ds_name, za->za_name); + zap_attribute_free(za); if (n >= ZFS_MAX_DATASET_NAME_LEN) { return (luaL_error(state, "unexpected error %d from snprintf()", ENAMETOOLONG)); @@ -610,7 +616,7 @@ zcp_holds_iter(lua_State *state) uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2)); dsl_pool_t *dp = zcp_run_info(state)->zri_pool; dsl_dataset_t *ds; - zap_attribute_t za; + zap_attribute_t *za; zap_cursor_t zc; int err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); @@ -631,9 +637,11 @@ zcp_holds_iter(lua_State *state) dsl_dataset_phys(ds)->ds_userrefs_obj, cursor); dsl_dataset_rele(ds, FTAG); - err = zap_cursor_retrieve(&zc, &za); + za = zap_attribute_alloc(); + err = zap_cursor_retrieve(&zc, za); if (err != 0) { zap_cursor_fini(&zc); + zap_attribute_free(za); if (err != ENOENT) { return (luaL_error(state, "unexpected error %d from zap_cursor_retrieve()", @@ -648,8 +656,9 @@ zcp_holds_iter(lua_State *state) lua_pushnumber(state, cursor); lua_replace(state, lua_upvalueindex(2)); - (void) lua_pushstring(state, za.za_name); - (void) lua_pushnumber(state, za.za_first_integer); + (void) lua_pushstring(state, za->za_name); + (void) lua_pushnumber(state, za->za_first_integer); + zap_attribute_free(za); return (2); } diff --git a/module/zfs/zfeature.c b/module/zfs/zfeature.c index 1d25bc406866..38479c7225ea 100644 --- a/module/zfs/zfeature.c +++ b/module/zfs/zfeature.c @@ -183,7 +183,7 @@ spa_features_check(spa_t *spa, boolean_t for_write, char *buf; zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + za = zap_attribute_alloc(); buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); supported = B_TRUE; @@ -217,7 +217,7 @@ spa_features_check(spa_t *spa, boolean_t for_write, zap_cursor_fini(zc); kmem_free(buf, MAXPATHLEN); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); kmem_free(zc, sizeof (zap_cursor_t)); return (supported); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index f7cecc9af8a4..25b05abd3650 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -595,6 +595,8 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, vs->vs_checksum_errors, FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS, DATA_TYPE_UINT64, vs->vs_slow_ios, + FM_EREPORT_PAYLOAD_ZFS_VDEV_DIO_VERIFY_ERRORS, + DATA_TYPE_UINT64, vs->vs_dio_verify_errors, NULL); } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 53366ad49781..8188a9e46865 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -160,7 +160,6 @@ #include #include #include -#include #include #include #include @@ -2595,6 +2594,41 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, } break; } + case ZFS_PROP_LONGNAME: + { + zfsvfs_t *zfsvfs; + + /* + * Ignore the checks if the property is being applied as part of + * 'zfs receive'. Because, we already check if the local pool + * has SPA_FEATURE_LONGNAME enabled in dmu_recv_begin_check(). + */ + if (source == ZPROP_SRC_RECEIVED) { + cmn_err(CE_NOTE, "Skipping ZFS_PROP_LONGNAME checks " + "for dsname=%s\n", dsname); + err = -1; + break; + } + + if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE)) != 0) { + cmn_err(CE_WARN, "%s:%d Failed to hold for dsname=%s " + "err=%d\n", __FILE__, __LINE__, dsname, err); + break; + } + + if (!spa_feature_is_enabled(zfsvfs->z_os->os_spa, + SPA_FEATURE_LONGNAME)) { + err = ENOTSUP; + } else { + /* + * Set err to -1 to force the zfs_set_prop_nvlist code + * down the default path to set the value in the nvlist. + */ + err = -1; + } + zfsvfs_rele(zfsvfs, FTAG); + break; + } default: err = -1; } diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 399f5a0117bb..16b07cc0a7d0 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -300,14 +300,13 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zfs_fuid_info_t *fuidp, vattr_t *vap) { itx_t *itx; - lr_create_t *lr; - lr_acl_create_t *lracl; + _lr_create_t *lr; + lr_acl_create_t *lracl = NULL; + uint8_t *lrdata; size_t aclsize = 0; size_t xvatsize = 0; size_t txsize; xvattr_t *xvap = (xvattr_t *)vap; - void *end; - size_t lrsize; size_t namesize = strlen(name) + 1; size_t fuidsz = 0; @@ -329,18 +328,21 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || (int)txtype == TX_MKXATTR) { - txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; - lrsize = sizeof (*lr); + txsize = sizeof (lr_create_t) + namesize + fuidsz + xvatsize; + itx = zil_itx_create(txtype, txsize); + lr_create_t *lrc = (lr_create_t *)&itx->itx_lr; + lrdata = &lrc->lr_data[0]; } else { txsize = sizeof (lr_acl_create_t) + namesize + fuidsz + ZIL_ACE_LENGTH(aclsize) + xvatsize; - lrsize = sizeof (lr_acl_create_t); + itx = zil_itx_create(txtype, txsize); + lracl = (lr_acl_create_t *)&itx->itx_lr; + lrdata = &lracl->lr_data[0]; } - itx = zil_itx_create(txtype, txsize); - lr = (lr_create_t *)&itx->itx_lr; + lr = (_lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; /* Store dnode slot count in 8 bits above object id. */ @@ -369,16 +371,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, * Fill in xvattr info if any */ if (vap->va_mask & ATTR_XVATTR) { - zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); - end = (caddr_t)lr + lrsize + xvatsize; - } else { - end = (caddr_t)lr + lrsize; + zfs_log_xvattr((lr_attr_t *)lrdata, xvap); + lrdata = &lrdata[xvatsize]; } /* Now fill in any ACL info */ if (vsecp) { - lracl = (lr_acl_create_t *)&itx->itx_lr; + ASSERT3P(lracl, !=, NULL); lracl->lr_aclcnt = vsecp->vsa_aclcnt; lracl->lr_acl_bytes = aclsize; lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; @@ -388,19 +388,19 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, else lracl->lr_acl_flags = 0; - memcpy(end, vsecp->vsa_aclentp, aclsize); - end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); + memcpy(lrdata, vsecp->vsa_aclentp, aclsize); + lrdata = &lrdata[ZIL_ACE_LENGTH(aclsize)]; } /* drop in FUID info */ if (fuidp) { - end = zfs_log_fuid_ids(fuidp, end); - end = zfs_log_fuid_domains(fuidp, end); + lrdata = zfs_log_fuid_ids(fuidp, lrdata); + lrdata = zfs_log_fuid_domains(fuidp, lrdata); } /* * Now place file name in log record */ - memcpy(end, name, namesize); + memcpy(lrdata, name, namesize); zil_itx_assign(zilog, itx, tx); } @@ -422,7 +422,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, itx = zil_itx_create(txtype, sizeof (*lr) + namesize); lr = (lr_remove_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; - memcpy(lr + 1, name, namesize); + memcpy(&lr->lr_data[0], name, namesize); itx->itx_oid = foid; @@ -458,7 +458,7 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr = (lr_link_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; lr->lr_link_obj = zp->z_id; - memcpy(lr + 1, name, namesize); + memcpy(&lr->lr_data[0], name, namesize); zil_itx_assign(zilog, itx, tx); } @@ -471,15 +471,17 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, const char *name, const char *link) { itx_t *itx; - lr_create_t *lr; + _lr_create_t *lr; + lr_create_t *lrc; size_t namesize = strlen(name) + 1; size_t linksize = strlen(link) + 1; if (zil_replaying(zilog, tx)) return; - itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); - lr = (lr_create_t *)&itx->itx_lr; + itx = zil_itx_create(txtype, sizeof (*lrc) + namesize + linksize); + lrc = (lr_create_t *)&itx->itx_lr; + lr = &lrc->lr_create; lr->lr_doid = dzp->z_id; lr->lr_foid = zp->z_id; lr->lr_uid = KUID_TO_SUID(ZTOUID(zp)); @@ -489,8 +491,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, sizeof (uint64_t)); (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), lr->lr_crtime, sizeof (uint64_t) * 2); - memcpy((char *)(lr + 1), name, namesize); - memcpy((char *)(lr + 1) + namesize, link, linksize); + memcpy(&lrc->lr_data[0], name, namesize); + memcpy(&lrc->lr_data[namesize], link, linksize); zil_itx_assign(zilog, itx, tx); } @@ -500,7 +502,8 @@ do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) { itx_t *itx; - lr_rename_t *lr; + _lr_rename_t *lr; + lr_rename_t *lrr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; @@ -508,11 +511,12 @@ do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, return; itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); - lr = (lr_rename_t *)&itx->itx_lr; + lrr = (lr_rename_t *)&itx->itx_lr; + lr = &lrr->lr_rename; lr->lr_sdoid = sdzp->z_id; lr->lr_tdoid = tdzp->z_id; - memcpy((char *)(lr + 1), sname, snamesize); - memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); + memcpy(&lrr->lr_data[0], sname, snamesize); + memcpy(&lrr->lr_data[snamesize], dname, dnamesize); itx->itx_oid = szp->z_id; zil_itx_assign(zilog, itx, tx); @@ -590,8 +594,8 @@ zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev, sizeof (lr->lr_wrdev)); - memcpy((char *)(lr + 1), sname, snamesize); - memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); + memcpy(&lr->lr_data[0], sname, snamesize); + memcpy(&lr->lr_data[snamesize], dname, dnamesize); itx->itx_oid = szp->z_id; zil_itx_assign(zilog, itx, tx); @@ -607,7 +611,7 @@ static int64_t zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, boolean_t commit, - zil_callback_t callback, void *callback_data) + boolean_t o_direct, zil_callback_t callback, void *callback_data) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); uint32_t blocksize = zp->z_blksz; @@ -622,7 +626,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, return; } - if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct) write_state = WR_INDIRECT; else if (!spa_has_slogs(zilog->zl_spa) && resid >= zfs_immediate_write_sz) @@ -663,8 +667,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, if (wr_state == WR_COPIED) { int err; DB_DNODE_ENTER(db); - err = dmu_read_by_dnode(DB_DNODE(db), off, len, lr + 1, - DMU_READ_NO_PREFETCH); + err = dmu_read_by_dnode(DB_DNODE(db), off, len, + &lr->lr_data[0], DMU_READ_NO_PREFETCH); DB_DNODE_EXIT(db); if (err != 0) { zil_itx_destroy(itx); @@ -733,7 +737,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr_setattr_t *lr; xvattr_t *xvap = (xvattr_t *)vap; size_t recsize = sizeof (lr_setattr_t); - void *start; + uint8_t *start; if (zil_replaying(zilog, tx) || zp->z_unlinked) return; @@ -767,10 +771,10 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_size = (uint64_t)vap->va_size; ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); - start = (lr_setattr_t *)(lr + 1); + start = &lr->lr_data[0]; if (vap->va_mask & ATTR_XVATTR) { zfs_log_xvattr((lr_attr_t *)start, xvap); - start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); + start = &lr->lr_data[ZIL_XVAT_SIZE(xvap->xva_mapsize)]; } /* @@ -794,7 +798,6 @@ zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_t *itx; lr_setsaxattr_t *lr; size_t recsize = sizeof (lr_setsaxattr_t); - void *xattrstart; int namelen; if (zil_replaying(zilog, tx) || zp->z_unlinked) @@ -805,10 +808,9 @@ zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, recsize); lr = (lr_setsaxattr_t *)&itx->itx_lr; lr->lr_foid = zp->z_id; - xattrstart = (char *)(lr + 1); - memcpy(xattrstart, name, namelen); + memcpy(&lr->lr_data[0], name, namelen); if (value != NULL) { - memcpy((char *)xattrstart + namelen, value, size); + memcpy(&lr->lr_data[namelen], value, size); lr->lr_size = size; } else { lr->lr_size = 0; @@ -866,13 +868,13 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, if (txtype == TX_ACL_V0) { lrv0 = (lr_acl_v0_t *)lr; - memcpy(lrv0 + 1, vsecp->vsa_aclentp, aclbytes); + memcpy(&lrv0->lr_data[0], vsecp->vsa_aclentp, aclbytes); } else { - void *start = (ace_t *)(lr + 1); + uint8_t *start = &lr->lr_data[0]; memcpy(start, vsecp->vsa_aclentp, aclbytes); - start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); + start = &lr->lr_data[ZIL_ACE_LENGTH(aclbytes)]; if (fuidp) { start = zfs_log_fuid_ids(fuidp, start); diff --git a/module/zfs/zfs_quota.c b/module/zfs/zfs_quota.c index 9b351eefc04e..0fe152bc4dd8 100644 --- a/module/zfs/zfs_quota.c +++ b/module/zfs/zfs_quota.c @@ -165,7 +165,7 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, { int error; zap_cursor_t zc; - zap_attribute_t za; + zap_attribute_t *za; zfs_useracct_t *buf = vbuf; uint64_t obj; int offset = 0; @@ -196,8 +196,9 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, type == ZFS_PROP_PROJECTOBJUSED) offset = DMU_OBJACCT_PREFIX_LEN; + za = zap_attribute_alloc(); for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); - (error = zap_cursor_retrieve(&zc, &za)) == 0; + (error = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > *bufsizep) @@ -207,14 +208,14 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX) * when dealing with block quota and vice versa. */ - if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX, + if ((offset > 0) != (strncmp(za->za_name, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN) == 0)) continue; - fuidstr_to_sid(zfsvfs, za.za_name + offset, + fuidstr_to_sid(zfsvfs, za->za_name + offset, buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); - buf->zu_space = za.za_first_integer; + buf->zu_space = za->za_first_integer; buf++; } if (error == ENOENT) @@ -224,6 +225,7 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; *cookiep = zap_cursor_serialize(&zc); zap_cursor_fini(&zc); + zap_attribute_free(za); return (error); } diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 810550161f8b..80c11d80f6c4 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -293,16 +293,16 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_acl_create_t *lracl = arg2; + _lr_create_t *lr = &lracl->lr_create; char *name = NULL; /* location determined later */ - lr_create_t *lr = (lr_create_t *)lracl; znode_t *dzp; znode_t *zp; xvattr_t xva; int vflg = 0; vsecattr_t vsec = { 0 }; lr_attr_t *lrattr; - void *aclstart; - void *fuidstart; + uint8_t *aclstart; + uint8_t *fuidstart; size_t xvatlen = 0; uint64_t txtype; uint64_t objid; @@ -316,17 +316,18 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) byteswap_uint64_array(lracl, sizeof (*lracl)); if (txtype == TX_CREATE_ACL_ATTR || txtype == TX_MKDIR_ACL_ATTR) { - lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); + lrattr = (lr_attr_t *)&lracl->lr_data[0]; zfs_replay_swap_attrs(lrattr); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); } - aclstart = (caddr_t)(lracl + 1) + xvatlen; + aclstart = &lracl->lr_data[xvatlen]; zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE); + /* swap fuids */ if (lracl->lr_fuidcnt) { - byteswap_uint64_array((caddr_t)aclstart + - ZIL_ACE_LENGTH(lracl->lr_acl_bytes), + byteswap_uint64_array( + &aclstart[ZIL_ACE_LENGTH(lracl->lr_acl_bytes)], lracl->lr_fuidcnt * sizeof (uint64_t)); } } @@ -361,28 +362,27 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) vflg |= FIGNORECASE; switch (txtype) { case TX_CREATE_ACL: - aclstart = (caddr_t)(lracl + 1); - fuidstart = (caddr_t)aclstart + - ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + aclstart = &lracl->lr_data[0]; + fuidstart = &aclstart[ZIL_ACE_LENGTH(lracl->lr_acl_bytes)]; zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); zfs_fallthrough; case TX_CREATE_ACL_ATTR: if (name == NULL) { - lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); + lrattr = (lr_attr_t *)&lracl->lr_data[0]; xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); xva.xva_vattr.va_mask |= ATTR_XVATTR; zfs_replay_xvattr(lrattr, &xva); } vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; - vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; + vsec.vsa_aclentp = &lracl->lr_data[xvatlen]; vsec.vsa_aclcnt = lracl->lr_aclcnt; vsec.vsa_aclentsz = lracl->lr_acl_bytes; vsec.vsa_aclflags = lracl->lr_acl_flags; if (zfsvfs->z_fuid_replay == NULL) { - fuidstart = (caddr_t)(lracl + 1) + xvatlen + - ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + fuidstart = &lracl->lr_data[xvatlen + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes)]; zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, @@ -398,9 +398,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) #endif break; case TX_MKDIR_ACL: - aclstart = (caddr_t)(lracl + 1); - fuidstart = (caddr_t)aclstart + - ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + aclstart = &lracl->lr_data[0]; + fuidstart = &aclstart[ZIL_ACE_LENGTH(lracl->lr_acl_bytes)]; zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); @@ -412,13 +411,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) zfs_replay_xvattr(lrattr, &xva); } vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; - vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; + vsec.vsa_aclentp = &lracl->lr_data[xvatlen]; vsec.vsa_aclcnt = lracl->lr_aclcnt; vsec.vsa_aclentsz = lracl->lr_acl_bytes; vsec.vsa_aclflags = lracl->lr_acl_flags; if (zfsvfs->z_fuid_replay == NULL) { - fuidstart = (caddr_t)(lracl + 1) + xvatlen + - ZIL_ACE_LENGTH(lracl->lr_acl_bytes); + fuidstart = &lracl->lr_data[xvatlen + + ZIL_ACE_LENGTH(lracl->lr_acl_bytes)]; zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, @@ -456,14 +455,14 @@ static int zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; - lr_create_t *lr = arg2; + lr_create_t *lrc = arg2; + _lr_create_t *lr = &lrc->lr_create; char *name = NULL; /* location determined later */ char *link; /* symlink content follows name */ znode_t *dzp; znode_t *zp = NULL; xvattr_t xva; int vflg = 0; - size_t lrsize = sizeof (lr_create_t); lr_attr_t *lrattr; void *start; size_t xvatlen; @@ -476,9 +475,9 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) txtype = (lr->lr_common.lrc_txtype & ~TX_CI); if (byteswap) { - byteswap_uint64_array(lr, sizeof (*lr)); + byteswap_uint64_array(lrc, sizeof (*lrc)); if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) - zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); + zfs_replay_swap_attrs((lr_attr_t *)&lrc->lr_data[0]); } @@ -520,7 +519,7 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) if (txtype != TX_SYMLINK && txtype != TX_MKDIR_ATTR && txtype != TX_CREATE_ATTR) { - start = (lr + 1); + start = (void *)&lrc->lr_data[0]; zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); @@ -528,10 +527,10 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) switch (txtype) { case TX_CREATE_ATTR: - lrattr = (lr_attr_t *)(caddr_t)(lr + 1); + lrattr = (lr_attr_t *)&lrc->lr_data[0]; xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); - zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); - start = (caddr_t)(lr + 1) + xvatlen; + zfs_replay_xvattr(lrattr, &xva); + start = (void *)&lrc->lr_data[xvatlen]; zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); @@ -551,10 +550,10 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) #endif break; case TX_MKDIR_ATTR: - lrattr = (lr_attr_t *)(caddr_t)(lr + 1); + lrattr = (lr_attr_t *)&lrc->lr_data[0]; xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); - zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); - start = (caddr_t)(lr + 1) + xvatlen; + zfs_replay_xvattr(lrattr, &xva); + start = &lrc->lr_data[xvatlen]; zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); @@ -563,7 +562,7 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) case TX_MKDIR: if (name == NULL) - name = (char *)(lr + 1); + name = (char *)&lrc->lr_data[0]; #if defined(__linux__) error = zfs_mkdir(dzp, name, &xva.xva_vattr, @@ -578,8 +577,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred); break; case TX_SYMLINK: - name = (char *)(lr + 1); - link = name + strlen(name) + 1; + name = &lrc->lr_data[0]; + link = &lrc->lr_data[strlen(name) + 1]; #if defined(__linux__) error = zfs_symlink(dzp, name, &xva.xva_vattr, link, &zp, kcred, vflg, zfs_init_idmap); @@ -612,7 +611,7 @@ zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_remove_t *lr = arg2; - char *name = (char *)(lr + 1); /* name follows lr_remove_t */ + char *name = (char *)&lr->lr_data[0]; /* name follows lr_remove_t */ znode_t *dzp; int error; int vflg = 0; @@ -649,7 +648,7 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_link_t *lr = arg2; - char *name = (char *)(lr + 1); /* name follows lr_link_t */ + char *name = &lr->lr_data[0]; /* name follows lr_link_t */ znode_t *dzp, *zp; int error; int vflg = 0; @@ -678,7 +677,7 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) } static int -do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, +do_zfs_replay_rename(zfsvfs_t *zfsvfs, _lr_rename_t *lr, char *sname, char *tname, uint64_t rflags, vattr_t *wo_vap) { znode_t *sdzp, *tdzp; @@ -722,15 +721,17 @@ static int zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; - lr_rename_t *lr = arg2; + lr_rename_t *lrr = arg2; + _lr_rename_t *lr = &lrr->lr_rename; ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); + byteswap_uint64_array(lrr, sizeof (*lrr)); - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; + /* sname and tname follow lr_rename_t */ + char *sname = (char *)&lrr->lr_data[0]; + char *tname = (char *)&lrr->lr_data[strlen(sname)+1]; return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); } @@ -739,15 +740,17 @@ zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) { #ifdef __linux__ zfsvfs_t *zfsvfs = arg1; - lr_rename_t *lr = arg2; + lr_rename_t *lrr = arg2; + _lr_rename_t *lr = &lrr->lr_rename; ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); + byteswap_uint64_array(lrr, sizeof (*lrr)); - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; + /* sname and tname follow lr_rename_t */ + char *sname = (char *)&lrr->lr_data[0]; + char *tname = (char *)&lrr->lr_data[strlen(sname)+1]; return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, NULL)); #else @@ -760,24 +763,26 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) { #ifdef __linux__ zfsvfs_t *zfsvfs = arg1; - lr_rename_whiteout_t *lr = arg2; + lr_rename_whiteout_t *lrrw = arg2; + _lr_rename_t *lr = &lrrw->lr_rename; int error; /* For the whiteout file. */ xvattr_t xva; uint64_t objid; uint64_t dnodesize; - ASSERT3U(lr->lr_rename.lr_common.lrc_reclen, >, sizeof (*lr)); + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); + byteswap_uint64_array(lrrw, sizeof (*lrrw)); - objid = LR_FOID_GET_OBJ(lr->lr_wfoid); - dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT; + objid = LR_FOID_GET_OBJ(lrrw->lr_wfoid); + dnodesize = LR_FOID_GET_SLOTS(lrrw->lr_wfoid) << DNODE_SHIFT; xva_init(&xva); zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, - lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid); + lrrw->lr_wmode, lrrw->lr_wuid, lrrw->lr_wgid, lrrw->lr_wrdev, + objid); /* * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which @@ -786,8 +791,8 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) * attributes, so we smuggle the values inside the vattr's otherwise * unused va_ctime, va_nblocks, and va_fsid fields. */ - ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime); - xva.xva_vattr.va_nblocks = lr->lr_wgen; + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lrrw->lr_wcrtime); + xva.xva_vattr.va_nblocks = lrrw->lr_wgen; xva.xva_vattr.va_fsid = dnodesize; error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); @@ -795,9 +800,9 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) return (error); /* sname and tname follow lr_rename_whiteout_t */ - char *sname = (char *)(lr + 1); - char *tname = sname + strlen(sname) + 1; - return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, + char *sname = (char *)&lrrw->lr_data[0]; + char *tname = (char *)&lrrw->lr_data[strlen(sname)+1]; + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_WHITEOUT, &xva.xva_vattr)); #else return (SET_ERROR(ENOTSUP)); @@ -809,7 +814,7 @@ zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_write_t *lr = arg2; - char *data = (char *)(lr + 1); /* data follows lr_write_t */ + char *data = &lr->lr_data[0]; /* data follows lr_write_t */ znode_t *zp; int error; uint64_t eod, offset, length; @@ -968,7 +973,7 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) if ((lr->lr_mask & ATTR_XVATTR) && zfsvfs->z_version >= ZPL_VERSION_INITIAL) - zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); + zfs_replay_swap_attrs((lr_attr_t *)&lr->lr_data[0]); } if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) @@ -987,11 +992,11 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) * Fill in xvattr_t portions if necessary. */ - start = (lr_setattr_t *)(lr + 1); + start = (void *)&lr->lr_data[0]; if (vap->va_mask & ATTR_XVATTR) { zfs_replay_xvattr((lr_attr_t *)start, &xva); - start = (caddr_t)start + - ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); + start = &lr->lr_data[ + ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize)]; } else xva.xva_vattr.va_mask &= ~ATTR_XVATTR; @@ -1049,12 +1054,12 @@ zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap) /* Get xattr name, value and size from log record */ size = lr->lr_size; - name = (char *)(lr + 1); + name = (char *)&lr->lr_data[0]; if (size == 0) { value = NULL; error = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); } else { - value = name + strlen(name) + 1; + value = &lr->lr_data[strlen(name) + 1]; /* Limited to 32k to keep nvpair memory allocations small */ if (size > DXATTR_MAX_ENTRY_SIZE) { error = SET_ERROR(EFBIG); @@ -1099,7 +1104,7 @@ zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_acl_v0_t *lr = arg2; - ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ + ace_t *ace = (ace_t *)&lr->lr_data[0]; vsecattr_t vsa = {0}; znode_t *zp; int error; @@ -1148,7 +1153,7 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_acl_t *lr = arg2; - ace_t *ace = (ace_t *)(lr + 1); + ace_t *ace = (ace_t *)&lr->lr_data[0]; vsecattr_t vsa = {0}; znode_t *zp; int error; @@ -1160,8 +1165,8 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) byteswap_uint64_array(lr, sizeof (*lr)); zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); if (lr->lr_fuidcnt) { - byteswap_uint64_array((caddr_t)ace + - ZIL_ACE_LENGTH(lr->lr_acl_bytes), + byteswap_uint64_array(&lr->lr_data[ + ZIL_ACE_LENGTH(lr->lr_acl_bytes)], lr->lr_fuidcnt * sizeof (uint64_t)); } } @@ -1176,8 +1181,8 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) vsa.vsa_aclflags = lr->lr_acl_flags; if (lr->lr_fuidcnt) { - void *fuidstart = (caddr_t)ace + - ZIL_ACE_LENGTH(lr->lr_acl_bytes); + void *fuidstart = &lr->lr_data[ + ZIL_ACE_LENGTH(lr->lr_acl_bytes)]; zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, &fuidstart, diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index f3db953eab46..a96f508ffac0 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -74,6 +73,14 @@ int zfs_bclone_enabled = 1; */ static int zfs_bclone_wait_dirty = 0; +/* + * Enable Direct I/O. If this setting is 0, then all I/O requests will be + * directed through the ARC acting as though the dataset property direct was + * set to disabled. + */ +static int zfs_dio_enabled = 0; + + /* * Maximum bytes to read per chunk in zfs_read(). */ @@ -202,6 +209,77 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); } +/* + * Determine if Direct I/O has been requested (either via the O_DIRECT flag or + * the "direct" dataset property). When inherited by the property only apply + * the O_DIRECT flag to correctly aligned IO requests. The rational for this + * is it allows the property to be safely set on a dataset without forcing + * all of the applications to be aware of the alignment restrictions. When + * O_DIRECT is explicitly requested by an application return EINVAL if the + * request is unaligned. In all cases, if the range for this request has + * been mmap'ed then we will perform buffered I/O to keep the mapped region + * synhronized with the ARC. + * + * It is possible that a file's pages could be mmap'ed after it is checked + * here. If so, that is handled coorarding in zfs_write(). See comments in the + * following area for how this is handled: + * zfs_write() -> update_pages() + */ +static int +zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, + int *ioflagp) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + objset_t *os = zfsvfs->z_os; + int ioflag = *ioflagp; + int error = 0; + + if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED || + zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) { + /* + * Direct I/O is disabled or the region is mmap'ed. In either + * case the I/O request will just directed through the ARC. + */ + ioflag &= ~O_DIRECT; + goto out; + } else if (os->os_direct == ZFS_DIRECT_ALWAYS && + zfs_uio_page_aligned(uio) && + zfs_uio_aligned(uio, PAGE_SIZE)) { + if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) || + (rw == UIO_READ)) { + ioflag |= O_DIRECT; + } + } else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) { + /* + * Direct I/O was requested through the direct=always, but it + * is not properly PAGE_SIZE aligned. The request will be + * directed through the ARC. + */ + ioflag &= ~O_DIRECT; + } + + if (ioflag & O_DIRECT) { + if (!zfs_uio_page_aligned(uio) || + !zfs_uio_aligned(uio, PAGE_SIZE)) { + error = SET_ERROR(EINVAL); + goto out; + } + + error = zfs_uio_get_dio_pages_alloc(uio, rw); + if (error) { + goto out; + } + } + + IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT); + ASSERT0(error); + +out: + *ioflagp = ioflag; + return (error); +} + /* * Read bytes from specified file into supplied buffer. * @@ -286,24 +364,58 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = 0; goto out; } - ASSERT(zfs_uio_offset(uio) < zp->z_size); + + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_READ, &ioflag); + if (error) { + goto out; + } + #if defined(__linux__) ssize_t start_offset = zfs_uio_offset(uio); #endif + ssize_t chunk_size = zfs_vnops_read_chunk_size; ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio)); ssize_t start_resid = n; + ssize_t dio_remaining_resid = 0; + + if (uio->uio_extflg & UIO_DIRECT) { + /* + * All pages for an O_DIRECT request ahve already been mapped + * so there's no compelling reason to handle this uio in + * smaller chunks. + */ + chunk_size = DMU_MAX_ACCESS; + + /* + * In the event that the O_DIRECT request is reading the entire + * file, it is possible file's length is not page sized + * aligned. However, lower layers expect that the Direct I/O + * request is page-aligned. In this case, as much of the file + * that can be read using Direct I/O happens and the remaining + * amount will be read through the ARC. + * + * This is still consistent with the semantics of Direct I/O in + * ZFS as at a minimum the I/O request must be page-aligned. + */ + dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t); + if (dio_remaining_resid != 0) + n -= dio_remaining_resid; + } while (n > 0) { - ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size - - P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size)); + ssize_t nbytes = MIN(n, chunk_size - + P2PHASE(zfs_uio_offset(uio), chunk_size)); #ifdef UIO_NOCOPY if (zfs_uio_segflg(uio) == UIO_NOCOPY) error = mappedread_sf(zp, nbytes, uio); else #endif if (zn_has_cached_data(zp, zfs_uio_offset(uio), - zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) { + zfs_uio_offset(uio) + nbytes - 1)) { error = mappedread(zp, nbytes, uio); } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), @@ -332,12 +444,40 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) n -= nbytes; } + if (error == 0 && (uio->uio_extflg & UIO_DIRECT) && + dio_remaining_resid != 0) { + /* + * Temporarily remove the UIO_DIRECT flag from the UIO so the + * remainder of the file can be read using the ARC. + */ + uio->uio_extflg &= ~UIO_DIRECT; + + if (zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + dio_remaining_resid - 1)) { + error = mappedread(zp, dio_remaining_resid, uio); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, + dio_remaining_resid); + } + uio->uio_extflg |= UIO_DIRECT; + + if (error != 0) + n += dio_remaining_resid; + } else if (error && (uio->uio_extflg & UIO_DIRECT)) { + n += dio_remaining_resid; + } int64_t nread = start_resid - n; + dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); - task_io_account_read(nread); out: zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_READ); + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); zfs_exit(zfsvfs, FTAG); return (error); @@ -422,6 +562,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) int error = 0, error1; ssize_t start_resid = zfs_uio_resid(uio); uint64_t clear_setid_bits_txg = 0; + boolean_t o_direct_defer = B_FALSE; /* * Fasttrack empty write @@ -474,6 +615,15 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (SET_ERROR(EINVAL)); } + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_WRITE, &ioflag); + if (error) { + zfs_exit(zfsvfs, FTAG); + return (SET_ERROR(error)); + } + /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. @@ -504,6 +654,12 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) woff = zp->z_size; } zfs_uio_setoffset(uio, woff); + /* + * We need to update the starting offset as well because it is + * set previously in the ZPL (Linux) and VNOPS (FreeBSD) + * layers. + */ + zfs_uio_setsoffset(uio, woff); } else { /* * Note that if the file block size will change as a result of @@ -539,6 +695,33 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) const uint64_t gid = KGID_TO_SGID(ZTOGID(zp)); const uint64_t projid = zp->z_projid; + /* + * In the event we are increasing the file block size + * (lr_length == UINT64_MAX), we will direct the write to the ARC. + * Because zfs_grow_blocksize() will read from the ARC in order to + * grow the dbuf, we avoid doing Direct I/O here as that would cause + * data written to disk to be overwritten by data in the ARC during + * the sync phase. Besides writing data twice to disk, we also + * want to avoid consistency concerns between data in the the ARC and + * on disk while growing the file's blocksize. + * + * We will only temporarily remove Direct I/O and put it back after + * we have grown the blocksize. We do this in the event a request + * is larger than max_blksz, so further requests to + * dmu_write_uio_dbuf() will still issue the requests using Direct + * IO. + * + * As an example: + * The first block to file is being written as a 4k request with + * a recorsize of 1K. The first 1K issued in the loop below will go + * through the ARC; however, the following 3 1K requests will + * use Direct I/O. + */ + if (uio->uio_extflg & UIO_DIRECT && lr->lr_length == UINT64_MAX) { + uio->uio_extflg &= ~UIO_DIRECT; + o_direct_defer = B_TRUE; + } + /* * Write the file in reasonable size chunks. Each chunk is written * in a separate transaction; this keeps the intent log records small @@ -580,6 +763,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) ssize_t nbytes = n; if (n >= blksz && woff >= zp->z_size && P2PHASE(woff, blksz) == 0 && + !(uio->uio_extflg & UIO_DIRECT) && (blksz >= SPA_OLD_MAXBLOCKSIZE || n < 4 * blksz)) { /* * This write covers a full block. "Borrow" a buffer @@ -705,9 +889,30 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uioskip(uio, nbytes); tx_bytes = nbytes; } + /* + * There is a window where a file's pages can be mmap'ed after + * zfs_setup_direct() is called. This is due to the fact that + * the rangelock in this function is acquired after calling + * zfs_setup_direct(). This is done so that + * zfs_uio_prefaultpages() does not attempt to fault in pages + * on Linux for Direct I/O requests. This is not necessary as + * the pages are pinned in memory and can not be faulted out. + * Ideally, the rangelock would be held before calling + * zfs_setup_direct() and zfs_uio_prefaultpages(); however, + * this can lead to a deadlock as zfs_getpage() also acquires + * the rangelock as a RL_WRITER and prefaulting the pages can + * lead to zfs_getpage() being called. + * + * In the case of the pages being mapped after + * zfs_setup_direct() is called, the call to update_pages() + * will still be made to make sure there is consistency between + * the ARC and the Linux page cache. This is an ufortunate + * situation as the data will be read back into the ARC after + * the Direct I/O write has completed, but this is the penality + * for writing to a mmap'ed region of a file using Direct I/O. + */ if (tx_bytes && - zn_has_cached_data(zp, woff, woff + tx_bytes - 1) && - !(ioflag & O_DIRECT)) { + zn_has_cached_data(zp, woff, woff + tx_bytes - 1)) { update_pages(zp, woff, tx_bytes, zfsvfs->z_os); } @@ -756,10 +961,21 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * the TX_WRITE records logged here. */ zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit, - NULL, NULL); + uio->uio_extflg & UIO_DIRECT ? B_TRUE : B_FALSE, NULL, + NULL); dmu_tx_commit(tx); + /* + * Direct I/O was deferred in order to grow the first block. + * At this point it can be re-enabled for subsequent writes. + */ + if (o_direct_defer) { + ASSERT(ioflag & O_DIRECT); + uio->uio_extflg |= UIO_DIRECT; + o_direct_defer = B_FALSE; + } + if (error != 0) break; ASSERT3S(tx_bytes, ==, nbytes); @@ -767,9 +983,21 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) pfbytes -= nbytes; } + if (o_direct_defer) { + ASSERT(ioflag & O_DIRECT); + uio->uio_extflg |= UIO_DIRECT; + o_direct_defer = B_FALSE; + } + zfs_znode_update_vfs(zp); zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_WRITE); + /* * If we're in replay mode, or we made no progress, or the * uio data is inaccessible return an error. Otherwise, it's @@ -784,9 +1012,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) if (commit) zil_commit(zilog, zp->z_id); - const int64_t nwritten = start_resid - zfs_uio_resid(uio); + int64_t nwritten = start_resid - zfs_uio_resid(uio); dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten); - task_io_account_write(nwritten); zfs_exit(zfsvfs, FTAG); return (0); @@ -846,7 +1073,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, uint64_t object = lr->lr_foid; uint64_t offset = lr->lr_offset; uint64_t size = lr->lr_length; - dmu_buf_t *db; zgd_t *zgd; int error = 0; uint64_t zp_gen; @@ -890,8 +1116,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, - offset, size, RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, + size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = SET_ERROR(ENOENT); @@ -929,18 +1155,44 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, zil_fault_io = 0; } #endif + + dmu_buf_t *dbp; if (error == 0) error = dmu_buf_hold_noread(os, object, offset, zgd, - &db); + &dbp); if (error == 0) { - blkptr_t *bp = &lr->lr_blkptr; + zgd->zgd_db = dbp; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp; + boolean_t direct_write = B_FALSE; + mutex_enter(&db->db_mtx); + dbuf_dirty_record_t *dr = + dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg); + if (dr != NULL && dr->dt.dl.dr_diowrite) + direct_write = B_TRUE; + mutex_exit(&db->db_mtx); + + /* + * All Direct I/O writes will have already completed and + * the block pointer can be immediately stored in the + * log record. + */ + if (direct_write) { + /* + * A Direct I/O write always covers an entire + * block. + */ + ASSERT3U(dbp->db_size, ==, zp->z_blksz); + lr->lr_blkptr = dr->dt.dl.dr_overridden_by; + zfs_get_done(zgd, 0); + return (0); + } - zgd->zgd_db = db; + blkptr_t *bp = &lr->lr_blkptr; zgd->zgd_bp = bp; - ASSERT(db->db_offset == offset); - ASSERT(db->db_size == size); + ASSERT3U(dbp->db_offset, ==, offset); + ASSERT3U(dbp->db_size, ==, size); error = dmu_sync(zio, lr->lr_common.lrc_txg, zfs_get_done, zgd); @@ -975,7 +1227,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, return (error); } - static void zfs_get_done(zgd_t *zgd, int error) { @@ -1559,3 +1810,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW, "Wait for dirty blocks when cloning"); + +ZFS_MODULE_PARAM(zfs, zfs_, dio_enabled, INT, ZMOD_RW, + "Enable Direct I/O"); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c new file mode 100644 index 000000000000..824db8c689a7 --- /dev/null +++ b/module/zfs/zfs_znode.c @@ -0,0 +1,401 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2014 Integros [integros.com] + */ + +/* Portions Copyright 2007 Jeremy Teo */ +/* Portions Copyright 2011 Martin Matuska */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zfs_prop.h" +#include "zfs_comutil.h" + +static int +zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) +{ + uint64_t sa_obj = 0; + int error; + + error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); + if (error != 0 && error != ENOENT) + return (error); + + error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); + return (error); +} + +static int +zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, + dmu_buf_t **db, const void *tag) +{ + dmu_object_info_t doi; + int error; + + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) + return (error); + + dmu_object_info_from_db(*db, &doi); + if ((doi.doi_bonus_type != DMU_OT_SA && + doi.doi_bonus_type != DMU_OT_ZNODE) || + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t))) { + sa_buf_rele(*db, tag); + return (SET_ERROR(ENOTSUP)); + } + + error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); + if (error != 0) { + sa_buf_rele(*db, tag); + return (error); + } + + return (0); +} + +static void +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) +{ + sa_handle_destroy(hdl); + sa_buf_rele(db, tag); +} + +/* + * Given an object number, return its parent object number and whether + * or not the object is an extended attribute directory. + */ +int +zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, + uint64_t *pobjp, int *is_xattrdir) +{ + uint64_t parent; + uint64_t pflags; + uint64_t mode; + uint64_t parent_mode; + sa_bulk_attr_t bulk[3]; + sa_handle_t *sa_hdl; + dmu_buf_t *sa_db; + int count = 0; + int error; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, + &parent, sizeof (parent)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, + &pflags, sizeof (pflags)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &mode, sizeof (mode)); + + if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) + return (error); + + /* + * When a link is removed its parent pointer is not changed and will + * be invalid. There are two cases where a link is removed but the + * file stays around, when it goes to the delete queue and when there + * are additional links. + */ + error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); + if (error != 0) + return (error); + + error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); + if (error != 0) + return (error); + + *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); + + /* + * Extended attributes can be applied to files, directories, etc. + * Otherwise the parent must be a directory. + */ + if (!*is_xattrdir && !S_ISDIR(parent_mode)) + return (SET_ERROR(EINVAL)); + + *pobjp = parent; + + return (0); +} + +/* + * Given an object number, return some zpl level statistics + */ +static int +zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, + zfs_stat_t *sb) +{ + sa_bulk_attr_t bulk[4]; + int count = 0; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &sb->zs_mode, sizeof (sb->zs_mode)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, + &sb->zs_gen, sizeof (sb->zs_gen)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, + &sb->zs_links, sizeof (sb->zs_links)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, + &sb->zs_ctime, sizeof (sb->zs_ctime)); + + return (sa_bulk_lookup(hdl, bulk, count)); +} + +static int +zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, + sa_attr_type_t *sa_table, char *buf, int len) +{ + sa_handle_t *sa_hdl; + sa_handle_t *prevhdl = NULL; + dmu_buf_t *prevdb = NULL; + dmu_buf_t *sa_db = NULL; + char *path = buf + len - 1; + char *comp_buf; + int error; + + *path = '\0'; + sa_hdl = hdl; + + uint64_t deleteq_obj; + VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, + ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); + error = zap_lookup_int(osp, deleteq_obj, obj); + if (error == 0) { + return (ESTALE); + } else if (error != ENOENT) { + return (error); + } + + comp_buf = kmem_alloc(ZAP_MAXNAMELEN_NEW + 2, KM_SLEEP); + for (;;) { + uint64_t pobj = 0; + char *component = comp_buf; + size_t complen; + int is_xattrdir = 0; + + if (prevdb) { + ASSERT3P(prevhdl, !=, NULL); + zfs_release_sa_handle(prevhdl, prevdb, FTAG); + } + + if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, + &is_xattrdir)) != 0) + break; + + if (pobj == obj) { + if (path[0] != '/') + *--path = '/'; + break; + } + + component[0] = '/'; + if (is_xattrdir) { + strcpy(component + 1, ""); + } else { + error = zap_value_search(osp, pobj, obj, + ZFS_DIRENT_OBJ(-1ULL), component + 1, + ZAP_MAXNAMELEN_NEW); + if (error != 0) + break; + } + + complen = strlen(component); + path -= complen; + ASSERT3P(path, >=, buf); + memcpy(path, component, complen); + obj = pobj; + + if (sa_hdl != hdl) { + prevhdl = sa_hdl; + prevdb = sa_db; + } + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); + if (error != 0) { + sa_hdl = prevhdl; + sa_db = prevdb; + break; + } + } + + if (sa_hdl != NULL && sa_hdl != hdl) { + ASSERT3P(sa_db, !=, NULL); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); + } + + if (error == 0) + (void) memmove(buf, path, buf + len - path); + + kmem_free(comp_buf, ZAP_MAXNAMELEN_NEW +2); + return (error); +} + +int +zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) +{ + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} + +int +zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, + char *buf, int len) +{ + char *path = buf + len - 1; + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + *path = '\0'; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_stats_impl(hdl, sa_table, sb); + if (error != 0) { + zfs_release_sa_handle(hdl, db, FTAG); + return (error); + } + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} + +/* + * Read a property stored within the master node. + */ +int +zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) +{ + uint64_t *cached_copy = NULL; + + /* + * Figure out where in the objset_t the cached copy would live, if it + * is available for the requested property. + */ + if (os != NULL) { + switch (prop) { + case ZFS_PROP_VERSION: + cached_copy = &os->os_version; + break; + case ZFS_PROP_NORMALIZE: + cached_copy = &os->os_normalization; + break; + case ZFS_PROP_UTF8ONLY: + cached_copy = &os->os_utf8only; + break; + case ZFS_PROP_CASE: + cached_copy = &os->os_casesensitivity; + break; + default: + break; + } + } + if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { + *value = *cached_copy; + return (0); + } + + /* + * If the property wasn't cached, look up the file system's value for + * the property. For the version property, we look up a slightly + * different string. + */ + const char *pname; + int error = ENOENT; + if (prop == ZFS_PROP_VERSION) + pname = ZPL_VERSION_STR; + else + pname = zfs_prop_to_name(prop); + + if (os != NULL) { + ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); + error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); + } + + if (error == ENOENT) { + /* No value set, use the default value */ + switch (prop) { + case ZFS_PROP_VERSION: + *value = ZPL_VERSION; + break; + case ZFS_PROP_NORMALIZE: + case ZFS_PROP_UTF8ONLY: + *value = 0; + break; + case ZFS_PROP_CASE: + *value = ZFS_CASE_SENSITIVE; + break; + case ZFS_PROP_ACLTYPE: +#ifdef __FreeBSD__ + *value = ZFS_ACLTYPE_NFSV4; +#else + *value = ZFS_ACLTYPE_OFF; +#endif + break; + default: + return (error); + } + error = 0; + } + + /* + * If one of the methods for getting the property value above worked, + * copy it into the objset_t's cache. + */ + if (error == 0 && cached_copy != NULL) { + *cached_copy = *value; + } + + return (error); +} diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 53992931e049..b26f5e80abfb 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -26,6 +26,7 @@ * Copyright (c) 2019, 2023, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2021, Datto, Inc. + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #include @@ -803,6 +804,12 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait, pio->io_reexecute |= zio->io_reexecute; ASSERT3U(*countp, >, 0); + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { + ASSERT3U(*errorp, ==, EIO); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + } + (*countp)--; if (*countp == 0 && pio->io_stall == countp) { @@ -1282,20 +1289,14 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; + enum zio_stage pipeline = zp->zp_direct_write == B_TRUE ? + ZIO_DIRECT_WRITE_PIPELINE : (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE; - ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF && - zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && - zp->zp_compress >= ZIO_COMPRESS_OFF && - zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && - DMU_OT_IS_VALID(zp->zp_type) && - zp->zp_level < 32 && - zp->zp_copies > 0 && - zp->zp_copies <= spa_max_replication(spa)); zio = zio_create(pio, spa, txg, bp, data, lsize, psize, done, private, ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, - ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? - ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); + ZIO_STAGE_OPEN, pipeline); zio->io_ready = ready; zio->io_children_ready = children_ready; @@ -1572,6 +1573,19 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, */ pipeline |= ZIO_STAGE_CHECKSUM_VERIFY; pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; + } else if (type == ZIO_TYPE_WRITE && + pio->io_prop.zp_direct_write == B_TRUE) { + /* + * By default we only will verify checksums for Direct I/O + * writes for Linux. FreeBSD is able to place user pages under + * write protection before issuing them to the ZIO pipeline. + * + * Checksum validation errors will only be reported through + * the top-level VDEV, which is set by this child ZIO. + */ + ASSERT3P(bp, !=, NULL); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + pipeline |= ZIO_STAGE_DIO_CHECKSUM_VERIFY; } if (vd->vdev_ops->vdev_op_leaf) { @@ -1691,6 +1705,26 @@ zio_roundup_alloc_size(spa_t *spa, uint64_t size) return (spa->spa_min_alloc); } +size_t +zio_get_compression_max_size(enum zio_compress compress, uint64_t gcd_alloc, + uint64_t min_alloc, size_t s_len) +{ + size_t d_len; + + /* minimum 12.5% must be saved (legacy value, may be changed later) */ + d_len = s_len - (s_len >> 3); + + /* ZLE can't use exactly d_len bytes, it needs more, so ignore it */ + if (compress == ZIO_COMPRESS_ZLE) + return (d_len); + + d_len = d_len - d_len % gcd_alloc; + + if (d_len < min_alloc) + return (BPE_PAYLOAD_SIZE); + return (d_len); +} + /* * ========================================================================== * Prepare to read and write logical blocks @@ -1872,7 +1906,10 @@ zio_write_compress(zio_t *zio) psize = lsize; else psize = zio_compress_data(compress, zio->io_abd, &cabd, - lsize, zp->zp_complevel); + lsize, + zio_get_compression_max_size(compress, + spa->spa_gcd_alloc, spa->spa_min_alloc, lsize), + zp->zp_complevel); if (psize == 0) { compress = ZIO_COMPRESS_OFF; } else if (psize >= lsize) { @@ -3104,6 +3141,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) zp.zp_nopwrite = B_FALSE; zp.zp_encrypt = gio->io_prop.zp_encrypt; zp.zp_byteorder = gio->io_prop.zp_byteorder; + zp.zp_direct_write = B_FALSE; memset(zp.zp_salt, 0, ZIO_DATA_SALT_LEN); memset(zp.zp_iv, 0, ZIO_DATA_IV_LEN); memset(zp.zp_mac, 0, ZIO_DATA_MAC_LEN); @@ -3577,6 +3615,13 @@ zio_ddt_write(zio_t *zio) ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum); ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override); ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW))); + /* + * Deduplication will not take place for Direct I/O writes. The + * ddt_tree will be emptied in syncing context. Direct I/O writes take + * place in the open-context. Direct I/O write can not attempt to + * modify the ddt_tree while issuing out a write. + */ + ASSERT3B(zio->io_prop.zp_direct_write, ==, B_FALSE); ddt_enter(ddt); dde = ddt_lookup(ddt, bp); @@ -4161,8 +4206,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, * some parallelism. */ int flags = METASLAB_ZIL; - int allocator = (uint_t)cityhash4(0, 0, 0, - os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; + int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) + % spa->spa_alloc_count; error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); *slog = (error == 0); @@ -4509,6 +4554,19 @@ zio_vdev_io_assess(zio_t *zio) zio->io_vsd = NULL; } + /* + * If a Direct I/O write checksum verify error has occurred then this + * I/O should not attempt to be issued again. Instead the EIO will + * be returned. + */ + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { + ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); + ASSERT3U(zio->io_error, ==, EIO); + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + return (zio); + } + + if (zio_injection_enabled && zio->io_error == 0) zio->io_error = zio_handle_fault_injection(zio, EIO); @@ -4822,6 +4880,49 @@ zio_checksum_verify(zio_t *zio) return (zio); } +static zio_t * +zio_dio_checksum_verify(zio_t *zio) +{ + zio_t *pio = zio_unique_parent(zio); + int error; + + ASSERT3P(zio->io_vd, !=, NULL); + ASSERT3P(zio->io_bp, !=, NULL); + ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); + ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + ASSERT3B(pio->io_prop.zp_direct_write, ==, B_TRUE); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + + if (zfs_vdev_direct_write_verify == 0 || zio->io_error != 0) + goto out; + + if ((error = zio_checksum_error(zio, NULL)) != 0) { + zio->io_error = error; + if (error == ECKSUM) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_dio_verify_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + zio->io_error = SET_ERROR(EIO); + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + + /* + * The EIO error must be propagated up to the logical + * parent ZIO in zio_notify_parent() so it can be + * returned to dmu_write_abd(). + */ + zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE; + + (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY, + zio->io_spa, zio->io_vd, &zio->io_bookmark, + zio, 0); + } + } + +out: + return (zio); +} + + /* * Called by RAID-Z to ensure we don't compute the checksum twice. */ @@ -5152,7 +5253,8 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && zio->io_vd != NULL && - !vdev_is_dead(zio->io_vd)) { + !vdev_is_dead(zio->io_vd) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0); if (ret != EALREADY) { @@ -5167,6 +5269,7 @@ zio_done(zio_t *zio) if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) && zio == zio->io_logical) { /* * For logical I/O requests, tell the SPA to log the @@ -5188,7 +5291,8 @@ zio_done(zio_t *zio) ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); if (IO_IS_ALLOCATING(zio) && - !(zio->io_flags & ZIO_FLAG_CANFAIL)) { + !(zio->io_flags & ZIO_FLAG_CANFAIL) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { if (zio->io_error != ENOSPC) zio->io_reexecute |= ZIO_REEXECUTE_NOW; else @@ -5238,6 +5342,14 @@ zio_done(zio_t *zio) zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND; if (zio->io_reexecute) { + /* + * A Direct I/O write that has a checksum verify error should + * not attempt to reexecute. Instead, EAGAIN should just be + * propagated back up so the write can be attempt to be issued + * through the ARC. + */ + ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)); + /* * This is a logical I/O that wants to reexecute. * @@ -5398,6 +5510,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_vdev_io_done, zio_vdev_io_assess, zio_checksum_verify, + zio_dio_checksum_verify, zio_done }; diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index 9182917f75eb..10c482573862 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -22,15 +22,11 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - */ -/* * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - */ - -/* * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #include @@ -129,9 +125,9 @@ zio_compress_select(spa_t *spa, enum zio_compress child, size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, size_t s_len, - uint8_t level) + size_t d_len, uint8_t level) { - size_t c_len, d_len; + size_t c_len; uint8_t complevel; zio_compress_info_t *ci = &zio_compress_table[c]; @@ -156,15 +152,11 @@ zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, size_t s_len, if (*dst == NULL) *dst = abd_alloc_sametype(src, s_len); - /* Compress at least 12.5%, but limit to the size of the dest abd. */ - d_len = MIN(s_len - (s_len >> 3), abd_get_size(*dst)); - c_len = ci->ci_compress(src, *dst, s_len, d_len, complevel); if (c_len > d_len) return (s_len); - ASSERT3U(c_len, <=, d_len); return (c_len); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 7b489e03d306..78bf714170d2 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1034,7 +1034,7 @@ zvol_add_clones(const char *dsname, list_t *minors_list) goto out; zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); - zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + zap_attribute_t *za = zap_attribute_alloc(); objset_t *mos = dd->dd_pool->dp_meta_objset; for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones); @@ -1060,7 +1060,7 @@ zvol_add_clones(const char *dsname, list_t *minors_list) } } zap_cursor_fini(zc); - kmem_free(za, sizeof (zap_attribute_t)); + zap_attribute_free(za); kmem_free(zc, sizeof (zap_cursor_t)); out: diff --git a/scripts/Makefile.am b/scripts/Makefile.am index b43bf97dbdf4..7d9cef83d2c6 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -28,7 +28,6 @@ endif dist_noinst_DATA += \ %D%/cstyle.pl \ - %D%/enum-extract.pl \ %D%/update_authors.pl \ %D%/zfs2zol-patch.sed \ %D%/zol2zfs-patch.sed diff --git a/scripts/enum-extract.pl b/scripts/enum-extract.pl deleted file mode 100755 index 5dc2e3455145..000000000000 --- a/scripts/enum-extract.pl +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env perl - -my $usage = <) { - # comments - s/\/\*.*\*\///; - if (m/\/\*/) { - while ($_ .= <>) { - last if s/\/\*.*\*\///s; - } - } - - # preprocessor stuff - next if /^#/; - - # find our enum - $in_enum = 1 if s/^\s*enum\s+${enum}(?:\s|$)//; - next unless $in_enum; - - # remove explicit values - s/\s*=[^,]+,/,/g; - - # extract each identifier - while (m/\b([a-z_][a-z0-9_]*)\b/ig) { - print $1, "\n"; - } - - # - # don't exit: there may be multiple versions of the same enum, e.g. - # inside different #ifdef blocks. Let's explicitly return all of - # them and let external tooling deal with it. - # - $in_enum = 0 if m/}\s*;/; -} - -exit 0; diff --git a/scripts/zfs-tests.sh b/scripts/zfs-tests.sh index c25903ea1bee..2906d73442c2 100755 --- a/scripts/zfs-tests.sh +++ b/scripts/zfs-tests.sh @@ -1,5 +1,6 @@ -#!/bin/sh +#!/usr/bin/env bash # shellcheck disable=SC2154 +# shellcheck disable=SC2292 # # CDDL HEADER START # @@ -208,6 +209,49 @@ find_runfile() { fi } +# Given a TAGS with a format like "1/3" or "2/3" then divide up the test list +# into portions and print that portion. So "1/3" for "the first third of the +# test tags". +# +# +split_tags() { + # Get numerator and denominator + NUM=$(echo "$TAGS" | cut -d/ -f1) + DEN=$(echo "$TAGS" | cut -d/ -f2) + # At the point this is called, RUNFILES will contain a comma separated + # list of full paths to the runfiles, like: + # + # "/home/hutter/qemu/tests/runfiles/common.run,/home/hutter/qemu/tests/runfiles/linux.run" + # + # So to get tags for our selected tests we do: + # + # 1. Remove unneeded chars: [],\ + # 2. Print out the last field of each tag line. This will be the tag + # for the test (like 'zpool_add'). + # 3. Remove duplicates between the runfiles. If the same tag is defined + # in multiple runfiles, then when you do '-T ' ZTS is smart + # enough to know to run the tag in each runfile. So '-T zpool_add' + # will run the zpool_add from common.run and linux.run. + # 4. Ignore the 'functional' tag since we only want individual tests + # 5. Print out the tests in our faction of all tests. This uses modulus + # so "1/3" will run tests 1,3,6,9 etc. That way the tests are + # interleaved so, say, "3/4" isn't running all the zpool_* tests that + # appear alphabetically at the end. + # 6. Remove trailing comma from list + # + # TAGS will then look like: + # + # "append,atime,bootfs,cachefile,checksum,cp_files,deadman,dos_attributes, ..." + + # Change the comma to a space for easy processing + _RUNFILES=${RUNFILES//","/" "} + # shellcheck disable=SC2002,SC2086 + cat $_RUNFILES | tr -d "[],\'" | awk '/tags = /{print $NF}' | sort | \ + uniq | grep -v functional | \ + awk -v num="$NUM" -v den="$DEN" '{ if(NR % den == (num - 1)) {printf "%s,",$0}}' | \ + sed -E 's/,$//' +} + # # Symlink file if it appears under any of the given paths. # @@ -331,10 +375,14 @@ OPTIONS: -t PATH|NAME Run single test at PATH relative to test suite, or search for test by NAME -T TAGS Comma separated list of tags (default: 'functional') + Alternately, specify a fraction like "1/3" or "2/3" to + run the first third of tests or 2nd third of the tests. This + is useful for splitting up the test amongst different + runners. -u USER Run single test as USER (default: root) EXAMPLES: -# Run the default ($(echo "${DEFAULT_RUNFILES}" | sed 's/\.run//')) suite of tests and output the configuration used. +# Run the default ${DEFAULT_RUNFILES//\.run/} suite of tests and output the configuration used. $0 -v # Run a smaller suite of tests designed to run more quickly. @@ -347,7 +395,7 @@ $0 -t tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh $0 -t zfs_bookmark_cliargs # Cleanup a previous run of the test suite prior to testing, run the -# default ($(echo "${DEFAULT_RUNFILES}" | sed 's/\.run//')) suite of tests and perform no cleanup on exit. +# default ${DEFAULT_RUNFILES//\.run//} suite of tests and perform no cleanup on exit. $0 -x EOF @@ -489,6 +537,8 @@ fi # TAGS=${TAGS:='functional'} + + # # Attempt to locate the runfiles describing the test workload. # @@ -509,6 +559,23 @@ done unset IFS RUNFILES=${R#,} +# The tag can be a fraction to indicate which portion of ZTS to run, Like +# +# "1/3": Run first one third of all tests in runfiles +# "2/3": Run second one third of all test in runfiles +# "6/10": Run 6th tenth of all tests in runfiles +# +# This is useful for splitting up the test across multiple runners. +# +# After this code block, TAGS will be transformed from something like +# "1/3" to a comma separate taglist, like: +# +# "append,atime,bootfs,cachefile,checksum,cp_files,deadman,dos_attributes, ..." +# +if echo "$TAGS" | grep -Eq '^[0-9]+/[0-9]+$' ; then + TAGS=$(split_tags) +fi + # # This script should not be run as root. Instead the test user, which may # be a normal user account, needs to be configured such that it can diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 088e46ce578c..f89a4b3e0aae 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -693,6 +693,14 @@ tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] tags = ['functional', 'delegate'] +[tests/functional/direct] +tests = ['dio_aligned_block', 'dio_async_always', 'dio_async_fio_ioengines', + 'dio_compression', 'dio_dedup', 'dio_encryption', 'dio_grow_block', + 'dio_max_recordsize', 'dio_mixed', 'dio_mmap', 'dio_overwrites', + 'dio_property', 'dio_random', 'dio_recordsize', 'dio_unaligned_block', + 'dio_unaligned_filesize'] +tags = ['functional', 'direct'] + [tests/functional/exec] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] @@ -735,7 +743,7 @@ pre = tags = ['functional', 'inheritance'] [tests/functional/io] -tests = ['sync', 'psync', 'posixaio', 'mmap'] +tests = ['mmap', 'posixaio', 'psync', 'sync'] tags = ['functional', 'io'] [tests/functional/inuse] diff --git a/tests/runfiles/freebsd.run b/tests/runfiles/freebsd.run index 13696d645850..e1ae0c6b7721 100644 --- a/tests/runfiles/freebsd.run +++ b/tests/runfiles/freebsd.run @@ -30,3 +30,7 @@ tags = ['functional', 'cli_root', 'zfs_jail'] tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive', 'pam_short_password'] tags = ['functional', 'pam'] + +[tests/functional/direct:FreeBSD] +tests = ['dio_write_stable_pages'] +tags = ['functional', 'direct'] diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 5817e649003c..5534cd27f637 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -102,10 +102,14 @@ tags = ['functional', 'compression'] tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos'] tags = ['functional', 'devices'] +[tests/functional/direct:Linux] +tests = ['dio_write_verify'] +tags = ['functional', 'direct'] + [tests/functional/events:Linux] tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config', - 'zed_slow_io', 'zed_slow_io_many_vdevs'] + 'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] tags = ['functional', 'events'] [tests/functional/fadvise:Linux] @@ -121,7 +125,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', 'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault', 'decompress_fault', - 'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s'] + 'fault_limits', 'scrub_after_resilver', 'suspend_resume_single', + 'zpool_status_-s'] tags = ['functional', 'fault'] [tests/functional/features/large_dnode:Linux] @@ -138,6 +143,10 @@ pre = post = tags = ['functional', 'largest_pool'] +[tests/functional/longname:Linux] +tests = ['longname_001_pos', 'longname_002_pos', 'longname_003_pos'] +tags = ['functional', 'longname'] + [tests/functional/mmap:Linux] tests = ['mmap_libaio_001_pos', 'mmap_sync_001_pos'] tags = ['functional', 'mmap'] @@ -182,7 +191,7 @@ tests = ['renameat2_noreplace', 'renameat2_exchange', 'renameat2_whiteout'] tags = ['functional', 'renameat2'] [tests/functional/rsend:Linux] -tests = ['send_realloc_dnode_size', 'send_encrypted_files'] +tests = ['send_realloc_dnode_size', 'send_encrypted_files', 'send-c_longname'] tags = ['functional', 'rsend'] [tests/functional/simd:Linux] diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in index 6488fa8318ff..92fb64feeeef 100755 --- a/tests/test-runner/bin/test-runner.py.in +++ b/tests/test-runner/bin/test-runner.py.in @@ -295,6 +295,18 @@ User: %s except Exception: pass + """ + Log each test we run to /dev/ttyu0 (on FreeBSD), so if there's a kernel + warning we'll be able to match it up to a particular test. + """ + if options.kmsg is True and exists("/dev/ttyu0"): + try: + kp = Popen([SUDO, "sh", "-c", + f"echo ZTS run {self.pathname} > /dev/ttyu0"]) + kp.wait() + except Exception: + pass + self.result.starttime = monotonic_time() if kmemleak: diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 6db10b91de05..2562836213af 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -66,12 +66,6 @@ exec_reason = 'Test user execute permissions required for utilities' # renameat2_reason = 'Kernel renameat2 support required' -# -# Some tests require the O_TMPFILE flag which was first introduced in the -# 3.11 kernel. -# -tmpfile_reason = 'Kernel O_TMPFILE support required' - # # Some tests require the statx(2) system call on Linux which was first # introduced in the 4.11 kernel. @@ -124,11 +118,6 @@ fspacectl_reason = 'fspacectl(2) and truncate -d support required' # na_reason = "Not applicable" -# -# Some test cases doesn't have all requirements to run on Github actions CI. -# -ci_reason = 'CI runner doesn\'t have all requirements' - # # Idmapped mount is only supported in kernel version >= 5.12 # @@ -212,6 +201,7 @@ maybe = { 'chattr/setup': ['SKIP', exec_reason], 'crtime/crtime_001_pos': ['SKIP', statx_reason], 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason], + 'cli_root/zfs_copies/zfs_copies_006_pos': ['FAIL', known_reason], 'cli_root/zfs_destroy/zfs_destroy_dev_removal_condense': ['FAIL', known_reason], 'cli_root/zfs_get/zfs_get_004_pos': ['FAIL', known_reason], @@ -226,11 +216,18 @@ maybe = { 'cli_root/zpool_import/zpool_import_missing_003_pos': ['SKIP', 6839], 'cli_root/zpool_initialize/zpool_initialize_import_export': ['FAIL', 11948], + 'cli_root/zpool_initialize/zpool_initialize_suspend_resume': + ['FAIL', known_reason], 'cli_root/zpool_labelclear/zpool_labelclear_removed': ['FAIL', known_reason], 'cli_root/zpool_trim/setup': ['SKIP', trim_reason], + 'cli_root/zpool_trim/zpool_trim_fault_export_import_online': + ['FAIL', known_reason], 'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', 6141], + 'cli_user/misc/arc_summary_001_pos': ['FAIL', known_reason], 'delegate/setup': ['SKIP', exec_reason], + 'events/zed_cksum_config': ['FAIL', known_reason], + 'fault/auto_replace_002_pos': ['FAIL', known_reason], 'fallocate/fallocate_punch-hole': ['SKIP', fspacectl_reason], 'history/history_004_pos': ['FAIL', 7026], 'history/history_005_neg': ['FAIL', 6680], @@ -238,20 +235,27 @@ maybe = { 'history/history_008_pos': ['FAIL', known_reason], 'history/history_010_pos': ['SKIP', exec_reason], 'io/mmap': ['SKIP', fio_reason], + 'l2arc/l2arc_l2miss_pos': ['FAIL', known_reason], + 'l2arc/persist_l2arc_005_pos': ['FAIL', known_reason], 'largest_pool/largest_pool_001_pos': ['FAIL', known_reason], + 'mmap/mmap_sync_001_pos': ['FAIL', known_reason], 'mmp/mmp_on_uberblocks': ['FAIL', known_reason], 'pam/setup': ['SKIP', "pamtester might be not available"], 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', 11946], 'projectquota/setup': ['SKIP', exec_reason], + 'raidz/raidz_002_pos': ['FAIL', known_reason], + 'raidz/raidz_expand_001_pos': ['FAIL', 16421], + 'redundancy/redundancy_draid_spare1': ['FAIL', known_reason], + 'redundancy/redundancy_draid_spare3': ['FAIL', known_reason], 'removal/removal_condense_export': ['FAIL', known_reason], 'renameat2/setup': ['SKIP', renameat2_reason], 'reservation/reservation_008_pos': ['FAIL', 7741], 'reservation/reservation_018_pos': ['FAIL', 5642], 'snapshot/clone_001_pos': ['FAIL', known_reason], + 'snapshot/snapshot_006_pos': ['FAIL', known_reason], 'snapshot/snapshot_009_pos': ['FAIL', 7961], 'snapshot/snapshot_010_pos': ['FAIL', 7961], 'snapused/snapused_004_pos': ['FAIL', 5513], - 'tmpfile/setup': ['SKIP', tmpfile_reason], 'trim/setup': ['SKIP', trim_reason], 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason], 'upgrade/upgrade_projectquota_002_pos': ['SKIP', project_id_reason], @@ -355,37 +359,6 @@ elif sys.platform.startswith('linux'): 'mmp/mmp_inactive_import': ['FAIL', known_reason], }) -# Not all Github actions runners have scsi_debug module, so we may skip -# some tests which use it. -if os.environ.get('CI') == 'true': - known.update({ - 'cli_root/zpool_expand/zpool_expand_001_pos': ['SKIP', ci_reason], - 'cli_root/zpool_expand/zpool_expand_003_neg': ['SKIP', ci_reason], - 'cli_root/zpool_expand/zpool_expand_005_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/setup': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_001_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_002_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_004_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_005_pos': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_006_neg': ['SKIP', ci_reason], - 'cli_root/zpool_reopen/zpool_reopen_007_pos': ['SKIP', ci_reason], - 'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason], - 'fault/auto_offline_001_pos': ['SKIP', ci_reason], - 'fault/auto_online_001_pos': ['SKIP', ci_reason], - 'fault/auto_online_002_pos': ['SKIP', ci_reason], - 'fault/auto_replace_001_pos': ['SKIP', ci_reason], - 'fault/auto_replace_002_pos': ['SKIP', ci_reason], - 'fault/auto_spare_ashift': ['SKIP', ci_reason], - 'fault/auto_spare_shared': ['SKIP', ci_reason], - 'fault/suspend_resume_single': ['SKIP', ci_reason], - 'procfs/pool_state': ['SKIP', ci_reason], - }) - - maybe.update({ - 'events/events_002_pos': ['FAIL', 11546], - }) - def process_results(pathname): try: diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore index 0ed0a69eb013..e9e3b8f73e42 100644 --- a/tests/zfs-tests/cmd/.gitignore +++ b/tests/zfs-tests/cmd/.gitignore @@ -16,6 +16,7 @@ /getversion /largest_file /libzfs_input_check +/manipulate_user_buffer /mkbusy /mkfile /mkfiles diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index a8df06c2e990..5250e72f9fa8 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -60,6 +60,8 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/libzfs_input_check libzfs_core.la \ libnvpair.la +scripts_zfs_tests_bin_PROGRAMS += %D%/manipulate_user_buffer +%C%_manipulate_user_buffer_LDADD = -lpthread scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree %C%_mkfile_LDADD = $(LTLIBINTL) diff --git a/tests/zfs-tests/cmd/manipulate_user_buffer.c b/tests/zfs-tests/cmd/manipulate_user_buffer.c new file mode 100644 index 000000000000..714f42200557 --- /dev/null +++ b/tests/zfs-tests/cmd/manipulate_user_buffer.c @@ -0,0 +1,272 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022 by Triad National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MIN +#define MIN(a, b) ((a) < (b)) ? (a) : (b) +#endif + +static char *outputfile = NULL; +static int blocksize = 131072; /* 128K */ +static int wr_err_expected = 0; +static int numblocks = 100; +static char *execname = NULL; +static int print_usage = 0; +static int randompattern = 0; +static int ofd; +char *buf = NULL; + +typedef struct { + int entire_file_written; +} pthread_args_t; + +static void +usage(void) +{ + (void) fprintf(stderr, + "usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n" + " [-n numblocks] [-p randpattern] [-h help]\n" + "\n" + "Testing whether checksum verify works correctly for O_DIRECT.\n" + "when manipulating the contents of a userspace buffer.\n" + "\n" + " outputfile: File to write to.\n" + " blocksize: Size of each block to write (must be at \n" + " least >= 512).\n" + " wr_err_expected: Whether pwrite() is expected to return EIO\n" + " while manipulating the contents of the\n" + " buffer.\n" + " numblocks: Total number of blocksized blocks to\n" + " write.\n" + " randpattern: Fill data buffer with random data. Default\n" + " behavior is to fill the buffer with the \n" + " known data pattern (0xdeadbeef).\n" + " help: Print usage information and exit.\n" + "\n" + " Required parameters:\n" + " outputfile\n" + "\n" + " Default Values:\n" + " blocksize -> 131072\n" + " wr_err_expexted -> false\n" + " numblocks -> 100\n" + " randpattern -> false\n", + execname); + (void) exit(1); +} + +static void +parse_options(int argc, char *argv[]) +{ + int c; + int errflag = 0; + extern char *optarg; + extern int optind, optopt; + execname = argv[0]; + + while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) { + switch (c) { + case 'b': + blocksize = atoi(optarg); + break; + + case 'e': + wr_err_expected = 1; + break; + + case 'h': + print_usage = 1; + break; + + case 'n': + numblocks = atoi(optarg); + break; + + case 'o': + outputfile = optarg; + break; + + case 'p': + randompattern = 1; + break; + + case ':': + (void) fprintf(stderr, + "Option -%c requires an opertand\n", + optopt); + errflag++; + break; + case '?': + default: + (void) fprintf(stderr, + "Unrecognized option: -%c\n", optopt); + errflag++; + break; + } + } + + if (errflag || print_usage == 1) + (void) usage(); + + if (blocksize < 512 || outputfile == NULL || numblocks <= 0) { + (void) fprintf(stderr, + "Required paramater(s) missing or invalid.\n"); + (void) usage(); + } +} + +/* + * Write blocksize * numblocks to the file using O_DIRECT. + */ +static void * +write_thread(void *arg) +{ + size_t offset = 0; + int total_data = blocksize * numblocks; + int left = total_data; + ssize_t wrote = 0; + pthread_args_t *args = (pthread_args_t *)arg; + + while (!args->entire_file_written) { + wrote = pwrite(ofd, buf, blocksize, offset); + if (wrote != blocksize) { + if (wr_err_expected) + assert(errno == EIO); + else + exit(2); + } + + offset = ((offset + blocksize) % total_data); + left -= blocksize; + + if (left == 0) + args->entire_file_written = 1; + } + + pthread_exit(NULL); +} + +/* + * Update the buffers contents with random data. + */ +static void * +manipulate_buf_thread(void *arg) +{ + size_t rand_offset; + char rand_char; + pthread_args_t *args = (pthread_args_t *)arg; + + while (!args->entire_file_written) { + rand_offset = (rand() % blocksize); + rand_char = (rand() % (126 - 33) + 33); + buf[rand_offset] = rand_char; + } + + pthread_exit(NULL); +} + +int +main(int argc, char *argv[]) +{ + const char *datapattern = "0xdeadbeef"; + int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT; + mode_t mode = S_IRUSR | S_IWUSR; + pthread_t write_thr; + pthread_t manipul_thr; + int left = blocksize; + int offset = 0; + int rc; + pthread_args_t args = { 0 }; + + parse_options(argc, argv); + + ofd = open(outputfile, ofd_flags, mode); + if (ofd == -1) { + (void) fprintf(stderr, "%s, %s\n", execname, outputfile); + perror("open"); + exit(2); + } + + int err = posix_memalign((void **)&buf, sysconf(_SC_PAGE_SIZE), + blocksize); + if (err != 0) { + (void) fprintf(stderr, + "%s: %s\n", execname, strerror(err)); + exit(2); + } + + if (!randompattern) { + /* Putting known data pattern in buffer */ + while (left) { + size_t amt = MIN(strlen(datapattern), left); + memcpy(&buf[offset], datapattern, amt); + offset += amt; + left -= amt; + } + } else { + /* Putting random data in buffer */ + for (int i = 0; i < blocksize; i++) + buf[i] = rand(); + } + + /* + * Writing using O_DIRECT while manipulating the buffer contents until + * the entire file is written. + */ + if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread, + &args))) { + fprintf(stderr, "error: pthreads_create, manipul_thr, " + "rc: %d\n", rc); + exit(2); + } + + if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) { + fprintf(stderr, "error: pthreads_create, write_thr, " + "rc: %d\n", rc); + exit(2); + } + + pthread_join(write_thr, NULL); + pthread_join(manipul_thr, NULL); + + assert(args.entire_file_written == 1); + + (void) close(ofd); + + free(buf); + + return (0); +} diff --git a/tests/zfs-tests/cmd/mmap_sync.c b/tests/zfs-tests/cmd/mmap_sync.c index 226e71be2f57..f9008eeaf31a 100644 --- a/tests/zfs-tests/cmd/mmap_sync.c +++ b/tests/zfs-tests/cmd/mmap_sync.c @@ -64,7 +64,7 @@ main(int argc, char *argv[]) run_time_mins = atoi(argv[1]); } - int max_msync_time_ms = 1000; + int max_msync_time_ms = 2000; if (argc >= 3) { max_msync_time_ms = atoi(argv[2]); } diff --git a/tests/zfs-tests/cmd/stride_dd.c b/tests/zfs-tests/cmd/stride_dd.c index a20b26131650..e1e45794cf16 100644 --- a/tests/zfs-tests/cmd/stride_dd.c +++ b/tests/zfs-tests/cmd/stride_dd.c @@ -21,12 +21,19 @@ #include #include +static int alignment = 0; static int bsize = 0; static int count = 0; static char *ifile = NULL; static char *ofile = NULL; -static off_t stride = 0; +static off_t stride = 1; static off_t seek = 0; +static int seekbytes = 0; +static int if_o_direct = 0; +static int of_o_direct = 0; +static int skip = 0; +static int skipbytes = 0; +static int entire_file = 0; static const char *execname = "stride_dd"; static void usage(void); @@ -36,8 +43,10 @@ static void usage(void) { (void) fprintf(stderr, - "usage: %s -i inputfile -o outputfile -b blocksize -c count \n" - " -s stride [ -k seekblocks]\n" + "usage: %s -i inputfile -o outputfile -b blocksize [-c count]\n" + " [-s stride] [-k seekblocks] [-K seekbytes]\n" + " [-a alignment] [-d if_o_direct] [-D of_o_direct]\n" + " [-p skipblocks] [-P skipbytes] [-e entire_file]\n" "\n" "Simplified version of dd that supports the stride option.\n" "A stride of n means that for each block written, n - 1 blocks\n" @@ -45,16 +54,47 @@ usage(void) "means that blocks are read and written consecutively.\n" "All numeric parameters must be integers.\n" "\n" - " inputfile: File to read from\n" - " outputfile: File to write to\n" - " blocksize: Size of each block to read/write\n" - " count: Number of blocks to read/write\n" - " stride: Read/write a block then skip (stride - 1) blocks\n" - " seekblocks: Number of blocks to skip at start of output\n", + " inputfile: File to read from\n" + " outputfile: File to write to\n" + " blocksize: Size of each block to read/write\n" + " count: Number of blocks to read/write (Required" + " unless -e is used)\n" + " stride: Read/write a block then skip (stride - 1) blocks" + "\n" + " seekblocks: Number of blocks to skip at start of output\n" + " seekbytes: Treat seekblocks as byte count\n" + " alignment: Alignment passed to posix_memalign() (default" + " PAGE_SIZE)\n" + " if_o_direct: Use O_DIRECT with inputfile (default no O_DIRECT)" + "\n" + " of_o_direct: Use O_DIRECT with outputfile (default no " + " O_DIRECT)\n" + " skipblocks: Number of blocks to skip at start of input " + " (default 0)\n" + " skipbytes: Treat skipblocks as byte count\n" + " entire_file: When used the entire inputfile will be read and" + " count will be ignored\n", execname); (void) exit(1); } +/* + * posix_memalign() only allows for alignments which are postive, powers of two + * and a multiple of sizeof (void *). + */ +static int +invalid_alignment(int alignment) +{ + if ((alignment < 0) || (alignment & (alignment - 1)) || + ((alignment % sizeof (void *)))) { + (void) fprintf(stderr, + "Alignment must be a postive, power of two, and multiple " + "of sizeof (void *).\n"); + return (1); + } + return (0); +} + static void parse_options(int argc, char *argv[]) { @@ -62,12 +102,17 @@ parse_options(int argc, char *argv[]) int errflag = 0; execname = argv[0]; + alignment = sysconf(_SC_PAGE_SIZE); extern char *optarg; extern int optind, optopt; - while ((c = getopt(argc, argv, ":b:c:i:o:s:k:")) != -1) { + while ((c = getopt(argc, argv, "a:b:c:deDi:o:s:k:Kp:P")) != -1) { switch (c) { + case 'a': + alignment = atoi(optarg); + break; + case 'b': bsize = atoi(optarg); break; @@ -76,6 +121,18 @@ parse_options(int argc, char *argv[]) count = atoi(optarg); break; + case 'd': + if_o_direct = 1; + break; + + case 'e': + entire_file = 1; + break; + + case 'D': + of_o_direct = 1; + break; + case 'i': ifile = optarg; break; @@ -92,6 +149,18 @@ parse_options(int argc, char *argv[]) seek = atoi(optarg); break; + case 'K': + seekbytes = 1; + break; + + case 'p': + skip = atoi(optarg); + break; + + case 'P': + skipbytes = 1; + break; + case ':': (void) fprintf(stderr, "Option -%c requires an operand\n", optopt); @@ -111,64 +180,59 @@ parse_options(int argc, char *argv[]) } } - if (bsize <= 0 || count <= 0 || stride <= 0 || ifile == NULL || - ofile == NULL || seek < 0) { + if (bsize <= 0 || stride <= 0 || ifile == NULL || ofile == NULL || + seek < 0 || invalid_alignment(alignment) || skip < 0) { + (void) fprintf(stderr, + "Required parameter(s) missing or invalid.\n"); + (void) usage(); + } + + if (count <= 0 && entire_file == 0) { (void) fprintf(stderr, "Required parameter(s) missing or invalid.\n"); (void) usage(); } } -int -main(int argc, char *argv[]) +static void +read_entire_file(int ifd, int ofd, void *buf) { - int i; - int ifd; - int ofd; - void *buf; int c; - parse_options(argc, argv); - - ifd = open(ifile, O_RDONLY); - if (ifd == -1) { - (void) fprintf(stderr, "%s: %s: ", execname, ifile); - perror("open"); - exit(2); - } - - ofd = open(ofile, O_WRONLY | O_CREAT, 0666); - if (ofd == -1) { - (void) fprintf(stderr, "%s: %s: ", execname, ofile); - perror("open"); - exit(2); - } - - /* - * We use valloc because some character block devices expect a - * page-aligned buffer. - */ - int err = posix_memalign(&buf, 4096, bsize); - if (err != 0) { - (void) fprintf(stderr, - "%s: %s\n", execname, strerror(err)); - exit(2); - } - - if (seek > 0) { - if (lseek(ofd, seek * bsize, SEEK_CUR) == -1) { - perror("output lseek"); + do { + c = read(ifd, buf, bsize); + if (c < 0) { + perror("read"); exit(2); + } else if (c != 0) { + c = write(ofd, buf, bsize); + if (c < 0) { + perror("write"); + exit(2); + } + } - } + if (stride > 1) { + if (lseek(ifd, (stride - 1) * bsize, SEEK_CUR) == -1) { + perror("input lseek"); + exit(2); + } + if (lseek(ofd, (stride - 1) * bsize, SEEK_CUR) == -1) { + perror("output lseek"); + exit(2); + } + } + } while (c != 0); +} + +static void +read_on_count(int ifd, int ofd, void *buf) +{ + int i; + int c; for (i = 0; i < count; i++) { c = read(ifd, buf, bsize); - if (c != bsize) { - - perror("read"); - exit(2); - } if (c != bsize) { if (c < 0) { perror("read"); @@ -205,6 +269,71 @@ main(int argc, char *argv[]) } } } +} + +int +main(int argc, char *argv[]) +{ + int ifd; + int ofd; + int ifd_flags = O_RDONLY; + int ofd_flags = O_WRONLY | O_CREAT; + void *buf; + + parse_options(argc, argv); + + if (if_o_direct) + ifd_flags |= O_DIRECT; + + if (of_o_direct) + ofd_flags |= O_DIRECT; + + ifd = open(ifile, ifd_flags); + if (ifd == -1) { + (void) fprintf(stderr, "%s: %s: ", execname, ifile); + perror("open"); + exit(2); + } + + ofd = open(ofile, ofd_flags, 0666); + if (ofd == -1) { + (void) fprintf(stderr, "%s: %s: ", execname, ofile); + perror("open"); + exit(2); + } + + /* + * We use valloc because some character block devices expect a + * page-aligned buffer. + */ + int err = posix_memalign(&buf, alignment, bsize); + if (err != 0) { + (void) fprintf(stderr, + "%s: %s\n", execname, strerror(err)); + exit(2); + } + + if (skip > 0) { + int skipamt = skipbytes == 1 ? skip : skip * bsize; + if (lseek(ifd, skipamt, SEEK_CUR) == -1) { + perror("input lseek"); + exit(2); + } + } + + if (seek > 0) { + int seekamt = seekbytes == 1 ? seek : seek * bsize; + if (lseek(ofd, seekamt, SEEK_CUR) == -1) { + perror("output lseek"); + exit(2); + } + } + + if (entire_file == 1) + read_entire_file(ifd, ofd, buf); + else + read_on_count(ifd, ofd, buf); + free(buf); (void) close(ofd); diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 19770138bf14..be41ce5210e8 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -100,7 +100,8 @@ export SYSTEM_FILES_COMMON='awk uniq vmstat wc - xargs' + xargs + xxh128sum' export SYSTEM_FILES_FREEBSD='chflags compress @@ -112,13 +113,11 @@ export SYSTEM_FILES_FREEBSD='chflags jexec jls lsextattr - md5 mdconfig newfs pw rmextattr setextattr - sha256 showmount swapctl sysctl @@ -146,7 +145,6 @@ export SYSTEM_FILES_LINUX='attr lscpu lsmod lsscsi - md5sum mkswap modprobe mountpoint @@ -156,7 +154,6 @@ export SYSTEM_FILES_LINUX='attr perf setfattr setpriv - sha256sum udevadm unshare useradd @@ -200,6 +197,7 @@ export ZFSTEST_FILES='badsend getversion largest_file libzfs_input_check + manipulate_user_buffer mkbusy mkfile mkfiles diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index a2f42999a31e..9cf919c3dd0f 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3455,43 +3455,24 @@ function tunable_exists } # -# Compute MD5 digest for given file or stdin if no file given. +# Compute xxh128sum for given file or stdin if no file given. # Note: file path must not contain spaces # -function md5digest +function xxh128digest { - typeset file=$1 - - case "$UNAME" in - FreeBSD) - md5 -q $file - ;; - *) - typeset sum _ - read -r sum _ < <(md5sum -b $file) - echo $sum - ;; - esac + xxh128sum $1 | awk '{print $1}' } # -# Compute SHA256 digest for given file or stdin if no file given. -# Note: file path must not contain spaces +# Compare the xxhash128 digest of two files. # -function sha256digest -{ - typeset file=$1 +function cmp_xxh128 { + typeset file1=$1 + typeset file2=$2 - case "$UNAME" in - FreeBSD) - sha256 -q $file - ;; - *) - typeset sum _ - read -r sum _ < <(sha256sum -b $file) - echo $sum - ;; - esac + typeset sum1=$(xxh128digest $file1) + typeset sum2=$(xxh128digest $file2) + test "$sum1" = "$sum2" } function new_fs # diff --git a/tests/zfs-tests/include/properties.shlib b/tests/zfs-tests/include/properties.shlib index 3dfb295a40df..5a39eb3f36f5 100644 --- a/tests/zfs-tests/include/properties.shlib +++ b/tests/zfs-tests/include/properties.shlib @@ -30,7 +30,7 @@ typeset -a logbias_prop_vals=('latency' 'throughput') typeset -a primarycache_prop_vals=('all' 'none' 'metadata') typeset -a redundant_metadata_prop_vals=('all' 'most' 'some' 'none') typeset -a secondarycache_prop_vals=('all' 'none' 'metadata') -typeset -a snapdir_prop_vals=('hidden' 'visible') +typeset -a snapdir_prop_vals=('disabled' 'hidden' 'visible') typeset -a sync_prop_vals=('standard' 'always' 'disabled') typeset -a fs_props=('compress' 'checksum' 'recsize' diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 96943421f84c..9f436eb4026e 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -93,6 +93,7 @@ VDEV_FILE_LOGICAL_ASHIFT vdev.file.logical_ashift vdev_file_logical_ashift VDEV_FILE_PHYSICAL_ASHIFT vdev.file.physical_ashift vdev_file_physical_ashift VDEV_MAX_AUTO_ASHIFT vdev.max_auto_ashift zfs_vdev_max_auto_ashift VDEV_MIN_MS_COUNT vdev.min_ms_count zfs_vdev_min_ms_count +VDEV_DIRECT_WR_VERIFY vdev.direct_write_verify zfs_vdev_direct_write_verify VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev VOL_MODE vol.mode zvol_volmode @@ -100,6 +101,7 @@ VOL_RECURSIVE vol.recursive UNSUPPORTED VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq BCLONE_ENABLED bclone_enabled zfs_bclone_enabled BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty +DIO_ENABLED dio_enabled zfs_dio_enabled XATTR_COMPAT xattr_compat zfs_xattr_compat ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index bbeabc6dfb42..206ee8ac1542 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -265,6 +265,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/delegate/delegate_common.kshlib \ functional/devices/devices.cfg \ functional/devices/devices_common.kshlib \ + functional/direct/dio.cfg \ + functional/direct/dio.kshlib \ functional/events/events.cfg \ functional/events/events_common.kshlib \ functional/fault/fault.cfg \ @@ -1183,6 +1185,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_prefetch/cleanup.ksh \ functional/cli_root/zpool_prefetch/setup.ksh \ functional/cli_root/zpool_prefetch/zpool_prefetch_001_pos.ksh \ + functional/cli_root/zpool_reguid/cleanup.ksh \ + functional/cli_root/zpool_reguid/setup.ksh \ + functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh \ + functional/cli_root/zpool_reguid/zpool_reguid_002_neg.ksh \ functional/cli_root/zpool_remove/cleanup.ksh \ functional/cli_root/zpool_remove/setup.ksh \ functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh \ @@ -1458,6 +1464,26 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/devices/devices_002_neg.ksh \ functional/devices/devices_003_pos.ksh \ functional/devices/setup.ksh \ + functional/direct/dio_aligned_block.ksh \ + functional/direct/dio_async_always.ksh \ + functional/direct/dio_async_fio_ioengines.ksh \ + functional/direct/dio_compression.ksh \ + functional/direct/dio_dedup.ksh \ + functional/direct/dio_encryption.ksh \ + functional/direct/dio_grow_block.ksh \ + functional/direct/dio_max_recordsize.ksh \ + functional/direct/dio_mixed.ksh \ + functional/direct/dio_mmap.ksh \ + functional/direct/dio_overwrites.ksh \ + functional/direct/dio_property.ksh \ + functional/direct/dio_random.ksh \ + functional/direct/dio_recordsize.ksh \ + functional/direct/dio_unaligned_block.ksh \ + functional/direct/dio_unaligned_filesize.ksh \ + functional/direct/dio_write_verify.ksh \ + functional/direct/dio_write_stable_pages.ksh \ + functional/direct/setup.ksh \ + functional/direct/cleanup.ksh \ functional/dos_attributes/cleanup.ksh \ functional/dos_attributes/read_dos_attrs_001.ksh \ functional/dos_attributes/setup.ksh \ @@ -1468,6 +1494,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/events/setup.ksh \ functional/events/zed_cksum_config.ksh \ functional/events/zed_cksum_reported.ksh \ + functional/events/zed_diagnose_multiple.ksh \ functional/events/zed_fd_spill.ksh \ functional/events/zed_io_config.ksh \ functional/events/zed_rc_filter.ksh \ @@ -1498,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/fault/cleanup.ksh \ functional/fault/decompress_fault.ksh \ functional/fault/decrypt_fault.ksh \ + functional/fault/fault_limits.ksh \ functional/fault/scrub_after_resilver.ksh \ functional/fault/suspend_resume_single.ksh \ functional/fault/setup.ksh \ @@ -1577,6 +1605,11 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/link_count/link_count_001.ksh \ functional/link_count/link_count_root_inode.ksh \ functional/link_count/setup.ksh \ + functional/longname/cleanup.ksh \ + functional/longname/longname_001_pos.ksh \ + functional/longname/longname_002_pos.ksh \ + functional/longname/longname_003_pos.ksh \ + functional/longname/setup.ksh \ functional/log_spacemap/log_spacemap_import_logs.ksh \ functional/migration/cleanup.ksh \ functional/migration/migration_001_pos.ksh \ @@ -1910,6 +1943,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/rsend/rsend_031_pos.ksh \ functional/rsend/send-c_embedded_blocks.ksh \ functional/rsend/send-c_incremental.ksh \ + functional/rsend/send-c_longname.ksh \ functional/rsend/send-c_lz4_disabled.ksh \ functional/rsend/send-c_mixed_compression.ksh \ functional/rsend/send-c_props.ksh \ diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib index 84b92b4dcdc9..4c52bebad081 100644 --- a/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib +++ b/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib @@ -77,7 +77,7 @@ function test_file_integrity typeset -r clone=$2 typeset -r filesize=$3 - typeset -r clone_checksum=$(sha256digest $clone) + typeset -r clone_checksum=$(xxh128digest $clone) if [[ $original_checksum != $clone_checksum ]]; then log_fail "Clone $clone is corrupted with file size $filesize" @@ -171,7 +171,7 @@ function bclone_test dsize=0 fi - typeset -r original_checksum=$(sha256digest $original) + typeset -r original_checksum=$(xxh128digest $original) sync_pool $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib index aeb8efe91715..dbb47563bebf 100644 --- a/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib +++ b/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib @@ -32,14 +32,14 @@ function first_half_checksum { typeset -r file=$1 - dd if=$file bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest + dd if=$file bs=$HALFRECORDSIZE count=1 2>/dev/null | xxh128digest } function second_half_checksum { typeset -r file=$1 - dd if=$file bs=$HALFRECORDSIZE count=1 skip=1 2>/dev/null | sha256digest + dd if=$file bs=$HALFRECORDSIZE count=1 skip=1 2>/dev/null | xxh128digest } function bclone_corner_cases_init @@ -66,7 +66,7 @@ function bclone_corner_cases_init export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0) export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1) export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2) - export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest) + export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 2>/dev/null | xxh128digest) export FIRST_HALF_CHECKSUM="" export SECOND_HALF_CHECKSUM="" } diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index 50f3a3d262c0..d59329d8748e 100644 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -28,8 +28,8 @@ function have_same_content { - typeset hash1=$(md5digest $1) - typeset hash2=$(md5digest $2) + typeset hash1=$(xxh128digest $1) + typeset hash2=$(xxh128digest $2) log_must [ "$hash1" = "$hash2" ] } diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh index 702e23267f7e..f85e4f038a3e 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh @@ -110,9 +110,9 @@ log_note "Cloning entire file with copy_file_range across different enc" \ clone_and_check "file" "clone" $DS1 $DS2 "" true log_note "check if the file is still readable and the same after" \ "unmount and key unload, shouldn't fail" -typeset hash1=$(md5digest "/$DS1/file") +typeset hash1=$(xxh128digest "/$DS1/file") log_must zfs umount $DS1 && zfs unload-key $DS1 -typeset hash2=$(md5digest "/$DS2/clone") +typeset hash2=$(xxh128digest "/$DS2/clone") log_must [ "$hash1" = "$hash2" ] cleanup_enc @@ -144,12 +144,12 @@ log_must sync_pool $TESTPOOL log_must rm -f "/$DS1/file" "/$DS2/file" log_must sync_pool $TESTPOOL clone_and_check "file" "clone" "$DS2" "$DS1" "" true "s1" -typeset hash1=$(md5digest "/$DS1/.zfs/snapshot/s1/file") +typeset hash1=$(xxh128digest "/$DS1/.zfs/snapshot/s1/file") log_note "destroy the snapshot and check if the file is still readable and" \ "has the same content" log_must zfs destroy -r $DS2@s1 log_must sync_pool $TESTPOOL -typeset hash2=$(md5digest "/$DS1/file") +typeset hash2=$(xxh128digest "/$DS1/file") log_must [ "$hash1" = "$hash2" ] cleanup_enc diff --git a/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh b/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh index 945db71bf113..20498440bea7 100755 --- a/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh @@ -75,7 +75,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) )) log_must set_tunable32 L2ARC_WRITE_MAX $(( $VCACHE_SZ * 2 )) diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh index d98ab86ab667..bd025c925496 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh @@ -47,8 +47,8 @@ sync_pool $TESTPOOL log_must eval "zfs send -ecL $snap > $tmpfile.1" log_must eval "zdb -B $TESTPOOL/$objsetid ecL > $tmpfile.2" -typeset sum1=$(cat $tmpfile.1 | md5sum) -typeset sum2=$(cat $tmpfile.2 | md5sum) +typeset sum1=$(xxh128digest $tmpfile.1) +typeset sum2=$(xxh128digest $tmpfile.2) log_must test "$sum1" = "$sum2" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh index 7f8eb0b138ee..fec6fc041b47 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_compressed_corrective.ksh @@ -73,7 +73,7 @@ function test_corrective_recv log_must zpool status -v $TESTPOOL log_mustnot eval "zpool status -v $TESTPOOL | \ grep \"Permanent errors have been detected\"" - typeset cksum=$(md5digest $file) + typeset cksum=$(xxh128digest $file) [[ "$cksum" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum != $checksum)" } @@ -95,7 +95,7 @@ log_must zfs create -o primarycache=none \ log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync log_must eval "echo 'aaaaaaaa' >> "$file -typeset checksum=$(md5digest $file) +typeset checksum=$(xxh128digest $file) log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh index 261fc5eed8cb..44d4e2fa6ded 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh @@ -72,7 +72,7 @@ function test_corrective_recv log_must zpool status -v $TESTPOOL log_mustnot eval "zpool status -v $TESTPOOL | \ grep \"Permanent errors have been detected\"" - typeset cksum=$(md5digest $file) + typeset cksum=$(xxh128digest $file) [[ "$cksum" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum != $checksum)" } @@ -94,7 +94,7 @@ log_must zfs create -o primarycache=none \ log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync log_must eval "echo 'aaaaaaaa' >> "$file -typeset checksum=$(md5digest $file) +typeset checksum=$(xxh128digest $file) log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1 @@ -177,7 +177,7 @@ log_must zpool scrub -w $TESTPOOL log_must zpool status -v $TESTPOOL log_mustnot eval "zpool status -v $TESTPOOL | \ grep \"Permanent errors have been detected\"" -typeset cksum=$(md5digest $file) +typeset cksum=$(xxh128digest $file) [[ "$cksum" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh index 891432685287..fe9c0f1803a7 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -59,7 +59,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS2" log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0 -typeset checksum=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) +typeset checksum=$(xxh128digest /$TESTPOOL/$TESTFS2/$TESTFILE0) log_must zfs snapshot $snap @@ -69,14 +69,14 @@ log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1) [[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted" -typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) +typeset cksum1=$(xxh128digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" log_note "Verify ZFS can receive into an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1" -typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS2/c1/$TESTFILE0) +typeset cksum2=$(xxh128digest /$TESTPOOL/$TESTFS2/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh index 05c2ece4654f..e078103a1be2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh @@ -60,7 +60,7 @@ log_must zfs create -o compress=zstd-$random_level $TESTPOOL/$TESTFS1 # Make a 5kb compressible file log_must eval cat $src_data $src_data $src_data $src_data $src_data \ "> /$TESTPOOL/$TESTFS1/$TESTFILE0" -typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) +typeset checksum=$(xxh128digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap @@ -79,7 +79,7 @@ log_note "ZSTD src: size=$zstd_size1 version=$zstd_version1 level=$zstd_level1" log_note "Verify ZFS can receive the ZSTD compressed stream" log_must eval "zfs send -ec $snap | zfs receive $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) +typeset cksum1=$(xxh128digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_large_block_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_large_block_corrective.ksh index fbcd33f60704..0958b7c1bbf3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_large_block_corrective.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_large_block_corrective.ksh @@ -73,7 +73,7 @@ function test_corrective_recv log_must zpool status -v $TESTPOOL log_mustnot eval "zpool status -v $TESTPOOL | \ grep \"Permanent errors have been detected\"" - typeset cksum=$(md5digest $file) + typeset cksum=$(xxh128digest $file) [[ "$cksum" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum != $checksum)" } @@ -96,7 +96,7 @@ log_must zfs create -o recordsize=1m -o primarycache=none \ log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync log_must eval "echo 'aaaaaaaa' >> "$file -typeset checksum=$(md5digest $file) +typeset checksum=$(xxh128digest $file) log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh index 32b05e527ad3..6b4425fd1db2 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -61,7 +61,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ "-o keyformat=passphrase $TESTPOOL/$TESTFS1" log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) +typeset checksum=$(xxh128digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap @@ -74,7 +74,7 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2) log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) +typeset cksum1=$(xxh128digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" @@ -85,7 +85,7 @@ keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1) log_fail "Expected keystatus unavailable, got $keystatus" log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1" -typeset cksum2=$(md5digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) +typeset cksum2=$(xxh128digest /$TESTPOOL/$TESTFS1/c1/$TESTFILE0) [[ "$cksum2" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum2 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh index 7826ec9a4890..c33bba53291a 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh @@ -69,7 +69,7 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_must zfs snapshot $snap1 log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 -typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0) +typeset checksum=$(xxh128digest /$TESTPOOL/$TESTFS1/$TESTFILE0) log_must zfs snapshot $snap2 @@ -89,7 +89,7 @@ log_must zfs unload-key $TESTPOOL/$TESTFS2 log_must eval "zfs receive $TESTPOOL/$TESTFS2 < $ibackup" log_must eval "echo $passphrase2 | zfs mount -l $TESTPOOL/$TESTFS2" -typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0) +typeset cksum1=$(xxh128digest /$TESTPOOL/$TESTFS2/$TESTFILE0) [[ "$cksum1" == "$checksum" ]] || \ log_fail "Checksums differ ($cksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib index 4987bc3a9dac..765be8ac0dd1 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib @@ -119,9 +119,8 @@ function check_feature_set while read line; do typeset flag=1 - if [[ "$line" == "#*" ]]; then - continue - fi + # Skip comments + [[ $line = \#* ]] && continue for set in "$@"; do if ! grep -q "$line" $ZPOOL_COMPAT_DIR/$set; then diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_005_pos.ksh index 4b0618017e38..e10d2936cd3d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_005_pos.ksh @@ -75,8 +75,8 @@ log_onexit cleanup # excluded because other features depend on them. set -A features \ "hole_birth" \ - "large_blocks" \ "large_dnode" \ + "longname" \ "userobj_accounting" typeset -i i=0 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 50c1b7a9d09e..e1fe865b1d3b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -110,5 +110,7 @@ if is_linux || is_freebsd; then "feature@vdev_zaps_v2" "feature@raidz_expansion" "feature@fast_dedup" + "feature@longname" + "feature@large_microzap" ) fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh index ce9885904b01..5d0bcfc222df 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh @@ -50,7 +50,7 @@ function dev_checksum log_note "Compute checksum of '$dev'" - md5digest $dev || + xxh128digest $dev || log_fail "Failed to compute checksum of '$dev'" } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh index af6ac8d78e4e..e63b82be84bd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh @@ -79,11 +79,11 @@ function test_devices_missing log_must set_spa_load_verify_data 0 log_must zpool import -o readonly=on -d $DEVICE_DIR $TESTPOOL1 - log_must verify_data_md5sums $MD5FILE + log_must verify_data_hashsums $MD5FILE log_note "Try reading second batch of data, make sure pool doesn't" \ "get suspended." - verify_data_md5sums $MD5FILE >/dev/null 2>&1 + verify_data_hashsums $MD5FILE >/dev/null 2>&1 log_must_busy zpool export $TESTPOOL1 @@ -95,8 +95,8 @@ function test_devices_missing log_must set_zfs_max_missing_tvds 0 log_must zpool import -d $DEVICE_DIR $TESTPOOL1 - log_must verify_data_md5sums $MD5FILE - log_must verify_data_md5sums $MD5FILE2 + log_must verify_data_hashsums $MD5FILE + log_must verify_data_hashsums $MD5FILE2 # Cleanup log_must zpool destroy $TESTPOOL1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh index 4b6fcbd80af1..ee0403135dfc 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh @@ -23,7 +23,7 @@ # # STRATEGY: # 1. Create a pool. -# 2. Generate files and remember their md5sum. +# 2. Generate files and remember their hashsum. # 3. Note last synced txg. # 4. Take a snapshot to make sure old blocks are not overwritten. # 5. Perform zpool add/attach/detach/remove operation. @@ -134,7 +134,7 @@ function test_common log_must zpool export $TESTPOOL1 if zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1; then - verify_data_md5sums $MD5FILE && retval=0 + verify_data_hashsums $MD5FILE && retval=0 log_must check_pool_config $TESTPOOL1 "$poolcheck" log_must zpool destroy $TESTPOOL1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh index b03b39d178ca..6fdb9b26f26c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh @@ -23,7 +23,7 @@ # # STRATEGY: # 1. Create a pool. -# 2. Generate files and remember their md5sum. +# 2. Generate files and remember their hashsum. # 3. Sync a few times and note last synced txg. # 4. Take a snapshot to make sure old blocks are not overwritten. # 5. Initiate device replacement and export the pool. Special care must @@ -117,7 +117,7 @@ function test_replace_vdev log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1 log_must check_pool_config $TESTPOOL1 "$poolcreate" - log_must verify_data_md5sums $MD5FILE + log_must verify_data_hashsums $MD5FILE log_must zpool export $TESTPOOL1 @@ -137,7 +137,7 @@ function test_replace_vdev log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1 log_must check_pool_config $TESTPOOL1 "$poolcreate" - log_must verify_data_md5sums $MD5FILE + log_must verify_data_hashsums $MD5FILE # Cleanup log_must zpool destroy $TESTPOOL1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg index cf9c6a8499af..df951be0864e 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg @@ -45,8 +45,8 @@ export MYTESTFILE=$STF_SUITE/include/libtest.shlib export CPATH=$TEST_BASE_DIR/cachefile.$$ export CPATHBKP=$TEST_BASE_DIR/cachefile.$$.bkp export CPATHBKP2=$TEST_BASE_DIR/cachefile.$$.bkp2 -export MD5FILE=$TEST_BASE_DIR/md5sums.$$ -export MD5FILE2=$TEST_BASE_DIR/md5sums.$$.2 +export MD5FILE=$TEST_BASE_DIR/hashsums.$$ +export MD5FILE2=$TEST_BASE_DIR/hashsums.$$.2 export GROUP_NUM=3 typeset -i num=0 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib index 50157fa80578..7f9eb43e79b1 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib @@ -79,29 +79,29 @@ function write_some_data # Checksum all the files and store digests in a file. # # newdata: overwrite existing files if false. -# md5file: file where to store md5 digests +# hashfile: file where to store xxh128 digests # datasetname: base name for datasets # function _generate_data_common { typeset pool=$1 typeset newdata=$2 - typeset md5file=$3 + typeset hashfile=$3 typeset datasetname=$4 typeset -i datasets=3 typeset -i files=5 typeset -i blocks=10 - [[ -n $md5file ]] && rm -f $md5file + [[ -n $hashfile ]] && rm -f $hashfile for i in {1..$datasets}; do ( $newdata ) && log_must zfs create "$pool/$datasetname$i" for j in {1..$files}; do typeset file="/$pool/$datasetname$i/file$j" dd if=/dev/urandom of=$file bs=128k count=$blocks > /dev/null - if [[ -n $md5file ]]; then - typeset cksum=$(md5digest $file) - echo $cksum $file >> $md5file + if [[ -n $hashfile ]]; then + typeset cksum=$(xxh128digest $file) + echo $cksum $file >> $hashfile fi done ( $newdata ) && sync_pool "$pool" @@ -113,39 +113,39 @@ function _generate_data_common function generate_data { typeset pool=$1 - typeset md5file="$2" + typeset hashfile="$2" typeset datasetname=${3:-ds} - _generate_data_common $pool true "$md5file" $datasetname + _generate_data_common $pool true "$hashfile" $datasetname } function overwrite_data { typeset pool=$1 - typeset md5file="$2" + typeset hashfile="$2" typeset datasetname=${3:-ds} - _generate_data_common $1 false "$md5file" $datasetname + _generate_data_common $1 false "$hashfile" $datasetname } # -# Verify md5sums of every file in md5sum file $1. +# Verify hashsums of every file in hashsum file $1. # -function verify_data_md5sums +function verify_data_hashsums { - typeset md5file=$1 + typeset hashfile=$1 - if [[ ! -f $md5file ]]; then - log_note "md5 sums file '$md5file' doesn't exist" + if [[ ! -f $hashfile ]]; then + log_note "md5 sums file '$hashfile' doesn't exist" return 1 fi while read -r digest file; do - typeset digest1=$(md5digest $file) + typeset digest1=$(xxh128digest $file) if [[ "$digest1" != "$digest" ]]; then return 1 fi - done < $md5file + done < $hashfile return 0 } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh index dcb1ac1ab69f..c4273d050422 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh @@ -28,9 +28,8 @@ # 2. Create a pool. # 3. Backup the cachefile. # 4. Simulate the pool being torn down without export: -# 4.1. Copy the underlying device state. -# 4.2. Export the pool. -# 4.3. Restore the device state from the copy. +# 4.1. Sync then freeze the pool. +# 4.2. Export the pool (uncleanly). # 5. Change the hostid. # 6. Verify that importing the pool from the cachefile fails. # 7. Verify that importing the pool from the cachefile with force @@ -57,10 +56,9 @@ log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0 log_must cp $CPATH $CPATHBKP # 4. Simulate the pool being torn down without export. -log_must cp $VDEV0 $VDEV0.bak +sync_pool $TESTPOOL1 +log_must zpool freeze $TESTPOOL1 log_must zpool export $TESTPOOL1 -log_must cp -f $VDEV0.bak $VDEV0 -log_must rm -f $VDEV0.bak # 5. Change the hostid. log_must zgenhostid -f $HOSTID2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh index ad8cca642dbc..abbe9acff629 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh @@ -27,8 +27,8 @@ # 1. Set a hostid. # 2. Create a pool. # 3. Simulate the pool being torn down without export: -# 3.1. Copy the underlying device state. -# 3.2. Export the pool. +# 3.1. Sync then freeze the pool. +# 3.2. Export the pool (uncleanly). # 3.3. Restore the device state from the copy. # 4. Change the hostid. # 5. Verify that importing the pool fails. @@ -52,10 +52,9 @@ log_must zgenhostid -f $HOSTID1 log_must zpool create $TESTPOOL1 $VDEV0 # 3. Simulate the pool being torn down without export. -log_must cp $VDEV0 $VDEV0.bak +sync_pool $TESTPOOL1 +log_must zpool freeze $TESTPOOL1 log_must zpool export $TESTPOOL1 -log_must cp -f $VDEV0.bak $VDEV0 -log_must rm -f $VDEV0.bak # 4. Change the hostid. log_must zgenhostid -f $HOSTID2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/Makefile.am deleted file mode 100644 index 87d46b394015..000000000000 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_reguid -dist_pkgdata_SCRIPTS = \ - setup.ksh \ - cleanup.ksh \ - zpool_reguid_001_pos.ksh \ - zpool_reguid_002_neg.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh index 4e18abd988cd..3fea6c3bb0f0 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_001_pos.ksh @@ -62,12 +62,12 @@ if [[ "$initial_guid" == "$new_guid" ]]; then log_fail "GUID change failed; GUID has not changed: $initial_guid" fi -for g in "$(bc -e '2^64 - 1')" 0; do +for g in "$(echo '2^64 - 1' | bc)" "314"; do set_guid "$g" done # zpool-reguid(8) will strip the leading 0. set_guid 0123 "123" # GUID "-1" is effectively 2^64 - 1 in value. -set_guid -1 "$(bc -e '2^64 - 1')" +set_guid -1 "$(echo '2^64 - 1' | bc)" log_pass "'zpool reguid' changes GUID as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_002_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_002_neg.ksh index 599041e284e2..4de0a05915d9 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reguid/zpool_reguid_002_neg.ksh @@ -46,7 +46,7 @@ check_guid() { log_assert "Verify 'zpool reguid' does not accept invalid GUIDs" -for ig in "$(bc -e '2^64')" 0xA 0xa; do +for ig in "$(echo '2^64' | bc)" 0xA 0xa 0; do check_guid "$ig" done diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh index 6ce054cdb4a1..3b9851d0d079 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_003_pos.ksh @@ -65,7 +65,7 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "unavail" TESTFILE=/$TESTPOOL/data log_must generate_random_file /$TESTPOOL/data $LARGE_FILE_SIZE sync_pool $TESTPOOL -TESTFILE_MD5=$(md5digest $TESTFILE) +TESTFILE_MD5=$(xxh128digest $TESTFILE) # 4. Execute scrub. # add delay to I/O requests for remaining disk in pool @@ -89,7 +89,7 @@ log_must is_scan_restarted $TESTPOOL # 8. Put another device offline and check if the test file checksum is correct. log_must zpool offline $TESTPOOL $DISK2 -CHECK_MD5=$(md5digest $TESTFILE) +CHECK_MD5=$(xxh128digest $TESTFILE) [[ $CHECK_MD5 == $TESTFILE_MD5 ]] || \ log_fail "Checksums differ ($CHECK_MD5 != $TESTFILE_MD5)" log_must zpool online $TESTPOOL $DISK2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh index 6be2ad5a7410..4fb900c73cf6 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh @@ -69,12 +69,12 @@ for raid_type in "draid2:3d:6c:1s" "raidz2"; do log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE" # Check no ONLINE slow vdevs are show. Then mark IOs greater than - # 10ms slow, delay IOs 20ms to vdev6, check slow IOs. + # 160ms slow, delay IOs 320ms to vdev6, check slow IOs. log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE" log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE" - log_must set_tunable64 ZIO_SLOW_IO_MS 10 - log_must zinject -d $TESTDIR/vdev6 -D20:100 $TESTPOOL2 + log_must set_tunable64 ZIO_SLOW_IO_MS 160 + log_must zinject -d $TESTDIR/vdev6 -D320:100 $TESTPOOL2 log_must mkfile 1048576 /$TESTPOOL2/testfile sync_pool $TESTPOOL2 log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh index 57f6b6a0242b..1d3cbfc79ee6 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh @@ -36,7 +36,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh index f7b8a4b950d5..460c95bb6051 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh @@ -37,7 +37,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh index 0838b2c93e68..2f352e2af5d4 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh @@ -37,7 +37,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh index 4db968ffae05..418c41fe659a 100755 --- a/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh @@ -61,8 +61,8 @@ function cleanup function verify_copy { - src_cksum=$(sha256digest $1) - dst_cksum=$(sha256digest $2) + src_cksum=$(xxh128digest $1) + dst_cksum=$(xxh128digest $2) if [[ "$src_cksum" != "$dst_cksum" ]]; then log_must ls -l $CP_TESTDIR diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh index 34a8821725a6..f1561b7282e5 100755 --- a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh +++ b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh @@ -62,7 +62,7 @@ log_must set_tunable64 DEADMAN_FAILMODE "wait" default_setup_noexit $DISK1 log_must zpool events -c -# Force each IO to take 10s by allow them to run concurrently. +# Force each IO to take 10s but allow them to run concurrently. log_must zinject -d $DISK1 -D10000:10 $TESTPOOL mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) @@ -80,11 +80,11 @@ else fi log_must zpool events -# Verify at least 4 deadman events were logged. The first after 5 seconds, +# Verify at least 3 deadman events were logged. The first after 5 seconds, # and another each second thereafter until the delay is clearer. events=$(zpool events | grep -c ereport.fs.zfs.deadman) -if [ "$events" -lt 4 ]; then - log_fail "Expect >=5 deadman events, $events found" +if [ "$events" -lt 3 ]; then + log_fail "Expect >=3 deadman events, $events found" fi log_pass "Verify spa deadman detected a hung txg and $events deadman events" diff --git a/tests/zfs-tests/tests/functional/direct/cleanup.ksh b/tests/zfs-tests/tests/functional/direct/cleanup.ksh new file mode 100755 index 000000000000..75fe97f923d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/cleanup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +default_cleanup_noexit + +if tunable_exists DIO_ENABLED ; then + log_must restore_tunable DIO_ENABLED +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/direct/dio.cfg b/tests/zfs-tests/tests/functional/direct/dio.cfg new file mode 100644 index 000000000000..6472610d7b41 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio.cfg @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +DIO_VDEV1=$TEST_BASE_DIR/file1 +DIO_VDEV2=$TEST_BASE_DIR/file2 +DIO_VDEV3=$TEST_BASE_DIR/file3 +DIO_VDEVS="$DIO_VDEV1 $DIO_VDEV2 $DIO_VDEV3" + +DIO_FILESIZE=4M +DIO_BS=128K diff --git a/tests/zfs-tests/tests/functional/direct/dio.kshlib b/tests/zfs-tests/tests/functional/direct/dio.kshlib new file mode 100644 index 000000000000..3a70cf293967 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio.kshlib @@ -0,0 +1,331 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg + +function dio_cleanup +{ + if poolexists $TESTPOOL1; then + destroy_pool $TESTPOOL1 + fi + + rm -f $DIO_VDEVS +} + +# +# Generate an IO workload using fio and then verify the resulting data. +# +function dio_and_verify # mode file-size block-size directory ioengine extra-args +{ + typeset mode=$1 + typeset size=$2 + typeset bs=$3 + typeset mntpnt=$4 + typeset ioengine=$5 + typeset extra_args=$6 + + # Invoke an fio workload via Direct I/O and verify with Direct I/O. + log_must fio --directory=$mntpnt --name=direct-$mode \ + --rw=$mode --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --verify=sha1 --ioengine=$ioengine --fallocate=none \ + --group_reporting --minimal --do_verify=1 $extra_args + + # Now just read back the file without Direct I/O into the ARC as an + # additional verfication step. + log_must fio --directory=$mntpnt --name=direct-$mode \ + --rw=read --size=$size --bs=$bs --direct=0 --numjobs=1 \ + --ioengine=$ioengine --group_reporting --minimal + + log_must rm -f "$mntpnt/direct-*" +} + +# +# Get zpool status -d checksum verify failures +# +function get_zpool_status_chksum_verify_failures # pool_name vdev_type +{ + typeset pool=$1 + typeset vdev_type=$2 + + if [[ "$vdev_type" == "stripe" ]]; then + val=$(zpool status -dp $pool | \ + awk '{s+=$6} END {print s}' ) + elif [[ "$vdev_type" == "mirror" || "$vdev_type" == "raidz" || + "$vdev_type" == "draid" ]]; then + val=$(zpool status -dp $pool | \ + awk -v d="$vdev_type" '$0 ~ d {print $6}' ) + else + log_fail "Unsupported VDEV type in \ + get_zpool_status_chksum_verify_failures(): $vdev_type" + fi + echo "$val" +} + +# +# Get ZED dio_verify events +# +function get_zed_dio_verify_events # pool +{ + typeset pool=$1 + + val=$(zpool events $pool | grep -c dio_verify) + + echo "$val" +} + +# +# Checking for checksum verify write failures with: +# zpool status -d +# zpool events +# After getting that counts will clear the out the ZPool errors and events +# +function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors +{ + typeset pool=$1 + typeset vdev_type=$2 + typeset expect_errors=$3 + typeset note_str="expecting none" + + if [[ $expect_errors -ne 0 ]]; then + note_str="expecting some" + fi + + log_note "Checking for Direct I/O write checksum verify errors \ + $note_str on ZPool: $pool" + + status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type) + zed_dio_verify_events=$(get_zed_dio_verify_events $pool) + + if [[ $expect_errors -ne 0 ]]; then + if [[ $status_failures -eq 0 || + $zed_dio_verify_events -eq 0 ]]; then + zpool status -dp $pool + zpool events $pool + log_fail "Checksum verifies in zpool status -d \ + $status_failures. ZED dio_verify events \ + $zed_dio_verify_events. Neither should be 0." + fi + else + if [[ $status_failures -ne 0 || + $zed_dio_verify_events -ne 0 ]]; then + zpool status -dp $pool + zpool events $pool + log_fail "Checksum verifies in zpool status -d \ + $status_failures. ZED dio_verify events \ + $zed_dio_verify_events. Both should be zero." + fi + fi + + log_must zpool clear $pool + log_must zpool events -c + +} + +# +# Get the value of a counter from +# Linux: /proc/spl/kstat/zfs/$pool/iostats file. +# FreeBSD: kstat.zfs.$pool.msic.iostats.$stat +# +function get_iostats_stat # pool stat +{ + typeset pool=$1 + typeset stat=$2 + + if is_linux; then + iostats_file=/proc/spl/kstat/zfs/$pool/iostats + val=$(grep -m1 "$stat" $iostats_file | awk '{ print $3 }') + else + val=$(sysctl -n kstat.zfs.$pool.misc.iostats.$stat) + fi + if [[ -z "$val" ]]; then + log_fail "Unable to read $stat counter" + fi + + echo "$val" +} + +# +# Evict any buffered blocks by overwritting them using an O_DIRECT request. +# +function evict_blocks +{ + typeset pool=$1 + typeset file=$2 + typeset size=$3 + + log_must stride_dd -i /dev/urandom -o $file -b $size -c 1 -D +} + +# +# Perform FIO Direct I/O writes to a file with the given arguments. +# Then verify thae minimum expected number of blocks were written as +# Direct I/O. +# +function verify_dio_write_count #pool bs size mnpnt +{ + typeset pool=$1 + typeset bs=$2 + typeset size=$3 + typeset mntpnt=$4 + typeset dio_wr_expected=$(((size / bs) -1)) + + log_note "Checking for $dio_wr_expected Direct I/O writes" + + prev_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_and_verify write $size $bs $mntpnt "sync" + curr_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_wr_actual=$((curr_dio_wr - prev_dio_wr)) + + if [[ $dio_wr_actual -lt $dio_wr_expected ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct writes $dio_wr_actual of $dio_wr_expected" + fi +} + +# +# Perform a stride_dd write command to the file with the given arguments. +# Then verify the minimum expected number of blocks were written as either +# buffered IO (by the ARC), or Direct I/O to the application (dd). +# +function check_write # pool file bs count seek flags buf_wr dio_wr +{ + typeset pool=$1 + typeset file=$2 + typeset bs=$3 + typeset count=$4 + typeset seek=$5 + typeset flags=$6 + typeset buf_wr_expect=$7 + typeset dio_wr_expect=$8 + + log_note "Checking $count * $bs write(s) at offset $seek, $flags" + + prev_buf_wr=$(get_iostats_stat $pool arc_write_count) + prev_dio_wr=$(get_iostats_stat $pool direct_write_count) + + log_must stride_dd -i /dev/urandom -o $file -b $bs -c $count \ + -k $seek $flags + + curr_buf_wr=$(get_iostats_stat $pool arc_write_count) + buf_wr_actual=$((curr_buf_wr - prev_buf_wr)) + + curr_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_wr_actual=$((curr_dio_wr - prev_dio_wr)) + + if [[ $buf_wr_actual -lt $buf_wr_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Buffered writes $buf_wr_actual of $buf_wr_expect" + fi + + if [[ $dio_wr_actual -lt $dio_wr_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct writes $dio_wr_actual of $dio_wr_expect" + fi +} + +# +# Perform a stride_dd read command to the file with the given arguments. +# Then verify the minimum expected number of blocks were read as either +# buffered IO (by the ARC), or Direct I/O to the application (dd). +# +function check_read # pool file bs count skip flags buf_rd dio_rd +{ + typeset pool=$1 + typeset file=$2 + typeset bs=$3 + typeset count=$4 + typeset skip=$5 + typeset flags=$6 + typeset buf_rd_expect=$7 + typeset dio_rd_expect=$8 + + log_note "Checking $count * $bs read(s) at offset $skip, $flags" + + prev_buf_rd=$(get_iostats_stat $pool arc_read_count) + prev_dio_rd=$(get_iostats_stat $pool direct_read_count) + + log_must stride_dd -i $file -o /dev/null -b $bs -c $count \ + -p $skip $flags + + curr_buf_rd=$(get_iostats_stat $pool arc_read_count) + buf_rd_actual=$((curr_buf_rd - prev_buf_rd)) + + curr_dio_rd=$(get_iostats_stat $pool direct_read_count) + dio_rd_actual=$((curr_dio_rd - prev_dio_rd)) + + if [[ $buf_rd_actual -lt $buf_rd_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Buffered reads $buf_rd_actual of $buf_rd_expect" + fi + + if [[ $dio_rd_actual -lt $dio_rd_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct reads $dio_rd_actual of $dio_rd_expect" + fi +} + +function get_file_size +{ + typeset filename="$1" + + if is_linux; then + filesize=$(stat -c %s $filename) + else + filesize=$(stat -s $filename | awk '{print $8}' | grep -o '[0-9]\+') + fi + + echo $filesize +} + +function do_truncate_reduce +{ + typeset filename=$1 + typeset size=$2 + + filesize=$(get_file_size $filename) + eval "echo original filesize: $filesize" + if is_linux; then + truncate $filename -s $((filesize - size)) + else + truncate -s -$size $filename + fi + filesize=$(get_file_size $filename) + eval "echo new filesize after truncate: $filesize" +} diff --git a/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh new file mode 100755 index 000000000000..e26fbdfc2569 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh @@ -0,0 +1,115 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the number direct/buffered requests for (un)aligned access +# +# STRATEGY: +# 1. Create a multi-block file +# 2. Perform various (un)aligned accesses and verify the result. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_onexit cleanup + +log_assert "Verify the number direct/buffered requests for unaligned access" + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +tmp_file=$mntpnt/tmp_file +file_size=$((rs * 8)) + +log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1 + +# N recordsize aligned writes which do not span blocks +check_write $TESTPOOL $tmp_file $rs 1 0 "-D" 0 1 +check_write $TESTPOOL $tmp_file $rs 2 0 "-D" 0 2 +check_write $TESTPOOL $tmp_file $rs 4 0 "-D" 0 4 +check_write $TESTPOOL $tmp_file $rs 8 0 "-D" 0 8 + +# 1 recordsize aligned write which spans multiple blocks at various offsets +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 0 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 1 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 2 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 3 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 4)) 1 0 "-D" 0 4 +check_write $TESTPOOL $tmp_file $((rs * 4)) 1 1 "-D" 0 4 +check_write $TESTPOOL $tmp_file $((rs * 8)) 1 0 "-D" 0 8 + +# sub-blocksize unaligned writes which do not span blocks. +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 0 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 1 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 2 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 3 "-D" 1 0 + +# large unaligned writes which span multiple blocks +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 $((rs / 2)) "-D -K" 2 1 +check_write $TESTPOOL $tmp_file $((rs * 4)) 2 $((rs / 4)) "-D -K" 4 6 + +# evict any cached blocks by overwriting with O_DIRECT +evict_blocks $TESTPOOL $tmp_file $file_size + +# recordsize aligned reads which do not span blocks +check_read $TESTPOOL $tmp_file $rs 1 0 "-d" 0 1 +check_read $TESTPOOL $tmp_file $rs 2 0 "-d" 0 2 +check_read $TESTPOOL $tmp_file $rs 4 0 "-d" 0 4 +check_read $TESTPOOL $tmp_file $rs 8 0 "-d" 0 8 + +# 1 recordsize aligned read which spans multiple blocks at various offsets +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 0 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 1 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 2 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 3 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 0 "-d" 0 4 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 1 "-d" 0 4 +check_read $TESTPOOL $tmp_file $((rs * 8)) 1 0 "-d" 0 8 + +# sub-blocksize unaligned reads which do not span blocks. +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 0 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 1 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 2 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 3 "-d" 0 1 + +# large unaligned reads which span multiple blocks +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 $((rs / 2)) "-d -P" 0 3 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 $((rs / 4)) "-d -P" 0 5 + +log_pass "Verify the number direct/buffered requests for (un)aligned access" diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh new file mode 100755 index 000000000000..27fd66ccd216 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify small async Direct I/O requests +# +# STRATEGY: +# 1. Use fio to issue small read/write requests. Writes are +# smaller than the block size and thus will be buffered, +# reads satisfy the minimum alignment and will be direct. +# + +verify_runnable "global" + +function cleanup +{ + zfs set direct=standard $TESTPOOL/$TESTFS + rm $tmp_file +} + +log_assert "Verify direct=always mixed small async requests" + +log_onexit cleanup + +log_must zfs set direct=always $TESTPOOL/$TESTFS + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1G +runtime=10 + +log_must truncate -s $file_size $tmp_file + +log_must fio --filename=$tmp_file --name=always-randrw \ + --rw=randwrite --bs=$page_size --size=$file_size --numjobs=1 \ + --ioengine=posixaio --fallocate=none --iodepth=4 --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based + +log_pass "Verify direct=always mixed small async requests" diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh new file mode 100755 index 000000000000..5492a5a90584 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh @@ -0,0 +1,106 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify FIO async engines work using Direct I/O. +# +# STRATEGY: +# 1. Select a FIO async ioengine +# 2. Start sequntial Direct I/O and verify with buffered I/O +# 3. Start mixed Direct I/O and verify with buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" +} + +function check_fio_ioengine +{ + fio --ioengine=io_uring --parse-only > /dev/null 2>&1 + return $? +} + +log_assert "Verify FIO async ioengines work using Direct I/O." + +log_onexit cleanup + +typeset -a async_ioengine_args=("--iodepth=4" "--iodepth=4 --thread") + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +fio_async_ioengines="posixaio" + +if is_linux; then + fio_async_ioengines+=" libaio" + if $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then + if [ -e /etc/os-release ] ; then + source /etc/os-release + if [ $PLATFORM_ID = "platform:el9" ] ; then + log_note "io_uring disabled on RHEL 9 " \ + "variants: fails with " \ + "'Operation not permitted'" + elif $(check_fio_ioengine -eq 0); then + fio_async_ioengines+=" io_uring" + else + log_note "io_uring not supported by fio and " \ + "will not be tested" + fi + else + if $(check_fio_ioengine); then + fio_async_ioengines+=" io_uring" + + else + log_note "io_uring not supported by fio and " \ + "will not be tested" + fi + fi + else + log_note "io_uring not supported by kernel will not " \ + "be tested" + + fi +fi + +for ioengine in $fio_async_ioengines; do + for ioengine_args in "${async_ioengine_args[@]}"; do + for op in "rw" "randrw" "write"; do + log_note "Checking Direct I/O with FIO async ioengine" \ + " $ioengine with args $ioengine_args --rw=$op" + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "$ioengine" \ + "$ioengine_args" + done + done +done + +log_pass "Verfied FIO async ioengines work using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_compression.ksh b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh new file mode 100755 index 000000000000..5463715d7bab --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify compression works using Direct I/O. +# +# STRATEGY: +# 1. Select a random compression algoritm +# 2. Start sequential Direct I/O and verify with buffered I/O +# 3. Start mixed Direct I/O and verify with buffered I/O +# 4. Repeat from 2 for all compression algoritms +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" + log_must zfs set compression=off $TESTPOOL/$TESTFS +} + +log_assert "Verify compression works using Direct I/O." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +compress_args="--buffer_compress_percentage=50" + +for comp in "${compress_prop_vals[@]:1}"; do + log_must zfs set compression=$comp $TESTPOOL/$TESTFS + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "sync" $compress_args + done +done + +log_pass "Verfied compression works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh new file mode 100755 index 000000000000..ba2b29eeca4e --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify deduplication works. Deduplication is disabled when issuing +# Direct I/O writes. +# +# STRATEGY: +# 1. Enable dedup +# 2. Start sequential Direct I/O and verify with buffered I/O +# 3. Start mixed Direct IO and verify with buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" + log_must zfs set dedup=off $TESTPOOL/$TESTFS +} + +log_assert "Verify deduplication works using Direct I/O." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +dedup_args="--dedupe_percentage=50" + +log_must zfs set dedup=on $TESTPOOL/$TESTFS +for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "sync" $dedup_args +done + +log_pass "Verfied deduplication works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh new file mode 100755 index 000000000000..b6faa11970b3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify encryption works using Direct I/O. +# +# STRATEGY: +# 1. Create multidisk pool. +# 2. Start some mixed readwrite Direct I/O. +# 3. Verify the results are as expected using buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify encryption works using Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +create_pool $TESTPOOL1 $DIO_VDEVS +log_must eval "echo 'password' | zfs create -o encryption=on \ + -o keyformat=passphrase -o keylocation=prompt -o compression=off \ + $TESTPOOL1/$TESTFS1" + +mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + +for bs in "4k" "128k" "1m"; do + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $bs $mntpnt "sync" + done +done + +log_pass "Verified encryption works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh new file mode 100755 index 000000000000..12b2f2127535 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the number direct/buffered requests when growing a file +# +# STRATEGY: +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_assert "Verify the number direct/buffered requests when growing a file" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +tmp_file=$mntpnt/tmp_file + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +# +# Verify the expected number of buffered and Direct I/O's when growing +# the first block of a file up to the maximum recordsize. +# +for bs in "8192" "16384" "32768" "65536" "131072"; do + + # When O_DIRECT is set the first write to a new file, or when the + # block size needs to be grown, it will be done as a buffered write. + check_write $TESTPOOL $tmp_file $bs 1 0 "-D" 1 0 + + # Overwriting the first block of an existing file with O_DIRECT will + # be a buffered write if less than the block size. + check_write $TESTPOOL $tmp_file 4096 1 0 "-D" 1 0 + check_write $TESTPOOL $tmp_file 4096 1 1 "-D" 1 0 + + # Overwriting the first block of an existing file with O_DIRECT will + # be a direct write as long as the block size matches. + check_write $TESTPOOL $tmp_file $bs 1 0 "-D" 0 1 + + # Evict any blocks which may be buffered before the read tests. + evict_blocks $TESTPOOL $tmp_file $bs + + # Reading the first block of an existing file with O_DIRECT will + # be a direct read for part or all of the block size. + check_read $TESTPOOL $tmp_file $bs 1 0 "-d" 0 1 + check_read $TESTPOOL $tmp_file 4096 1 0 "-d" 0 1 + check_read $TESTPOOL $tmp_file 4096 1 1 "-d" 0 1 +done + +log_pass "Verify the number direct/buffered requests when growing a file" diff --git a/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh new file mode 100755 index 000000000000..2c0ce832b1fe --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify max recordsizes are supported for Direct I/O. +# +# STRATEGY: +# 1. Create a pool from each vdev type with varying recordsizes. +# 2. Start sequential Direct I/O and verify with buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify max recordsizes are supported for Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +for type in "" "mirror" "raidz" "draid"; do; + for recsize in "2097152" "8388608" "16777216"; do + create_pool $TESTPOOL1 $type $DIO_VDEVS + log_must eval "zfs create \ + -o recordsize=$recsize -o compression=off \ + $TESTPOOL1/$TESTFS1" + + mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + + verify_dio_write_count $TESTPOOL1 $recsize $((4 * recsize)) \ + $mntpnt + + destroy_pool $TESTPOOL1 + done +done + +log_pass "Verified max recordsizes are supported for Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh new file mode 100755 index 000000000000..dc10b8a33dbe --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh @@ -0,0 +1,107 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed buffered and Direct I/O are coherent. +# +# STRATEGY: +# 1. Verify interleaved buffered and Direct I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f $src_file $new_file $tmp_file +} + +log_assert "Verify mixed buffered and Direct I/O are coherent." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +src_file=$mntpnt/src_file +new_file=$mntpnt/new_file +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1048576 + +log_must stride_dd -i /dev/urandom -o $src_file -b $file_size -c 1 + +# +# Using mixed input and output block sizes verify that buffered and +# Direct I/O can be interleaved and the result with always be coherent. +# +for ibs in "512" "$page_size" "131072"; do + for obs in "512" "$page_size" "131072"; do + iblocks=$(($file_size / $ibs)) + oblocks=$(($file_size / $obs)) + iflags="" + oflags="" + + # Only allow Direct I/O when it is at least page sized. + if [[ $ibs -ge $page_size ]]; then + iflags="-d" + fi + + if [[ $obs -ge $page_size ]]; then + oflags="-D" + fi + + # Verify buffered write followed by a direct read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks $iflags + log_must cmp_xxh128 $new_file $tmp_file + log_must rm -f $new_file $tmp_file + + # Verify direct write followed by a buffered read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks $oflags + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks + log_must cmp_xxh128 $new_file $tmp_file + log_must rm -f $new_file $tmp_file + + # Verify direct write followed by a direct read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks $oflags + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks $iflags + log_must cmp_xxh128 $new_file $tmp_file + log_must rm -f $new_file $tmp_file + done +done + +log_pass "Verify mixed buffered and Direct I/O are coherent." diff --git a/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh new file mode 100755 index 000000000000..fbd6afd7b391 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh @@ -0,0 +1,92 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed Direct I/O and mmap I/O. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a background Direct I/O random read/write fio to the +# file. +# 3. Start a background mmap random read/write fio to the file. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f "$tmp_file" +} + +log_assert "Verify mixed Direct I/O and mmap I/O" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((128 * 1024)) +blocks=64 +size=$((bs * blocks)) +runtime=60 + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct I/O writes +log_must eval "fio --filename=$tmp_file --name=direct-write \ + --rw=randwrite --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# Direct I/O reads +log_must eval "fio --filename=$tmp_file --name=direct-read \ + --rw=randread --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# mmap I/O writes +log_must eval "fio --filename=$tmp_file --name=mmap-write \ + --rw=randwrite --size=$size --bs=$bs --numjobs=1 \ + --ioengine=mmap --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# mmap I/O reads +log_must eval "fio --filename=$tmp_file --name=mmap-read \ + --rw=randread --size=$size --bs=$bs --numjobs=1 \ + --ioengine=mmap --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +wait + +log_pass "Verfied mixed Direct I/O and mmap I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh new file mode 100755 index 000000000000..04973fc88632 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify Direct I/O overwrite. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a Direct I/O random write fio to the file. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f "$tmp_file" +} + +log_assert "Verify Direct I/O overwrites" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((128 * 1024)) +blocks=64 +size=$((bs * blocks)) +runtime=60 + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct I/O overwrites +log_must eval "fio --filename=$tmp_file --name=direct-write \ + --rw=randwrite --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap" + +log_pass "Verfied Direct I/O overwrites" diff --git a/tests/zfs-tests/tests/functional/direct/dio_property.ksh b/tests/zfs-tests/tests/functional/direct/dio_property.ksh new file mode 100755 index 000000000000..9e18f0bf787e --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_property.ksh @@ -0,0 +1,127 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the direct=always|disabled|standard property +# +# STRATEGY: +# 1. Verify direct=always behavior +# 2. Verify direct=disabled behavior +# 3. Verify direct=standard behavior +# + +verify_runnable "global" + +function cleanup +{ + zfs set direct=standard $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_assert "Verify the direct=always|disabled|standard property" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) + +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1048576 +count=8 + +# +# Check when "direct=always" any aligned IO is done as direct. +# Note that the "-D" and "-d" flags are not set in the following calls to +# stride_dd. +# +log_must zfs set direct=always $TESTPOOL/$TESTFS + +log_note "Aligned writes (buffered, then all direct)" +check_write $TESTPOOL $tmp_file $rs $count 0 "" 1 $((count - 1)) + +log_note "Aligned overwrites" +check_write $TESTPOOL $tmp_file $rs $count 0 "" 0 $count + +log_note "Sub-recordsize unaligned overwrites" +check_write $TESTPOOL $tmp_file $((rs / 2)) $((2 * count)) 0 "" $((2 * count)) 0 + +log_note "Sub-page size aligned overwrites" +check_write $TESTPOOL $tmp_file 512 $count 0 "" $count 0 +evict_blocks $TESTPOOL $tmp_file $file_size + +log_note "Aligned reads" +check_read $TESTPOOL $tmp_file $rs $count 0 "" 0 $count + +log_note "Sub-recordsize unaligned reads" +check_read $TESTPOOL $tmp_file $((rs / 2)) $((count * 2)) 0 "" 0 $((2 * count)) + +log_note "Sub-page size aligned reads (one read then ARC hits)" +check_read $TESTPOOL $tmp_file 512 $count 0 "" 1 0 + +log_must rm -f $tmp_file + + +# +# Check when "direct=disabled" there are never any direct requests. +# Note that the "-D" and "-d" flags are always set in the following calls to +# stride_dd. +# +log_must zfs set direct=disabled $TESTPOOL/$TESTFS + +log_note "Aligned writes (all buffered with an extra for create)" +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" $count 0 + +log_note "Aligned overwrites" +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" $count 0 + +log_note "Aligned reads (all ARC hits)" +check_read $TESTPOOL $tmp_file $rs $count 0 "-d" 0 0 + +log_must rm -f $tmp_file + + +# +# Check when "direct=standard" only requested Direct I/O occur. +# +log_must zfs set direct=standard $TESTPOOL/$TESTFS + +log_note "Aligned writes/overwrites (buffered / direct)" +check_write $TESTPOOL $tmp_file $rs $count 0 "" $count 0 +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" 0 $count + +log_note "Aligned reads (buffered / direct)" +evict_blocks $TESTPOOL $tmp_file $file_size +check_read $TESTPOOL $tmp_file $rs $count 0 "" $count 0 +evict_blocks $TESTPOOL $tmp_file $file_size +check_read $TESTPOOL $tmp_file $rs $count 0 "-d" 0 $count + +log_pass "Verify the direct=always|disabled|standard property" diff --git a/tests/zfs-tests/tests/functional/direct/dio_random.ksh b/tests/zfs-tests/tests/functional/direct/dio_random.ksh new file mode 100755 index 000000000000..abe8d5c0dca1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_random.ksh @@ -0,0 +1,82 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed Direct I/O and buffered I/O. A workload of random +# but correctly aligned direct read/writes is mixed with a +# concurrent workload of entirely unaligned buffered read/writes. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a background fio randomly issuing direct read/writes. +# 3. Start a background fio randomly issuing buffered read/writes. +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$tmp_file" +} + +log_assert "Verify randomly sized mixed Direct I/O and buffered I/O" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((1024 * 1024)) +blocks=32 +size=$((bs * blocks)) +runtime=10 +page_size=$(getconf PAGESIZE) + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct random read/write page-aligned IO of varying sizes with +# occasional calls to fsync(2), mixed with... +log_must eval "fio --filename=$tmp_file --name=direct-rwrand \ + --rw=randrw --size=$size --offset_align=$(getconf PAGESIZE) \ + --bsrange=$page_size-1m --direct=1 --fsync=32 --numjobs=2 \ + --ioengine=sync --fallocate=none --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based &" + +# Buffered random read/write entirely unaligned IO of varying sizes +# occasional calls to fsync(2). +log_must eval "fio --filename=$tmp_file --name=buffered-write \ + --rw=randrw --size=$size --offset_align=512 --bs_unaligned=1 \ + --bsrange=$page_size-1m --direct=0 --fsync=32 --numjobs=2 \ + --ioengine=sync --fallocate=none --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based &" + +wait + +log_pass "Verfied randomly sized mixed Direct I/O and buffered I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh new file mode 100755 index 000000000000..def46822130d --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify different recordsizes are supported for Direct I/O. +# +# STRATEGY: +# 1. Create a pool from each vdev type with varying recordsizes. +# 2. Start sequential Direct I/O and verify with buffered I/O. +# 3. Start mixed Direct I/O and verify with buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify different recordsizes are supported for Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +for type in "" "mirror" "raidz" "draid"; do + for recsize in "1024" "4096" "128k"; do + create_pool $TESTPOOL1 $type $DIO_VDEVS + log_must eval "zfs create \ + -o recordsize=$recsize -o compression=off \ + $TESTPOOL1/$TESTFS1" + + mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + + for bs in "4k" "128k"; do + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $bs $mntpnt "sync" + done + done + + destroy_pool $TESTPOOL1 + done +done + +log_pass "Verified different recordsizes are supported for Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh new file mode 100755 index 000000000000..309d35ea0e6d --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify failure for (un)aligned O_DIRECT +# +# STRATEGY: +# 1. Create a multi-block file +# 2. Perform (un)aligned write/read verify the result. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + zfs set direct=standard $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_onexit cleanup + +log_assert "Verify direct requests for (un)aligned access" + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +tmp_file=$mntpnt/tmp_file +file_size=$((rs * 8)) + +log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1 + +log_must zfs set direct=standard $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always fail if direct=standard. +log_mustnot stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D +log_mustnot stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 -d + +log_must zfs set direct=always $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always pass if direct=always. +log_must stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 +log_must stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 + +log_must zfs set direct=disabled $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always pass if direct=disabled. +log_must stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D +log_must stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 -d + +log_pass "Verify direct requests for (un)aligned access" diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh new file mode 100755 index 000000000000..8bb363f1a983 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh @@ -0,0 +1,91 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify Direct I/O reads can read an entire file that is not +# page-aligned in length. When a file is not page-aligned in total +# length, as much that can be read using using O_DIRECT is done so and +# the rest is read using the ARC. O_DIRECT requires page-size alignment. +# +# STRATEGY: +# 1. Write a file that is page-aligned (buffered) +# 2. Truncate the file to be 512 bytes less +# 3. Export then import the Zpool flushing out the ARC +# 4. Read back the file using O_DIRECT +# 5. Verify the file is read back with both Direct I/O and buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$filename" + log_must set recordsize=$rs $TESTPOOL/$TESTFS +} + +log_assert "Verify Direct I/O reads can read an entire file that is not \ + page-aligned" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +bs=$((128 * 1024)) # bs=recordsize (128k) +filename="$mntpnt/testfile.iso" + +log_must stride_dd -i /dev/urandom -o $filename -b $bs -c 2 +# Truncating file so the total length is no longer page-size aligned +log_must do_truncate_reduce $filename 512 + +# Exporting the Zpool to make sure all future reads happen from the ARC +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +# Reading the file back using Direct I/O +prev_dio_read=$(get_iostats_stat $TESTPOOL direct_read_count) +prev_arc_read=$(get_iostats_stat $TESTPOOL arc_read_count) +log_must stride_dd -i $filename -o /dev/null -b $bs -e -d +curr_dio_read=$(get_iostats_stat $TESTPOOL direct_read_count) +curr_arc_read=$(get_iostats_stat $TESTPOOL arc_read_count) +total_dio_read=$((curr_dio_read - prev_dio_read)) +total_arc_read=$((curr_arc_read - prev_arc_read)) + +# We should see both Direct I/O reads an ARC read to read the entire file that +# is not page-size aligned +if [[ $total_dio_read -lt 2 ]] || [[ $total_arc_read -lt 1 ]]; then + log_fail "Expect 2 reads from Direct I/O and 1 from the ARC but \ + Direct I/O: $total_dio_read ARC: $total_arc_read" +fi + +log_pass "Verified Direct I/O read can read a none page-aligned length file" diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh new file mode 100755 index 000000000000..efc9ee639184 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh @@ -0,0 +1,103 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify stable pages work for O_DIRECT writes. +# +# STRATEGY: +# 1. Start a Direct I/O write workload while manipulating the user +# buffer. +# 2. Verify we can Read the contents of the file using buffered reads. +# 3. Verify there is no checksum errors reported from zpool status. +# 4. Repeat steps 1 and 2 for 3 iterations. +# 5. Repeat 1-3 but with compression disabled. +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-write.iso" + check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 +} + +log_assert "Verify stable pages work for Direct I/O writes." + +if is_linux; then + log_unsupported "Linux does not support stable pages for O_DIRECT \ + writes" +fi + +log_onexit cleanup + +ITERATIONS=3 +NUMBLOCKS=300 +BS=$((128 * 1024)) #128k +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +for compress in "on" "off"; +do + log_must zfs set compression=$compress $TESTPOOL/$TESTFS + + for i in $(seq 1 $ITERATIONS); do + log_note "Verifying stable pages for Direct I/O writes \ + iteration $i of $ITERATIONS" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + + # Manipulate the user's buffer while running O_DIRECT write + # workload with the buffer. + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS + + # Reading back the contents of the file + log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \ + -b $BS -c $NUMBLOCKS + + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + + # Making sure there are no data errors for the zpool + log_note "Making sure there are no checksum errors with the ZPool" + log_must check_pool_status $TESTPOOL "errors" \ + "No known data errors" + + log_must rm -f "$mntpnt/direct-write.iso" + done +done + +log_pass "Verified stable pages work for Direct I/O writes." diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh new file mode 100755 index 000000000000..536459a35e6c --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh @@ -0,0 +1,196 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify checksum verify works for Direct I/O writes. +# +# STRATEGY: +# 1. Set the module parameter zfs_vdev_direct_write_verify to 0. +# 2. Check that manipulating the user buffer while Direct I/O writes are +# taking place does not cause any panics with compression turned on. +# 3. Start a Direct I/O write workload while manipulating the user buffer +# without compression. +# 4. Verify there are Direct I/O write verify failures using +# zpool status -d and checking for zevents. We also make sure there +# are reported data errors when reading the file back. +# 5. Repeat steps 3 and 4 for 3 iterations. +# 6. Set zfs_vdev_direct_write_verify set to 1 and repeat 3. +# 7. Verify there are Direct I/O write verify failures using +# zpool status -d and checking for zevents. We also make sure there +# there are no reported data errors when reading the file back because +# with us checking every Direct I/O write and on checksum validation +# failure those writes will not be committed to a VDEV. +# + +verify_runnable "global" + +function cleanup +{ + # Clearing out DIO counts for Zpool + log_must zpool clear $TESTPOOL + # Clearing out dio_verify from event logs + log_must zpool events -c + log_must set_tunable32 VDEV_DIRECT_WR_VERIFY $DIO_WR_VERIFY_TUNABLE +} + +log_assert "Verify checksum verify works for Direct I/O writes." + +if is_freebsd; then + log_unsupported "FreeBSD is capable of stable pages for O_DIRECT writes" +fi + +log_onexit cleanup + +ITERATIONS=3 +NUMBLOCKS=300 +BS=$((128 * 1024)) # 128k +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +typeset DIO_WR_VERIFY_TUNABLE=$(get_tunable VDEV_DIRECT_WR_VERIFY) + +# Get a list of vdevs in our pool +set -A array $(get_disklist_fullpath $TESTPOOL) + +# Get the first vdev +firstvdev=${array[0]} + +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0 + +# First we will verify there are no panics while manipulating the contents of +# the user buffer during Direct I/O writes with compression. The contents +# will always be copied out of the ABD and there should never be any ABD ASSERT +# failures +log_note "Verifying no panics for Direct I/O writes with compression" +log_must zfs set compression=on $TESTPOOL/$TESTFS +prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) +log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" -n $NUMBLOCKS \ + -b $BS +curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) +total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + +log_note "Making sure we have Direct I/O writes logged" +if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" +fi + +# Clearing out DIO counts for Zpool +log_must zpool clear $TESTPOOL +# Clearing out dio_verify from event logs +log_must zpool events -c +log_must rm -f "$mntpnt/direct-write.iso" + +# Next we will verify there are checksum errors for Direct I/O writes while +# manipulating the contents of the user pages. +log_must zfs set compression=off $TESTPOOL/$TESTFS + +for i in $(seq 1 $ITERATIONS); do + log_note "Verifying Direct I/O write checksums iteration \ + $i of $ITERATIONS with zfs_vdev_direct_write_verify=0" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS + + # Reading file back to verify checksum errors + filesize=$(get_file_size "$mntpnt/direct-write.iso") + num_blocks=$((filesize / BS)) + log_mustnot stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \ + -c $num_blocks + + # Getting new Direct I/O and ARC write counts. + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + # Verifying there are checksum errors + log_note "Making sure there are checksum errors for the ZPool" + cksum=$(zpool status -P -v $TESTPOOL | awk -v v="$firstvdev" '$0 ~ v \ + {print $5}') + if [[ $cksum -eq 0 ]]; then + zpool status -P -v $TESTPOOL + log_fail "No checksum failures for ZPool $TESTPOOL" + fi + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + log_note "Making sure we have no Direct I/O write checksum verifies \ + with ZPool" + check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 + + log_must rm -f "$mntpnt/direct-write.iso" +done + +log_must zpool status -v $TESTPOOL +log_must zpool sync $TESTPOOL + + + +# Finally we will verfiy that with checking every Direct I/O write we have no +# errors at all. +# Create the file before trying to manipulate the contents +log_must file_write -o create -f "$mntpnt/direct-write.iso" -b $BS \ + -c $NUMBLOCKS -w +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 1 + +for i in $(seq 1 $ITERATIONS); do + log_note "Verifying every Direct I/O write checksums iteration $i of \ + $ITERATIONS with zfs_vdev_direct_write_verify=1" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -e + + # Reading file back to verify there no are checksum errors + filesize=$(get_file_size "$mntpnt/direct-write.iso") + num_blocks=$((filesize / BS)) + log_must stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \ + -c $num_blocks + + # Getting new Direct I/O and ARC Write counts. + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + log_note "Making sure there are no checksum errors with the ZPool" + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + + log_note "Making sure we have Direct I/O write checksum verifies with ZPool" + check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1 +done + +log_must rm -f "$mntpnt/direct-write.iso" + +log_pass "Verified checksum verify works for Direct I/O writes." diff --git a/tests/zfs-tests/tests/functional/direct/setup.ksh b/tests/zfs-tests/tests/functional/direct/setup.ksh new file mode 100755 index 000000000000..f66d6531c1db --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/setup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +verify_runnable "global" + +if tunable_exists DIO_ENABLED ; then + log_must save_tunable DIO_ENABLED + log_must set_tunable32 DIO_ENABLED 1 +fi + +default_raidz_setup_noexit "$DISKS" +log_must zfs set compression=off $TESTPOOL/$TESTFS +log_pass diff --git a/tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh b/tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh new file mode 100755 index 000000000000..aea9f1dd34de --- /dev/null +++ b/tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh @@ -0,0 +1,168 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024, Klara Inc. +# + +# DESCRIPTION: +# Verify that simultaneous io error events from multiple vdevs +# doesn't generate a fault +# +# STRATEGY: +# 1. Create a pool with a 4 disk raidz vdev +# 2. Inject io errors +# 3. Verify that ZED detects the errors but doesn't fault any vdevs +# + +. $STF_SUITE/include/libtest.shlib + +TESTDIR="$TEST_BASE_DIR/zed_error_multiple" +VDEV1="$TEST_BASE_DIR/vdevfile1.$$" +VDEV2="$TEST_BASE_DIR/vdevfile2.$$" +VDEV3="$TEST_BASE_DIR/vdevfile3.$$" +VDEV4="$TEST_BASE_DIR/vdevfile4.$$" +VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4" +TESTPOOL="zed_test_pool" +FILEPATH="$TESTDIR/zed.testfile" + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + + # if pool still exists then something failed so log additional info + if poolexists $TESTPOOL ; then + log_note "$(zpool status -s $TESTPOOL)" + echo "=================== zed log search ===================" + grep "Diagnosis Engine" $ZEDLET_DIR/zed.log + destroy_pool $TESTPOOL + fi + log_must zed_stop + + log_must rm -f $VDEVS +} + +function start_io_errors +{ + for vdev in $VDEVS + do + log_must zpool set io_n=4 $TESTPOOL $vdev + log_must zpool set io_t=60 $TESTPOOL $vdev + done + zpool sync + + for vdev in $VDEVS + do + log_must zinject -d $vdev -e io $TESTPOOL + done + zpool sync +} + +function multiple_slow_vdevs_test +{ + log_must truncate -s 1G $VDEVS + default_raidz_setup_noexit $VDEVS + + log_must zpool events -c + log_must zfs set compression=off $TESTPOOL + log_must zfs set primarycache=none $TESTPOOL + log_must zfs set recordsize=4K $TESTPOOL + + log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=4 + zpool sync + + # + # Read the file with io errors injected on the disks + # This will cause multiple errors on each disk to trip ZED SERD + # + # pool: zed_test_pool + # state: ONLINE + # status: One or more devices has experienced an unrecoverable error. An + # attempt was made to correct the error. Applications are unaffected. + # action: Determine if the device needs to be replaced, and clear the errors + # using 'zpool clear' or replace the device with 'zpool replace'. + # see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-9P + # config: + # + # NAME STATE READ WRITE CKSUM + # zed_test_pool ONLINE 0 0 0 + # raidz1-0 ONLINE 0 0 0 + # /var/tmp/vdevfile1.1547063 ONLINE 532 561 0 + # /var/tmp/vdevfile2.1547063 ONLINE 547 594 0 + # /var/tmp/vdevfile3.1547063 ONLINE 1.05K 1.10K 0 + # /var/tmp/vdevfile4.1547063 ONLINE 1.05K 1.00K 0 + # + + start_io_errors + dd if=$FILEPATH of=/dev/null bs=1M count=4 2>/dev/null + log_must zinject -c all + + # count io error events available for processing + typeset -i i=0 + typeset -i events=0 + while [[ $i -lt 60 ]]; do + events=$(zpool events | grep "ereport\.fs\.zfs.io" | wc -l) + [[ $events -ge "50" ]] && break + i=$((i+1)) + sleep 1 + done + log_note "$events io error events found" + if [[ $events -lt "50" ]]; then + log_note "bailing: not enough events to complete the test" + destroy_pool $TESTPOOL + return + fi + + # + # give slow ZED a chance to process the checkum events + # + typeset -i i=0 + typeset -i skips=0 + while [[ $i -lt 75 ]]; do + skips=$(grep "retiring case" \ + $ZEDLET_DIR/zed.log | wc -l) + [[ $skips -gt "0" ]] && break + i=$((i+1)) + sleep 1 + done + + log_note $skips fault skips in ZED log after $i seconds + [ $skips -gt "0" ] || log_fail "expecting to see skips" + + fault=$(grep "zpool_vdev_fault" $ZEDLET_DIR/zed.log | wc -l) + log_note $fault vdev fault in ZED log + [ $fault -eq "0" ] || \ + log_fail "expecting no fault events, found $fault" + + destroy_pool $TESTPOOL +} + +log_assert "Test ZED io errors across multiple vdevs" +log_onexit cleanup + +log_must zed_events_drain +log_must zed_start +multiple_slow_vdevs_test + +log_pass "Test ZED io errors across multiple vdevs" diff --git a/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh b/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh index 3357ae2e3510..e27ef91cbdc7 100755 --- a/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh +++ b/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh @@ -25,10 +25,10 @@ # # DESCRIPTION: -# Verify that delay events from multiple vdevs doesnt degrade +# Verify that delay events from multiple vdevs doesn't degrade # # STRATEGY: -# 1. Create a pool with a 3 disk raidz vdev +# 1. Create a pool with a 4 disk raidz vdev # 2. Inject slow io errors # 3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs # diff --git a/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/tests/zfs-tests/tests/functional/fault/fault_limits.ksh new file mode 100755 index 000000000000..2f62a15c392b --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/fault_limits.ksh @@ -0,0 +1,96 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2024 by Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: Verify that raidz children vdev fault count is restricted +# +# STRATEGY: +# 1. Create a raidz2 or raidz3 pool and add some data to it +# 2. Replace one of the child vdevs to create a replacing vdev +# 3. While it is resilvering, attempt to fault disks +# 4. Verify that less than parity count was faulted while replacing +# + +TESTPOOL="fault-test-pool" +PARITY=$((RANDOM%(2) + 2)) +VDEV_CNT=$((4 + (2 * PARITY))) +VDEV_SIZ=512M + +function cleanup +{ + poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + + for i in {0..$((VDEV_CNT - 1))}; do + log_must rm -f "$TEST_BASE_DIR/dev-$i" + done +} + +log_onexit cleanup +log_assert "restricts raidz children vdev fault count" + +log_note "creating $VDEV_CNT vdevs for parity $PARITY test" +typeset -a disks +for i in {0..$((VDEV_CNT - 1))}; do + device=$TEST_BASE_DIR/dev-$i + log_must truncate -s $VDEV_SIZ $device + disks[${#disks[*]}+1]=$device +done + +log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]} + +# Add some data to the pool +log_must zfs create $TESTPOOL/fs +MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)" +log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z +sync_pool $TESTPOOL + +# Replace the last child vdev to form a replacing vdev +log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]} +# imediately offline replacement disk to keep replacing vdev around +log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]} + +# Fault disks while a replacing vdev is still active +for disk in ${disks[0..$PARITY]}; do + log_must zpool offline -tf ${TESTPOOL} $disk +done + +zpool status $TESTPOOL + +# Count the faults that succeeded +faults=0 +for disk in ${disks[0..$PARITY]}; do + state=$(zpool get -H -o value state ${TESTPOOL} ${disk}) + if [ "$state" = "FAULTED" ] ; then + ((faults=faults+1)) + fi +done + +log_must test "$faults" -lt "$PARITY" +log_must test "$faults" -gt 0 + +log_pass "restricts raidz children vdev fault count" diff --git a/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh b/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh index 05f3ac708477..b67059158a57 100755 --- a/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh +++ b/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh @@ -43,7 +43,7 @@ log_assert "ensure single-disk pool resumes properly after suspend and clear" # create a file, and take a checksum, so we can compare later log_must dd if=/dev/urandom of=$DATAFILE bs=128K count=1 -typeset sum1=$(cat $DATAFILE | md5sum) +typeset sum1=$(xxh128digest $DATAFILE) # make a debug device that we can "unplug" load_scsi_debug 100 1 1 1 '512b' @@ -94,7 +94,7 @@ log_must zpool export $TESTPOOL log_must zpool import $TESTPOOL # sum the file we wrote earlier -typeset sum2=$(cat /$TESTPOOL/file | md5sum) +typeset sum2=$(xxh128digest /$TESTPOOL/file) # make sure the checksums match log_must test "$sum1" = "$sum2" diff --git a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh index 9f27785d64f9..efaf022153a8 100755 --- a/tests/zfs-tests/tests/functional/history/history_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/history/history_003_pos.ksh @@ -64,7 +64,7 @@ log_must zpool create $spool $VDEV0 log_must zfs create $spool/$sfs typeset -i orig_count=$(zpool history $spool | wc -l) -typeset orig_md5=$(zpool history $spool | head -2 | md5digest) +typeset orig_hash=$(zpool history $spool | head -2 | xxh128digest) typeset -i i=0 while ((i < 300)); do zfs set compression=off $spool/$sfs @@ -79,7 +79,7 @@ done TMPFILE=$TEST_BASE_DIR/spool.$$ zpool history $spool >$TMPFILE typeset -i entry_count=$(wc -l < $TMPFILE) -typeset final_md5=$(head -2 $TMPFILE | md5digest) +typeset final_hash=$(head -2 $TMPFILE | xxh128digest) grep -q 'zpool create' $TMPFILE || log_fail "'zpool create' was not found in pool history" @@ -91,7 +91,7 @@ grep -q 'zfs set compress' $TMPFILE || log_fail "'zfs set compress' was found in pool history" # Verify that the creation of the pool was preserved in the history. -if [[ $orig_md5 != $final_md5 ]]; then +if [[ $orig_hash != $final_hash ]]; then log_fail "zpool creation history was not preserved." fi diff --git a/tests/zfs-tests/tests/functional/io/setup.ksh b/tests/zfs-tests/tests/functional/io/setup.ksh index 82aaf5bc91b5..29d267115891 100755 --- a/tests/zfs-tests/tests/functional/io/setup.ksh +++ b/tests/zfs-tests/tests/functional/io/setup.ksh @@ -27,5 +27,5 @@ . $STF_SUITE/include/libtest.shlib verify_runnable "global" -default_setup "$DISKS" +default_raidz_setup "$DISKS" log_must zfs set compression=on $TESTPOOL/$TESTFS diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg b/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg index 0302392f4c7f..f79123e5b2e1 100644 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg @@ -35,4 +35,4 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 diff --git a/tests/zfs-tests/tests/functional/longname/cleanup.ksh b/tests/zfs-tests/tests/functional/longname/cleanup.ksh new file mode 100755 index 000000000000..aac8062ec408 --- /dev/null +++ b/tests/zfs-tests/tests/functional/longname/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/longname/longname_001_pos.ksh b/tests/zfs-tests/tests/functional/longname/longname_001_pos.ksh new file mode 100755 index 000000000000..b7010fe0fc81 --- /dev/null +++ b/tests/zfs-tests/tests/functional/longname/longname_001_pos.ksh @@ -0,0 +1,132 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify the support for long filenames. +# +# STRATEGY: +# 0. On a fresh dataset ensure property "longname" is enabled by default. +# 1. Disable the longname. +# 2. Try to create a filename whose length is > 256 bytes. This should fail. +# 3. Enable "longname" property on the dataset. +# 4. Try to create files and dirs whose names are > 256 bytes. +# 5. Ensure that "ls" is able to list the file. +# 6. Ensure stat(1) is able to stat the file/directory. +# 7. Try to rename the files and directories +# 8. Try to delete the files and directories + +verify_runnable "global" + +WORKDIR=$TESTDIR/workdir +MOVEDIR=$TESTDIR/movedir/level2/level3/level4/level5/level6 + +function cleanup +{ + log_must rm -rf $WORKDIR + log_must rm -rf $TESTDIR/movedir +} + +LONGNAME=$(printf 'a%.0s' {1..512}) +LONGFNAME="file-$LONGNAME" +LONGDNAME="dir-$LONGNAME" +LONGPNAME="mypipe-$LONGNAME" +LONGCNAME="char_dev-$LONGNAME" +LONGLNAME="link-$LONGNAME" +LONGNAME_255=$(printf 'a%.0s' {1..255}) +LONGNAME_1023=$(printf 'a%.0s' {1..1023}) + + +log_assert "Check longname support for directories/files" + +log_onexit cleanup + +log_must mkdir $WORKDIR +log_must mkdir -p $MOVEDIR + +# Disable longname support +log_must zfs set longname=off $TESTPOOL/$TESTFS + +#Ensure a file of length 255bytes can be created +log_must touch $WORKDIR/$LONGNAME_255 + +#Where as file of length 256bytes should fail +log_mustnot touch $WORKDIR/${LONGNAME_255}b + +# Try to create a file with long name with property "longname=off" +log_mustnot touch $WORKDIR/$LONGFNAME +log_mustnot mkdir $WORKDIR/$LONGDNAME + +# Enable longname support +log_must zfs set longname=on $TESTPOOL/$TESTFS + +# Retry the longname creates and that should succeed +log_must mkdir $WORKDIR/$LONGDNAME +log_must touch $WORKDIR/$LONGFNAME + +# Should be able to create a file with name of 1023 chars +log_must touch $WORKDIR/$LONGNAME_1023 + +# And names longer that 1023 should fail +log_mustnot touch $WORKDIR/${LONGNAME_1023}b + +# Ensure the longnamed dir/file can be listed. +name=$(ls $WORKDIR/$LONGFNAME) +if [[ "${name}" != "$WORKDIR/$LONGFNAME" ]]; then + log_fail "Unable to list: $WORKDIR/$LONGFNAME ret:$name" +fi + +name=$(ls -d $WORKDIR/$LONGDNAME) +if [[ "${name}" != "$WORKDIR/$LONGDNAME" ]]; then + log_fail "Unable to list: $WORKDIR/$LONGDNAME ret:$name" +fi + +# Ensure stat works +log_must stat $WORKDIR/$LONGFNAME +log_must stat $WORKDIR/$LONGDNAME + +# Ensure softlinks can be created from a longname to +# another longnamed file. +log_must ln -s $WORKDIR/$LONGFNAME $WORKDIR/$LONGLNAME + +# Ensure a longnamed pipe and character device file +# can be created +log_must mknod $WORKDIR/$LONGPNAME p +log_must mknod $WORKDIR/$LONGCNAME c 92 1 + +# Ensure we can rename the longname file +log_must mv $WORKDIR/$LONGFNAME $WORKDIR/file2 + +# Delete the long named dir/file +log_must rmdir $WORKDIR/$LONGDNAME +log_must rm $WORKDIR/file2 +log_must rm $WORKDIR/$LONGPNAME +log_must rm $WORKDIR/$LONGCNAME +log_must rm $WORKDIR/$LONGLNAME + +log_pass diff --git a/tests/zfs-tests/tests/functional/longname/longname_002_pos.ksh b/tests/zfs-tests/tests/functional/longname/longname_002_pos.ksh new file mode 100755 index 000000000000..dd2acabb3e02 --- /dev/null +++ b/tests/zfs-tests/tests/functional/longname/longname_002_pos.ksh @@ -0,0 +1,115 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Check if longname feature is disabled by default and can be enabled. +# +# STRATEGY: +# 1. Create a zpool with longname feature disabled +# 2. Attempt to enable 'longname' property should fail. +# 3. Attempt to create a longnamed (>255) file should fail. +# 4. Enable the feature@longname +# 5. Enable 'longname' property on the dataset. +# 6. Should be able to create long named files and directories. +# 7. Should be able to disable longname property. +# 8. This should disallow creating new longnamed file/dirs. But, should be +# able to access existing longnamed files/dirs. +verify_runnable "global" + +function cleanup +{ + log_must rm -rf $WORKDIR + poolexists $TESTPOOL && zpool destroy $TESTPOOL +} + +log_assert "Check feature@longname and 'longname' dataset propery work correctly" + +log_onexit cleanup + +log_must zpool destroy $TESTPOOL + +log_must zpool create -o feature@longname=disabled $TESTPOOL $DISKS + +log_must zfs create $TESTPOOL/$TESTFS2 + +log_must zfs set mountpoint=$TESTDIR2 $TESTPOOL/$TESTFS2 + +log_mustnot zfs set longname=on $TESTPOOL/$TESTFS2 + +LONGNAME=$(printf 'a%.0s' {1..512}) +LONGFNAME="file-$LONGNAME" +LONGDNAME="dir-$LONGNAME" +SHORTDNAME="dir-short" +SHORTFNAME="file-short" +WORKDIR=$TESTDIR2/workdir + +log_must mkdir $WORKDIR +log_mustnot touch $WORKDIR/$LONGFNAME +log_mustnot mkdir $WORKDIR/$LONGDNAME + +log_must zpool set feature@longname=enabled $TESTPOOL +log_must zfs set longname=on $TESTPOOL/$TESTFS2 + +log_must mkdir $WORKDIR/$LONGDNAME +log_must touch $WORKDIR/$LONGFNAME + +# Ensure the above changes are synced out. +log_must zpool sync $TESTPOOL + +# Ensure that the feature is activated once longnamed files are created. +state=$(zpool get feature@longname -H -o value $TESTPOOL) +log_note "feature@longname on pool: $TESTPOOL : $state" + +if [[ "$state" != "active" ]]; then + log_fail "feature@longname has state $state (expected active)" +fi + +# Set longname=off. +log_must zfs set longname=off $TESTPOOL/$TESTFS2 + +# Ensure no new file/directory with longnames can be created or can be renamed +# to. +log_mustnot mkdir $WORKDIR/${LONGDNAME}.1 +log_mustnot touch $WORKDIR/${LONGFNAME}.1 +log_must mkdir $WORKDIR/$SHORTDNAME +log_mustnot mv $WORKDIR/$SHORTDNAME $WORKDIR/${LONGDNAME}.1 +log_must touch $WORKDIR/$SHORTFNAME +log_mustnot mv $WORKDIR/$SHORTFNAME $WORKDIR/${LONGFNAME}.1 + +#Cleanup shortnames +log_must rmdir $WORKDIR/$SHORTDNAME +log_must rm $WORKDIR/$SHORTFNAME + +# But, should be able to stat and rename existing files +log_must stat $WORKDIR/$LONGDNAME +log_must stat $WORKDIR/$LONGFNAME +log_must mv $WORKDIR/$LONGDNAME $WORKDIR/$SHORTDNAME +log_must mv $WORKDIR/$LONGFNAME $WORKDIR/$SHORTFNAME + +log_pass diff --git a/tests/zfs-tests/tests/functional/longname/longname_003_pos.ksh b/tests/zfs-tests/tests/functional/longname/longname_003_pos.ksh new file mode 100755 index 000000000000..f684b514399d --- /dev/null +++ b/tests/zfs-tests/tests/functional/longname/longname_003_pos.ksh @@ -0,0 +1,113 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Check if longnames are handled correctly by ZIL replay and feature is activated. +# +# STRATEGY: +# 1. Create a zpool with longname feature disabled +# 2. Enable the feature@longname +# 3. Enable 'longname' property on the dataset. +# 4. Freeze the zpool +# 5. Create a longname +# 6. Export and import the zpool. +# 7. Replaying of longname create should activate the feature@longname +verify_runnable "global" + +function cleanup +{ + log_must rm -rf $WORKDIR + poolexists $TESTPOOL && zpool destroy $TESTPOOL +} + +log_assert "Check feature@longname and 'longname' dataset propery work correctly" + +log_onexit cleanup + +poolexists $TESTPOOL && zpool destroy $TESTPOOL + +log_must zpool create -o feature@longname=disabled $TESTPOOL $DISKS + +log_must zfs create $TESTPOOL/$TESTFS + +log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS + +LONGNAME=$(printf 'a%.0s' {1..512}) +LONGFNAME="file-$LONGNAME" +LONGDNAME="dir-$LONGNAME" +SHORTDNAME="dir-short" +SHORTFNAME="file-short" +WORKDIR=$TESTDIR/workdir + +log_must mkdir $WORKDIR + +log_must zpool set feature@longname=enabled $TESTPOOL +log_must zfs set longname=on $TESTPOOL/$TESTFS + +# Ensure that the feature is NOT activated yet as no longnamed file is created. +state=$(zpool get feature@longname -H -o value $TESTPOOL) +log_note "feature@longname on pool: $TESTPOOL : $state" + +if [[ "$state" != "enabled" ]]; then + log_fail "feature@longname has state $state (expected enabled)" +fi + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$WORKDIR/sync conv=fdatasync,fsync bs=1 count=1 + +log_must zpool freeze $TESTPOOL + +log_must mkdir $WORKDIR/$LONGDNAME +log_must touch $WORKDIR/$LONGFNAME + +# Export and re-import the zpool +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +# Ensure that the feature is activated once longnamed files are created. +state=$(zpool get feature@longname -H -o value $TESTPOOL) +log_note "feature@longname on pool: $TESTPOOL : $state" +if [[ "$state" != "active" ]]; then + log_fail "feature@longname has state $state (expected active)" +fi + +# Destroying the dataset where the feature is activated should put the feature +# back to 'enabled' state +log_must zfs destroy -r $TESTPOOL/$TESTFS +state=$(zpool get feature@longname -H -o value $TESTPOOL) +log_note "feature@longname on pool: $TESTPOOL : $state" +if [[ "$state" != "enabled" ]]; then + log_fail "feature@longname has state $state (expected active)" +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/longname/setup.ksh b/tests/zfs-tests/tests/functional/longname/setup.ksh new file mode 100755 index 000000000000..3f6759469b7b --- /dev/null +++ b/tests/zfs-tests/tests/functional/longname/setup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh b/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh index 1d35fba5dbfa..9b285ac5f31f 100755 --- a/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh +++ b/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh @@ -41,7 +41,7 @@ bs=1024k count=512 log_must dd if=/dev/urandom of=$TESTDIR/data bs=$bs count=$count -data_checksum=$(sha256digest $TESTDIR/data) +data_checksum=$(xxh128digest $TESTDIR/data) # Test common large block configuration. log_must zfs create -o recordsize=1m -o primarycache=metadata $TESTPOOL/gang @@ -50,7 +50,7 @@ mntpnt=$(get_prop mountpoint $TESTPOOL/gang) log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count sync_pool $TESTPOOL log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count -out_checksum=$(sha256digest $TESTDIR/out) +out_checksum=$(xxh128digest $TESTDIR/out) if [[ "$data_checksum" != "$out_checksum" ]]; then log_fail "checksum mismatch ($data_checksum != $out_checksum)" @@ -74,7 +74,7 @@ mntpnt=$(get_prop mountpoint $TESTPOOL/gang) log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count sync_pool $TESTPOOL log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count -out_checksum=$(sha256digest $TESTDIR/out) +out_checksum=$(xxh128digest $TESTDIR/out) if [[ "$data_checksum" != "$out_checksum" ]]; then log_fail "checksum mismatch ($data_checksum != $out_checksum)" diff --git a/tests/zfs-tests/tests/functional/projectquota/projectid_003_pos.ksh b/tests/zfs-tests/tests/functional/projectquota/projectid_003_pos.ksh index 93474b53709a..a66d61b686b3 100755 --- a/tests/zfs-tests/tests/functional/projectquota/projectid_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/projectquota/projectid_003_pos.ksh @@ -59,7 +59,7 @@ log_onexit cleanup log_assert "Check changing project ID with directory-based extended attributes" -log_must zfs set xattr=on $QFS +log_must zfs set xattr=dir $QFS log_must touch $PRJGUARD log_must chattr -p $PRJID1 $PRJGUARD @@ -76,6 +76,6 @@ sync_pool typeset prj_aft=$(project_obj_count $QFS $PRJID1) [[ $prj_aft -ge $((prj_bef + 5)) ]] || - log_fail "new value ($prj_aft) is NOT 5 largr than old one ($prj_bef)" + log_fail "new value ($prj_aft) is NOT 5 larger than old one ($prj_bef)" log_pass "Changing project ID with directory-based extended attributes pass" diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_expand_001_pos.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_expand_001_pos.ksh index 063d7fa735df..d4923fdb67d9 100755 --- a/tests/zfs-tests/tests/functional/raidz/raidz_expand_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/raidz/raidz_expand_001_pos.ksh @@ -154,7 +154,9 @@ function test_scrub # log_must zpool import -o cachefile=none -d $dir $pool + is_pool_scrubbing $pool && wait_scrubbed $pool log_must zpool scrub -w $pool + log_must zpool clear $pool log_must zpool export $pool @@ -166,6 +168,7 @@ function test_scrub # log_must zpool import -o cachefile=none -d $dir $pool + is_pool_scrubbing $pool && wait_scrubbed $pool log_must zpool scrub -w $pool log_must check_pool_status $pool "errors" "No known data errors" diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_expand_002_pos.ksh index 004f3d1f9255..56810aca099f 100755 --- a/tests/zfs-tests/tests/functional/raidz/raidz_expand_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/raidz/raidz_expand_002_pos.ksh @@ -105,6 +105,7 @@ for disk in ${disks[$(($nparity+2))..$devs]}; do log_fail "pool $pool not expanded" fi + is_pool_scrubbing $pool && wait_scrubbed $pool verify_pool $pool pool_size=$expand_size diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 26e7c2cc25bc..b4fcdd7bbd24 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -155,13 +155,6 @@ function cleanup_pool fi } -function cmp_md5s { - typeset file1=$1 - typeset file2=$2 - - [ "$(md5digest $file1)" = "$(md5digest $file2)" ] -} - # # Detect if the given two filesystems have same sub-datasets # @@ -720,6 +713,7 @@ function stream_has_features feature[resuming]="100000" feature[redacted]="200000" feature[compressed]="400000" + feature[longname]="10000000" typeset flag known derived=0 for flag in "$@"; do @@ -808,15 +802,15 @@ function recursive_cksum { case "$(uname)" in FreeBSD) - find $1 -type f -exec sh -c 'sha256 -q {}; lsextattr -q \ - system {} | sha256 -q; lsextattr -q user {} | sha256 -q' \ - \; | sort | sha256 -q + find $1 -type f -exec sh -c 'xxh128sum {}; \ + lsextattr -q system {} | xxh128sum; \ + lsextattr -q user {} | xxh128sum' \; \ + | sort -k 2 | awk '{ print $1 }' | xxh128digest ;; *) - find $1 -type f -exec sh -c 'sha256sum {}; getfattr \ - --absolute-names --only-values -d {} | sha256sum' \; | \ - sort -k 2 | awk '{ print $1 }' | sha256sum | \ - awk '{ print $1 }' + find $1 -type f -exec sh -c 'xxh128sum {}; getfattr \ + --absolute-names --only-values -d {} | xxh128sum' \ + \; | sort -k 2 | awk '{ print $1 }' | xxh128digest ;; esac } diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_longname.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_longname.ksh new file mode 100755 index 000000000000..3f7edc6810dd --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send-c_longname.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2021 by Nutanix. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib +. $STF_SUITE/include/properties.shlib + +# +# Description: +# Verify that longname featureflag is present in the stream. +# +# Strategy: +# 1. Create a filesystem with longnamed files/directories. +# 2. Verify that the sendstream has the longname featureflag is present in the +# send stream. +# 3. Verify the created streams can be received correctly. +# 4. Verify that the longnamed files/directories are present in the received +# filesystem. +# + +verify_runnable "both" + +log_assert "Verify that longnames are handled correctly in send stream." +log_onexit cleanup_pool $POOL $POOL2 $POOL3 + +typeset sendfs=$POOL/sendfs +typeset recvfs=$POOL2/recvfs +typeset recvfs3=$POOL3/recvfs +typeset stream=$BACKDIR/stream +typeset dump=$BACKDIR/dump + +log_must zfs create -o longname=on $sendfs +typeset dir=$(get_prop mountpoint $sendfs) + +# Create a longnamed dir and a file in the send dataset +LONGNAME=$(printf 'a%.0s' {1..512}) +LONGFNAME="file-$LONGNAME" +LONGDNAME="dir-$LONGNAME" +log_must mkdir $dir/$LONGDNAME +log_must touch $dir/$LONGFNAME + +# When POOL3 is created by rsend.kshlib feature@longname is 'enabled'. +# Recreate the POOL3 with feature@longname disabled. +datasetexists $POOL3 && log_must zpool destroy $POOL3 +log_must zpool create -o feature@longname=disabled $POOL3 $DISK3 + +# Generate the streams and zstreamdump output. +log_must zfs snapshot $sendfs@now +log_must eval "zfs send -p $sendfs@now >$stream" +log_must eval "zstream dump -v <$stream >$dump" +log_must eval "zfs recv $recvfs <$stream" +cmp_ds_cont $sendfs $recvfs +log_must stream_has_features $stream longname + +# Ensure the the receiving pool has feature@longname activated after receiving. +feat_val=$(zpool get -H -o value feature@longname $POOL2) +log_note "Zpool $POOL2 feature@longname=$feat_val" +if [[ "$feat_val" != "active" ]]; then + log_fail "pool $POOL2 feature@longname=$feat_val (expected 'active')" +fi + +# Receiving of the stream on $POOL3 should fail as longname is not enabled +log_mustnot eval "zfs recv $recvfs3 <$stream" + +# Enable feature@longname and retry the receiving the stream. +# It should succeed this time. +log_must eval "zpool set feature@longname=enabled $POOL3" +log_must eval "zfs recv $recvfs3 <$stream" + +log_must zfs get longname $recvfs3 +prop_val=$(zfs get -H -o value longname $recvfs3) +log_note "dataset $recvfs3 has longname=$prop_val" +if [[ "$prop_val" != "on" ]]; then + log_fail "$recvfs3 has longname=$prop_val (expected 'on')" +fi + +# +# TODO: +# - Add a testcase to cover the case where send-stream does not contain +# properties (generated without "-p"). +# In this case the target dataset would have longname files/directories which +# cannot be accessed if the dataset property 'longname=off'. +# + +log_pass "Longnames are handled correctly in send/recv" diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh index 1bf234823459..6a29d964500d 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh @@ -50,8 +50,8 @@ typeset megs=8 log_must zfs create -V 256m -o compress=lz4 $vol write_compressible $BACKDIR ${megs}m 2 -md5_1=$(md5digest $data1) -md5_2=$(md5digest $data2) +hash1=$(xxh128digest $data1) +hash2=$(xxh128digest $data2) log_must dd if=$data1 of=$voldev bs=1024k log_must zfs snapshot $vol@snap @@ -63,8 +63,8 @@ verify_stream_size $BACKDIR/full $vol verify_stream_size $BACKDIR/full $vol2 block_device_wait $voldev2 log_must dd if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs -md5=$(md5digest $BACKDIR/copy) -[[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1" +hash=$(xxh128digest $BACKDIR/copy) +[[ $hash = $hash1 ]] || log_fail "hash mismatch: $hash != $hash1" # Repeat, for an incremental send log_must dd seek=$megs if=$data2 of=$voldev bs=1024k @@ -77,7 +77,7 @@ verify_stream_size $BACKDIR/inc $vol 90 $vol@snap verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap block_device_wait $voldev2 log_must dd skip=$megs if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs -md5=$(md5digest $BACKDIR/copy) -[[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2" +hash=$(xxh128digest $BACKDIR/copy) +[[ $hash = $hash2 ]] || log_fail "hash mismatch: $hash != $hash2" log_pass "Verify compressed send works with volumes" diff --git a/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh index 64ee6293c7a4..7db9892da583 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh @@ -101,8 +101,8 @@ block_device_wait log_must mount $recvdev $recvmnt -md5_1=$(cat $mntpnt/* | md5digest) -md5_2=$(cat $recvmnt/* | md5digest) -[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2" +hash1=$(cat $mntpnt/* | xxh128digest) +hash2=$(cat $recvmnt/* | xxh128digest) +[[ "$hash1" == "$hash2" ]] || log_fail "hash mismatch: $hash1 != $hash2" log_pass "zfs can receive raw, recursive send streams" diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh index c0c7b682def9..3b20f0a99a4a 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh @@ -76,7 +76,7 @@ log_must zfs create -o keyformat=passphrase -o keylocation=file://$keyfile \ log_must mkfile 1M /$TESTPOOL/ds/$TESTFILE0 log_must cp /$TESTPOOL/ds/$TESTFILE0 /$TESTPOOL/crypt/$TESTFILE0 -typeset cksum=$(md5digest /$TESTPOOL/ds/$TESTFILE0) +typeset cksum=$(xxh128digest /$TESTPOOL/ds/$TESTFILE0) log_must zfs snap -r $snap log_must zfs snap -r $snap2 @@ -129,7 +129,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -153,7 +153,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -171,7 +171,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -185,7 +185,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -199,7 +199,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds @@ -213,7 +213,7 @@ log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt" log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm" log_must test "$(get_prop 'keyformat' $ds)" == "passphrase" log_must test "$(get_prop 'mounted' $ds)" == "yes" -recv_cksum=$(md5digest /$ds/$TESTFILE0) +recv_cksum=$(xxh128digest /$ds/$TESTFILE0) log_must test "$recv_cksum" == "$cksum" log_must zfs destroy -r $ds diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh index aff54e3a7d69..a6223f05a65a 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh @@ -52,16 +52,8 @@ log_onexit cleanup function recursive_cksum { - case "$(uname)" in - FreeBSD) - find $1 -type f -exec sha256 -q {} + | \ - sort | sha256digest - ;; - *) - find $1 -type f -exec sha256sum {} + | \ - sort -k 2 | awk '{ print $1 }' | sha256digest - ;; - esac + find $1 -type f -exec xxh128sum {} + | \ + sort -k 2 | awk '{ print $1 }' | xxh128digest } log_assert "Verify 'zfs send -w' works with many different file layouts" diff --git a/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh b/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh index 6754fb0f44bb..72b83505d5e8 100755 --- a/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh @@ -65,7 +65,7 @@ function send_and_verify ">$BACKDIR/pool-snap1-snap2" log_must eval "zfs receive $recvfs < $BACKDIR/pool-snap1-snap2" - log_must cmp_md5s /$sendfs/file1 /$recvfs/file1 + log_must cmp_xxh128 /$sendfs/file1 /$recvfs/file1 } # By default sending hole_birth times is disabled. This functionality needs diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 8f3585a5997f..6f4871975b61 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -77,6 +77,14 @@ log_must zfs create $TESTPOOL/$TESTFS log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ conv=fdatasync,fsync bs=1 count=1 +# +# Create a small file for the O_DIRECT test before freezing the pool. This +# allows us to overwrite it after the pool is frozen and avoid the case +# where O_DIRECT is disabled because the first block must be grown. +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/direct \ + oflag=sync,direct bs=4k count=1 + # # 2. Freeze TESTFS # @@ -110,7 +118,7 @@ log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete log_must mkdir -p $TESTDIR log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload \ oflag=sync bs=1k count=8 -typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload) +typeset checksum=$(xxh128digest /$TESTPOOL/$TESTFS/payload) # TX_WRITE (small file with ordering) log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/small_file \ @@ -140,6 +148,10 @@ log_must truncate -s 0 /$TESTPOOL/$TESTFS/truncated_file log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \ oflag=sync bs=128k count=64 +# TX_WRITE (O_DIRECT) +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/direct \ + oflag=sync,direct bs=4k count=1 + # Write zeros, which compress to holes, in the middle of a file log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 \ oflag=sync bs=128k count=8 @@ -239,7 +251,7 @@ log_note "Verify working set diff:" log_must replay_directory_diff $TESTDIR/copy /$TESTPOOL/$TESTFS log_note "Verify file checksum:" -typeset checksum1=$(sha256digest /$TESTPOOL/$TESTFS/payload) +typeset checksum1=$(xxh128digest /$TESTPOOL/$TESTFS/payload) [[ "$checksum1" == "$checksum" ]] || \ log_fail "checksum mismatch ($checksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index e99d40570c39..4728f8389ad0 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -140,7 +140,7 @@ fi # # 4. Generate checksums for all ext4 files. # -typeset checksum=$(cat $MNTPNT/* | sha256digest) +typeset checksum=$(cat $MNTPNT/* | xxh128digest) # # 5. Unmount filesystem and export the pool @@ -172,7 +172,7 @@ log_note "Verify current block usage:" log_must zdb -bcv $TESTPOOL log_note "Verify checksums" -typeset checksum1=$(cat $MNTPNT/* | sha256digest) +typeset checksum1=$(cat $MNTPNT/* | xxh128digest) [[ "$checksum1" == "$checksum" ]] || \ log_fail "checksum mismatch ($checksum1 != $checksum)" diff --git a/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh index a93d0b3cc803..62563e0dd4cb 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh @@ -77,7 +77,7 @@ export PERF_COMPCHUNK=0 export RUNTIME=30 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # Write to the pool. log_must fio $FIO_SCRIPTS/mkfiles.fio diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_001_pos.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_001_pos.ksh index ab61211d9fd2..447f230ba18a 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_001_pos.ksh @@ -52,7 +52,7 @@ function cleanup { fi } -set -A args "on" "sa" +set -A args "dir" "sa" log_assert "Create/read/write/append of xattrs works" log_onexit cleanup diff --git a/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh b/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh index c60be2217dae..50062ed35e92 100755 --- a/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh +++ b/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh @@ -46,7 +46,7 @@ function cleanup { } -set -A args "on" "sa" +set -A args "dir" "sa" log_assert "A read of a non-existent xattr fails" log_onexit cleanup diff --git a/udev/zvol_id.c b/udev/zvol_id.c index 609349594767..0d1da9956749 100644 --- a/udev/zvol_id.c +++ b/udev/zvol_id.c @@ -56,6 +56,7 @@ main(int argc, const char *const *argv) return (1); } const char *dev_name = argv[1]; + size_t i, len; int fd; struct stat sb; @@ -73,11 +74,13 @@ main(int argc, const char *const *argv) } const char *dev_part = strrchr(dev_name, 'p'); + len = strlen(zvol_name); if (dev_part != NULL) { - sprintf(zvol_name + strlen(zvol_name), "-part%s", dev_part + 1); + sprintf(zvol_name + len, "-part%s", dev_part + 1); + len = strlen(zvol_name); } - for (size_t i = 0; i < strlen(zvol_name); ++i) + for (i = 0; i < len; ++i) if (isblank(zvol_name[i])) zvol_name[i] = '+';