diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index af674de20c94ef..aafe40804c9b59 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -140,7 +140,7 @@ jobs: if: ${{ failure() && steps.check.conclusion == 'failure' }} run: | make regen-abidump - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 name: Publish updated ABI files if: ${{ failure() && steps.check.conclusion == 'failure' }} with: @@ -520,7 +520,7 @@ jobs: -x test_subprocess \ -x test_signal \ -x test_sysconfig - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: hypothesis-example-db diff --git a/.github/workflows/build_min.yml b/.github/workflows/build_min.yml new file mode 100644 index 00000000000000..e50d4ed8ea3ef2 --- /dev/null +++ b/.github/workflows/build_min.yml @@ -0,0 +1,615 @@ +name: TestsMin + +# gh-84728: "paths-ignore" is not used to skip documentation-only PRs, because +# it prevents to mark a job as mandatory. A PR cannot be merged if a job is +# mandatory but not scheduled because of "paths-ignore". +on: + workflow_dispatch: + pull_request: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-reusable + cancel-in-progress: true + +jobs: + check_source: + name: 'Check for source changes' + runs-on: ubuntu-latest + timeout-minutes: 10 + outputs: + run-docs: ${{ steps.docs-changes.outputs.run-docs || false }} + run_tests: ${{ steps.check.outputs.run_tests }} + run_hypothesis: ${{ steps.check.outputs.run_hypothesis }} + config_hash: ${{ steps.config_hash.outputs.hash }} + steps: + - uses: actions/checkout@v4 + - name: Check for source changes + id: check + run: | + if [ -z "$GITHUB_BASE_REF" ]; then + echo "run_tests=true" >> $GITHUB_OUTPUT + else + git fetch origin $GITHUB_BASE_REF --depth=1 + # git diff "origin/$GITHUB_BASE_REF..." (3 dots) may be more + # reliable than git diff "origin/$GITHUB_BASE_REF.." (2 dots), + # but it requires to download more commits (this job uses + # "git fetch --depth=1"). + # + # git diff "origin/$GITHUB_BASE_REF..." (3 dots) works with Git + # 2.26, but Git 2.28 is stricter and fails with "no merge base". + # + # git diff "origin/$GITHUB_BASE_REF.." (2 dots) should be enough on + # GitHub, since GitHub starts by merging origin/$GITHUB_BASE_REF + # into the PR branch anyway. + # + # https://github.com/python/core-workflow/issues/373 + git diff --name-only origin/$GITHUB_BASE_REF.. | grep -qvE '(\.rst$|^Doc|^Misc|^\.pre-commit-config\.yaml$|\.ruff\.toml$)' && echo "run_tests=true" >> $GITHUB_OUTPUT || true + fi + + # Check if we should run hypothesis tests + GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}} + echo $GIT_BRANCH + if $(echo "$GIT_BRANCH" | grep -q -w '3\.\(8\|9\|10\|11\)'); then + echo "Branch too old for hypothesis tests" + echo "run_hypothesis=false" >> $GITHUB_OUTPUT + else + echo "Run hypothesis tests" + echo "run_hypothesis=true" >> $GITHUB_OUTPUT + fi + - name: Compute hash for config cache key + id: config_hash + run: | + echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> $GITHUB_OUTPUT + - name: Get a list of the changed documentation-related files + if: github.event_name == 'pull_request' + id: changed-docs-files + uses: Ana06/get-changed-files@v2.2.0 + with: + filter: | + Doc/** + Misc/** + .github/workflows/reusable-docs.yml + format: csv # works for paths with spaces + - name: Check for docs changes + if: >- + github.event_name == 'pull_request' + && steps.changed-docs-files.outputs.added_modified_renamed != '' + id: docs-changes + run: | + echo "run-docs=true" >> "${GITHUB_OUTPUT}" + + # check-docs: + # name: Docs + # needs: check_source + # if: fromJSON(needs.check_source.outputs.run-docs) + # uses: ./.github/workflows/reusable-docs.yml + + # Pyrona is changing the ABI drop this test for now. + # + # check_abi: + # name: 'Check if the ABI has changed' + # runs-on: ubuntu-22.04 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # steps: + # - uses: actions/checkout@v3 + # - uses: actions/setup-python@v4 + # - name: Install dependencies + # run: | + # sudo ./.github/workflows/posix-deps-apt.sh + # sudo apt-get install -yq abigail-tools + # - name: Build CPython + # env: + # CFLAGS: -g3 -O0 + # run: | + # # Build Python with the libpython dynamic library + # ./configure --enable-shared + # make -j4 + # - name: Check for changes in the ABI + # id: check + # run: | + # if ! make check-abidump; then + # echo "Generated ABI file is not up to date." + # echo "Please add the release manager of this branch as a reviewer of this PR." + # echo "" + # echo "The up to date ABI file should be attached to this build as an artifact." + # echo "" + # echo "To learn more about this check: https://devguide.python.org/setup/#regenerate-the-abi-dump" + # echo "" + # exit 1 + # fi + # - name: Generate updated ABI files + # if: ${{ failure() && steps.check.conclusion == 'failure' }} + # run: | + # make regen-abidump + # - uses: actions/upload-artifact@v4 + # name: Publish updated ABI files + # if: ${{ failure() && steps.check.conclusion == 'failure' }} + # with: + # name: abi-data + # path: ./Doc/data/*.abi + + check_generated_files: + name: 'Check if generated files are up to date' + runs-on: ubuntu-22.04 + timeout-minutes: 60 + needs: check_source + if: needs.check_source.outputs.run_tests == 'true' + steps: + - uses: actions/checkout@v4 + - name: Restore config.cache + uses: actions/cache@v3 + with: + path: config.cache + key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + - uses: actions/setup-python@v4 + with: + python-version: '3.x' + - name: Install Dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Add ccache to PATH + run: echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + - name: Check Autoconf and aclocal versions + run: | + grep "Generated by GNU Autoconf 2.71" configure + grep "aclocal 1.16.4" aclocal.m4 + grep -q "runstatedir" configure + grep -q "PKG_PROG_PKG_CONFIG" aclocal.m4 + - name: Configure CPython + run: | + # Build Python with the libpython dynamic library + ./configure --config-cache --with-pydebug --enable-shared + - name: Regenerate autoconf files with container image + run: make regen-configure + - name: Build CPython + run: | + # Deepfreeze will usually cause global objects to be added or removed, + # so we run it before regen-global-objects gets rum (in regen-all). + make regen-deepfreeze + make -j4 regen-all + make regen-stdlib-module-names + - name: Check for changes + run: | + git add -u + changes=$(git status --porcelain) + # Check for changes in regenerated files + if test -n "$changes"; then + echo "Generated files not up to date." + echo "Perhaps you forgot to run make regen-all or build.bat --regen. ;)" + echo "configure files must be regenerated with a specific version of autoconf." + echo "$changes" + echo "" + git diff --staged || true + exit 1 + fi + - name: Check exported libpython symbols + run: make smelly + - name: Check limited ABI symbols + run: make check-limited-abi + - name: Check for unsupported C global variables + if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME + run: make check-c-globals + + # These were all broken before we started. + + # build_win32: + # name: 'Windows (x86)' + # runs-on: windows-latest + # timeout-minutes: 60 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # env: + # IncludeUwp: 'true' + # steps: + # - uses: actions/checkout@v4 + # - name: Build CPython + # run: .\PCbuild\build.bat -e -d -p Win32 + # - name: Display build info + # run: .\python.bat -m test.pythoninfo + # - name: Tests + # run: .\PCbuild\rt.bat -p Win32 -d -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 + + # build_win_amd64: + # name: 'Windows (x64)' + # runs-on: windows-latest + # timeout-minutes: 60 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # env: + # IncludeUwp: 'true' + # steps: + # - uses: actions/checkout@v4 + # - name: Register MSVC problem matcher + # run: echo "::add-matcher::.github/problem-matchers/msvc.json" + # - name: Build CPython + # run: .\PCbuild\build.bat -e -d -p x64 + # - name: Display build info + # run: .\python.bat -m test.pythoninfo + # - name: Tests + # run: .\PCbuild\rt.bat -p x64 -d -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 + + # build_win_arm64: + # name: 'Windows (arm64)' + # runs-on: windows-latest + # timeout-minutes: 60 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # env: + # IncludeUwp: 'true' + # steps: + # - uses: actions/checkout@v4 + # - name: Register MSVC problem matcher + # run: echo "::add-matcher::.github/problem-matchers/msvc.json" + # - name: Build CPython + # run: .\PCbuild\build.bat -e -d -p arm64 + + # build_macos: + # name: 'macOS' + # runs-on: macos-latest + # timeout-minutes: 60 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # env: + # HOMEBREW_NO_ANALYTICS: 1 + # HOMEBREW_NO_AUTO_UPDATE: 1 + # HOMEBREW_NO_INSTALL_CLEANUP: 1 + # PYTHONSTRICTEXTENSIONBUILD: 1 + # steps: + # - uses: actions/checkout@v4 + # - name: Restore config.cache + # uses: actions/cache@v3 + # with: + # path: config.cache + # key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + # - name: Install Homebrew dependencies + # run: brew install pkg-config openssl@3.0 xz gdbm tcl-tk + # - name: Configure CPython + # run: | + # GDBM_CFLAGS="-I$(brew --prefix gdbm)/include" \ + # GDBM_LIBS="-L$(brew --prefix gdbm)/lib -lgdbm" \ + # ./configure \ + # --config-cache \ + # --with-pydebug \ + # --prefix=/opt/python-dev \ + # --with-openssl="$(brew --prefix openssl@3.0)" + # - name: Build CPython + # run: make -j4 + # - name: Display build info + # run: make pythoninfo + # - name: Tests + # run: make buildbottest TESTOPTS="-j4 -uall,-cpu" + + build_ubuntu: + name: 'Ubuntu' + runs-on: ubuntu-20.04 + timeout-minutes: 60 + needs: check_source + if: needs.check_source.outputs.run_tests == 'true' + env: + OPENSSL_VER: 3.0.11 + PYTHONSTRICTEXTENSIONBUILD: 1 + steps: + - uses: actions/checkout@v4 + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install Dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> $GITHUB_ENV + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v3 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory $MULTISSL_DIR --openssl $OPENSSL_VER --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_RO_SRCDIR=$(realpath -m ${GITHUB_WORKSPACE}/../cpython-ro-srcdir)" >> $GITHUB_ENV + echo "CPYTHON_BUILDDIR=$(realpath -m ${GITHUB_WORKSPACE}/../cpython-builddir)" >> $GITHUB_ENV + - name: Create directories for read-only out-of-tree builds + run: mkdir -p $CPYTHON_RO_SRCDIR $CPYTHON_BUILDDIR + - name: Bind mount sources read-only + run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR + - name: Restore config.cache + uses: actions/cache@v3 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/config.cache + key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + - name: Configure CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + ../cpython-ro-srcdir/configure \ + --config-cache \ + --with-pydebug \ + --with-region-invariant \ + --with-openssl=$OPENSSL_DIR + - name: Build CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make -j4 + - name: Display build info + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make pythoninfo + - name: Remount sources writable for tests + # some tests write to srcdir, lack of pyc files slows down testing + run: sudo mount $CPYTHON_RO_SRCDIR -oremount,rw + - name: Tests + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: xvfb-run make buildbottest TESTOPTS="-j4 -uall,-cpu" + + # Removing as not changing this for now. + # + # build_ubuntu_ssltests: + # name: 'Ubuntu SSL tests with OpenSSL' + # runs-on: ubuntu-20.04 + # timeout-minutes: 60 + # needs: check_source + # if: needs.check_source.outputs.run_tests == 'true' + # strategy: + # fail-fast: false + # matrix: + # openssl_ver: [1.1.1w, 3.0.11, 3.1.3] + # env: + # OPENSSL_VER: ${{ matrix.openssl_ver }} + # MULTISSL_DIR: ${{ github.workspace }}/multissl + # OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} + # LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib + # steps: + # - uses: actions/checkout@v4 + # - name: Restore config.cache + # uses: actions/cache@v3 + # with: + # path: config.cache + # key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + # - name: Register gcc problem matcher + # run: echo "::add-matcher::.github/problem-matchers/gcc.json" + # - name: Install Dependencies + # run: sudo ./.github/workflows/posix-deps-apt.sh + # - name: Configure OpenSSL env vars + # run: | + # echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> $GITHUB_ENV + # echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> $GITHUB_ENV + # echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV + # - name: 'Restore OpenSSL build' + # id: cache-openssl + # uses: actions/cache@v3 + # with: + # path: ./multissl/openssl/${{ env.OPENSSL_VER }} + # key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + # - name: Install OpenSSL + # if: steps.cache-openssl.outputs.cache-hit != 'true' + # run: python3 Tools/ssl/multissltests.py --steps=library --base-directory $MULTISSL_DIR --openssl $OPENSSL_VER --system Linux + # - name: Add ccache to PATH + # run: | + # echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + # - name: Configure ccache action + # uses: hendrikmuhs/ccache-action@v1.2 + # - name: Configure CPython + # run: ./configure --config-cache --with-pydebug --with-openssl=$OPENSSL_DIR + # - name: Build CPython + # run: make -j4 + # - name: Display build info + # run: make pythoninfo + # - name: SSL tests + # run: ./python Lib/test/ssltests.py + + test_hypothesis: + name: "Hypothesis tests on Ubuntu" + runs-on: ubuntu-20.04 + timeout-minutes: 60 + needs: check_source + if: needs.check_source.outputs.run_tests == 'true' && needs.check_source.outputs.run_hypothesis == 'true' + env: + OPENSSL_VER: 3.0.11 + PYTHONSTRICTEXTENSIONBUILD: 1 + steps: + - uses: actions/checkout@v4 + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install Dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> $GITHUB_ENV + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v3 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory $MULTISSL_DIR --openssl $OPENSSL_VER --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_RO_SRCDIR=$(realpath -m ${GITHUB_WORKSPACE}/../cpython-ro-srcdir)" >> $GITHUB_ENV + echo "CPYTHON_BUILDDIR=$(realpath -m ${GITHUB_WORKSPACE}/../cpython-builddir)" >> $GITHUB_ENV + - name: Create directories for read-only out-of-tree builds + run: mkdir -p $CPYTHON_RO_SRCDIR $CPYTHON_BUILDDIR + - name: Bind mount sources read-only + run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR + - name: Restore config.cache + uses: actions/cache@v3 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/config.cache + key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + - name: Configure CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + ../cpython-ro-srcdir/configure \ + --config-cache \ + --with-pydebug \ + --with-region-invariant \ + --with-openssl=$OPENSSL_DIR + - name: Build CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make -j4 + - name: Display build info + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make pythoninfo + - name: Remount sources writable for tests + # some tests write to srcdir, lack of pyc files slows down testing + run: sudo mount $CPYTHON_RO_SRCDIR -oremount,rw + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_BUILDDIR=$(realpath -m ${GITHUB_WORKSPACE}/../cpython-builddir)" >> $GITHUB_ENV + - name: "Create hypothesis venv" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + VENV_LOC=$(realpath -m .)/hypovenv + VENV_PYTHON=$VENV_LOC/bin/python + echo "HYPOVENV=${VENV_LOC}" >> $GITHUB_ENV + echo "VENV_PYTHON=${VENV_PYTHON}" >> $GITHUB_ENV + ./python -m venv $VENV_LOC && $VENV_PYTHON -m pip install -r ${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt + - name: 'Restore Hypothesis database' + id: cache-hypothesis-database + uses: actions/cache@v3 + with: + path: ./hypothesis + key: hypothesis-database-${{ github.head_ref || github.run_id }} + restore-keys: | + - hypothesis-database- + - name: "Run tests" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + # Most of the excluded tests are slow test suites with no property tests + # + # (GH-104097) test_sysconfig is skipped because it has tests that are + # failing when executed from inside a virtual environment. + ${{ env.VENV_PYTHON }} -m test \ + -W \ + -o \ + -j4 \ + -x test_asyncio \ + -x test_multiprocessing_fork \ + -x test_multiprocessing_forkserver \ + -x test_multiprocessing_spawn \ + -x test_concurrent_futures \ + -x test_socket \ + -x test_subprocess \ + -x test_signal \ + -x test_sysconfig + - uses: actions/upload-artifact@v4 + if: always() + with: + name: hypothesis-example-db + path: .hypothesis/examples/ + + + build_asan: + name: 'Address sanitizer' + runs-on: ubuntu-20.04 + timeout-minutes: 60 + needs: check_source + if: needs.check_source.outputs.run_tests == 'true' + env: + OPENSSL_VER: 3.0.11 + PYTHONSTRICTEXTENSIONBUILD: 1 + ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 + steps: + - uses: actions/checkout@v4 + - name: Restore config.cache + uses: actions/cache@v3 + with: + path: config.cache + key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install Dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Set up GCC-10 for ASAN + uses: egor-tensin/setup-gcc@v1 + with: + version: 10 + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> $GITHUB_ENV + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v3 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory $MULTISSL_DIR --openssl $OPENSSL_VER --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + - name: Configure CPython + run: ./configure --config-cache --with-address-sanitizer --without-pymalloc --with-region-invariant + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: Tests + run: xvfb-run make buildbottest TESTOPTS="-j4 -uall,-cpu" + + all-required-green: # This job does nothing and is only used for the branch protection + name: All required checks pass + if: always() + + needs: + - check_source # Transitive dependency, needed to access `run_tests` value + - check_generated_files + # - build_win32 + # - build_win_amd64 + # - build_win_arm64 + # - build_macos + - build_ubuntu + # - build_ubuntu_ssltests + - test_hypothesis + - build_asan + + runs-on: ubuntu-latest + + steps: + - name: Check whether the needed jobs succeeded or failed + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe + with: + allowed-failures: >- + test_hypothesis, + allowed-skips: >- + ${{ + needs.check_source.outputs.run_tests != 'true' + && ' + check_generated_files, + build_ubuntu, + build_asan, + ' + || '' + }} + ${{ + !fromJSON(needs.check_source.outputs.run_hypothesis) + && ' + test_hypothesis, + ' + || '' + }} + jobs: ${{ toJSON(needs) }} diff --git a/Doc/conf.py b/Doc/conf.py index dbd75012988442..f6ff1c7f511c3d 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -200,6 +200,7 @@ ('c:data', 'PyExc_ProcessLookupError'), ('c:data', 'PyExc_RecursionError'), ('c:data', 'PyExc_ReferenceError'), + ('c:data', 'PyExc_RegionError'), ('c:data', 'PyExc_RuntimeError'), ('c:data', 'PyExc_StopAsyncIteration'), ('c:data', 'PyExc_StopIteration'), diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index f112d268129fd1..8cb401b6204983 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -238,6 +238,7 @@ var,PyExc_ModuleNotFoundError,3.6,, var,PyExc_NameError,3.2,, var,PyExc_NotADirectoryError,3.7,, var,PyExc_NotImplementedError,3.2,, +var,PyExc_NotWriteableError,4.0,, var,PyExc_OSError,3.2,, var,PyExc_OverflowError,3.2,, var,PyExc_PendingDeprecationWarning,3.2,, @@ -245,6 +246,7 @@ var,PyExc_PermissionError,3.7,, var,PyExc_ProcessLookupError,3.7,, var,PyExc_RecursionError,3.7,, var,PyExc_ReferenceError,3.2,, +var,PyExc_RegionError,4.0,, var,PyExc_ResourceWarning,3.7,, var,PyExc_RuntimeError,3.2,, var,PyExc_RuntimeWarning,3.2,, diff --git a/Include/cpython/listobject.h b/Include/cpython/listobject.h index a6a453fc1cb2a5..0d305a5b01ca85 100644 --- a/Include/cpython/listobject.h +++ b/Include/cpython/listobject.h @@ -2,6 +2,8 @@ # error "this header file must not be included directly" #endif +#include "regions.h" // Py_IsImmutable + typedef struct { PyObject_VAR_HEAD /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ @@ -40,7 +42,7 @@ static inline Py_ssize_t PyList_GET_SIZE(PyObject *op) { static inline void PyList_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value) { - if(_Py_IsImmutable(op)){ // _Py_CHECKWRITE(op) is not available + if(Py_IsImmutable(op)){ // _Py_CHECKWRITE(op) is not available // TODO this should be replaced with a _PyObject_ASSERT_MSG // when veronpy implementation is complete _PyObject_ASSERT_FAILED_MSG(op, "cannot modify immutable object"); diff --git a/Include/cpython/object.h b/Include/cpython/object.h index ae7f780a93182a..413e44f28ec955 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -176,7 +176,7 @@ struct _typeobject { PyBufferProcs *tp_as_buffer; /* Flags to define presence of optional/expanded features */ - unsigned long tp_flags; + uint64_t tp_flags; // Made flags 64 bit to support region flags. const char *tp_doc; /* Documentation string */ diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index 9890d1149ba7fc..c3a255fa3db824 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -82,6 +82,12 @@ typedef struct { PyObject *name; } PyAttributeErrorObject; +typedef struct { + PyException_HEAD + PyObject *source; + PyObject *target; +} PyRegionErrorObject; + /* Compatibility typedefs */ typedef PyOSErrorObject PyEnvironmentErrorObject; #ifdef MS_WINDOWS diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 8f044874121741..02847bd63e9860 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -63,7 +63,7 @@ extern PyObject *_PyDict_SetKeyImmutable(PyDictObject *mp, PyObject *key); /* Consumes references to key and value */ extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); -extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); +extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value, PyObject* owner); extern PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index e1a0e2af059231..f34c2161e0922a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -13,7 +13,7 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_runtime.h" // _PyRuntime -#include "pycore_regions.h" // _Py_DEFAULT_REGION +#include "pycore_regions.h" // _Py_LOCAL_REGION /* We need to maintain an internal copy of Py{Var}Object_HEAD_INIT to avoid designated initializer conflicts in C++20. If we use the deinition in @@ -24,11 +24,11 @@ extern "C" { are not supported pre-C++20. Thus, keeping an internal copy here is the most backwards compatible solution */ #define _PyObject_HEAD_INIT(type) \ - { \ - _PyObject_EXTRA_INIT \ - .ob_refcnt = _Py_IMMORTAL_REFCNT, \ - .ob_type = (type), \ - .ob_region = _Py_DEFAULT_REGION \ + { \ + _PyObject_EXTRA_INIT \ + .ob_refcnt = _Py_IMMORTAL_REFCNT, \ + .ob_type = (type), \ + .ob_region = (Py_region_ptr_with_tags_t){_Py_IMMUTABLE} \ }, #define _PyVarObject_HEAD_INIT(type, size) \ { \ @@ -96,7 +96,7 @@ static inline void _Py_ClearImmortal(PyObject *op) static inline void _Py_SetImmutable(PyObject *op) { if(op) { - op->ob_region = _Py_IMMUTABLE; + Py_SET_REGION(op, _Py_IMMUTABLE); // TODO once reference counting across regions is fully working // we no longer need to make all immutable objects immortal op->ob_refcnt = _Py_IMMORTAL_REFCNT; @@ -177,7 +177,6 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj) { assert(op != NULL); Py_SET_TYPE(op, typeobj); - Py_SET_REGION(op, _Py_DEFAULT_REGION); if (_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE)) { Py_INCREF(typeobj); } diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index ea075f96a3de9f..2996b95d81b578 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -10,13 +10,63 @@ extern "C" { #endif #include "object.h" +#include "regions.h" -#define Py_CHECKWRITE(op) ((op) && _PyObject_CAST(op)->ob_region != _Py_IMMUTABLE) +#define Py_CHECKWRITE(op) ((op) && !Py_IsImmutable(op)) #define Py_REQUIREWRITE(op, msg) {if (Py_CHECKWRITE(op)) { _PyObject_ASSERT_FAILED_MSG(op, msg); }} +Py_region_ptr_t _Py_REGION(PyObject *ob); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_REGION(ob) _Py_REGION(_PyObject_CAST(ob)) +#endif + +void _Py_SET_TAGGED_REGION(PyObject *ob, Py_region_ptr_with_tags_t region); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_TAGGED_REGION(ob, region) _Py_SET_TAGGED_REGION(_PyObject_CAST(ob), (region)) +#endif + +static inline void _Py_SET_REGION(PyObject *ob, Py_region_ptr_t region) { + _Py_SET_TAGGED_REGION(ob, Py_region_ptr_with_tags(region & Py_REGION_MASK)); +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_REGION(ob, region) (_Py_SET_REGION(_PyObject_CAST(ob), _Py_CAST(Py_region_ptr_t, (region)))) +#endif + +/* This makes the given objects and all object reachable from the given + * object immutable. This will also move the objects into the immutable + * region. + * + * The argument is borrowed, meaning that it expects the calling context + * to handle the reference count. + * + * The function will return `Py_None` by default. + */ PyObject* _Py_MakeImmutable(PyObject* obj); #define Py_MakeImmutable(op) _Py_MakeImmutable(_PyObject_CAST(op)) +PyObject* _Py_EnableInvariant(void); +#define Py_EnableInvariant() _Py_EnableInvariant() + +PyObject* _Py_ResetInvariant(void); +#define Py_ResetInvariant() _Py_ResetInvariant() + +// Invariant placeholder +bool _Py_RegionAddReference(PyObject* src, PyObject* new_tgt); +#define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(_PyObject_CAST(a), b) + +void _Py_RegionAddLocalReference(PyObject* new_tgt); +#define Py_REGIONADDLOCALREFERENCE(b) _Py_RegionAddLocalReference(b) + +// Helper macros to count the number of arguments +#define _COUNT_ARGS(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, N, ...) N +#define COUNT_ARGS(...) _COUNT_ARGS(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) + +bool _Py_RegionAddReferences(PyObject* src, int new_tgtc, ...); +#define Py_REGIONADDREFERENCES(a, ...) _Py_RegionAddReferences(_PyObject_CAST(a), COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) + +void _Py_RegionRemoveReference(PyObject* src, PyObject* old_tgt); +#define Py_REGIONREMOVEREFERENCE(a, b) _Py_RegionRemoveReference(_PyObject_CAST(a), b) + #ifdef NDEBUG #define _Py_VPYDBG(fmt, ...) #define _Py_VPYDBGPRINT(fmt, ...) @@ -25,7 +75,45 @@ PyObject* _Py_MakeImmutable(PyObject* obj); #define _Py_VPYDBGPRINT(op) PyObject_Print(_PyObject_CAST(op), stdout, 0) #endif +int _Py_CheckRegionInvariant(PyThreadState *tstate); +// Set a cown as parent of a region +void _PyRegion_set_cown_parent(PyObject* region, PyObject* cown); +// Check whether a region is closed +int _PyCown_release(PyObject *self); +int _PyCown_is_released(PyObject *self); +int _PyCown_is_pending_release(PyObject *self); +PyObject *_PyCown_close_region(PyObject* ob); +#define PyCown_close_region(op) _PyCown_close_region(_PyObject_CAST(op)) +int _PyRegion_is_closed(PyObject* op); +#define PyRegion_is_closed(op) _PyRegion_is_closed(_PyObject_CAST(op)) + + +#ifdef _Py_TYPEOF +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + _Py_TYPEOF(op)* _tmp_field_ptr = &(field); \ + _Py_TYPEOF(op) _tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ + *_tmp_field_ptr = _Py_NULL; \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ + } \ + } while (0) +#else +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + PyObject **_tmp_field_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ + PyObject *_null_ptr = _Py_NULL; \ + memcpy(_tmp_field_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ + } \ + } while (0) +#endif + #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_VERONAPY_H */ \ No newline at end of file +#endif /* !Py_INTERNAL_VERONAPY_H */ diff --git a/Include/object.h b/Include/object.h index 1617956902c3c2..51c981ab7dd0b9 100644 --- a/Include/object.h +++ b/Include/object.h @@ -125,26 +125,47 @@ check by comparing the reference count field to the immortality reference count. #define _Py_IMMORTAL_REFCNT (UINT_MAX >> 2) #endif -#define _Py_DEFAULT_REGION ((Py_uintptr_t)0) -#define _Py_IMMUTABLE ((Py_uintptr_t)1) +// This is only a typedef of `Py_uintptr_t` opposed to a custom typedef +// to allow comparisons and make casts easier. +typedef Py_uintptr_t Py_region_ptr_t; +typedef struct { Py_uintptr_t value; } Py_region_ptr_with_tags_t; + +// This is the mask of all used bits to indicate the region. +// this should be used when the region pointer was requested. +// Macros for the individual flags are defined in regions.c. +#define Py_REGION_MASK (~((Py_region_ptr_t)0x2)) +static inline Py_region_ptr_t Py_region_ptr(Py_region_ptr_with_tags_t tagged_region) { + return (Py_region_ptr_t)(tagged_region.value & Py_REGION_MASK); +} + +static inline Py_region_ptr_with_tags_t Py_region_ptr_with_tags(Py_region_ptr_t region) { + return (Py_region_ptr_with_tags_t) { region }; +} + +int _Py_is_bridge_object(PyObject *op); +#define Py_is_bridge_object(op) (_Py_is_bridge_object(_PyObject_CAST(op))) + +#define _Py_LOCAL_REGION ((Py_region_ptr_t)0) +#define _Py_IMMUTABLE ((Py_region_ptr_t)1) +#define _Py_COWN ((Py_region_ptr_t)4) // Make all internal uses of PyObject_HEAD_INIT immortal while preserving the // C-API expectation that the refcnt will be set to 1. #ifdef Py_BUILD_CORE -#define PyObject_HEAD_INIT(type) \ - { \ - _PyObject_EXTRA_INIT \ - { _Py_IMMORTAL_REFCNT }, \ - (type), \ - _Py_DEFAULT_REGION \ +#define PyObject_HEAD_INIT(type) \ + { \ + _PyObject_EXTRA_INIT \ + { _Py_IMMORTAL_REFCNT }, \ + (type), \ + (Py_region_ptr_with_tags_t){_Py_IMMUTABLE} \ }, #else -#define PyObject_HEAD_INIT(type) \ - { \ - _PyObject_EXTRA_INIT \ - { 1 }, \ - (type), \ - _Py_DEFAULT_REGION \ +#define PyObject_HEAD_INIT(type) \ + { \ + _PyObject_EXTRA_INIT \ + { 1 }, \ + (type), \ + (Py_region_ptr_with_tags_t){_Py_LOCAL_REGION} \ }, #endif /* Py_BUILD_CORE */ @@ -194,8 +215,7 @@ struct _object { PyTypeObject *ob_type; // VeronaPy: Field used for tracking which region this objects is stored in. - // Bottom bits stolen for distinguishing types of region ptr. - Py_uintptr_t ob_region; + Py_region_ptr_with_tags_t ob_region; }; /* Cast argument to PyObject* type. */ @@ -231,12 +251,6 @@ static inline PyTypeObject* Py_TYPE(PyObject *ob) { # define Py_TYPE(ob) Py_TYPE(_PyObject_CAST(ob)) #endif -static inline Py_uintptr_t Py_REGION(PyObject *ob) { - return ob->ob_region; -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_REGION(ob) Py_REGION(_PyObject_CAST(ob)) -#endif PyAPI_DATA(PyTypeObject) PyLong_Type; PyAPI_DATA(PyTypeObject) PyBool_Type; @@ -269,12 +283,7 @@ static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { # define Py_IS_TYPE(ob, type) Py_IS_TYPE(_PyObject_CAST(ob), (type)) #endif -static inline Py_ALWAYS_INLINE int _Py_IsImmutable(PyObject *op) -{ - return op->ob_region == _Py_IMMUTABLE; -} -#define _Py_IsImmutable(op) _Py_IsImmutable(_PyObject_CAST(op)) - +void _Py_notify_regions_in_use(void); static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { // This immortal check is for code that is unaware of immortal objects. @@ -307,14 +316,6 @@ static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { # define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) #endif -static inline void Py_SET_REGION(PyObject *ob, Py_uintptr_t region) { - ob->ob_region = region; -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_REGION(ob, region) Py_SET_REGION(_PyObject_CAST(ob), (region)) -#endif - - /* Type objects contain a string containing the type name (to help somewhat in debugging), the allocation parameters (see PyObject_New() and @@ -580,6 +581,10 @@ given type object has a specified feature. #define Py_TPFLAGS_BASE_EXC_SUBCLASS (1UL << 30) #define Py_TPFLAGS_TYPE_SUBCLASS (1UL << 31) +/* Used to indicate that a type is aware of the region model, and + can be trusted to correctly modify the region topology.*/ +#define Py_TPFLAGS_REGION_AWARE (1UL << 32) + #define Py_TPFLAGS_DEFAULT ( \ Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ 0) diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 9dd230d3adcd60..659de5d49904b0 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -121,7 +121,6 @@ PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError; PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError; PyAPI_DATA(PyObject *) PyExc_ValueError; PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError; -PyAPI_DATA(PyObject *) PyExc_NotWriteableError; #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_DATA(PyObject *) PyExc_BlockingIOError; @@ -141,6 +140,12 @@ PyAPI_DATA(PyObject *) PyExc_ProcessLookupError; PyAPI_DATA(PyObject *) PyExc_TimeoutError; #endif +/* Pyrona Exceptions */ +PyAPI_DATA(PyObject *) PyExc_NotWriteableError; +// FIXME(xFrednet): We probably want finer error granualrity +// to destinqush the kind of error and if the system is in a +// valid state after the execption. +PyAPI_DATA(PyObject *) PyExc_RegionError; /* Compatibility aliases */ PyAPI_DATA(PyObject *) PyExc_EnvironmentError; diff --git a/Include/regions.h b/Include/regions.h new file mode 100644 index 00000000000000..1cfc49ce0984b6 --- /dev/null +++ b/Include/regions.h @@ -0,0 +1,23 @@ +#ifndef Py_REGIONS_H +#define Py_REGIONS_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "object.h" + +PyAPI_FUNC(int) _Py_IsImmutable(PyObject *op); +#define Py_IsImmutable(op) _Py_IsImmutable(_PyObject_CAST(op)) + +PyAPI_FUNC(int) _Py_IsLocal(PyObject *op); +#define Py_IsLocal(op) _Py_IsLocal(_PyObject_CAST(op)) + +PyAPI_FUNC(int) _Py_IsCown(PyObject *op); +#define Py_IsCown(op) _Py_IsCown(_PyObject_CAST(op)) + +int Py_is_invariant_enabled(void); + +#ifdef __cplusplus +} +#endif +#endif // !Py_REGIONS_H diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py index e034427ecea908..6bac5bff3256d3 100644 --- a/Lib/_compat_pickle.py +++ b/Lib/_compat_pickle.py @@ -138,6 +138,7 @@ "UserWarning", "ValueError", "NotWriteableError", + "RegionError", "Warning", "ZeroDivisionError", ) diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index c987419663409e..6b1284c838afbf 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -38,6 +38,7 @@ BaseException │ ├── ProcessLookupError │ └── TimeoutError ├── ReferenceError + ├── RegionError ├── RuntimeError │ ├── NotImplementedError │ └── RecursionError diff --git a/Lib/test/test_capi/test_abstract.py b/Lib/test/test_capi/test_abstract.py index e1ec3a17294465..9d68b53614df6b 100644 --- a/Lib/test/test_capi/test_abstract.py +++ b/Lib/test/test_capi/test_abstract.py @@ -297,7 +297,7 @@ def test_object_setitem(self): self.assertRaises(SystemError, setitem, {}, 'a', NULL) self.assertRaises(IndexError, setitem, [], 1, 5) self.assertRaises(TypeError, setitem, [], 'a', 5) - self.assertRaises(TypeError, setitem, (), 1, 5) + self.assertRaises(NotWriteableError, setitem, (), 1, 5) self.assertRaises(SystemError, setitem, NULL, 'a', 5) def test_mapping_setitemstring(self): diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 07299a650a1112..14f3699a5e6f78 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1096,12 +1096,12 @@ class MyStr(str): class MyBytes(bytes): __slots__ = () - with self.assertRaises(TypeError): + with self.assertRaises((TypeError, NotWriteableError)): b"a".__class__ = MyBytes class MyTuple(tuple): __slots__ = () - with self.assertRaises(TypeError): + with self.assertRaises(NotWriteableError): ().__class__ = MyTuple class MyFrozenSet(frozenset): @@ -4082,7 +4082,7 @@ class D(C): try: list.__bases__ = (dict,) - except TypeError: + except NotWriteableError: pass else: self.fail("shouldn't be able to assign to list.__bases__") diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index bca4915e0fa673..330fed2d5862db 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -707,7 +707,7 @@ def non_Python_modules(): r""" >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> 830 < len(tests) < 860 # approximate number of objects with docstrings + >>> 830 < len(tests) < 900 # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests diff --git a/Lib/test/test_regions_dictobject.py b/Lib/test/test_regions_dictobject.py new file mode 100644 index 00000000000000..d2d4164b34a670 --- /dev/null +++ b/Lib/test/test_regions_dictobject.py @@ -0,0 +1,80 @@ +import unittest + +class TestRegionsDictObject(unittest.TestCase): + def setUp(self): + enableinvariant() + + def test_dict_insert_empty_dict(self): + # Create Region with Empty dictionary + r = Region() + d = {} + r.body = d + n = {} + # Add local object to region + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_insert_nonempty_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + d["bar"] = 1 + r.body = d + # Add local object to region + n = {} + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_update_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n1 = {} + d["foo"] = n1 + r.body = d + # Update dictionary to contain a local object + n2 = {} + d["foo"] = n2 + self.assertTrue(r.owns_object(n2)) + + def test_dict_clear(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n = {} + d["foo"] = n + r.body = d + # Clear dictionary + d.clear() + # As LRC is not checked by the invariant, this test cannot + # check anything useful yet. + + def test_dict_copy(self): + r = Region() + d = {} + r.body = d + r2 = Region() + d["foo"] = r2 + d.copy() + + def test_dict_setdefault(self): + r = Region("outer") + d = {} + r.body = d + r2 = Region("inner") + d["foo"] = r2 + d.setdefault("foo", r2) + self.assertRaises(RegionError, d.setdefault, "bar", r2) + + def test_dict_update(self): + # Create a region containing two dictionaries + r = Region() + d = {} + r.body = d + d2 = {} + r.body2 = d2 + # Add a contained region to the first dictionary + d["reg"] = Region() + # Update the second dictionary to contain the elements of the first + self.assertRaises(RegionError, d2.update, d) + self.assertRaises(RegionError, d2.update, d) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 8cad71c7c34545..cf0c367388e31a 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -264,6 +264,7 @@ def test_windows_feature_macros(self): "PyExc_NameError", "PyExc_NotADirectoryError", "PyExc_NotImplementedError", + "PyExc_NotWriteableError", "PyExc_OSError", "PyExc_OverflowError", "PyExc_PendingDeprecationWarning", @@ -271,6 +272,7 @@ def test_windows_feature_macros(self): "PyExc_ProcessLookupError", "PyExc_RecursionError", "PyExc_ReferenceError", + "PyExc_RegionError", "PyExc_ResourceWarning", "PyExc_RuntimeError", "PyExc_RuntimeWarning", diff --git a/Lib/test/test_type_aliases.py b/Lib/test/test_type_aliases.py index 8f0a998e1f3dc1..f98a8e48942c1c 100644 --- a/Lib/test/test_type_aliases.py +++ b/Lib/test/test_type_aliases.py @@ -232,7 +232,7 @@ def test_errors(self): class TypeAliasTypeTest(unittest.TestCase): def test_immutable(self): - with self.assertRaises(TypeError): + with self.assertRaises(NotWriteableError): TypeAliasType.whatever = "not allowed" def test_no_subclassing(self): diff --git a/Lib/test/test_type_annotations.py b/Lib/test/test_type_annotations.py index 3dbb35afcb620f..9efa21f04ee37f 100644 --- a/Lib/test/test_type_annotations.py +++ b/Lib/test/test_type_annotations.py @@ -32,9 +32,9 @@ def test_annotations_getset_raises(self): # builtin types don't have __annotations__ (yet!) with self.assertRaises(AttributeError): print(float.__annotations__) - with self.assertRaises(TypeError): + with self.assertRaises(NotWriteableError): float.__annotations__ = {} - with self.assertRaises(TypeError): + with self.assertRaises(NotWriteableError): del float.__annotations__ # double delete diff --git a/Lib/test/test_using.py b/Lib/test/test_using.py new file mode 100644 index 00000000000000..f123c2d23521fc --- /dev/null +++ b/Lib/test/test_using.py @@ -0,0 +1,190 @@ +import unittest +from using import * + +# Initial test cases for using and cowns +# Note: no concurrency test yet +class UsingTest(unittest.TestCase): + obj = None + + def setUp(self): + makeimmutable(self.obj) + + def test_cown(self): + def invalid_assignment1(c): + c.value = 42 + def invalid_assignment2(c): + c.f = 42 + def invalid_assignment3(c): + c["g"] = 42 + + c = Cown() + self.assertRaises(AttributeError, invalid_assignment1, c) + self.assertRaises(AttributeError, invalid_assignment2, c) + self.assertRaises(TypeError, invalid_assignment3, c) + # Cannot access unacquired cown + self.assertRaises(RegionError, lambda _ : c.get(), c) + self.assertRaises(RegionError, lambda _ : c.set(Region()), c) + + def test_cown_aquired_access(self): + c = Cown() + @using(c) + def _(): + c.set(self.obj) + @using(c) + def _(): + self.assertEqual(c.get(), self.obj) + + # Returns the state of a cown as a string + # Hacky but want to avoid adding methods to cowns just for testing + def hacky_state_check(self, cown, expected_state): + s = repr(cown) + return expected_state in s + + def test_release(self): + r = Region() + c = Cown(r) + self.assertFalse(r.is_open()) + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_early_release_cown(self): + c = Cown() + @using(c) + def _(): + self.assertTrue(self.hacky_state_check(c, "acquired")) + c.set(c) + self.assertTrue(self.hacky_state_check(c, "released")) + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_early_release_closed_region(self): + c = Cown() + self.assertTrue(self.hacky_state_check(c, "released")) + @using(c) + def _(): + self.assertTrue(self.hacky_state_check(c, "acquired")) + r = Region() + self.assertFalse(r.is_open()) + c.set(r) + self.assertTrue(self.hacky_state_check(c, "released")) + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_early_release_immutable(self): + c = Cown() + @using(c) + def _(): + self.assertTrue(self.hacky_state_check(c, "acquired")) + c.set(self.obj) + self.assertTrue(self.hacky_state_check(c, "released")) + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_pending_release(self): + r = Region() + r.open() + self.assertTrue(r.is_open()) + c = Cown(r) + r = None + self.assertTrue(self.hacky_state_check(c, "pending-release")) + c.get().close() + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_acquire(self): + c = Cown(Region()) + self.assertTrue(self.hacky_state_check(c, "released")) + @using(c) + def _(): + r = c.get() + r.open() + self.assertTrue(self.hacky_state_check(c, "acquired")) + r = None + c.get().close() + self.assertTrue(self.hacky_state_check(c, "acquired")) + self.assertTrue(self.hacky_state_check(c, "released")) + + def test_region_cown_ptr(self): + r = Region() + r.f = Cown() + self.assertTrue(True) + + def test_invalid_cown_init(self): + # Create cown with invalid init value + self.assertRaises(RegionError, Cown, [42]) + + def test_threads(self): + from threading import Thread + from using import using + + + class Counter(object): + def __init__(self, value): + self.value = value + + def inc(self): + self.value += 1 + + def dec(self): + self.value -= 1 + + def __repr__(self): + return "Counter(" + str(self.value) + ")" + + + # Freezes the **class** -- not needed explicitly later + makeimmutable(Counter) + + def ThreadSafeValue(value): + r = Region("counter region") + r.value = value + c = Cown(r) + # Dropping value, r and closing not needed explicitly later + del value + del r + c.get().close() + return c + + def work(c): + for _ in range(0, 100): + c.inc() + + def work_in_parallel(c): + @using(c) + def _(): + work(c.get().value) + + + c = ThreadSafeValue(Counter(0)) + + t1 = Thread(target=work_in_parallel, args=(c,)) + t2 = Thread(target=work_in_parallel, args=(c,)) + t1.start() + t2.start() + t1.join() + t2.join() + + result = 0 + @using(c) + def _(): + nonlocal result + result = c.get().value.value + if result != 200: + self.fail() + + def test_thread_creation(self): + from using import PyronaThread as T + + class Mutable: pass + self.assertRaises(RuntimeError, lambda x: T(target=print, args=(Mutable(),)), None) + self.assertRaises(RuntimeError, lambda x: T(target=print, kwargs={'a' : Mutable()}), None) + self.assertRaises(RuntimeError, lambda x: T(target=print, args=(Mutable(),), kwargs={'a' : Mutable()}), None) + self.assertRaises(RuntimeError, lambda x: T(target=print, args=(Mutable(), 42)), None) + self.assertRaises(RuntimeError, lambda x: T(target=print, args=(Mutable(), Cown())), None) + self.assertRaises(RuntimeError, lambda x: T(target=print, args=(Mutable(), Region())), None) + + T(target=print, kwargs={'imm' : 42, 'cown' : Cown(), 'region' : Region()}) + T(target=print, kwargs={'a': 42}) + T(target=print, kwargs={'a': Cown()}) + T(target=print, kwargs={'a': Region()}) + + T(target=print, args=(42, Cown(), Region())) + T(target=print, args=(42,)) + T(target=print, args=(Cown(),)) + T(target=print, args=(Region(),)) + self.assertTrue(True) # To make sure we got here correctly diff --git a/Lib/test/test_veronapy.py b/Lib/test/test_veronapy.py index 1b02a43959ca74..f64e4abdcd92c7 100644 --- a/Lib/test/test_veronapy.py +++ b/Lib/test/test_veronapy.py @@ -389,6 +389,398 @@ def test_weakref(self): # self.assertTrue(c.val() is obj) self.assertIsNone(c.val()) +class TestRegionOwnership(unittest.TestCase): + class A: + pass + + def setUp(self): + # This freezes A and super and meta types of A namely `type` and `object` + makeimmutable(self.A) + enableinvariant() + + def test_default_ownership(self): + a = self.A() + r = Region() + self.assertFalse(r.owns_object(a)) + + def test_add_ownership(self): + a = self.A() + r = Region() + r.add_object(a) + self.assertTrue(r.owns_object(a)) + + def test_remove_ownership(self): + a = self.A() + r = Region() + r.add_object(a) + r.remove_object(a) + self.assertFalse(r.owns_object(a)) + + def test_add_ownership2(self): + a = self.A() + r1 = Region() + r2 = Region() + r1.add_object(a) + self.assertFalse(r2.owns_object(a)) + + def test_add_object_is_deep(self): + # Create linked objects (a) -> (b) -> (c) + a = self.A() + b = self.A() + c = self.A() + a.b = b + b.c = c + + # Create a region and take ownership of a + r = Region() + r.add_object(a) + + # Check that b was also moved into the region + self.assertTrue(r.owns_object(a)) + self.assertTrue(r.owns_object(b)) + self.assertTrue(r.owns_object(c)) + + def test_should_fail_add_ownership_twice_2(self): + a = self.A() + r1 = Region("r1") + r1.add_object(a) + r2 = Region("r2") + try: + r2.add_object(a) + except RegionError as e: + self.assertEqual(e.source, r2) + self.assertEqual(e.target, a) + else: + self.fail("Should not reach here -- a can't be owned by two objects") + + def test_init_with_name(self): + r1 = Region() + r2 = Region("Super-name") + self.assertTrue("Super-name" in repr(r2)) + + r3_name = "Trevligt-Name" + r3a = Region(r3_name) + r3b = Region(r3_name) + self.assertTrue(r3_name in repr(r3a)) + self.assertTrue(r3_name in repr(r3b)) + self.assertTrue(isimmutable(r3_name)) + + def test_init_invalid_name(self): + self.assertRaises(TypeError, Region, 42) + + def test_init_same_name(self): + r1 = Region("Andy") + r2 = Region("Andy") + # Check that we reach the end of the test + self.assertTrue(True) + + def test_region__dict__(self): + r = Region() + r.f = self.A() + # The above line will fail unless the region has gotten a dict + self.assertTrue(True) + + def test_object__dict__(self): + r = Region() + a = self.A() + b = self.A() + r.add_object(b) + r.f = a + a.f = b + d = a.__dict__ + self.assertTrue(r.owns_object(d)) + self.assertTrue(r.owns_object(a)) + self.assertTrue(r.owns_object(b)) + + def test_allow_bridge_object_ref(self): + # Create linked objects (a) -> (b) + a = self.A() + b = Region("Child") + a.b = b + + # Create a region and take ownership of a + r = Region("Parent") + r.add_object(a) + self.assertFalse(r.owns_object(b)) + self.assertTrue(r.owns_object(a)) + + def test_should_fail_external_uniqueness(self): + a = self.A() + r1 = Region("r1") + # Two refs from the local region are allowed + a.f = r1 + a.g = r1 + r2 = Region("r2") + try: + r2.add_object(a) + except RegionError as e: + # Check that the error is on the appropriate objects + self.assertEqual(e.source, a) + self.assertEqual(e.target, r1) + else: + self.fail("Should not reach here -- a can't be owned by two objects") + +class TestTryCloseRegion(unittest.TestCase): + class A: + pass + + def setUp(self): + # This freezes A and super and meta types of A namely `type` and `object` + makeimmutable(self.A) + + def test_new_region_is_closed(self): + r1 = Region("r1") + self.assertFalse(r1.is_open()) + self.assertTrue(r1.try_close()) + # Check it remained closed after the `try_close` call + self.assertFalse(r1.is_open()) + + def test_try_close_with_bridge_ref(self): + r1 = Region("r1") + + # Create a local reference + r1_ref = r1 + + # The region is still marked as closed since the write barrier + # doesn't catch the new `r1_ref` reference + self.assertFalse(r1.is_open(), "Should fail once WB on the Frame is in place") + + # Closing the region fails due to `r1_ref` + self.assertFalse(r1.try_close()) + # The open status was now updated + self.assertTrue(r1.is_open()) + + # Remove the local reference + r1_ref = None + + # Closing the region should now succeed + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_with_bridge_ref_owned_by_region(self): + r1 = Region("r1") + r1.r2 = Region("r2") + + # Create a local reference + r2_ref = r1.r2 + + # The region is still marked as closed since the write barrier + # doesn't catch the new `r1_ref` reference + self.assertFalse(r1.r2.is_open(), "Should fail once WB on the Frame is in place") + + # Closing the region fails due to `r1_ref` + self.assertFalse(r1.r2.try_close()) + # The open status was now updated + self.assertTrue(r1.r2.is_open()) + + # Remove the local reference + r2_ref = None + + # Closing the region should now succeed + self.assertTrue(r1.r2.try_close()) + self.assertFalse(r1.r2.is_open()) + + def test_try_close_with_bridge_ref_owned_by_cown(self): + r1 = Region("r1") + r1.a = self.A() + + # Create a local reference + r1_ref = r1 + + # r1 should be open here + self.assertTrue(r1.is_open()) + + # Create a new cown which isn't released yet + c = Cown(r1) + + # Closing the region fails due to `r1_ref` + self.assertFalse(c.get().try_close()) + + # Remove local references + r1 = None + r1_ref = None + + print("Checkout 3") + # Closing the region should now succeed + self.assertTrue(c.get().try_close()) + + print("Checkout 5") + # Check that the cown has been released + self.assertRaises(RegionError, lambda _ : c.get(), c) + + def test_try_close_with_contained_ref(self): + r1 = Region("r1") + r1.a = self.A() + + # The region is now open, since we added something to it + # (This is temporary, while the write barrier is not sufficient) + self.assertTrue(r1.is_open()) + + # Create a local reference + a = r1.a + + # Closing the region fails due to `a` + self.assertFalse(r1.try_close()) + self.assertTrue(r1.is_open()) + + # Remove the local reference + a = None + + # Closing the region should now succeed + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_sub_region_contained_ref(self): + r1 = Region("r1") + r1.a = self.A() + r1.a.r2 = Region("r2") + r1.a.r2.b = self.A() + + # The regions are now open, since we added something to them + self.assertTrue(r1.is_open()) + self.assertTrue(r1.a.r2.is_open()) + + # Create a local reference + b = r1.a.r2.b + + # Closing the regions fails due to `b` + self.assertFalse(r1.try_close()) + self.assertTrue(r1.is_open()) + + # Remove the local reference + b = None + + # Closing the regions succeed now + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_sub_sub_region_contained_ref(self): + r1 = Region("region") + r1.r2 = Region("sub-region") + r1.r2.r3 = Region("sub-sub-region") + r1.r2.r3.a = self.A() + + # Create a local reference to a contained object + a = r1.r2.r3.a + + # The region is now open + self.assertTrue(r1.is_open()) + + # Closing the regions fails due to `a` + self.assertFalse(r1.try_close()) + self.assertTrue(r1.is_open()) + + # Kill the local reference + a = None + + # Closing the regions succeed now + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_sub_sub_region_bridge_ref(self): + r1 = Region("region") + r1.r2 = Region("sub-region") + r1.r2.r3 = Region("sub-sub-region") + r1.r2.r3.a = self.A() + + # The region is now open + self.assertTrue(r1.is_open()) + + # Closing r3 should succeeed and propagate to r1 + self.assertTrue(r1.r2.r3.try_close()) + self.assertFalse(r1.is_open()) + + # Create a local reference to a bridge + r3 = r1.r2.r3 + + # Manually open r2, while the WB is missing on attributes + r1.r2.a = self.A() + self.assertTrue(r1.r2.is_open()) + + # r3 is still marked as closed due to the missing WB on the frame + # This test should become irrelevant once the WB is in place and + # the open status is correctly tracked. + self.assertFalse(r1.r2.r3.is_open(), "Should fail once WB on the Frame is in place") + + # Closing the regions fails due to `r3` + self.assertFalse(r1.try_close()) + self.assertTrue(r1.is_open()) + + # Kill the local reference + r3 = None + + # Closing the regions succeed now + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_with_contained_cycle(self): + r1 = Region("r1") + r1.a = self.A() + r1.a.self = r1.a + r1.a.region = r1 + + # Create a local reference + a = r1.a + + # The region is now open + self.assertTrue(r1.is_open()) + + # Closing the regions fails due to `a` + self.assertFalse(r1.try_close()) + self.assertTrue(r1.is_open()) + + # Remove the local reference + a = None + + # Closing the regions succeed now + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + + def test_try_close_banish_unreachable_contained(self): + r1 = Region("r1") + r1.a = self.A() + + # Create a small tree we can later detach + b = self.A() + b.c = self.A() + r1.add_object(b) # TODO: Remove once the write barrier on objects works + r1.a.b = b + + # Check that r1 owns the objects now + self.assertTrue(r1.owns_object(b)) + self.assertTrue(r1.owns_object(b.c)) + + # Make `b` and `c` unreachable from the bridge + r1.a.b = None + + # `b` and `c` should remain members of r1 + self.assertTrue(r1.owns_object(b)) + self.assertTrue(r1.owns_object(b.c)) + + # Closing the regions should succeed but kickout `b` and `c` + self.assertTrue(r1.try_close()) + self.assertFalse(r1.is_open()) + self.assertFalse(r1.owns_object(b)) + self.assertFalse(r1.owns_object(b.c)) + + # A new region could now take ownership of `b` and `c` + r2 = Region("r2") + r2.c = b.c + self.assertTrue(r2.owns_object(b.c)) + + # Closing the regions fails since the local `b` points to `c` + self.assertFalse(r2.try_close()) + self.assertTrue(r2.is_open()) + + # Remove the local reference + b = None + + # Closing the regions succeed now + self.assertTrue(r2.try_close()) + self.assertFalse(r2.is_open()) + + + # This test will make the Python environment unusable. # Should perhaps forbid making the frame immutable. # class TestStackCapture(unittest.TestCase): @@ -414,5 +806,28 @@ def test_global_dict_mutation(self): self.assertTrue(isimmutable(f1)) self.assertRaises(NotWriteableError, f1) +class TestPoolAllocation(unittest.TestCase): + # If pooling does not reset region between allocations, + # then the second call to f will result in `a` being owned by + # the first region that no has been deallocated. This + # will result in a UAF that ASAN can detect. + def test_pool_allocation(self): + def f(): + r = Region() + a = {} + r.add_object(a) + f() + f() + +class TestGenericAliasBug(unittest.TestCase): + # The code inside generic alias attempts to set + # __orig_class__ on the empty tuple, which is not + # allowed. The make immutable means this can fail + # NotWriteableError rather than the TypeError or + # AttributeError that would be raised otherwise. + def test_generic_alias_bug(self): + c = makeimmutable(()) + tuple[int]() + if __name__ == '__main__': unittest.main() diff --git a/Lib/typing.py b/Lib/typing.py index 9e2adbe2214a8a..2d6acf216e7ebe 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -2388,7 +2388,7 @@ def no_type_check(arg): no_type_check(obj) try: arg.__no_type_check__ = True - except TypeError: # built-in classes + except NotWriteableError: # built-in classes pass return arg @@ -2507,7 +2507,7 @@ class Other(Leaf): # Error reported by type checker """ try: f.__final__ = True - except (AttributeError, TypeError): + except (AttributeError, TypeError, NotWriteableError): # Skip the attribute silently if it is not writable. # AttributeError happens if the object has __slots__ or a # read-only property, TypeError if it's a builtin class. diff --git a/Lib/unittest/suite.py b/Lib/unittest/suite.py index 6f45b6fe5f6039..0c8623e1091bf3 100644 --- a/Lib/unittest/suite.py +++ b/Lib/unittest/suite.py @@ -152,7 +152,7 @@ def _handleClassSetUp(self, test, result): failed = False try: currentClass._classSetupFailed = False - except TypeError: + except NotWriteableError: # test may actually be a function # so its class will be a builtin-type pass diff --git a/Lib/using.py b/Lib/using.py new file mode 100644 index 00000000000000..aafc4e30b1e343 --- /dev/null +++ b/Lib/using.py @@ -0,0 +1,102 @@ +from contextlib import contextmanager + +# This library defines a decorator "@using" that uses blocking semantics. +# A function decorated by a @using will be called as a result of its +# definition. +# +# Example: +# +# @using(c1, c2) +# def _(): +# print(f"c1 and c2 are now acquired") +# +# Assuming c1 and c2 are cowns, the system will block on acquiring them, +# then call the function _ and release c1 and c2 when the function +# terminates. If c1 or c2 are updated with a closed region, a cown or an +# immutable object, c1 or c2 will be released immediately. + + +def using(*args): + @contextmanager + def CS(cowns, *args): + for c in cowns: + c.acquire() + + try: + # Yield control to the code inside the 'with' block + yield args + finally: + for c in cowns: + c.release() + + def argument_check(cowns, args): + for a in args: + # Append cowns to the list of things that must be acquired + if isinstance(a, Cown): + cowns.append(a) + else: + raise Exception("Using only works on cowns, " + "but was passed " + repr(a)) + + def decorator(func): + cowns = [] + argument_check(cowns, args) + + with CS(cowns, *args): + return func() + return decorator + +# TODO: this creates a normal Python thread and ensures that all its +# arguments are moved to the new thread. Eventually we should revisit +# this behaviour as we go multiple interpreters / multicore. +# TODO: require RC to be one less when move is upstreamed +def PyronaThread(group=None, target=None, name=None, + args=(), kwargs=None, *, daemon=None): + # Only check when a program uses pyrona + from sys import getrefcount as rc + from threading import Thread + # TODO: improve this check for final version of phase 3 + # - Revisit the rc checks + # - Consider throwing a different kind of error (e.g. RegionError) + # - Improve error messages + def ok_share(o): + if isimmutable(o): + return True + if isinstance(o, Cown): + return True + return False + def ok_move(o): + if isinstance(o, Region): + if rc(o) != 5: + # rc = 4 because: + # 1. ref to o in rc + # 2. ref to o on this frame (ok_move) + # 3. ref to o on the calling frame (check) + # 4. ref to o from iteration over kwargs dictionary or args tuple/list + # 5. ref to o from kwargs dictionary or args tuple/list + raise RuntimeError("Region passed to thread was not moved into thread") + if o.is_open(): + raise RuntimeError("Region passed to thread was open") + return True + return False + + def check(a, args): + # rc(args) == 4 because we need to know that the args list is moved into the thread too + # rc = 4 because: + # 1. ref to args in rc + # 2. ref to args on this frame + # 3. ref to args on the calling framedef check(a, args): + # 4. ref from frame calling PyronaThread -- FIXME: not valid; revisit after #45 + if not (ok_share(a) or (ok_move(a) and rc(args) == 4)): + raise RuntimeError("Thread was passed an object which was neither immutable, a cown, or a unique region") + + if kwargs is None: + for a in args: + check(a, args) + return Thread(group, target, name, args, daemon) + else: + for k in kwargs: + # Important to get matching RCs in both paths + v = kwargs[k] + check(v, kwargs) + return Thread(group, target, name, kwargs, daemon) diff --git a/Makefile.pre.in b/Makefile.pre.in index 0b80d67452011f..572b1813af676a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -452,6 +452,7 @@ OBJECT_OBJS= \ Objects/classobject.o \ Objects/codeobject.o \ Objects/complexobject.o \ + Objects/cown.o \ Objects/descrobject.o \ Objects/enumobject.o \ Objects/exceptions.o \ diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 48299e9b35ff97..c3cda5eb83c394 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2406,3 +2406,8 @@ added = '3.12' [const.Py_TPFLAGS_ITEMS_AT_END] added = '3.12' + +[data.PyExc_NotWriteableError] + added = '4.0' +[data.PyExc_RegionError] + added = '4.0' diff --git a/Objects/cown.c b/Objects/cown.c new file mode 100644 index 00000000000000..384fdaba33fb57 --- /dev/null +++ b/Objects/cown.c @@ -0,0 +1,342 @@ +#include "Python.h" +#include +#include +#include +#include +#include +#include +#include +#include "methodobject.h" +#include "modsupport.h" +#include "object.h" +#include "pycore_ast.h" +#include "pycore_dict.h" +#include "pycore_interp.h" +#include "pycore_object.h" +#include "pycore_regions.h" +#include "pycore_pyerrors.h" +#include "pycore_atomic.h" +#include "pyerrors.h" +#include "pystate.h" + +// Needed to test for region object +extern PyTypeObject PyRegion_Type; +extern PyTypeObject PyCown_Type; + +typedef enum { + Cown_RELEASED = 0, + Cown_ACQUIRED = 1, + Cown_PENDING_RELEASE = 2, +} CownState; + +typedef struct PyCownObject { + PyObject_HEAD + _Py_atomic_int state; + size_t owning_thread; + sem_t semaphore; + PyObject* value; +} PyCownObject; + +static PyObject *PyCown_set_unchecked(PyCownObject *self, PyObject *arg); +static PyObject *PyCown_set(PyCownObject *self, PyObject *arg); +static PyObject *PyCown_get(PyCownObject *self, PyObject *ignored); +static PyObject *PyCown_acquire(PyCownObject *self, PyObject *ignored); + +#define POSIX_FAIL_GUARD(exp) \ + if ((exp)) { \ + fprintf(stderr, "Unsuccessful return from %s", #exp); \ + abort(); \ + } + +static void PyCown_dealloc(PyCownObject *self) { + POSIX_FAIL_GUARD(sem_destroy(&self->semaphore)); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_UnTrack((PyObject *)self); + Py_TRASHCAN_BEGIN(self, PyCown_dealloc) + Py_CLEAR(self->value); + PyObject_GC_Del(self); + Py_DECREF(tp); + Py_TRASHCAN_END +} + +static int PyCown_init(PyCownObject *self, PyObject *args, PyObject *kwds) { + // TODO: Pyrona: should not be needed in the future + _Py_MakeImmutable(_PyObject_CAST(Py_TYPE(self))); + _Py_notify_regions_in_use(); + + POSIX_FAIL_GUARD(sem_init(&self->semaphore, 0, 0)); + Py_SET_REGION(self, _Py_COWN); + + static char *kwlist[] = {"value", NULL}; + PyObject *value = NULL; + + // See if we got a value as a keyword argument + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &value)) { + return -1; // Return -1 on failure + } + + if (value) { + PyObject* result = PyCown_set_unchecked(self, value); + // Propagate errors from set_unchecked + if (result == NULL) return -1; + + } else { + _Py_atomic_store(&self->state, Cown_RELEASED); + self->value = Py_None; + } + return 0; +} + +static int PyCown_traverse(PyCownObject *self, visitproc visit, void *arg) { + Py_VISIT(self->value); + return 0; +} + +#define STATE(op) op->state._value + +#define BAIL_IF_OWNED(o, msg) \ + do { \ + /* Note: we must hold the GIL at this point -- note for future threading implementation. */ \ + size_t tid = o->owning_thread; \ + if (tid != 0) { \ + PyErr_Format(PyExc_RegionError, "%s: %S -- %zd", msg, o, tid); \ + return NULL; \ + } \ + } while(0); + +#define BAIL_UNLESS_OWNED(o, msg) \ + do { \ + /* Note: we must hold the GIL at this point -- note for future threading implementation. */ \ + PyThreadState *tstate = PyThreadState_Get(); \ + if (o->owning_thread != tstate->thread_id) { \ + PyErr_Format(PyExc_RegionError, "%s: %S", msg, o); \ + return NULL; \ + } \ + } while(0); + +#define BAIL_UNLESS_IN_STATE(o, expected_state, msg) \ + do { \ + /* Note: we must hold the GIL at this point -- note for future threading implementation. */ \ + if (STATE(o) != expected_state) { \ + PyErr_Format(PyExc_RegionError, "%s: %S", msg, o); \ + return NULL; \ + } \ + } while(0); + +#define BAIL_UNLESS_ACQUIRED(o, msg) \ + BAIL_UNLESS_OWNED(o, msg) \ + BAIL_UNLESS_IN_STATE(o, Cown_ACQUIRED, msg) + +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyCown_acquire(PyCownObject *self, PyObject *ignored) { + PyThreadState *tstate = PyThreadState_Get(); + + // TODO: Pyrona: releasing the GIL will eventually not be necessary here + Py_BEGIN_ALLOW_THREADS + int expected = Cown_RELEASED; + + // TODO: Pyrona: eventually replace this with something from pycore_atomic (nothing there now) + while (!atomic_compare_exchange_strong(&self->state._value, &expected, Cown_ACQUIRED)) { + expected = Cown_RELEASED; + sem_wait(&self->semaphore); + } + + // Note: we must hold the GIL at this point -- note for future + // threading implementation. + self->owning_thread = tstate->thread_id; + Py_END_ALLOW_THREADS + + Py_RETURN_NONE; +} + +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyCown_release(PyCownObject *self, PyObject *ignored) { + if (STATE(self) == Cown_RELEASED) { + BAIL_IF_OWNED(self, "BUG: Released cown had owning thread: %p"); + Py_RETURN_NONE; + } + + BAIL_UNLESS_OWNED(self, "Thread attempted to release a cown it did not own"); + + if (self->value && Py_TYPE(self->value) == &PyRegion_Type) { + if (PyCown_close_region(self->value) == NULL) { + // Close region failed -- propagate its error + return NULL; + } + } + + self->owning_thread = 0; + _Py_atomic_store(&self->state, Cown_RELEASED); + sem_post(&self->semaphore); + + Py_RETURN_NONE; +} + +int _PyCown_release(PyObject *self) { + PyObject* res = PyCown_release((PyCownObject *)self, NULL); + return res == Py_None ? 0 : -1; +} + +int _PyCown_is_released(PyObject *self) { + PyCownObject *cown = (PyCownObject *)self; + return STATE(cown) == Cown_RELEASED; +} + +int _PyCown_is_pending_release(PyObject *self) { + assert(Py_TYPE(self) == &PyCown_Type && "Is pending release called on non-cown!"); + + PyCownObject *cown = _Py_CAST(PyCownObject *, self); + return STATE(cown) == Cown_PENDING_RELEASE; +} + +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyCown_get(PyCownObject *self, PyObject *ignored) { + BAIL_UNLESS_OWNED(self, "Attempt to get value of unacquired cown"); + + if (self->value) { + return Py_NewRef(self->value); + } else { + Py_RETURN_NONE; + } +} + +static PyObject *PyCown_set_unchecked(PyCownObject *self, PyObject *arg) { + // Cowns are cells that hold a reference to a bridge object, + // (or another cown or immutable object) + const bool arg_is_region_object = + Py_IS_TYPE(arg, &PyRegion_Type) && _Py_is_bridge_object(arg); + if (arg_is_region_object || + arg->ob_type == &PyCown_Type || + _Py_IsImmutable(arg)) { + + PyObject* old = self->value; + Py_XINCREF(arg); + self->value = arg; + + // Tell the region that it is owned by a cown, + // to enable it to release the cown on close + if (arg_is_region_object) { + _PyRegion_set_cown_parent(arg, _PyObject_CAST(self)); + // TODO: Pyrona: should not run try close here unless dirty at the end of phase 3 + // if (_PyCown_close_region(arg) == Py_None) { + if (_PyRegion_is_closed(arg)) { + if (PyCown_release(self, NULL) == NULL) { + PyErr_Clear(); + } + } else { + _Py_atomic_store(&self->state, Cown_PENDING_RELEASE); + PyThreadState *tstate = PyThreadState_Get(); + self->owning_thread = tstate->thread_id; + } + } else { + // We can release this cown immediately + PyCown_release(self, NULL); + } + + return old ? old : Py_None; + } else { + // Invalid cown content + PyErr_SetString(PyExc_RegionError, + "Cowns can only store bridge objects, immutable objects or other cowns!"); + return NULL; + } +} + +static PyObject *PyCown_set(PyCownObject *self, PyObject *arg) { + BAIL_UNLESS_ACQUIRED(self, "Attempt to set value of unacquired cown"); + return PyCown_set_unchecked(self, arg); +} + +static int PyCown_clear(PyCownObject *self) { + Py_CLEAR(self->value); + return 0; +} + +static PyObject *PyCown_repr(PyCownObject *self) { +#ifdef PYDEBUG + if (STATE(self) == Cown_ACQUIRED) { + return PyUnicode_FromFormat( + "Cown(status=acquired by thread %zd,value=%S)", + PyThreadState_Get()->thread_id, + PyObject_Repr(self->value) + ); + } else { + return PyUnicode_FromFormat( + "Cown(status=%s,value=%S)", + STATE(self) == Cown_RELEASED + ? "released" + : "pending-release", + PyObject_Repr(self->value) + ); + } +#else + if (STATE(self) == Cown_ACQUIRED) { + return PyUnicode_FromFormat( + "Cown(status=acquired by thread %zd)", + PyThreadState_Get()->thread_id + ); + } else { + return PyUnicode_FromFormat( + "Cown(status=%s)", + STATE(self) == Cown_RELEASED + ? "released" + : "pending-release" + ); + } +#endif +} + +// Define the CownType with methods +static PyMethodDef PyCown_methods[] = { + {"acquire", (PyCFunction)PyCown_acquire, METH_NOARGS, "Acquire the cown."}, + {"release", (PyCFunction)PyCown_release, METH_NOARGS, "Release the cown."}, + {"get", (PyCFunction)PyCown_get, METH_NOARGS, "Get contents of acquired cown."}, + {"set", (PyCFunction)PyCown_set, METH_O, "Set contents of acquired cown."}, + {NULL} // Sentinel +}; + + +PyTypeObject PyCown_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "Cown", /* tp_name */ + sizeof(PyCownObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)PyCown_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)PyCown_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)PyCown_traverse, /* tp_traverse */ + (inquiry)PyCown_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PyCown_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)PyCown_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a7867ad70431b6..b0e7db70e3b6b2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -119,9 +119,12 @@ As a consequence of this, split keys have a maximum size of 16. #include "pycore_dict.h" // PyDictKeysObject #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() #include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_regions.h" // _PyObject_GC_TRACK() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_regions.h" // Py_ADDREGIONREFERENCE(), ... (region) #include "stringlib/eq.h" // unicode_eq() +#include "regions.h" // Py_IsImmutable() #include @@ -664,6 +667,10 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) { + // TODO: This feels like it should remove the references in the regions + // but keys is not a Python object, so it's not clear how to do that. + // mjp: Leaving as a TODO for now. + assert(keys != Py_EMPTY_KEYS); if (DK_IS_UNICODE(keys)) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); @@ -786,8 +793,12 @@ new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) } +/* The target represents the dictionary that this object will become part of. + If target is NULL, the object is not part of a freshly allocated dictionary, so should + be considered as part of te local region. +*/ static PyDictKeysObject * -clone_combined_dict_keys(PyDictObject *orig) +clone_combined_dict_keys(PyDictObject *orig, PyObject* target) { assert(PyDict_Check(orig)); assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter); @@ -828,6 +839,14 @@ clone_combined_dict_keys(PyDictObject *orig) if (value != NULL) { Py_INCREF(value); Py_INCREF(*pkey); + if (target != NULL) { + if (!Py_REGIONADDREFERENCES(target, *pkey, value)) + return NULL; + } + else { + Py_REGIONADDLOCALREFERENCE(*pkey); + Py_REGIONADDLOCALREFERENCE(value); + } } pvalue += offs; pkey += offs; @@ -1256,6 +1275,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) { + goto Fail; + } uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); /* Insert into new slot. */ @@ -1301,6 +1323,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, } if (old_value != value) { + if (!Py_REGIONADDREFERENCE((PyObject*)mp, value)) { + goto Fail; + } if(DK_IS_UNICODE(mp->ma_keys)){ PyDictUnicodeEntry *ep; ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -1334,6 +1359,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, else { _PyDictEntry_SetValue(DK_ENTRIES(mp->ma_keys) + ix, value); } + Py_REGIONREMOVEREFERENCE((PyObject*)mp, old_value); } mp->ma_version_tag = new_version; } @@ -1363,6 +1389,9 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, return -1; } + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) + return -1; + uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); @@ -2013,6 +2042,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, assert(ix < SHARED_KEYS_MAX_SIZE); /* Update order */ delete_index_from_values(mp->ma_values, ix); + Py_REGIONREMOVEREFERENCE(mp, old_value); ASSERT_CONSISTENT(mp); } else { @@ -2179,7 +2209,7 @@ PyDict_Clear(PyObject *op) if (oldvalues != NULL) { n = oldkeys->dk_nentries; for (i = 0; i < n; i++) - Py_CLEAR(oldvalues->values[i]); + Py_CLEAR_OBJECT_FIELD(op, oldvalues->values[i]); free_values(oldvalues); dictkeys_decref(interp, oldkeys); } @@ -2457,6 +2487,8 @@ dict_dealloc(PyDictObject *mp) Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) { + PyObject *value = values->values[i]; + Py_REGIONREMOVEREFERENCE(mp, value); Py_XDECREF(values->values[i]); } free_values(values); @@ -2937,7 +2969,7 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_CLONED, mp, b, NULL); - PyDictKeysObject *keys = clone_combined_dict_keys(other); + PyDictKeysObject *keys = clone_combined_dict_keys(other, a); // Need to say what owns the keys? if (keys == NULL) { return -1; } @@ -3140,6 +3172,13 @@ PyDict_Copy(PyObject *o) dictkeys_incref(mp->ma_keys); for (size_t i = 0; i < size; i++) { PyObject *value = mp->ma_values->values[i]; + if (!Py_REGIONADDREFERENCE(split_copy, value)) + { + // TODO: is this safe to dealloc the split_copy? + // is it in a valid enough state to be deallocated? + Py_DECREF(split_copy); + return NULL; + } split_copy->ma_values->values[i] = Py_XNewRef(value); } if (_PyObject_GC_IS_TRACKED(mp)) @@ -3165,7 +3204,7 @@ PyDict_Copy(PyObject *o) operations and copied after that. In cases like this, we defer to PyDict_Merge, which produces a compacted copy. */ - PyDictKeysObject *keys = clone_combined_dict_keys(mp); + PyDictKeysObject *keys = clone_combined_dict_keys(mp, NULL); if (keys == NULL) { return NULL; } @@ -3425,6 +3464,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) return NULL; if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); mp->ma_keys->dk_version = 0; @@ -3465,6 +3506,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) assert(mp->ma_keys->dk_usable >= 0); } else if (value == NULL) { + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); value = defaultobj; @@ -5505,6 +5548,7 @@ _PyObject_InitializeDict(PyObject *obj) return -1; } PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; return 0; } @@ -5550,11 +5594,20 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, assert(values != NULL); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - if(!Py_CHECKWRITE(obj)){ + if (!Py_CHECKWRITE(obj)){ PyErr_WriteToImmutable(obj); return -1; } + //TODO: PYRONA: The addition of the key is complex here. + // The keys PyDictKeysObject, might already have the key. Note that + // the keys PyDictKeysObject is not a PyObject. So it is unclear where + // this edge is created. + // The keys is coming from ht_cached_keys on the type object. + // This is also interesting from a race condition perspective. + // Can this be shared, should it be treated immutably when the type is? + // mjp: Leaving for a future PR. + Py_ssize_t ix = DKIX_EMPTY; if (PyUnicode_CheckExact(name)) { ix = insert_into_dictkeys(keys, name); @@ -5586,6 +5639,9 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, return PyDict_SetItem(dict, name, value); } } + if (!Py_REGIONADDREFERENCE(obj, value)) { + return -1; + } PyObject *old_value = values->values[ix]; values->values[ix] = Py_XNewRef(value); if (old_value == NULL) { @@ -5601,6 +5657,7 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, if (value == NULL) { delete_index_from_values(values, ix); } + Py_REGIONREMOVEREFERENCE(obj, old_value); Py_DECREF(old_value); } return 0; @@ -5768,10 +5825,11 @@ PyObject_GenericGetDict(PyObject *obj, void *context) dict = make_dict_from_instance_attributes( interp, CACHED_KEYS(tp), values); if (dict != NULL) { - if (_Py_IsImmutable(obj)) { + if (Py_IsImmutable(obj)) { _Py_SetImmutable(dict); } else { + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5781,10 +5839,11 @@ PyObject_GenericGetDict(PyObject *obj, void *context) if (dict == NULL) { dictkeys_incref(CACHED_KEYS(tp)); dict = new_dict_with_shared_keys(interp, CACHED_KEYS(tp)); - if (_Py_IsImmutable(obj)) { + if (Py_IsImmutable(obj)) { _Py_SetImmutable(dict); } else { + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5808,10 +5867,11 @@ PyObject_GenericGetDict(PyObject *obj, void *context) else { dict = PyDict_New(); } - if (_Py_IsImmutable(obj)) { + if (Py_IsImmutable(obj)) { _Py_SetImmutable(dict); } else { + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; } } @@ -5821,7 +5881,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, - PyObject *key, PyObject *value) + PyObject *key, PyObject *value, PyObject* owner) { PyObject *dict; int res; @@ -5837,12 +5897,15 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, dict = new_dict_with_shared_keys(interp, cached); if (dict == NULL) return -1; + Py_REGIONADDREFERENCE(owner, dict); *dictptr = dict; } if (value == NULL) { + // Pyrona: Remove reference is called by `DelItem` res = PyDict_DelItem(dict, key); } else { + // Pyrona: Add and remove reference is called by `SetItem` res = PyDict_SetItem(dict, key, value); } } else { @@ -5851,11 +5914,14 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, dict = PyDict_New(); if (dict == NULL) return -1; + Py_REGIONADDREFERENCE(owner, dict); *dictptr = dict; } if (value == NULL) { + // Pyrona: Remove reference is called by `DelItem` res = PyDict_DelItem(dict, key); } else { + // Pyrona: Add and remove reference is called by `SetItem` res = PyDict_SetItem(dict, key, value); } } @@ -6067,4 +6133,4 @@ _PyDict_IsKeyImmutable(PyObject* op, PyObject* key) PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + ix; return _PyDictEntry_IsImmutable(ep); } -} \ No newline at end of file +} diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 5008811212703a..7f8663146ef2f4 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3430,9 +3430,63 @@ PyObject *PyExc_MemoryError = (PyObject *) &_PyExc_MemoryError; */ SimpleExtendsException(PyExc_Exception, BufferError, "Buffer error."); +/* Pyrona Exceptions */ +/* + * NotWriteableError extends Exception + */ SimpleExtendsException(PyExc_Exception, NotWriteableError, "Object is not writeable."); +static int RegionError_init(PyRegionErrorObject *self, PyObject *args, PyObject *kwds) { + PyObject *source = NULL; + PyObject *target = NULL; + if (!PyArg_ParseTuple(args, "|OO", &source, &target)) { + return -1; + } + Py_XSETREF(self->source, Py_XNewRef(source)); + Py_XSETREF(self->target, Py_XNewRef(target)); + return 0; +} + +static int +RegionError_clear(PyRegionErrorObject *self) +{ + Py_CLEAR(self->source); + Py_CLEAR(self->target); + return BaseException_clear((PyBaseExceptionObject *)self); +} + +static void +RegionError_dealloc(PyRegionErrorObject *self) +{ + _PyObject_GC_UNTRACK(self); + RegionError_clear(self); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +RegionError_traverse(PyRegionErrorObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->source); + Py_VISIT(self->target); + return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); +} + +static PyMemberDef RegionError_members[] = { + {"source", T_OBJECT, offsetof(PyRegionErrorObject, source), 0, PyDoc_STR("source")}, + {"target", T_OBJECT, offsetof(PyRegionErrorObject, target), 0, PyDoc_STR("target")}, + {NULL} /* Sentinel */ +}; + +static PyMethodDef RegionError_methods[] = { + {NULL} /* Sentinel */ +}; + +ComplexExtendsException(PyExc_Exception, RegionError, + RegionError, 0, + RegionError_methods, RegionError_members, + 0, BaseException_str, + "A reference violates the rules of ownership"); /* Warning category docstrings */ @@ -3620,6 +3674,7 @@ static struct static_exception static_exceptions[] = { ITEM(ValueError), ITEM(NotWriteableError), ITEM(Warning), + ITEM(RegionError), // Level 4: ArithmeticError(Exception) subclasses ITEM(FloatingPointError), diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index 117b4e8dfb960a..a0be5fadced065 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -596,7 +596,8 @@ set_orig_class(PyObject *obj, PyObject *self) if (obj != NULL) { if (PyObject_SetAttr(obj, &_Py_ID(__orig_class__), self) < 0) { if (!PyErr_ExceptionMatches(PyExc_AttributeError) && - !PyErr_ExceptionMatches(PyExc_TypeError)) + !PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_NotWriteableError)) { Py_DECREF(obj); return NULL; diff --git a/Objects/object.c b/Objects/object.c index 5e76e3c8274c82..a9813a999fdb62 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1267,6 +1267,7 @@ _PyObject_GetDictPtr(PyObject *obj) PyErr_Clear(); return NULL; } + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } return &dorv_ptr->dict; @@ -1466,6 +1467,7 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, res = NULL; goto done; } + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -1596,7 +1598,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, goto done; } else { - res = _PyObjectDict_SetItem(tp, dictptr, name, value); + res = _PyObjectDict_SetItem(tp, dictptr, name, value, obj); } } else { @@ -1918,7 +1920,7 @@ PyObject _Py_NoneStruct = { _PyObject_EXTRA_INIT { _Py_IMMORTAL_REFCNT }, &_PyNone_Type, - _Py_IMMUTABLE + (Py_region_ptr_with_tags_t){_Py_IMMUTABLE} }; /* NotImplemented is an object that can be used to signal that an @@ -2021,7 +2023,8 @@ PyTypeObject _PyNotImplemented_Type = { PyObject _Py_NotImplementedStruct = { _PyObject_EXTRA_INIT { _Py_IMMORTAL_REFCNT }, - &_PyNotImplemented_Type + &_PyNotImplemented_Type, + (Py_region_ptr_with_tags_t) {_Py_IMMUTABLE} }; @@ -2041,6 +2044,8 @@ extern PyTypeObject _PyMemoryIter_Type; extern PyTypeObject _PyLineIterator; extern PyTypeObject _PyPositionsIterator; extern PyTypeObject _PyLegacyEventHandler_Type; +extern PyTypeObject PyRegion_Type; +extern PyTypeObject PyCown_Type; static PyTypeObject* static_types[] = { // The two most important base types: must be initialized first and @@ -2161,6 +2166,11 @@ static PyTypeObject* static_types[] = { &PyODictKeys_Type, // base=&PyDictKeys_Type &PyODictValues_Type, // base=&PyDictValues_Type &PyODict_Type, // base=&PyDict_Type + + // Pyrona Region: + &PyRegion_Type, + // Pyrona Cown: + &PyCown_Type, }; @@ -2227,6 +2237,10 @@ _Py_NewReference(PyObject *op) reftotal_increment(_PyInterpreterState_GET()); #endif new_reference(op); + // This uses an assignment opposed to `Py_SET_REGION` since that + // function expects the previous value to be a valid object but newly + // created objects never had this value initilized. + op->ob_region = Py_region_ptr_with_tags(_Py_LOCAL_REGION); } void @@ -2629,6 +2643,7 @@ _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); destructor dealloc = type->tp_dealloc; + Py_SET_REGION(op, _Py_LOCAL_REGION); #ifdef Py_DEBUG PyThreadState *tstate = _PyThreadState_GET(); PyObject *old_exc = tstate != NULL ? tstate->current_exception : NULL; diff --git a/Objects/regions.c b/Objects/regions.c index bbde367b9007b1..8204540bb962a7 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -2,10 +2,501 @@ #include "Python.h" #include #include +#include #include +#include "object.h" +#include "regions.h" #include "pycore_dict.h" +#include "pycore_interp.h" #include "pycore_object.h" #include "pycore_regions.h" +#include "pycore_pyerrors.h" +#include "pyerrors.h" + +// This tag indicates that the `regiondata` object has been merged +// with another region. The `parent` pointer points to the region it was +// merged with. +// +// This tag is only used for the parent pointer in `regiondata`. +#define Py_METADATA_MERGE_TAG ((Py_region_ptr_t)0x2) +static inline Py_region_ptr_with_tags_t Py_TAGGED_REGION(PyObject *ob) { + return ob->ob_region; +} +#define Py_TAGGED_REGION(ob) Py_TAGGED_REGION(_PyObject_CAST(ob)) +#define REGION_PRT_HAS_TAG(ptr, tag) ((ptr).value & tag) +#define REGION_PTR_SET_TAG(ptr, tag) (ptr = Py_region_ptr_with_tags((ptr).value | tag)) +#define REGION_PTR_CLEAR_TAG(ptr, tag) (ptr = Py_region_ptr_with_tags((ptr).value & (~tag))) + +#define REGION_DATA_CAST(r) (_Py_CAST(regiondata*, (r))) +#define REGION_PTR_CAST(r) (_Py_CAST(Py_region_ptr_t, (r))) +#define Py_REGION_DATA(ob) (REGION_DATA_CAST(Py_REGION(ob))) +#define Py_REGION_FIELD(ob) (ob->ob_region) + +#define IS_IMMUTABLE_REGION(r) (REGION_PTR_CAST(r) == _Py_IMMUTABLE) +#define IS_LOCAL_REGION(r) (REGION_PTR_CAST(r) == _Py_LOCAL_REGION) +#define IS_COWN_REGION(r) (REGION_PTR_CAST(r) == _Py_COWN) +#define HAS_METADATA(r) (!IS_LOCAL_REGION(r) && !IS_IMMUTABLE_REGION(r) && !IS_COWN_REGION(r)) + +typedef struct regiondata regiondata; +typedef struct PyRegionObject PyRegionObject; + +static regiondata* regiondata_get_parent(regiondata* self); +static PyObject *PyRegion_add_object(PyRegionObject *self, PyObject *args); +static PyObject *PyRegion_remove_object(PyRegionObject *self, PyObject *args); +static const char *get_region_name(PyObject* obj); + +/** + * Global status for performing the region check. + */ +bool invariant_do_region_check = false; +/** + * TODO: revisit the definition of this builting function + */ +int Py_is_invariant_enabled(void) { + return invariant_do_region_check; +} + +// Once an error has occurred this is used to surpress further checking +bool invariant_error_occurred = false; + +// This uses the given arguments to create and throw a `RegionError` +static void throw_region_error( + PyObject* src, PyObject* tgt, + const char *format_str, PyObject *obj) +{ + // Don't stomp existing exception + PyThreadState *tstate = _PyThreadState_GET(); + assert(tstate && "_PyThreadState_GET documentation says it's not safe, when?"); + if (_PyErr_Occurred(tstate)) { + return; + } + + // This disables the invariance check, as it could otherwise emit a runtime + // error before the emitted `RegionError` could be handled. + invariant_do_region_check = false; + invariant_error_occurred = true; + + // Create the error, this sets the error value in `tstate` + PyErr_Format(PyExc_RegionError, format_str, obj); + + // Set source and target fields + PyRegionErrorObject* exc = _Py_CAST(PyRegionErrorObject*, + PyErr_GetRaisedException()); + Py_XINCREF(src); + exc->source = src; + Py_XINCREF(tgt); + exc->target = tgt; + PyErr_SetRaisedException(_PyObject_CAST(exc)); +} +#define throw_region_error(src, tgt, format_str, format_arg) \ + throw_region_error(_PyObject_CAST(src), _PyObject_CAST(tgt), \ + format_str, format_arg) + +struct PyRegionObject { + PyObject_HEAD + regiondata* metadata; + PyObject *dict; +}; + +struct regiondata { + // The number of references coming in from the local region. + Py_ssize_t lrc; + // The number of open subregions. + Py_ssize_t osc; + // The number of references to this object + Py_ssize_t rc; + bool is_open; + // Indicates if the LRC value can be trusted or not. + // + // FIXME: Only a single bit is needed, this can be integrated into another field + bool is_dirty; + // This field might either point to the parent region or another region + // that this one was merged into. The `Py_METADATA_MERGE_TAG` tag is used + // to indicate this points to a merged region. + Py_region_ptr_with_tags_t parent; + // A weak reference to the bridge object. The bridge object has increased the + // rc of this metadata object. If this was a strong reference it could create + // a cycle. + PyRegionObject* bridge; + PyObject *name; // Optional string field for "name" + // TODO: Currently only used for invariant checking. If it's not used for other things + // it might make sense to make this conditional in debug builds (or something) + // + // Intrinsic list for invariant checking + regiondata* next; + PyObject* cown; // To be able to release a cown; to be integrated with parent +}; + +static Py_region_ptr_t regiondata_get_merge_tree_root(Py_region_ptr_t self) +{ + // Test for local and immutable region + if (!HAS_METADATA(self)) { + return self; + } + + // Return self if it wasn't merged with another region + regiondata* self_data = REGION_DATA_CAST(self); + if (!REGION_PRT_HAS_TAG(self_data->parent, Py_METADATA_MERGE_TAG)) { + return self; + } + + // FIXME: It can happen that there are several layers in this union-find + // structure. It would be efficient to directly update the parent pointers + // for deeper nodes. + return regiondata_get_merge_tree_root(Py_region_ptr(self_data->parent)); +} +#define regiondata_get_merge_tree_root(self) \ + regiondata_get_merge_tree_root(REGION_PTR_CAST(self)) + +static void regiondata_mark_as_dirty(Py_region_ptr_t self_ptr) { + if (!HAS_METADATA(self_ptr)) { + return; + } + + REGION_DATA_CAST(self_ptr)->is_dirty = true; +} +# define regiondata_mark_as_dirty(data) \ + (regiondata_mark_as_dirty(REGION_PTR_CAST(data))) + +static void regiondata_mark_as_not_dirty(Py_region_ptr_t self_ptr) { + if (!HAS_METADATA(self_ptr)) { + return; + } + + REGION_DATA_CAST(self_ptr)->is_dirty = false; +} +# define regiondata_mark_as_not_dirty(data) \ + (regiondata_mark_as_not_dirty(REGION_PTR_CAST(data))) + +static bool regiondata_is_dirty(Py_region_ptr_t self_ptr) { + if (!HAS_METADATA(self_ptr)) { + return false; + } + + return REGION_DATA_CAST(self_ptr)->is_dirty; +} +# define regiondata_is_dirty(data) \ + (regiondata_is_dirty(REGION_PTR_CAST(data))) + +static void regiondata_inc_osc(Py_region_ptr_t self_ptr); +static int regiondata_dec_osc(Py_region_ptr_t self_ptr); +static void regiondata_open(regiondata* self) { + assert(HAS_METADATA(self)); + if (self->is_open) { + return; + } + self->is_open = true; + regiondata_inc_osc(REGION_PTR_CAST(regiondata_get_parent(self))); +} + +/// This function marks the region as closed and propagartes the status to +/// the parent region or owning cown. +/// +/// It returns `0` if the close was successful. It should only fails, if the +/// system is in an inconsistent state and this close attempted to release a +/// cown which is currently not owned by the current thread. +static int regiondata_close(regiondata* self) { + // The LRC might be 1 or 2, if the owning references is a local and the + // bridge object was used as an argument. + assert(self->lrc <= 2 && "Attempting to close a region with an LRC > 2"); + assert(self->osc == 0 && "Attempting to close a region with an OSC != 0"); + if (!self->is_open) { + return 0; + } + + self->is_open = false; + + Py_region_ptr_t parent = REGION_PTR_CAST(regiondata_get_parent(self)); + if (HAS_METADATA(parent)) { + // Cowns and parents are mutually exclusive this can therefore return directly + return regiondata_dec_osc(parent); + } + + // Check if in a cown which is waiting for the region to close -- if so, release cown + if (self->cown && _PyCown_is_pending_release(self->cown)) { + // Propagate error from release + return _PyCown_release(self->cown); + } + + // Everything is a-okay + return 0; +} + +static bool regiondata_is_open(Py_region_ptr_t self) { + if (!HAS_METADATA(self)) { + // The immutable and local region are open by default and can't be closed. + return true; + } + + return REGION_DATA_CAST(self)->is_open; +} +#define regiondata_is_open(self) \ + regiondata_is_open(REGION_PTR_CAST(self)) + +static void regiondata_inc_osc(Py_region_ptr_t self_ptr) +{ + if (!HAS_METADATA(self_ptr)) { + return; + } + + regiondata* self = REGION_DATA_CAST(self_ptr); + self->osc += 1; + regiondata_open(self); +} +#define regiondata_inc_osc(self) \ + (regiondata_inc_osc(REGION_PTR_CAST(self))) + +/// Decrements the OSC of the region. This might close the region if the LRC +/// and ORC both hit zero and the region is not marked as dirty. +/// +/// Returns `0` on success. An error might come from closing the region +/// see `regiondata_close` for potential errors. +static int regiondata_dec_osc(Py_region_ptr_t self_ptr) +{ + if (!HAS_METADATA(self_ptr)) { + return 0; + } + + regiondata* self = REGION_DATA_CAST(self_ptr); + self->osc -= 1; + + // Check if the OSC decrease has closed this region as well. + if (self->osc == 0 && self->lrc == 0 && !regiondata_is_dirty(self)) { + return regiondata_close(self); + } + + return 0; +} +#define regiondata_dec_osc(self) \ + (regiondata_dec_osc(REGION_PTR_CAST(self))) + +static void regiondata_inc_rc(Py_region_ptr_t self) +{ + if (HAS_METADATA(self)) { + REGION_DATA_CAST(self)->rc += 1; + } +} +#define regiondata_inc_rc(self) \ + (regiondata_inc_rc(REGION_PTR_CAST(self))) + +static int regiondata_dec_rc(Py_region_ptr_t self_ptr) +{ + if (!HAS_METADATA(self_ptr)) { + return 0; + } + + // Update RC + regiondata* self = REGION_DATA_CAST(self_ptr); + self->rc -= 1; + if (self->rc != 0) { + return 0; + } + + // Sort out the funeral by informing everyone about the future freeing + Py_CLEAR(self->name); + + // Buffer the results since we don't want to leak any memory if this fails. + // OSC decreases in this function should also be safe. + int result = 0; + if (regiondata_is_open(self)) { + result |= regiondata_dec_osc(regiondata_get_parent(self)); + } + + // This access the parent directly to update the rc. + // It also doesn't matter if the parent pointer is a + // merge or subregion relation, since both cases have + // increased the rc. + result |= regiondata_dec_rc(Py_region_ptr(self->parent)); + + free(self); + + return result; +} +#define regiondata_dec_rc(self) \ + (regiondata_dec_rc(REGION_PTR_CAST(self))) + +static void regiondata_set_parent(regiondata* self, regiondata* parent) { + // Just a sanity check, since these cases should never happen + assert(HAS_METADATA(self) && "Can't set the parent on the immutable and local region"); + assert(REGION_PTR_CAST(self) == regiondata_get_merge_tree_root(self) && "Sanity Check"); + assert(REGION_PTR_CAST(parent) == regiondata_get_merge_tree_root(parent) && "Sanity Check"); + + Py_region_ptr_t old_parent = Py_region_ptr(self->parent); + Py_region_ptr_t new_parent = REGION_PTR_CAST(parent); + self->parent = Py_region_ptr_with_tags(new_parent); + + // Update RCs + regiondata_inc_rc(new_parent); + if (regiondata_is_open(self)) { + regiondata_inc_osc(new_parent); + regiondata_dec_osc(old_parent); + } + regiondata_dec_rc(old_parent); +} + +static regiondata* regiondata_get_parent(regiondata* self) { + assert(REGION_PTR_CAST(self) == regiondata_get_merge_tree_root(self) && "Sanity check"); + if (!HAS_METADATA(self)) { + // The local and immutable regions never have a parent + return NULL; + } + + Py_region_ptr_t parent_field = Py_region_ptr(self->parent); + Py_region_ptr_t parent_root = regiondata_get_merge_tree_root(parent_field); + + // If the parent was merged with another region we want to update the + // pointer to point at the root. + if (parent_field != parent_root) { + // set_parent ensures that the RC's are correctly updated + regiondata_set_parent(self, REGION_DATA_CAST(parent_root)); + } + + return REGION_DATA_CAST(parent_root); +} +#define regiondata_get_parent(self) \ + regiondata_get_parent(REGION_DATA_CAST(self)) + +static bool regiondata_has_parent(regiondata* self) { + return regiondata_get_parent(self) != NULL; +} + +static bool regiondata_has_ancestor(regiondata* self, regiondata* other) { + // The immutable or local region can never be a parent + if (!HAS_METADATA(other)) { + return false; + } + + while (self) { + if (self == other) { + return true; + } + self = regiondata_get_parent(self); + } + return false; +} + + +// This implementation merges `self` into `other`. Merging is not allowed +// to break external uniqueness. It's therefore not allowed if both regions +// to have a parent. Except cases, where one region has the other region as +// it's parent. +// +// This function expects `self` to be a valid object. +static PyObject* regiondata_merge(regiondata* self, Py_region_ptr_t other) { + assert(HAS_METADATA(self) && "The immutable and local region can't be merged into another region"); + assert(REGION_PTR_CAST(self) == regiondata_get_merge_tree_root(self) && "Sanity Check"); + + // If `other` is the parent of `self` we can merge it. We unset the the + // parent which will also update the rc and other counts. + regiondata* self_parent = regiondata_get_parent(self); + if (self_parent && REGION_PTR_CAST(self_parent) == other) { + assert(HAS_METADATA(self_parent) && "The immutable and local region can never have children"); + + regiondata_set_parent(self, NULL); + self_parent = NULL; + } + + // If only `self` has a parent we can make `other` the child and + // remove the parent from `self`. The merged region will then again + // have the correct parent. + regiondata* other_parent = regiondata_get_parent(self); + if (self_parent && HAS_METADATA(other) && other_parent == NULL) { + // Make sure we don't create any cycles + if (regiondata_has_ancestor(self_parent, REGION_DATA_CAST(other))) { + throw_region_error(self->bridge, REGION_DATA_CAST(other)->bridge, + "Merging these regions would create a cycle", NULL); + return NULL; + } + + regiondata_set_parent(REGION_DATA_CAST(other), self_parent); + regiondata_set_parent(self, NULL); + self_parent = NULL; + } + + // If `self` still has a parent we can't merge it into `other` + if (self_parent != NULL) { + PyObject* other_node = NULL; + if (HAS_METADATA(other)) { + other_node = _PyObject_CAST(REGION_DATA_CAST(other)->bridge); + } + throw_region_error(self->bridge, other_node, + "Unable to merge regions", NULL); + return NULL; + } + + regiondata_inc_rc(other); + + // Merge state into the root. + if (HAS_METADATA(other)) { + regiondata* other_data = REGION_DATA_CAST(other); + other_data->lrc += self->lrc; + other_data->osc += self->osc; + other_data->is_open |= self->is_open; + other_data->is_dirty |= self->is_dirty; + } + + // remove information from self + self->lrc = 0; + self->osc = 0; + self->is_open = false; + self->is_dirty = false; + + self->parent = Py_region_ptr_with_tags(other); + REGION_PTR_SET_TAG(self->parent, Py_METADATA_MERGE_TAG); + // No decref, since this is a weak reference. Otherwise we would get + // a cycle between the `regiondata` as a non GC'ed object and the bridge. + self->bridge = NULL; + Py_RETURN_NONE; +} +#define regiondata_merge(self, other) \ + (regiondata_merge(self, REGION_PTR_CAST(other))); + +int _Py_IsLocal(PyObject *op) { + return IS_LOCAL_REGION(Py_REGION(op)); +} + +int _Py_IsImmutable(PyObject *op) +{ + return IS_IMMUTABLE_REGION(Py_REGION(op)); +} +int _Py_IsCown(PyObject *op) +{ + return Py_REGION(op) == _Py_COWN; +} + +Py_region_ptr_t _Py_REGION(PyObject *ob) { + if (!ob) { + return REGION_PTR_CAST(NULL); + } + + Py_region_ptr_t field_value = Py_region_ptr(Py_REGION_FIELD(ob)); + if (!HAS_METADATA(field_value)) { + return field_value; + } + + Py_region_ptr_t region = regiondata_get_merge_tree_root(field_value); + // Update the region if we're not pointing to the root of the merge tree. + // This can allow freeing of non root regions and speedup future lookups. + if (region != field_value) { + // We keep the tags, since the owning region stays the same. + Py_region_ptr_t tags = Py_region_ptr(Py_REGION_FIELD(ob)) & (~Py_REGION_MASK); + _Py_SET_TAGGED_REGION(ob, Py_region_ptr_with_tags(region | tags)); + } + + return region; +} + +void _Py_SET_TAGGED_REGION(PyObject *ob, Py_region_ptr_with_tags_t region) { + // Here we access the field directly, since we want to update the RC of the + // regions we're actually holding and not the root of the merge tree. + Py_region_ptr_t old_region = Py_region_ptr(Py_REGION_FIELD(ob)); + + ob->ob_region = region; + + // Update the RC of the region + regiondata_inc_rc(Py_region_ptr(region)); + regiondata_dec_rc(old_region); +} /** * Simple implementation of stack for tracing during make immutable. @@ -20,7 +511,7 @@ typedef struct stack_s { node* head; } stack; -stack* stack_new(void){ +static stack* stack_new(void){ stack* s = (stack*)malloc(sizeof(stack)); if(s == NULL){ return NULL; @@ -31,24 +522,23 @@ stack* stack_new(void){ return s; } -bool stack_push(stack* s, PyObject* object){ +static bool stack_push(stack* s, PyObject* object){ node* n = (node*)malloc(sizeof(node)); if(n == NULL){ + // FIXME: This DECREF should only be used by MakeImmutable, since + // `add_to_region` and other functions only use weak refs. Py_DECREF(object); // Should we also free the stack? return true; } - _Py_VPYDBG("pushing "); - _Py_VPYDBGPRINT(object); - _Py_VPYDBG(" [rc=%ld]\n", object->ob_refcnt); n->object = object; n->next = s->head; s->head = n; return false; } -PyObject* stack_pop(stack* s){ +static PyObject* stack_pop(stack* s){ if(s->head == NULL){ return NULL; } @@ -61,7 +551,7 @@ PyObject* stack_pop(stack* s){ return object; } -void stack_free(stack* s){ +static void stack_free(stack* s){ while(s->head != NULL){ PyObject* op = stack_pop(s); Py_DECREF(op); @@ -70,26 +560,227 @@ void stack_free(stack* s){ free(s); } -bool stack_empty(stack* s){ +static bool stack_empty(stack* s){ return s->head == NULL; } -void stack_print(stack* s){ - _Py_VPYDBG("stack: "); +__attribute__((unused)) +static void stack_print(stack* s){ node* n = s->head; while(n != NULL){ - _Py_VPYDBGPRINT(n->object); - _Py_VPYDBG("[rc=%ld]\n", n->object->ob_refcnt); n = n->next; } } -bool is_c_wrapper(PyObject* obj){ +static bool is_c_wrapper(PyObject* obj){ return PyCFunction_Check(obj) || Py_IS_TYPE(obj, &_PyMethodWrapper_Type) || Py_IS_TYPE(obj, &PyWrapperDescr_Type); } +// Start of a linked list of bridge objects used to check for external uniqueness +// Bridge objects appear in this list if they are captured +#define CAPTURED_SENTINEL ((regiondata*) 0xc0defefe) +regiondata* captured = CAPTURED_SENTINEL; + +/** + * Enable the region check. + */ +void _Py_notify_regions_in_use(void) +{ + // Do not re-enable, if we have detected a fault. + if (!invariant_error_occurred) + invariant_do_region_check = true; +} + +PyObject* _Py_EnableInvariant(void) +{ + // Disable failure as program has explicitly requested invariant to be checked again. + invariant_error_occurred = false; + // Re-enable region check + invariant_do_region_check = true; + return Py_None; +} + +/** + * Set the global variables for a failure. + * This allows the interpreter to inspect what has failed. + */ +static void emit_invariant_error(PyObject* src, PyObject* tgt, const char* msg) +{ + const char *tgt_region_name = get_region_name(tgt); + const char *src_region_name = get_region_name(src); + PyObject *src_type_repr = PyObject_Repr(PyObject_Type(src)); + const char *src_desc = src_type_repr ? PyUnicode_AsUTF8(src_type_repr) : "<>"; + PyObject *tgt_type_repr = PyObject_Repr(PyObject_Type(tgt)); + const char *tgt_desc = tgt_type_repr ? PyUnicode_AsUTF8(tgt_type_repr) : "<>"; + PyObject* formatted = PyUnicode_FromFormat( + "Error: Invalid edge %p (%s in %s) -> %p (%s in %s) %s\n", + src, src_desc, src_region_name, tgt, tgt_desc, tgt_region_name, msg); + + // If the formatting failes, we have bigger problems + if (!formatted) { + return; + } + + const char* formatted_str = PyUnicode_AsUTF8(formatted); + throw_region_error(src, tgt, formatted_str, Py_None); + + Py_DECREF(formatted); +} + +// Lifted from gcmodule.c +typedef struct _gc_runtime_state GCState; +#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) +#define GC_NEXT _PyGCHead_NEXT +#define GC_PREV _PyGCHead_PREV +#define FROM_GC(g) ((PyObject *)(((char *)(g))+sizeof(PyGC_Head))) + +/* A traversal callback for _Py_CheckRegionInvariant. + - tgt is the target of the reference we are checking, and + - src(_void) is the source of the reference we are checking. +*/ +static int +visit_invariant_check(PyObject *tgt, void *src_void) +{ + PyObject *src = _PyObject_CAST(src_void); + + Py_region_ptr_t src_region_ptr = Py_REGION(src); + Py_region_ptr_t tgt_region_ptr = Py_REGION(tgt); + // Internal references are always allowed + if (src_region_ptr == tgt_region_ptr) + return 0; + + // Anything is allowed to point to immutable + if (Py_IsImmutable(tgt)) + return 0; + // Borrowed references are unrestricted + if (Py_IsLocal(src)) + return 0; + // References to cowns are unrestricted + if (Py_IsCown(tgt)) + return 0; + // Since tgt is not immutable, src also may not be as immutable may not point to mutable + if (Py_IsImmutable(src)) { + emit_invariant_error(src, tgt, "Reference from immutable object to mutable target"); + return 0; + } + + // Cross-region references must be to a bridge + if (!_Py_is_bridge_object(tgt)) { + emit_invariant_error(src, tgt, "Reference from object in one region into another region"); + return 0; + } + + regiondata* src_region = REGION_DATA_CAST(src_region_ptr); + // Region objects may be stored in cowns + if (IS_COWN_REGION(src_region)) { + return 0; + } + + regiondata* tgt_region = REGION_DATA_CAST(tgt_region_ptr); + // Check if region is already added to captured list + if (tgt_region->next != NULL) { + // Bridge object was already captured + emit_invariant_error(src, tgt, "Reference to bridge is not externally unique"); + return 0; + } + // Forbid cycles in the region topology + if (regiondata_has_ancestor(src_region, tgt_region)) { + emit_invariant_error(src, tgt, "Regions create a cycle with subregions"); + return 0; + } + + // First discovery of bridge -- add to list of captured bridge objects + tgt_region->next = captured; + captured = tgt_region; + + return 0; +} + +static void invariant_reset_captured_list(void) { + // Reset the captured list + while (captured != CAPTURED_SENTINEL) { + regiondata* m = captured; + captured = m->next; + m->next = NULL; + } +} + +/** + * This uses checks that the region topology is valid. + * + * It is currently implemented using the GC data. This + * means that not all objects are traversed as some objects + * are considered to not participate in cycles, and hence + * do not need to be understood for the cycle detector. + * + * This is not ideal for the region invariant, but is a good + * first approximation. We could actually walk the heap + * in a subsequent more elaborate invariant check. + * + * Returns non-zero if the invariant is violated. + */ +int _Py_CheckRegionInvariant(PyThreadState *tstate) +{ + // Check if we should perform the region invariant check + if(!invariant_do_region_check){ + return 0; + } + + // Use the GC data to find all the objects, and traverse them to + // confirm all their references satisfy the region invariant. + GCState *gcstate = &tstate->interp->gc; + + // There is an cyclic doubly linked list per generation of all the objects + // in that generation. + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + PyGC_Head *containers = GEN_HEAD(gcstate, i); + PyGC_Head *gc = GC_NEXT(containers); + // Walk doubly linked list of objects. + for (; gc != containers; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + // Local can point to anything. No invariant needed + if (Py_IsLocal(op)) + continue; + // Functions are complex. + // Removing from invariant initially. + // TODO provide custom traverse here. + if (PyFunction_Check(op)) + continue; + + // TODO the immutable code ignores c_wrappers + // review if this is correct. + if (is_c_wrapper(op)) + continue; + + // Use traverse proceduce to visit each field of the object. + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + (visitproc)visit_invariant_check, + op); + + // Also need to visit the type of the object + // As this isn't covered by the traverse. + PyObject* type_op = PyObject_Type(op); + visit_invariant_check(type_op, op); + Py_DECREF(type_op); + + // If we detected an error, stop so we don't + // write too much. + // TODO: The first error might not be the most useful. + // So might not need to build all error edges as a structure. + if (invariant_error_occurred) { + invariant_reset_captured_list(); + return 1; + } + } + } + + invariant_reset_captured_list(); + return 0; +} + #define _Py_VISIT_FUNC_ATTR(attr, frontier) do { \ - if(attr != NULL && !_Py_IsImmutable(attr)){ \ + if(attr != NULL && !Py_IsImmutable(attr)){ \ Py_INCREF(attr); \ if(stack_push(frontier, attr)){ \ return PyErr_NoMemory(); \ @@ -97,21 +788,16 @@ bool is_c_wrapper(PyObject* obj){ } \ } while(0) -PyObject* make_global_immutable(PyObject* globals, PyObject* name) +static PyObject* make_global_immutable(PyObject* globals, PyObject* name) { PyObject* value = PyDict_GetItem(globals, name); // value.rc = x - _Py_VPYDBG("value("); - _Py_VPYDBGPRINT(value); - _Py_VPYDBG(") -> "); _PyDict_SetKeyImmutable((PyDictObject*)globals, name); - if(!_Py_IsImmutable(value)){ - _Py_VPYDBG("pushed\n"); + if(!Py_IsImmutable(value)){ Py_INCREF(value); return value; }else{ - _Py_VPYDBG("immutable\n"); Py_RETURN_NONE; } } @@ -127,7 +813,7 @@ PyObject* make_global_immutable(PyObject* globals, PyObject* name) * just those, and prevent those keys from being updated in the global dictionary * from this point onwards. */ -PyObject* walk_function(PyObject* op, stack* frontier) +static PyObject* make_function_immutable(PyObject* op, stack* frontier) { PyObject* builtins; PyObject* globals; @@ -141,9 +827,6 @@ PyObject* walk_function(PyObject* op, stack* frontier) bool check_globals = false; _PyObject_ASSERT(op, PyFunction_Check(op)); - _Py_VPYDBG("function: "); - _Py_VPYDBGPRINT(op); - _Py_VPYDBG("[rc=%ld]\n", Py_REFCNT(op)); _Py_SetImmutable(op); @@ -185,25 +868,17 @@ PyObject* walk_function(PyObject* op, stack* frontier) } Py_INCREF(f_ptr); // fp.rc = x + 1 - _Py_VPYDBG("function: adding captured vars/funcs/builtins\n"); while(!stack_empty(f_stack)){ f_ptr = stack_pop(f_stack); // fp.rc = x + 1 _PyObject_ASSERT(f_ptr, PyCode_Check(f_ptr)); f_code = (PyCodeObject*)f_ptr; - _Py_VPYDBG("analysing code: "); - _Py_VPYDBGPRINT(f_code->co_name); - _Py_VPYDBG("\n"); size = 0; if (f_code->co_names != NULL) size = PySequence_Fast_GET_SIZE(f_code->co_names); - _Py_VPYDBG("Enumerating %ld names\n", size); for(Py_ssize_t i = 0; i < size; i++){ PyObject* name = PySequence_Fast_GET_ITEM(f_code->co_names, i); // name.rc = x - _Py_VPYDBG("name "); - _Py_VPYDBGPRINT(name); - _Py_VPYDBG(": "); if(PyUnicode_CompareWithASCIIString(name, "globals") == 0){ // if the code calls the globals() builtin, then any @@ -224,23 +899,19 @@ PyObject* walk_function(PyObject* op, stack* frontier) } } }else if(PyDict_Contains(builtins, name)){ - _Py_VPYDBG("builtin\n"); _PyDict_SetKeyImmutable((PyDictObject*)builtins, name); PyObject* value = PyDict_GetItem(builtins, name); // value.rc = x - if(!_Py_IsImmutable(value)){ + if(!Py_IsImmutable(value)){ _Py_SetImmutable(value); } }else if(PyDict_Contains(module_dict, name)){ PyObject* value = PyDict_GetItem(module_dict, name); // value.rc = x - _Py_VPYDBG("module("); - _Py_VPYDBGPRINT(value); - _Py_VPYDBG(") -> "); _PyDict_SetKeyImmutable((PyDictObject*)module_dict, name); - if(!_Py_IsImmutable(value)){ + if(!Py_IsImmutable(value)){ Py_INCREF(value); // value.rc = x + 1 if(stack_push(frontier, value)){ stack_free(f_stack); @@ -248,25 +919,18 @@ PyObject* walk_function(PyObject* op, stack* frontier) return PyErr_NoMemory(); } }else{ - _Py_VPYDBG("immutable\n"); } }else{ - _Py_VPYDBG("instance\n"); // TODO assert that it is an instance variable } } size = PySequence_Fast_GET_SIZE(f_code->co_consts); - _Py_VPYDBG("Enumerating %ld consts\n", size); for(Py_ssize_t i = 0; i < size; i++){ PyObject* value = PySequence_Fast_GET_ITEM(f_code->co_consts, i); // value.rc = x - _Py_VPYDBG("const "); - _Py_VPYDBGPRINT(value); - _Py_VPYDBG(": "); - if(!_Py_IsImmutable(value)){ + if(!Py_IsImmutable(value)){ Py_INCREF(value); // value.rc = x + 1 if(PyCode_Check(value)){ - _Py_VPYDBG("nested_func\n"); _Py_SetImmutable(value); @@ -276,7 +940,6 @@ PyObject* walk_function(PyObject* op, stack* frontier) return PyErr_NoMemory(); } }else{ - _Py_VPYDBG("pushed\n"); if(stack_push(frontier, value)){ stack_free(f_stack); @@ -285,16 +948,11 @@ PyObject* walk_function(PyObject* op, stack* frontier) } } }else{ - _Py_VPYDBG("immutable\n"); } if(check_globals && PyUnicode_Check(value)){ - _Py_VPYDBG("checking if"); - _Py_VPYDBGPRINT(value); - _Py_VPYDBG(" is a global: "); PyObject* name = value; if(PyDict_Contains(globals, name)){ - _Py_VPYDBG(" true "); value = make_global_immutable(globals, name); if(!Py_IsNone(value)){ if(stack_push(frontier, value)){ @@ -304,7 +962,6 @@ PyObject* walk_function(PyObject* op, stack* frontier) } } }else{ - _Py_VPYDBG("false\n"); } } @@ -319,18 +976,13 @@ PyObject* walk_function(PyObject* op, stack* frontier) size = 0; if(f->func_closure != NULL) size = PySequence_Fast_GET_SIZE(f->func_closure); - _Py_VPYDBG("Enumerating %ld closure vars to check for global names\n", size); for(Py_ssize_t i=0; i < size; ++i){ PyObject* cellvar = PySequence_Fast_GET_ITEM(f->func_closure, i); // cellvar.rc = x PyObject* value = PyCell_GET(cellvar); // value.rc = x - _Py_VPYDBG("cellvar("); - _Py_VPYDBGPRINT(value); - _Py_VPYDBG(") is "); if(PyUnicode_Check(value)){ PyObject* name = value; if(PyDict_Contains(globals, name)){ - _Py_VPYDBG("a global "); value = make_global_immutable(globals, name); if(!Py_IsNone(value)){ if(stack_push(frontier, value)){ @@ -340,10 +992,8 @@ PyObject* walk_function(PyObject* op, stack* frontier) } } }else{ - _Py_VPYDBG("not a global\n"); } }else{ - _Py_VPYDBG("not a global\n"); } } } @@ -360,12 +1010,9 @@ PyObject* walk_function(PyObject* op, stack* frontier) } \ } while(0) -int _makeimmutable_visit(PyObject* obj, void* frontier) +static int _makeimmutable_visit(PyObject* obj, void* frontier) { - _Py_VPYDBG("visit("); - _Py_VPYDBGPRINT(obj); - _Py_VPYDBG(") region: %lu rc: %ld\n", Py_REGION(obj), Py_REFCNT(obj)); - if(!_Py_IsImmutable(obj)){ + if(!Py_IsImmutable(obj)){ if(stack_push((stack*)frontier, obj)){ PyErr_NoMemory(); return -1; @@ -377,11 +1024,18 @@ int _makeimmutable_visit(PyObject* obj, void* frontier) PyObject* _Py_MakeImmutable(PyObject* obj) { - _Py_VPYDBG(">> makeimmutable("); - _Py_VPYDBGPRINT(obj); - _Py_VPYDBG(") region: %lu rc: %ld\n", Py_REGION(obj), Py_REFCNT(obj)); - if(_Py_IsImmutable(obj) && _Py_IsImmutable(Py_TYPE(obj))){ - return obj; + if (!obj || _Py_IsCown(obj)) { + Py_RETURN_NONE; + } + + // We have started using regions, so notify to potentially enable checks. + _Py_notify_regions_in_use(); + + // Some built-in objects are direclty created immutable. However, their types + // might be created in a mutable state. This therefore requres an additional + // check to see if the type is also immutable. + if(Py_IsImmutable(obj) && Py_IsImmutable(Py_TYPE(obj))){ + Py_RETURN_NONE; } stack* frontier = stack_new(); @@ -401,25 +1055,15 @@ PyObject* _Py_MakeImmutable(PyObject* obj) traverseproc traverse; PyObject* type_op = NULL; - _Py_VPYDBG("item: "); - _Py_VPYDBGPRINT(item); - if(_Py_IsImmutable(item)){ - _Py_VPYDBG(" already immutable!\n"); + if(Py_IsImmutable(item)){ // Direct access like this is not recommended, but will be removed in the future as // this is just for debugging purposes. - if(type->ob_base.ob_base.ob_region != _Py_IMMUTABLE){ + if (Py_REGION(&type->ob_base.ob_base) != _Py_IMMUTABLE) { // Why do we need to handle the type here, surely what ever made this immutable already did that? - // Log so we can investigate. - _Py_VPYDBG("type "); - _Py_VPYDBGPRINT(type_op); - _Py_VPYDBG(" not immutable! but object is: "); - _Py_VPYDBGPRINT(item); - _Py_VPYDBG("\n"); } goto handle_type; } - _Py_VPYDBG("\n"); _Py_SetImmutable(item); @@ -429,21 +1073,19 @@ PyObject* _Py_MakeImmutable(PyObject* obj) } if(PyFunction_Check(item)){ - _Py_MAKEIMMUTABLE_CALL(walk_function, item, frontier); + _Py_MAKEIMMUTABLE_CALL(make_function_immutable, item, frontier); goto handle_type; } traverse = type->tp_traverse; if(traverse != NULL){ - _Py_VPYDBG("implements tp_traverse\n"); if(traverse(item, (visitproc)_makeimmutable_visit, frontier)){ Py_DECREF(item); stack_free(frontier); return NULL; } }else{ - _Py_VPYDBG("does not implements tp_traverse\n"); // TODO: (mjp comment) These functions causes every character of // a string to become an immutable object, which is is not the // desired behavior. Commenting so we can discuss. I believe @@ -456,7 +1098,7 @@ PyObject* _Py_MakeImmutable(PyObject* obj) handle_type: type_op = PyObject_Type(item); // type_op.rc = x + 1 - if (!_Py_IsImmutable(type_op)){ + if (!Py_IsImmutable(type_op)){ // Previously this included a check for is_leaf_type, but if (stack_push(frontier, type_op)) { @@ -475,9 +1117,878 @@ PyObject* _Py_MakeImmutable(PyObject* obj) stack_free(frontier); - _Py_VPYDBGPRINT(obj); - _Py_VPYDBG(" region: %lu rc: %ld \n", Py_REGION(obj), Py_REFCNT(obj)); - _Py_VPYDBG("<< makeimmutable complete\n\n"); - return obj; -} \ No newline at end of file + Py_RETURN_NONE; +} + +typedef enum region_error_id { + /* Adding this object to a region or creating this reference would + * create a reference that points to a contained(non-bridge object) + * inside another region. + */ + ERR_CONTAINED_OBJ_REF, + /* Adding this object to a region or creating this reference would + * create a cycle in the region topology. + */ + ERR_CYCLE_CREATION, + /* Adding this object to a region or creating this reference would + * isn't possible as the referenced bridge object already has a parent + * region. + */ + ERR_SHARED_CUSTODY, + /* Functions can reference to global variables. That's why they need + * special handling, as can be seen in `_Py_MakeImmutable`. + * For now an error is emitted to see when this comes up and if + * `make_function_immutable` can be reused. + */ + ERR_WIP_FUNCTIONS, +} region_error_id; + +/* An error that occurred in `add_to_region`. The struct contains all + * informaiton needed to construct an error message or handle the error + * differently. + */ +typedef struct regionerror { + /* The source of the reference that created the region error. + * + * A weak reference, can be made into a strong reference with `Py_INCREF` + */ + PyObject* src; + /* The target of the reference that created the region error. + * + * A weak reference, can be made into a strong reference with `Py_INCREF` + */ + PyObject* tgt; + /* This ID indicates what kind of error occurred. + */ + region_error_id id; +} regionerror; + +/* Used by `_add_to_region_visit` to handle errors. The first argument is + * the error information. The second argument is supplementary data + * passed along by `add_to_region`. + */ +typedef int (*handle_add_to_region_error)(regionerror *, void *); + +/* This takes the region error and emits it as a `RegionError` to the + * user. This function will always return `false` to stop the propagation + * from `add_to_region` + * + * This function borrows both arguments. The memory has to be managed + * the caller. + */ +static int emit_region_error(regionerror *error) { + const char* msg = NULL; + + switch (error->id) + { + case ERR_CONTAINED_OBJ_REF: + msg = "References to objects in other regions are forbidden"; + break; + case ERR_CYCLE_CREATION: + msg = "Regions are not allowed to create cycles"; + break; + case ERR_SHARED_CUSTODY: + msg = "Regions can only have one parent at a time"; + break; + case ERR_WIP_FUNCTIONS: + msg = "WIP: Functions in regions are not supported yet"; + break; + default: + assert(false && "unreachable?"); + break; + } + throw_region_error(error->src, error->tgt, msg, NULL); + + // We never want to continue once an error has been emitted. + return -1; +} + +typedef struct addtoregionvisitinfo { + stack* pending; + // An optional stack to collect newly added subregions + stack* new_sub_regions; + // The source object of the reference. This is used to create + // better error message + PyObject* src; +} addtoregionvisitinfo; + +/// Adds the `target` object to the region of the `src` object stored +/// in the `addtoregionvisitinfo*` instance provided via `info_void`. +/// +/// This function can fail: +/// - If no memory is available to push nodes on the stacks of +/// `addtoregionvisitinfo`. +/// - If it's not possible to add the object to the region. +static int _add_to_region_visit(PyObject* target, void* info_void) +{ + addtoregionvisitinfo *info = _Py_CAST(addtoregionvisitinfo *, info_void); + + // Region objects are allowed to reference immutable objects. Immutable + // objects are only allowed to reference other immutable objects and cowns. + // we therefore don't need to traverse them. + if (Py_IsImmutable(target)) { + return 0; + } + + // References to cowns are unrestricted; cowns are opaque so + // do not need travsersing. + if (Py_IsCown(target)) { + return 0; + } + + // C wrappers can propergate through the entire system and draw + // in a lot of unwanted objects. Since c wrappers don't have mutable + // data, we just make it immutable and have the immutability impl + // handle it. We then have an edge from our region to an immutable + // object which is again valid. + if (is_c_wrapper(target)) { + _Py_MakeImmutable(target); + return 0; + } + + regiondata* source_region = Py_REGION_DATA(info->src); + if (Py_IsLocal(target)) { + // Add reference to the object, + // minus one for the reference we just followed + source_region->lrc += Py_REFCNT(target) - 1; + Py_SET_REGION(target, source_region); + + if (stack_push(info->pending, target)) { + PyErr_NoMemory(); + return -1; + } + return 0; + } + + // The target was previously in the local region but has already been + // added to the region by a previous iteration. We therefore only need + // to adjust the LRC + if (Py_REGION_DATA(target) == source_region) { + // -1 for the refernce we just followed + source_region->lrc -= 1; + return 0; + } + + // We push it onto the stack to be added to the region and traversed. + // The actual addition of the object is done in `add_to_region`. We keep + // it in the local region, to indicate to `add_to_region` that the object + // should actually be processed. + if (Py_IsLocal(target)) { + // The actual region update and write checks are done in the + // main body of `add_to_region` + if (stack_push(info->pending, target)) { + PyErr_NoMemory(); + return -1; + } + return 0; + } + + // At this point, we know that target is in another region. + // If target is in a different region, it has to be a bridge object. + // References to contained objects are forbidden. + if (!_Py_is_bridge_object(target)) { + regionerror err = {.src = info->src, .tgt = target, + .id = ERR_CONTAINED_OBJ_REF }; + return emit_region_error(&err); + } + + // The target is a bridge object from another region. We now need to + // if it already has a parent. + regiondata *target_region = Py_REGION_DATA(target); + if (regiondata_has_parent(target_region)) { + regionerror err = {.src = info->src, .tgt = target, + .id = ERR_SHARED_CUSTODY}; + return emit_region_error(&err); + } + + // Make sure that the new subregion relation won't create a cycle + regiondata* region = Py_REGION_DATA(info->src); + if (regiondata_has_ancestor(region, target_region)) { + regionerror err = {.src = info->src, .tgt = target, + .id = ERR_CYCLE_CREATION}; + return emit_region_error(&err); + } + + // From the previous checks we know that `target` is the bridge object + // of a free region. Thus we can make it a sub region and allow the + // reference. + // + // `set_parent` will also ensure that the `osc` counter is updated. + regiondata_set_parent(target_region, region); + if (info->new_sub_regions) { + if (stack_push(info->new_sub_regions, target)) { + PyErr_NoMemory(); + return -1; + } + } + + return 0; +} + +// This function visits all outgoing reference from `item` including the +// type. +// +// It will return `false` if the given `visit` function fails. +// (Or if it's called on a function, this is a limitation of the current +// implementation which should be lifted soon-ish) +static int visit_object(PyObject *item, visitproc visit, void* info) { + if (PyFunction_Check(item)) { + // FIXME: This is a temporary error. It should be replaced by + // proper handling of moving the function into the region + regionerror err = {.src = NULL, + .tgt = item, .id = ERR_WIP_FUNCTIONS }; + emit_region_error(&err); + return false; + } else { + PyTypeObject *type = Py_TYPE(item); + traverseproc traverse = type->tp_traverse; + if (traverse != NULL) { + if (traverse(item, visit, info)) { + return false; + } + } + } + + // Visit the type manually, since it's not included in the normal + // `tp_treverse`. + PyObject* type_ob = _PyObject_CAST(Py_TYPE(item)); + // Visit will return 0 if everything was okayw + return ((visit)(type_ob, info) == 0); +} + +// Add the transitive closure of objects in the local region reachable from obj to region +static PyObject *add_to_region(PyObject *obj, Py_region_ptr_t region) +{ + if (!obj || _Py_IsCown(obj)) { + Py_RETURN_NONE; + } + + // Make sure there are no pending exceptions that would be overwritten + // by us. + PyThreadState *tstate = _PyThreadState_GET(); + if (_PyErr_Occurred(tstate)) { + return NULL; + } + + // The current implementation assumes region is a valid pointer. This + // restriction can be lifted if needed + assert(HAS_METADATA(region)); + regiondata *region_data = REGION_DATA_CAST(region); + + // Early return if the object is already in the region or immutable + if (Py_REGION(obj) == region || Py_IsImmutable(obj)) { + Py_RETURN_NONE; + } + + // Mark the region as open, since we're adding stuff to it. + regiondata_open(region_data); + + addtoregionvisitinfo info = { + .pending = stack_new(), + .new_sub_regions = NULL, + // `src` is reassigned each iteration + .src = _PyObject_CAST(region_data->bridge), + }; + if (info.pending == NULL) { + return PyErr_NoMemory(); + } + + // The visit call is used to correctly add the object or + // add it to the pending stack, for further processing. + if (_add_to_region_visit(obj, &info)) { + stack_free(info.pending); + return NULL; + } + + while (!stack_empty(info.pending)) { + PyObject *item = stack_pop(info.pending); + + // Add `info.src` for better error messages + info.src = item; + + if (!visit_object(item, (visitproc)_add_to_region_visit, &info)) { + stack_free(info.pending); + return NULL; + } + } + + stack_free(info.pending); + + Py_RETURN_NONE; +} + +int _Py_is_bridge_object(PyObject *op) { + Py_region_ptr_t region = Py_REGION(op); + if (IS_LOCAL_REGION(region) || IS_IMMUTABLE_REGION(region) || IS_COWN_REGION(region)) { + return false; + } + + // It's not yet clear how immutability will interact with region objects. + // It's likely that the object will remain in the object topology but + // will use the properties of a bridge object. This therefore checks if + // the object is equal to the regions bridge object rather than checking + // that the type is `PyRegionObject` + return ((Py_region_ptr_t)((regiondata*)region)->bridge == (Py_region_ptr_t)op); +} + +/// This function attempts to close a region. It does this, by first merging +/// it into the local region and then reconstructing the region from the +/// given bridge object. All reachable objects will be added to the region, +/// similar to how `add_to_region` works. +/// +/// This function will also attempt to close open subregions, as that's +/// needed to close the given region. Closed subregions will remain closed +/// if possible. +/// +/// This function returns `-1` if any errors occurred. This can be due to +/// memory problems, region errors or problems with releasing cowns not owned +/// by the current thread. `0` only indicates that the function didn't error. +/// `regiondata_is_open()` should be used to check the region status. +static int try_close(PyRegionObject *root_bridge) { + addtoregionvisitinfo info = { + .pending = stack_new(), + .new_sub_regions = stack_new(), + // `src` is reassigned each iteration + .src = NULL, + }; + if (!info.pending || !info.new_sub_regions) { + goto fail; + } + + if (stack_push(info.new_sub_regions, _PyObject_CAST(root_bridge))) { + PyErr_NoMemory(); + goto fail; + } + + // The root region can have to have two local references, one from the + // owning reference and one from the `self` argument + Py_ssize_t root_region_lrc_limit; + regiondata *root_data = Py_REGION_DATA(root_bridge); + if (regiondata_has_parent(root_data) || root_data->cown) { + root_region_lrc_limit = 1; + } else { + root_region_lrc_limit = 2; + } + + while (!stack_empty(info.new_sub_regions)) { + PyObject *bridge = stack_pop(info.new_sub_regions); + assert(Py_is_bridge_object(bridge)); + regiondata* old_data = Py_REGION_DATA(bridge); + + // One from the owning reference + Py_ssize_t rc_limit = 1; + Py_ssize_t lrc_limit = 0; + + // The root bridge has different limits since it's currently used + // as an argument for this method. + if (bridge == _PyObject_CAST(root_bridge)) { + rc_limit += 1; + lrc_limit = root_region_lrc_limit; + } + + // The *LRC* and *is_open* status is currently not updated when references + // to the bridge are created. This means that the bridge might have multiple + // unknown references. + // + // Using the object RC is an over approximation, since internal cycles from + // objects to the bridge object will also increase the RC thereby tricking + // this check into opening it again. + if (Py_REFCNT(bridge) > rc_limit) { + regiondata_open(Py_REGION_DATA(bridge)); + } + + // If it's closed there is nothing we need to do. + if (!regiondata_is_open(old_data)) { + continue; + } + + // Create the new `regiondata*` + regiondata* new_data = (regiondata*)calloc(1, sizeof(regiondata)); + if (!new_data) { + PyErr_NoMemory(); + goto fail; + } + + PyRegionObject* bridge_obj = _Py_CAST(PyRegionObject *, bridge); + // Increase the RC for the reference given to the `metadata` field of the + // `PyRegionObject` object. + // + // The RC of the old value is directly decreased. The RC of `old_data` + // will remain `>= 1` until the region field of the bridge object is + // updated by `Py_SET_REGION(bridge, new_data);` This ensures that + // `old_data` stays valid while all the data is transferred to `new_data` + regiondata_dec_rc(bridge_obj->metadata); + bridge_obj->metadata = new_data; + regiondata_inc_rc(new_data); + + new_data->bridge = bridge_obj; + Py_XSETREF(new_data->name, old_data->name); + regiondata_open(new_data); + regiondata_set_parent(new_data, regiondata_get_parent(old_data)); + new_data->cown = old_data->cown; + old_data->cown = NULL; + + // Merge the old region data into local. This has to be done after the + // created of the `new_data` to prevent the parent from closing + // premeturely when the old data gets detached from it. + regiondata_set_parent(old_data, NULL); + regiondata_merge(old_data, _Py_LOCAL_REGION); + old_data = NULL; + + // This region update also triggers an RC decrease on `old_data`. + // afterwards it might be deallocated. This has to happen after + // all data has been transferred. + Py_SET_REGION(bridge, new_data); + new_data->lrc += Py_REFCNT(bridge); + // Only subtract 1 from the LRC if the reference comes from a parent. + // Owning references from the local region should still count towards + // the LRC. + if (regiondata_has_parent(new_data) || new_data->cown) { + new_data->lrc -= 1; + } + + if (stack_push(info.pending, bridge)) { + // No more memory, make sure the region is marked as dirty thereby + // preventing it from being closed in an inconsitent state. + regiondata_mark_as_dirty(Py_REGION_DATA(root_bridge)); + goto fail; + } + + // Re-add everything to the current region + while (!stack_empty(info.pending)) { + PyObject *item = stack_pop(info.pending); + + // Add `info.src` for better error messages + info.src = item; + + if (!visit_object(item, (visitproc)_add_to_region_visit, &info)) { + // The system is out of memory, or an object couldn't be added + // to the region. + // + // Either way, this means that the LRC of the region can't be trusted. + regiondata_mark_as_dirty(Py_REGION_DATA(root_bridge)); + goto fail; + } + } + + // Mark the region as clean + regiondata_mark_as_not_dirty(new_data); + + // The LRC will never decrease after this point. If the region is open + // due to the LRC it will remain open and the close fails. + if (new_data->lrc > lrc_limit) { + break; + } + + // Update the open status and make sure the parent knows + if (new_data->osc == 0) { + if (regiondata_close(new_data) != 0) { + // See `regiondata_close` for when this can fail. + // In either case, this region has just been cleaned and should + // be in a consistent state. + goto fail; + } + } + } + + root_data = Py_REGION_DATA(root_bridge); + if (root_data->lrc <= root_region_lrc_limit && root_data->osc == 0) { + if (regiondata_close(root_data) != 0) { + // See `regiondata_close` for when this can fail. + // In either case, this region has just been cleaned and should + // be in a consistent state. + goto fail; + } + } + + stack_free(info.pending); + stack_free(info.new_sub_regions); + return 0; + +fail: + if (info.pending) { + stack_free(info.pending); + } + if (info.new_sub_regions) { + stack_free(info.new_sub_regions); + } + return -1; +} + +static void PyRegion_dealloc(PyRegionObject *self) { + // Name is immutable and not in our region. + + // The object region has already been reset. + // We now need to update the RC of our metadata field. + if (self->metadata) { + regiondata* data = self->metadata; + self->metadata = NULL; + data->bridge = NULL; + regiondata_dec_rc(data); + } + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_UnTrack(_PyObject_CAST(self)); + Py_TRASHCAN_BEGIN(self, PyRegion_dealloc); + if (self->dict) { + // We need to clear the ownership, since this dictionary might be + // returned to an object pool rather than freed. This would result + // in an error if the dictionary has the previous region. + // TODO: revisit in #16 + Py_SET_REGION(self->dict, _Py_LOCAL_REGION); + Py_CLEAR(self->dict); + } + + PyObject_GC_Del(self); + Py_DECREF(tp); + Py_TRASHCAN_END +} + +static int PyRegion_init(PyRegionObject *self, PyObject *args, PyObject *kwds) { + // TODO: should not be needed in the future + _Py_notify_regions_in_use(); + _Py_MakeImmutable(_PyObject_CAST(Py_TYPE(self))); + + static char *kwlist[] = {"name", NULL}; + self->metadata = (regiondata*)calloc(1, sizeof(regiondata)); + if (!self->metadata) { + PyErr_NoMemory(); + return -1; + } + + // Make sure the internal reference is also counted. + regiondata_inc_rc(self->metadata); + + self->metadata->bridge = self; + + // Make the region an owner of the bridge object + Py_SET_REGION(self, self->metadata); + + // Freeze the region type to share it with other regions + _Py_MakeImmutable(_PyObject_CAST(Py_TYPE(self))); + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|U", kwlist, &self->metadata->name)) + return -1; + if (self->metadata->name) { + Py_XINCREF(self->metadata->name); + // Freeze the name and it's type. Short strings in Python are interned + // by default. This means that `id("AB") == id("AB")`. We therefore + // need to either clone the name object or freeze it to share it + // across regions. Freezing should be safe, since `+=` and other + // operators return new strings and keep the old one intact + // + // FIXME: Implicit freezing should take care of this instead + _Py_MakeImmutable(self->metadata->name); + } + + return 0; +} + +static int PyRegion_traverse(PyRegionObject *self, visitproc visit, void *arg) { + Py_VISIT(self->metadata->name); + Py_VISIT(self->dict); + return 0; +} + +static int PyRegion_clear(PyRegionObject *self) { + Py_CLEAR(self->metadata->name); + Py_CLEAR(self->dict); + return 0; +} + +// is_open method (returns True if the region is open, otherwise False) +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyRegion_is_open(PyRegionObject *self, PyObject *ignored) { + // FIXME: What is the behavior of a `PyRegionObject` that has been merged into another region? + assert(Py_is_bridge_object(_PyObject_CAST(self)) && "FIXME: When does this happend and what should it do?"); + return PyBool_FromLong(_Py_CAST(long, regiondata_is_open(self->metadata))); +} + +// Open method (sets the region to "open") +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyRegion_open(PyRegionObject *self, PyObject *ignored) { + // `Py_REGION()` will fetch the root region of the merge tree. + // this might be different from the region in `self->metadata`. + regiondata_open(Py_REGION_DATA(self)); + Py_RETURN_NONE; // Return None (standard for methods with no return value) +} + +int _PyRegion_is_closed(PyObject* self) { + return PyRegion_is_open((PyRegionObject *)self, NULL) == Py_False; +} + +// Close method (attempts to set the region to "closed") +// TODO: integrate with #19 and associated PRs +// The ignored argument is required for this function's type to be +// compatible with PyCFunction +static PyObject *PyRegion_close(PyRegionObject *self, PyObject *ignored) { + if (PyRegion_is_closed(self)) { + Py_RETURN_NONE; // Double close is OK + } + + // Attempt to close the region + if (try_close(self) != 0) { + if (!PyErr_Occurred()) { + // try_close did not run out of memory but failed to close the region + PyErr_Format(PyExc_RegionError, "Attempting to close the region failed"); + } + return NULL; + } + + // Check if the region is now closed + if (regiondata_is_open(Py_REGION(self))) { + PyErr_Format(PyExc_RegionError, "Attempting to close the region failed"); + return NULL; + } + + // Return None (standard for methods with no return value) + Py_RETURN_NONE; +} + +// try_close method (Attempts to close the region) +static PyObject *PyRegion_try_close(PyRegionObject *self, PyObject *args) { + assert(Py_is_bridge_object(self) && "self is not a bridge object"); + // Propagate potentual errors + if (try_close(self) != 0) { + return NULL; + } + + // Check if the region was closed + return PyBool_FromLong(_Py_CAST(long, !regiondata_is_open(Py_REGION(self)))); +} + +// Adds args object to self region +static PyObject *PyRegion_add_object(PyRegionObject *self, PyObject *args) { + if (!args) { + Py_RETURN_NONE; + } + + return add_to_region(args, Py_REGION(self)); +} + +// Remove args object to self region +static PyObject *PyRegion_remove_object(PyRegionObject *self, PyObject *args) { + if (!args) { + Py_RETURN_NONE; + } + + regiondata* md = Py_REGION_DATA(self); + if (Py_REGION(args) == (Py_region_ptr_t) md) { + Py_SET_REGION(args, _Py_LOCAL_REGION); + Py_RETURN_NONE; + } else { + PyErr_SetString(PyExc_RuntimeError, "Object not a member of region!"); + return NULL; + } +} + +// Return True if args object is member of self region +static PyObject *PyRegion_owns_object(PyRegionObject *self, PyObject *args) { + if (Py_REGION(self) == Py_REGION(args)) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyObject *PyRegion_repr(PyRegionObject *self) { + regiondata* data = Py_REGION_DATA(self); +#ifdef NDEBUG + // Debug mode: include detailed representation + return PyUnicode_FromFormat( + "Region(lrc=%d, osc=%d, name=%S, is_open=%s)", + data->lrc, + data->osc, + data->name ? data->name : Py_None, + data->is_open ? "yes" : "no" + ); +#else + // Normal mode: simple representation + return PyUnicode_FromFormat( + "Region(name=%S, is_open=%s)", + data->name ? data->name : Py_None, + data->is_open ? "yes" : "no" + ); +#endif +} + +// Define the RegionType with methods +static PyMethodDef PyRegion_methods[] = { + {"open", (PyCFunction)PyRegion_open, METH_NOARGS, "Open the region."}, + {"close", (PyCFunction)PyRegion_close, METH_NOARGS, "Attempt to close the region."}, + {"is_open", (PyCFunction)PyRegion_is_open, METH_NOARGS, "Check if the region is open."}, + {"try_close", (PyCFunction)PyRegion_try_close, METH_NOARGS, "Attempt to close the region."}, + // Temporary methods for testing. These will be removed or at least renamed once + // the write barrier is done. + {"add_object", (PyCFunction)PyRegion_add_object, METH_O, "Add object to the region."}, + {"remove_object", (PyCFunction)PyRegion_remove_object, METH_O, "Remove object from the region."}, + {"owns_object", (PyCFunction)PyRegion_owns_object, METH_O, "Check if object is owned by the region."}, + {NULL} // Sentinel +}; + + +PyTypeObject PyRegion_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "Region", /* tp_name */ + sizeof(PyRegionObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)PyRegion_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)PyRegion_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + "TODO =^.^=", /* tp_doc */ + (traverseproc)PyRegion_traverse, /* tp_traverse */ + (inquiry)PyRegion_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PyRegion_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(PyRegionObject, dict), /* tp_dictoffset */ + (initproc)PyRegion_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + +static const char *get_region_name(PyObject* obj) { + if (_Py_IsLocal(obj)) { + return "Default"; + } else if (Py_IsImmutable(obj)) { + return "Immutable"; + } else if (_Py_IsCown(obj)) { + return "Cown"; + } else { + const regiondata *md = Py_REGION_DATA(obj); + return md->name + ? PyUnicode_AsUTF8(md->name) + : ""; + } +} + +// x.f = y ==> _Pyrona_AddReference(src=x, tgt=y) +bool _Py_RegionAddReference(PyObject *src, PyObject *tgt) { + if (Py_REGION(src) == Py_REGION(tgt)) { + // Nothing to do -- intra-region references are always permitted + return true; + } + + if (Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + // Nothing to do -- adding a ref to an immutable or a cown is always permitted + return true; + } + + if (_Py_IsLocal(src)) { + // Record the borrowed reference in the LRC of the target region + // _Py_VPYDBG("Added borrowed ref %p --> %p (owner: '%s')\n", tgt, new_ref, get_region_name(tgt)); + Py_REGION_DATA(tgt)->lrc += 1; + return true; + } + + // Try slurp emphemerally owned object into the region of the target object + // _Py_VPYDBG("Added owning ref %p --> %p (owner: '%s')\n", tgt, new_ref, get_region_name(tgt)); + return add_to_region(tgt, Py_REGION(src)) == Py_None; +} + +// Used to add a reference from a local object that might not have been created yet +// to tgt. +void _Py_RegionAddLocalReference(PyObject *tgt) { + // Only need to increment the LRC of the target region + // if it is not local, immutable, or a cown. + if (_Py_IsLocal(tgt) || Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + return; + } + + Py_REGION_DATA(tgt)->lrc += 1; +} + +// Convenience function for moving multiple references into tgt at once +bool _Py_RegionAddReferences(PyObject *src, int tgtc, ...) { + va_list args; + va_start(args, tgtc); + + for (int i = 0; i < tgtc; i++) { + int res = _Py_RegionAddReference(src, va_arg(args, PyObject*)); + if (!res) return false; + } + + va_end(args); + return true; +} + +void _PyRegion_set_cown_parent(PyObject* bridge, PyObject* cown) { + assert(Py_is_bridge_object(bridge)); + regiondata* data = Py_REGION_DATA(bridge); + Py_XINCREF(cown); + Py_XSETREF(data->cown, cown); +} + +void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { + if (Py_REGION(src) == Py_REGION(tgt)) { + // Nothing to do -- intra-region references have no accounting. + return; + } + + // If the target is local, then so must the source be. So this should + // be covered by the previous check. + assert(!_Py_IsLocal(tgt)); + + if (_Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + // Nothing to do -- removing a ref to an immutable or a cown has no additional accounting. + return; + } + + regiondata* tgt_md = Py_REGION_DATA(tgt); + if (_Py_IsLocal(src)) { + // Dec LRC of the previously referenced region + // TODO should this decrement be a function, if it hits zero, + // then a region could become unreachable. + tgt_md->lrc -= 1; + return; + } + + // This must be a parent reference, so we need to remove the parent reference. + // FIXME(Pyrona): We might want to allow the `tgt_parent_md` to be NULL. + // This would prevent an exception if someone calls remove reference twice. + // We could also make this a dynamic check, which is lenient by default but + // can be turned strict by a flag. + regiondata* src_md = Py_REGION_DATA(src); + regiondata* tgt_parent_md = REGION_DATA_CAST(Py_region_ptr(tgt_md->parent)); + if (tgt_parent_md != src_md) { + // TODO: Could `dirty` mean this isn't an error? + throw_region_error(src, tgt, "(in WB/remove_ref)", Py_None); + } + + // Unparent the region. + regiondata_set_parent(tgt_md, NULL); +} + +PyObject *_PyCown_close_region(PyObject* ob) { + if (Py_TYPE(ob) == &PyRegion_Type) { + // Attempt to close the region + return PyRegion_close(_Py_CAST(PyRegionObject*, ob), NULL); + } else { + PyErr_SetString(PyExc_RegionError, "Attempted to close a region through a non-bridge object"); + return NULL; + } +} diff --git a/Objects/setobject.c b/Objects/setobject.c index a53ac7d389e668..74dc478b34b9cf 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -2588,5 +2588,6 @@ static PyTypeObject _PySetDummy_Type = { static PyObject _dummy_struct = { _PyObject_EXTRA_INIT { _Py_IMMORTAL_REFCNT }, - &_PySetDummy_Type + &_PySetDummy_Type, + (Py_region_ptr_with_tags_t) {_Py_IMMUTABLE} }; diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index e6776ac92b669c..a424b089f42d72 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -100,7 +100,8 @@ PyTypeObject PyEllipsis_Type = { PyObject _Py_EllipsisObject = { _PyObject_EXTRA_INIT { _Py_IMMORTAL_REFCNT }, - &PyEllipsis_Type + &PyEllipsis_Type, + (Py_region_ptr_with_tags_t) {_Py_IMMUTABLE} }; diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 45e09573d6d0d2..e043054197ae05 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -124,6 +124,14 @@ PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) return -1; } + // TODO: Pyrona: Possibly optimise this case as tuples + // should always be in local when they are assigned. + if (!Py_REGIONADDREFERENCE(op, newitem)){ + Py_XDECREF(newitem); + // Error set by region add test + return -1; + } + if (i < 0 || i >= Py_SIZE(op)) { Py_XDECREF(newitem); PyErr_SetString(PyExc_IndexError, diff --git a/PC/python3dll.c b/PC/python3dll.c index 1f4d006b8ad856..c204f04ff381b0 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -817,6 +817,7 @@ EXPORT_DATA(PyExc_ModuleNotFoundError) EXPORT_DATA(PyExc_NameError) EXPORT_DATA(PyExc_NotADirectoryError) EXPORT_DATA(PyExc_NotImplementedError) +EXPORT_DATA(PyExc_NotWriteableError) EXPORT_DATA(PyExc_OSError) EXPORT_DATA(PyExc_OverflowError) EXPORT_DATA(PyExc_PendingDeprecationWarning) @@ -824,6 +825,7 @@ EXPORT_DATA(PyExc_PermissionError) EXPORT_DATA(PyExc_ProcessLookupError) EXPORT_DATA(PyExc_RecursionError) EXPORT_DATA(PyExc_ReferenceError) +EXPORT_DATA(PyExc_RegionError) EXPORT_DATA(PyExc_ResourceWarning) EXPORT_DATA(PyExc_RuntimeError) EXPORT_DATA(PyExc_RuntimeWarning) @@ -844,7 +846,6 @@ EXPORT_DATA(PyExc_UnicodeTranslateError) EXPORT_DATA(PyExc_UnicodeWarning) EXPORT_DATA(PyExc_UserWarning) EXPORT_DATA(PyExc_ValueError) -EXPORT_DATA(PyExc_NotWriteableError) EXPORT_DATA(PyExc_Warning) EXPORT_DATA(PyExc_WindowsError) EXPORT_DATA(PyExc_ZeroDivisionError) diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 31b94b81f5e889..68dd51666731f7 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -131,6 +131,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 3366289ccd05ef..0994cdbf0d1ccd 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -103,6 +103,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 48d882a803a998..2e5e879a47e819 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -455,6 +455,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 4e4ba7b0b63330..f51fa339810746 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -995,6 +995,9 @@ Objects + + Objects + Objects diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 7d92f94fccdd70..bfbd9cfaa5d53c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -11,8 +11,9 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_ceval.h" // _PyEval_Vector() -#include "pycore_regions.h" // _Py_IMMUTABLE +#include "pycore_regions.h" // _Py_IMMUTABLE, PY_REGION() #include "pycore_dict.h" // _PyDict_SetGlobalImmutable() +#include "regions.h" // Py_IsImmutable() #include "clinic/bltinmodule.c.h" @@ -2755,7 +2756,7 @@ builtin_isimmutable(PyObject *module, PyObject *obj) _Py_VPYDBG("isimmutable("); _Py_VPYDBGPRINT(obj); _Py_VPYDBG(") region: %lu\n", Py_REGION(obj)); - return PyBool_FromLong(_Py_IsImmutable(obj)); + return PyBool_FromLong(Py_IsImmutable(obj)); } @@ -2770,11 +2771,24 @@ Make 'obj' and its entire reachable object graph immutable. static PyObject * builtin_makeimmutable(PyObject *module, PyObject *obj) -/*[clinic end generated code: output=4e665122542dfd24 input=21a50256fa4fb099]*/ +/*[clinic end generated code: output=4e665122542dfd24 input=bec4cf1797c848d4]*/ { return Py_MakeImmutable(obj); } +/*[clinic input] +enableinvariant as builtin_enableinvariant + +Enable the checking of the region invariant. +[clinic start generated code]*/ + +static PyObject * +builtin_enableinvariant_impl(PyObject *module) +/*[clinic end generated code: output=a3a27509957788c2 input=cf5922b1eb45ef0e]*/ +{ + return Py_EnableInvariant(); +} + typedef struct { PyObject_HEAD Py_ssize_t tuplesize; @@ -3061,6 +3075,7 @@ static PyMethodDef builtin_methods[] = { BUILTIN_DELATTR_METHODDEF BUILTIN_DIR_METHODDEF BUILTIN_DIVMOD_METHODDEF + BUILTIN_ENABLEINVARIANT_METHODDEF BUILTIN_EVAL_METHODDEF BUILTIN_EXEC_METHODDEF BUILTIN_FORMAT_METHODDEF @@ -3120,6 +3135,8 @@ static struct PyModuleDef builtinsmodule = { NULL }; +extern PyTypeObject PyRegion_Type; +extern PyTypeObject PyCown_Type; PyObject * _PyBuiltin_Init(PyInterpreterState *interp) @@ -3181,6 +3198,9 @@ _PyBuiltin_Init(PyInterpreterState *interp) SETBUILTIN("tuple", &PyTuple_Type); SETBUILTIN("type", &PyType_Type); SETBUILTIN("zip", &PyZip_Type); + SETBUILTIN("Region", &PyRegion_Type); + SETBUILTIN("Cown", &PyCown_Type); + debug = PyBool_FromLong(config->optimization_level == 0); if (PyDict_SetItemString(dict, "__debug__", debug) < 0) { Py_DECREF(debug); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index aacf2b5c2e2c4f..6f448dcf75b0c5 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -7,6 +7,7 @@ #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _Py_RunGC() #include "pycore_pymem.h" // _PyMem_IsPtrFreed() +#include "pycore_regions.h" // _Py_CheckRegionInvariant() /* Notes about the implementation: @@ -1056,6 +1057,13 @@ _Py_HandlePending(PyThreadState *tstate) struct _ceval_runtime_state *ceval = &runtime->ceval; struct _ceval_state *interp_ceval_state = &tstate->interp->ceval; +#ifdef Py_REGION_INVARIANT + /* Check the region invariant if required. */ + if (_Py_CheckRegionInvariant(tstate) != 0) { + return -1; + } +#endif + /* Pending signals */ if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) { if (handle_signals(tstate) != 0) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index fccf9088cbd131..3029cd340a0d42 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -88,13 +88,25 @@ #endif +#ifdef Py_REGION_INVARIANT /* Do interpreter dispatch accounting for tracing and instrumentation */ #define DISPATCH() \ { \ + if (_Py_CheckRegionInvariant(tstate) != 0) \ + goto error; \ NEXTOPARG(); \ PRE_DISPATCH_GOTO(); \ DISPATCH_GOTO(); \ } +#else +/* Do interpreter dispatch accounting for tracing and instrumentation */ +#define DISPATCH() \ + { \ + NEXTOPARG(); \ + PRE_DISPATCH_GOTO(); \ + DISPATCH_GOTO(); \ + } +#endif #define DISPATCH_SAME_OPARG() \ { \ diff --git a/Python/clinic/bltinmodule.c.h b/Python/clinic/bltinmodule.c.h index 0ffde42568666d..8f9edc96f78e5a 100644 --- a/Python/clinic/bltinmodule.c.h +++ b/Python/clinic/bltinmodule.c.h @@ -1423,8 +1423,26 @@ PyDoc_STRVAR(builtin_makeimmutable__doc__, "makeimmutable($module, obj, /)\n" "--\n" "\n" -"Make \'obj\' and its entire graph immutable."); +"Make \'obj\' and its entire reachable object graph immutable."); #define BUILTIN_MAKEIMMUTABLE_METHODDEF \ {"makeimmutable", (PyCFunction)builtin_makeimmutable, METH_O, builtin_makeimmutable__doc__}, -/*[clinic end generated code: output=356f1513888beba0 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(builtin_enableinvariant__doc__, +"enableinvariant($module, /)\n" +"--\n" +"\n" +"Enable the checking of the region invariant."); + +#define BUILTIN_ENABLEINVARIANT_METHODDEF \ + {"enableinvariant", (PyCFunction)builtin_enableinvariant, METH_NOARGS, builtin_enableinvariant__doc__}, + +static PyObject * +builtin_enableinvariant_impl(PyObject *module); + +static PyObject * +builtin_enableinvariant(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return builtin_enableinvariant_impl(module); +} +/*[clinic end generated code: output=2dcfa0885e1e7a45 input=a9049054013a1b77]*/ diff --git a/Python/errors.c b/Python/errors.c index 698faec546526b..27ff884c4b934e 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -1956,8 +1956,8 @@ _PyErr_WriteToImmutable(const char* filename, int lineno, PyObject* obj) PyObject* string; PyThreadState *tstate = _PyThreadState_GET(); if (!_PyErr_Occurred(tstate)) { - string = PyUnicode_FromFormat("object of type %s is immutable (in region %" PRIuPTR ") at %s:%d", - obj->ob_type->tp_name, obj->ob_region, filename, lineno); + string = PyUnicode_FromFormat("object of type %s is immutable at %s:%d", + obj->ob_type->tp_name, filename, lineno); if (string != NULL) { _PyErr_SetObject(tstate, PyExc_NotWriteableError, string); Py_DECREF(string); diff --git a/Python/instrumentation.c b/Python/instrumentation.c index a6ff7a8a98506c..86ac2e90b628fd 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -19,13 +19,15 @@ PyObject _PyInstrumentation_DISABLE = { .ob_refcnt = _Py_IMMORTAL_REFCNT, - .ob_type = &PyBaseObject_Type + .ob_type = &PyBaseObject_Type, + .ob_region = (Py_region_ptr_with_tags_t){_Py_IMMUTABLE} }; PyObject _PyInstrumentation_MISSING = { .ob_refcnt = _Py_IMMORTAL_REFCNT, - .ob_type = &PyBaseObject_Type + .ob_type = &PyBaseObject_Type, + .ob_region = (Py_region_ptr_with_tags_t){_Py_IMMUTABLE} }; static const int8_t EVENT_FOR_OPCODE[256] = { diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index ed4a0ac2dd32de..7b9d6c46bdb3e0 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -284,6 +284,7 @@ static const char* _Py_stdlib_module_names[] = { "unicodedata", "unittest", "urllib", +"using", "uu", "uuid", "venv", diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 6a7c14ebb220a8..d563d8313e3590 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -306,6 +306,10 @@ Modules/timemodule.c init_timezone YEAR - Objects/bytearrayobject.c - _PyByteArray_empty_string - Objects/complexobject.c - c_1 - Objects/exceptions.c - static_exceptions - +Objects/exceptions.c - _PyExc_NotWriteableError - +Objects/exceptions.c - PyExc_NotWriteableError - +Objects/exceptions.c - _PyExc_RegionError - +Objects/exceptions.c - PyExc_RegionError - Objects/genobject.c - ASYNC_GEN_IGNORED_EXIT_MSG - Objects/genobject.c - NON_INIT_CORO_MSG - Objects/longobject.c - _PyLong_DigitValue - @@ -712,3 +716,22 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - + +## Region Type Info this is constant +## Why do we have three of these? Surely it should just be in one file? +Objects/object.c - PyRegion_Type - +Objects/regions.c - PyRegion_Type - +Objects/cown.c - PyRegion_Type - +Python/bltinmodule.c - PyRegion_Type - +Objects/object.c - PyCown_Type - +Objects/cown.c - PyCown_Type - +Python/bltinmodule.c - PyCown_Type - + +## Regions Debug Info for Invariant +## Not to remain global, and should become localised to an interpreter +Objects/regions.c - invariant_do_region_check - +Objects/regions.c - invariant_error_src - +Objects/regions.c - invariant_error_tgt - +Objects/regions.c - invariant_error_occurred - +Objects/regions.c - captured - + diff --git a/configure b/configure index b6f90bcd8c7300..af5d2e2b75fd02 100755 --- a/configure +++ b/configure @@ -1078,6 +1078,7 @@ enable_shared with_static_libpython enable_profiling with_pydebug +with_region_invariant with_trace_refs enable_pystats with_assertions @@ -1850,6 +1851,8 @@ Optional Packages: do not build libpythonMAJOR.MINOR.a and do not install python.o (default is yes) --with-pydebug build with Py_DEBUG defined (default is no) + --with-region-invariant enable region invariant for debugging purpose + (default is no) --with-trace-refs enable tracing references for debugging purpose (default is no) --with-assertions build with C assertions enabled (default is no) @@ -8075,6 +8078,30 @@ printf "%s\n" "no" >&6; } fi +# Check for --with-region-invariant +# --with-region-invariant +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-region-invariant" >&5 +printf %s "checking for --with-region-invariant... " >&6; } + +# Check whether --with-region-invariant was given. +if test ${with_region_invariant+y} +then : + withval=$with_region_invariant; +else $as_nop + with_region_invariant=yes + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_region_invariant" >&5 +printf "%s\n" "$with_region_invariant" >&6; } + +if test "$with_region_invariant" = "yes" +then + +printf "%s\n" "#define Py_REGION_INVARIANT 1" >>confdefs.h + +fi + # Check for --with-trace-refs # --with-trace-refs { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-trace-refs" >&5 diff --git a/configure.ac b/configure.ac index ba768aea930714..a476ba833e6ab3 100644 --- a/configure.ac +++ b/configure.ac @@ -1689,6 +1689,21 @@ else AC_MSG_RESULT([no]); Py_DEBUG='false' fi], [AC_MSG_RESULT([no])]) +# Check for --with-region-invariant +# --with-region-invariant +AC_MSG_CHECKING([for --with-region-invariant]) +AC_ARG_WITH([region-invariant], + [AS_HELP_STRING([--with-region-invariant], [enable region invariant for debugging purpose (default is no)])], + [], [with_region_invariant=yes] +) +AC_MSG_RESULT([$with_region_invariant]) + +if test "$with_region_invariant" = "yes" +then + AC_DEFINE([Py_REGION_INVARIANT], [1], + [Define if you want to enable region invariant for debugging purpose]) +fi + # Check for --with-trace-refs # --with-trace-refs AC_MSG_CHECKING([for --with-trace-refs]) diff --git a/pyconfig.h.in b/pyconfig.h.in index ada9dccfef1084..2dd38014570b5c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1615,6 +1615,9 @@ SipHash13: 3, externally defined: 0 */ #undef Py_HASH_ALGORITHM +/* Define if you want to enable region invariant for debugging purpose */ +#undef Py_REGION_INVARIANT + /* Define if you want to enable internal statistics gathering. */ #undef Py_STATS