diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index dc923d9..71fd475 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -77,9 +77,9 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Build CPP extension with clang++ run: | - export CXX=$(brew --prefix llvm@15)/bin/clang++ - export LDFLAGS="-L/usr/local/opt/libomp/lib" - export CPPFLAGS="-I/usr/local/opt/libomp/include" + export CXX=$(brew --prefix llvm@18)/bin/clang++ + export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" + export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" pip install -e .[dev] - name: Test with pytest run: | diff --git a/.github/workflows/version.yml b/.github/workflows/version.yml index 6bd24fa..c334c91 100644 --- a/.github/workflows/version.yml +++ b/.github/workflows/version.yml @@ -1,10 +1,6 @@ name: Display version -on: - push: - branches: [ "dev", "main", "alpha", "beta" ] - pull_request: - branches: [ "dev", "main", "alpha", "beta" ] +on: [push, pull_request] permissions: contents: read @@ -26,8 +22,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install build "setuptools-git-versioning>=2,<3" + pip install build "setuptools-git-versioning>=2,<3" numpy numba pip install torch --index-url https://download.pytorch.org/whl/cpu - name: Display version run: | - setuptools-git-versioning -v >> $GITHUB_STEP_SUMMARY \ No newline at end of file + setuptools-git-versioning -vv >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a1cc84d..89cd037 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,19 +4,19 @@ requires = [ "setuptools-git-versioning>=2.0,<3", "wheel", "torch", + "numba", "numpy", ] -build-backend = "setuptools.build_meta" +build-backend = "setuptools.build_meta:__legacy__" [tool.setuptools-git-versioning] enabled = true # change the file path version_file = "torchlpc/VERSION.txt" -count_commits_from_version_file = true # <--- enable commits tracking -dev_template = "{tag}.{branch}{ccount}" # suffix for versions will be .dev -dirty_template = "{tag}.{branch}{ccount}" # same thing here -# Temporarily disable branch formatting due to issues with regex in _version.py -# branch_formatter = "torchlpc._version:format_branch_name" +count_commits_from_version_file = true # <--- enable commits tracking +dev_template = "{tag}.{branch}{ccount}" # suffix for versions will be .dev +dirty_template = "{tag}.{branch}{ccount}" # same thing here +branch_formatter = "torchlpc._version:format_branch_name" [tool.setuptools.package-data] # include VERSION file to a package @@ -29,6 +29,7 @@ exclude = ["tests", "tests.*"] [tool.setuptools] # this package will read some included files in runtime, avoid installing it as .zip zip-safe = false +license-files = ["LICENSE"] [project] dynamic = ["version"] @@ -39,8 +40,8 @@ authors = [{ name = "Chin-Yun Yu", email = "chin-yun.yu@qmul.ac.uk" }] maintainers = [{ name = "Chin-Yun Yu", email = "chin-yun.yu@qmul.ac.uk" }] description = "Fast, efficient, and differentiable time-varying LPC filtering in PyTorch." readme = "README.md" -license = "MIT" -license-files = ["LICENSE"] +license = { text = "MIT" } + classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", diff --git a/torchlpc/csrc/scan_cpu.cpp b/torchlpc/csrc/scan_cpu.cpp index 6b47cad..a789d9d 100644 --- a/torchlpc/csrc/scan_cpu.cpp +++ b/torchlpc/csrc/scan_cpu.cpp @@ -6,26 +6,29 @@ #include #include -extern "C" { -/* Creates a dummy empty _C module that can be imported from Python. - The import from Python will load the .so associated with this extension - built from this file, so that all the TORCH_LIBRARY calls below are run.*/ -PyObject *PyInit__C(void) { - static struct PyModuleDef module_def = { - PyModuleDef_HEAD_INIT, - "_C", /* name of module */ - NULL, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - NULL, /* methods */ - }; - return PyModule_Create(&module_def); -} +extern "C" +{ + /* Creates a dummy empty _C module that can be imported from Python. + The import from Python will load the .so associated with this extension + built from this file, so that all the TORCH_LIBRARY calls below are run.*/ + PyObject *PyInit__C(void) + { + static struct PyModuleDef module_def = { + PyModuleDef_HEAD_INIT, + "_C", /* name of module */ + NULL, /* module documentation, may be NULL */ + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + NULL, /* methods */ + }; + return PyModule_Create(&module_def); + } } template void scan_cpu(const at::Tensor &input, const at::Tensor &weights, - const at::Tensor &initials, const at::Tensor &output) { + const at::Tensor &initials, const at::Tensor &output) +{ TORCH_CHECK(input.dim() == 2, "Input must be 2D"); TORCH_CHECK(initials.dim() == 1, "Initials must be 1D"); TORCH_CHECK(weights.sizes() == input.sizes(), @@ -50,39 +53,33 @@ void scan_cpu(const at::Tensor &input, const at::Tensor &weights, auto T = input.size(1); auto total_size = input.numel(); - std::pair buffer[total_size]; - const scalar_t *input_ptr = input_contiguous.const_data_ptr(); const scalar_t *initials_ptr = initials_contiguous.const_data_ptr(); const scalar_t *weights_ptr = weights_contiguous.const_data_ptr(); scalar_t *output_ptr = output.mutable_data_ptr(); - std::transform(weights_ptr, weights_ptr + total_size, input_ptr, buffer, - [](const scalar_t &a, const scalar_t &b) { - return std::make_pair(a, b); - }); - - at::parallel_for(0, n_batch, 1, [&](int64_t start, int64_t end) { - for (auto b = start; b < end; b++) { - std::inclusive_scan( - buffer + b * T, buffer + (b + 1) * T, buffer + b * T, - [](const std::pair &a, - const std::pair &b) { - return std::make_pair(a.first * b.first, - a.second * b.first + b.second); - }, - std::make_pair((scalar_t)1.0, initials_ptr[b])); - } - }); - - std::transform( - buffer, buffer + total_size, output_ptr, - [](const std::pair &a) { return a.second; }); + at::parallel_for(0, n_batch, 1, [&](int64_t start, int64_t end) + { + for (auto b = start; b < end; b++) + { + auto initial = initials_ptr[b]; + auto weights_offset = weights_ptr + b * T; + auto input_offset = input_ptr + b * T; + auto output_offset = output_ptr + b * T; + for (int64_t t = 0; t < T; t++) + { + auto w = weights_offset[t]; + auto x = input_offset[t]; + initial = initial * w + x; + output_offset[t] = initial; + } + }; }); } template -void lpc_cpu_core(const torch::Tensor &a, const torch::Tensor &padded_out) { +void lpc_cpu_core(const torch::Tensor &a, const torch::Tensor &padded_out) +{ // Ensure input dimensions are correct TORCH_CHECK(a.dim() == 3, "a must be 3-dimensional"); TORCH_CHECK(padded_out.dim() == 2, "out must be 2-dimensional"); @@ -106,24 +103,27 @@ void lpc_cpu_core(const torch::Tensor &a, const torch::Tensor &padded_out) { const scalar_t *a_ptr = a_contiguous.const_data_ptr(); scalar_t *out_ptr = padded_out.mutable_data_ptr(); - at::parallel_for(0, B, 1, [&](int64_t start, int64_t end) { - for (auto b = start; b < end; b++) { - auto out_offset = b * (T + order) + order; - auto a_offset = b * T * order; - for (int64_t t = 0; t < T; t++) { - scalar_t y = out_ptr[out_offset + t]; - for (int64_t i = 0; i < order; i++) { - y -= a_ptr[a_offset + t * order + i] * - out_ptr[out_offset + t - i - 1]; + at::parallel_for(0, B, 1, [&](int64_t start, int64_t end) + { + for (auto b = start; b < end; b++) + { + auto out_offset = out_ptr + b * (T + order) + order; + auto a_offset = a_ptr + b * T * order; + for (int64_t t = 0; t < T; t++) + { + scalar_t y = out_offset[t]; + for (int64_t i = 0; i < order; i++) + { + y -= a_offset[t * order + i] * out_offset [t - i - 1]; } - out_ptr[out_offset + t] = y; + out_offset[t] = y; } - } - }); + }; }); } at::Tensor scan_cpu_wrapper(const at::Tensor &input, const at::Tensor &weights, - const at::Tensor &initials) { + const at::Tensor &initials) +{ TORCH_CHECK(input.is_floating_point() || input.is_complex(), "Input must be floating point or complex"); TORCH_CHECK(initials.scalar_type() == input.scalar_type(), @@ -135,12 +135,14 @@ at::Tensor scan_cpu_wrapper(const at::Tensor &input, const at::Tensor &weights, AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES( input.scalar_type(), "scan_cpu", - [&] { scan_cpu(input, weights, initials, output); }); + [&] + { scan_cpu(input, weights, initials, output); }); return output; } at::Tensor lpc_cpu(const at::Tensor &x, const at::Tensor &a, - const at::Tensor &zi) { + const at::Tensor &zi) +{ TORCH_CHECK(x.is_floating_point() || x.is_complex(), "Input must be floating point or complex"); TORCH_CHECK(a.scalar_type() == x.scalar_type(), @@ -156,16 +158,19 @@ at::Tensor lpc_cpu(const at::Tensor &x, const at::Tensor &a, auto out = at::cat({zi.flip(1), x}, 1).contiguous(); AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES( - x.scalar_type(), "lpc_cpu", [&] { lpc_cpu_core(a, out); }); + x.scalar_type(), "lpc_cpu", [&] + { lpc_cpu_core(a, out); }); return out.slice(1, zi.size(1), out.size(1)).contiguous(); } -TORCH_LIBRARY(torchlpc, m) { +TORCH_LIBRARY(torchlpc, m) +{ m.def("torchlpc::scan(Tensor a, Tensor b, Tensor c) -> Tensor"); m.def("torchlpc::lpc(Tensor a, Tensor b, Tensor c) -> Tensor"); } -TORCH_LIBRARY_IMPL(torchlpc, CPU, m) { +TORCH_LIBRARY_IMPL(torchlpc, CPU, m) +{ m.impl("scan", &scan_cpu_wrapper); m.impl("lpc", &lpc_cpu); }