From bb8402ef062b31704c366ba705c887aaea379a30 Mon Sep 17 00:00:00 2001 From: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> Date: Fri, 24 Feb 2023 17:19:49 -0800 Subject: [PATCH 1/5] Add DT_DEFAULT_CHUNK_SIZE and DT_DEFAULT_MIN_ITERS_PER_THREAD to control thread pool defaults (#3426) - add `DT_DEFAULT_CHUNK_SIZE` to control default chunk size for `parallel_for_static`; - add `DT_DEFAULT_MIN_ITERS_PER_THREAD` to control default minimum number of iterations per thread. --- src/core/parallel/api_primitives.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/core/parallel/api_primitives.h b/src/core/parallel/api_primitives.h index 7e41b3644f..83e3e9453f 100644 --- a/src/core/parallel/api_primitives.h +++ b/src/core/parallel/api_primitives.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2023 H2O.ai // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,14 +19,20 @@ #include "utils/assert.h" namespace dt { -size_t num_threads_in_pool(); +#ifndef DT_DEFAULT_CHUNK_SIZE + #define DT_DEFAULT_CHUNK_SIZE 1000 +#endif +#ifndef DT_DEFAULT_MIN_ITERS_PER_THREAD + #define DT_DEFAULT_MIN_ITERS_PER_THREAD 1000 +#endif +size_t num_threads_in_pool(); class ChunkSize { size_t value; public: - explicit ChunkSize(size_t sz = 1000 /* Default chunk size */) { + explicit ChunkSize(size_t sz = DT_DEFAULT_CHUNK_SIZE) { value = sz? sz : 1; } ChunkSize(const ChunkSize&) = default; @@ -68,7 +74,7 @@ class NThreads { * iterations per thread. */ NThreads nthreads_from_niters(size_t niters, - size_t min_iters_per_thread = 1000, + size_t min_iters_per_thread = DT_DEFAULT_MIN_ITERS_PER_THREAD, bool parallel_ok = true); From 43cdcd2ec08163aa12eb6cb700a86575cbbe68fa Mon Sep 17 00:00:00 2001 From: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> Date: Sat, 25 Feb 2023 21:42:01 -0800 Subject: [PATCH 2/5] Make `.to_numpy()` to honor number of rows for zero-column frames (#3429) Closes #3427 --- docs/releases/v1.1.0.rst | 3 +++ src/core/frame/to_numpy.cc | 8 ++++---- tests/frame/test-to-numpy.py | 24 +++++++++++++++++++++++- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index bbcbff8902..71719f33c4 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -62,6 +62,9 @@ -[fix] Fixed creating frames from numpy arrays, that contain unicode strings. [#3420] + -[fix] :meth:`.to_numpy()` will now create a correctly shaped array + in the case of zero-column frames. [#3427] + -[api] Converting a column of :attr:`void ` type into pandas now produces a pandas ``object`` column filled with ``None``s. Converting such column back into datatable produces a ``void`` column again. [#3063] diff --git a/src/core/frame/to_numpy.cc b/src/core/frame/to_numpy.cc index 2f943a2db2..107002f6e3 100644 --- a/src/core/frame/to_numpy.cc +++ b/src/core/frame/to_numpy.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2018-2022 H2O.ai +// Copyright 2018-2023 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -71,8 +71,8 @@ oobj Frame::to_numpy(const PKArgs& args) { col.cast_inplace(target_type); } auto res = to_numpy_impl( - Frame::oframe(new DataTable({col}, DataTable::default_names)), - c_contiguous + Frame::oframe(new DataTable({col}, DataTable::default_names)), + c_contiguous ); return res.invoke("reshape", {oint(col.nrows())}); } @@ -103,7 +103,7 @@ static oobj to_numpy_impl(oobj frame, bool c_contiguous) { size_t ncols = dt->ncols(); if (ncols == 0) { otuple shape(2); - shape.set(0, oint(0)); + shape.set(0, frame.get_attr("nrows").to_pyint()); shape.set(1, oint(0)); return numpy.invoke("empty", {shape}); } diff --git a/tests/frame/test-to-numpy.py b/tests/frame/test-to-numpy.py index 99c479f07c..cb4d1bce3f 100644 --- a/tests/frame/test-to-numpy.py +++ b/tests/frame/test-to-numpy.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- #------------------------------------------------------------------------------- -# Copyright 2018-2022 H2O.ai +# Copyright 2018-2023 H2O.ai # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -45,6 +45,28 @@ def test_empty_frame(): assert a.tolist() == [] +@numpy_test +def test_empty_frame_rows(): + # See issue #3427 + N = 5 + DT = dt.Frame() + DT.nrows = N + assert DT.shape == (N, 0) + NP = DT.to_numpy() + assert NP.shape == (N, 0) + assert NP.tolist() == [[] for _ in range(N)] + + +@numpy_test +def test_empty_frame_cols(): + M = 5 + DT = dt.Frame([[] for _ in range(M)]) + assert DT.shape == (0, M) + NP = DT.to_numpy() + assert NP.shape == (0, M) + assert NP.tolist() == [] + + @numpy_test def test_tonumpy0(np): d0 = dt.Frame([1, 3, 5, 7, 9]) From ea65a55acbd70250b3a0761fd0c61b14bfbe4d5c Mon Sep 17 00:00:00 2001 From: Chathrinda Ranasinghe Date: Tue, 28 Feb 2023 11:05:30 +0530 Subject: [PATCH 3/5] [DevOps] Snyk Integration (#3423) In this PR we - created a new organization in [Snyk](https://app.snyk.io/org/py_data_table). - created a new service account for [snyk token](https://app.snyk.io/org/py_data_table/manage/service-accounts). **PARENT ISSUES** - https://github.com/h2oai/h2o-ops/issues/241 - https://github.com/h2oai/h2o-ops/issues/248 --- .github/workflows/snyk-scan.yml | 66 +++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/snyk-scan.yml diff --git a/.github/workflows/snyk-scan.yml b/.github/workflows/snyk-scan.yml new file mode 100644 index 0000000000..5b665245e9 --- /dev/null +++ b/.github/workflows/snyk-scan.yml @@ -0,0 +1,66 @@ +name: Snyk Security Vulnerability Scan + +on: + workflow_dispatch: + pull_request: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + branches: + - 'main' + +jobs: + snyk_scan_test: + if: ${{ github.event_name == 'pull_request' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + + - uses: snyk/actions/setup@master + + - uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Check changed Deps files + uses: tj-actions/changed-files@v35 + id: changed-files + with: + files: | + **/requirements*.txt + + - name: Snyk scan for Python dependencies + if: steps.changed-files.outputs.any_changed == 'true' + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + run: | + for file in ${{ steps.changed-files.outputs.all_changed_files }}; do + python3 -m pip install -r $file + snyk test --command=python3 --package-manager=pip --file=$file -d --skip-unresolved + done + + snyk_scan_monitor: + if: ${{ github.event_name == 'push' }} + runs-on: ubuntu-latest + steps: + - name: Extract github branch/tag name + shell: bash + run: echo "ref=$(echo ${GITHUB_REF##*/})" >> $GITHUB_OUTPUT + id: extract_ref + + - uses: actions/checkout@master + + - uses: snyk/actions/setup@master + + - uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Snyk scan for Python dependencies + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + run: | + for file in requirements*.txt; do + python3 -m pip install -r $file + snyk monitor --command=python3 --skip-unresolved --org=py_data_table --remote-repo-url=datatable/${{ steps.extract_ref.outputs.ref }} --package-manager=pip --file=$file --project-name=PY-DATA-TABLE/datatable/${{ steps.extract_ref.outputs.ref }}/$file -d --fail-on=all + done From b1dc7d8abb7592cf30318a69e8dd3d3f2d68a9cf Mon Sep 17 00:00:00 2001 From: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> Date: Tue, 28 Feb 2023 18:56:14 -0800 Subject: [PATCH 4/5] Minor improvements to thread pool (#3432) - throw runtime exception for nested thread pools in the case of `DT_DISABLE`; - remove unnecessary include. --- src/core/parallel/job_idle.cc | 3 --- src/core/parallel/thread_team.cc | 14 +++++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/core/parallel/job_idle.cc b/src/core/parallel/job_idle.cc index 26dfb4045b..cbe2f76eff 100644 --- a/src/core/parallel/job_idle.cc +++ b/src/core/parallel/job_idle.cc @@ -20,9 +20,6 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include "utils/assert.h" -#ifndef DT_DISABLE - #include "utils/exceptions.h" -#endif #include "parallel/api.h" #include "parallel/job_idle.h" #include "parallel/thread_pool.h" diff --git a/src/core/parallel/thread_team.cc b/src/core/parallel/thread_team.cc index b7622efd58..e7798950e1 100644 --- a/src/core/parallel/thread_team.cc +++ b/src/core/parallel/thread_team.cc @@ -23,7 +23,12 @@ #include "parallel/thread_pool.h" #include "parallel/thread_job.h" #include "parallel/thread_team.h" -#include "utils/exceptions.h" +#ifdef DT_DISABLE + #include +#else + #include "utils/exceptions.h" +#endif + namespace dt { @@ -34,8 +39,11 @@ ThreadTeam::ThreadTeam(size_t nth, ThreadPool* pool) barrier_counter {0} { if (thpool->current_team) { - #ifndef DT_DISABLE - throw RuntimeError() << "Unable to create a nested thread team"; + std::string msg = "Unable to create a nested thread team"; + #ifdef DT_DISABLE + throw std::runtime_error(msg); + #else + throw RuntimeError() << msg; #endif } thpool->current_team = this; From 6a3b51bda0b524668ba75dacd0e4daf215b5d2de Mon Sep 17 00:00:00 2001 From: Oleksiy <35204136+oleksiyskononenko@users.noreply.github.com> Date: Wed, 1 Mar 2023 16:02:41 -0800 Subject: [PATCH 5/5] Drop python `3.7` support (#3435) Closes #3434 --- ci/Jenkinsfile.groovy | 54 ++-------------------------------------- ci/appveyor.yml | 34 ------------------------- ci/ext.py | 5 ++-- ci/xbuild/extension.py | 2 +- docs/releases/v1.1.0.rst | 11 +++++--- docs/start/install.rst | 6 ++--- 6 files changed, 17 insertions(+), 95 deletions(-) diff --git a/ci/Jenkinsfile.groovy b/ci/Jenkinsfile.groovy index 499de488af..77b03f6f3b 100644 --- a/ci/Jenkinsfile.groovy +++ b/ci/Jenkinsfile.groovy @@ -213,7 +213,7 @@ ansiColor('xterm') { -e DT_BUILD_NUMBER=${DT_BUILD_NUMBER} \ --entrypoint /bin/bash \ ${DOCKER_IMAGE_X86_64_MANYLINUX} \ - -c "env && /opt/python/cp37-cp37m/bin/python3.7 ci/ext.py sdist" + -c "env && /opt/python/cp38-cp38/bin/python3.8 ci/ext.py sdist" """ sh """ echo "--------- _build_info.py --------------------" @@ -251,7 +251,6 @@ ansiColor('xterm') { -c "cd /dot && \ ls -la && \ ls -la src/datatable && \ - /opt/python/cp37-cp37m/bin/python3.7 ci/ext.py wheel --audit && \ /opt/python/cp38-cp38/bin/python3.8 ci/ext.py debugwheel --audit && \ /opt/python/cp38-cp38/bin/python3.8 ci/ext.py wheel --audit && \ /opt/python/cp39-cp39/bin/python3.9 ci/ext.py wheel --audit && \ @@ -259,7 +258,6 @@ ansiColor('xterm') { /opt/python/cp311-cp311/bin/python3.11 ci/ext.py wheel --audit && \ echo '===== Py3.8 Debug =====' && unzip -p dist/*debug*.whl datatable/_build_info.py && \ mv dist/*debug*.whl . && \ - echo '===== Py3.7 =====' && unzip -p dist/*cp37*.whl datatable/_build_info.py && \ echo '===== Py3.8 =====' && unzip -p dist/*cp38*.whl datatable/_build_info.py && \ echo '===== Py3.9 =====' && unzip -p dist/*cp39*.whl datatable/_build_info.py && \ echo '===== Py3.10 =====' && unzip -p dist/*cp310*.whl datatable/_build_info.py && \ @@ -286,9 +284,6 @@ ansiColor('xterm') { "DT_BUILD_SUFFIX=${DT_BUILD_SUFFIX}", "DT_BUILD_NUMBER=${DT_BUILD_NUMBER}"]) { sh """ - source /Users/jenkins/datatable_envs/py37/bin/activate - python ci/ext.py wheel - deactivate source /Users/jenkins/datatable_envs/py38/bin/activate python ci/ext.py wheel deactivate @@ -301,7 +296,6 @@ ansiColor('xterm') { source /Users/jenkins/datatable_envs/py311/bin/activate python ci/ext.py wheel deactivate - echo '===== Py3.7 =====' && unzip -p dist/*cp37*.whl datatable/_build_info.py echo '===== Py3.8 =====' && unzip -p dist/*cp38*.whl datatable/_build_info.py echo '===== Py3.9 =====' && unzip -p dist/*cp39*.whl datatable/_build_info.py echo '===== Py3.10 =====' && unzip -p dist/*cp310*.whl datatable/_build_info.py @@ -341,7 +335,6 @@ ansiColor('xterm') { -c "cd /dot && \ ls -la && \ ls -la src/datatable && \ - /opt/python/cp37-cp37m/bin/python3.7 ci/ext.py wheel --audit && \ /opt/python/cp38-cp38/bin/python3.8 ci/ext.py debugwheel --audit && \ /opt/python/cp38-cp38/bin/python3.8 ci/ext.py wheel --audit && \ /opt/python/cp39-cp39/bin/python3.9 ci/ext.py wheel --audit && \ @@ -349,7 +342,6 @@ ansiColor('xterm') { /opt/python/cp311-cp311/bin/python3.11 ci/ext.py wheel --audit && \ echo '===== Py3.8 Debug =====' && unzip -p dist/*debug*.whl datatable/_build_info.py && \ mv dist/*debug*.whl . && \ - echo '===== Py3.7 =====' && unzip -p dist/*cp37*.whl datatable/_build_info.py && \ echo '===== Py3.8 =====' && unzip -p dist/*cp38*.whl datatable/_build_info.py && \ echo '===== Py3.9 =====' && unzip -p dist/*cp39*.whl datatable/_build_info.py && \ echo '===== Py3.10 =====' && unzip -p dist/*cp310*.whl datatable/_build_info.py && \ @@ -373,20 +365,6 @@ ansiColor('xterm') { if (!params.DISABLE_ALL_TESTS) { def testStages = [:] testStages << - namedStage('Test x86_64-manylinux-py37', { stageName, stageDir -> - node(NODE_LINUX) { - buildSummary.stageWithSummary(stageName, stageDir) { - cleanWs() - dumpInfo() - dir(stageDir) { - unstash 'datatable-sources' - unstash 'x86_64-manylinux-wheels' - test_in_docker("x86_64-manylinux-py37", "37", - DOCKER_IMAGE_X86_64_MANYLINUX) - } - } - } - }) << namedStage('Test x86_64-manylinux-py38-debug', doPy38Tests, { stageName, stageDir -> node(NODE_LINUX) { buildSummary.stageWithSummary(stageName, stageDir) { @@ -457,20 +435,6 @@ ansiColor('xterm') { } } }) << - namedStage('Test ppc64le-manylinux-py37', doPpcTests, { stageName, stageDir -> - node(NODE_PPC) { - buildSummary.stageWithSummary(stageName, stageDir) { - cleanWs() - dumpInfo() - dir(stageDir) { - unstash 'datatable-sources' - unstash 'ppc64le-manylinux-wheels' - test_in_docker("ppc64le-manylinux-py37", "37", - DOCKER_IMAGE_PPC64LE_MANYLINUX) - } - } - } - }) << namedStage('Test ppc64le-manylinux-py38-debug', doPpcTests && doPy38Tests, { stageName, stageDir -> node(NODE_PPC) { buildSummary.stageWithSummary(stageName, stageDir) { @@ -541,19 +505,6 @@ ansiColor('xterm') { } } }) << - namedStage('Test x86_64-macos-py37', { stageName, stageDir -> - node(NODE_MACOS) { - buildSummary.stageWithSummary(stageName, stageDir) { - cleanWs() - dumpInfo() - dir(stageDir) { - unstash 'datatable-sources' - unstash 'x86_64-macos-wheels' - test_macos('37') - } - } - } - }) << namedStage('Test x86_64-macos-py38', doPy38Tests, { stageName, stageDir -> node(NODE_MACOS) { buildSummary.stageWithSummary(stageName, stageDir) { @@ -756,7 +707,7 @@ ansiColor('xterm') { // used as a prefix for the test-report file name. // // pyver -// python version string, such as "37" or "310" +// python version string, such as "38" or "310" // // docker_image // Name of the docker container where the tests will be run @@ -824,7 +775,6 @@ def test_in_docker(String testtag, String pyver, String docker_image) { def get_python_for_docker(String pyver, String image) { if (image == DOCKER_IMAGE_X86_64_MANYLINUX || image == DOCKER_IMAGE_PPC64LE_MANYLINUX) { - if (pyver == "37") return "/opt/python/cp37-cp37m/bin/python3.7" if (pyver == "38") return "/opt/python/cp38-cp38/bin/python3.8" if (pyver == "39") return "/opt/python/cp39-cp39/bin/python3.9" if (pyver == "310") return "/opt/python/cp310-cp310/bin/python3.10" diff --git a/ci/appveyor.yml b/ci/appveyor.yml index c39ec05b30..97e4afb4de 100644 --- a/ci/appveyor.yml +++ b/ci/appveyor.yml @@ -183,40 +183,6 @@ build_script: - # ======================================================================= - # Build and test wheel for Python 3.7 - # ======================================================================= - - $env:PATH = "C:/Python37-x64;C:/Python37-x64/Scripts;$DEFAULT_PATH" - - python -V - - python ci/ext.py wheel - - $DT_WHEEL = ls dist/*-cp37-*.whl - - echo "DT_WHEEL = $DT_WHEEL" - - echo "----- _build_info.py for Python 3.7 ------------------------------" - - cat src/datatable/_build_info.py - - echo "------------------------------------------------------------------" - - python -m pip install --upgrade pip - - python -m pip install $DT_WHEEL - - python -m pip install pytest docutils pandas pyarrow - - python -m pytest -ra --maxfail=10 -Werror -vv --showlocals ./tests/ - - if(!$?) { Exit $LASTEXITCODE } - - python -m pip uninstall -y $DT_WHEEL - - - # ======================================================================= # Build and test wheel for Python 3.8 # ======================================================================= diff --git a/ci/ext.py b/ci/ext.py index cbb8c10029..2bd2f32d59 100644 --- a/ci/ext.py +++ b/ci/ext.py @@ -412,9 +412,10 @@ def get_meta(): "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Information Analysis", ], diff --git a/ci/xbuild/extension.py b/ci/xbuild/extension.py index 22b259ac0e..8c6684eada 100644 --- a/ci/xbuild/extension.py +++ b/ci/xbuild/extension.py @@ -344,7 +344,7 @@ def xbuild_version(self): def pyabi(self): """ Python `SOABI` config variable, which may look something like - 'cpython-37m-darwin'. If this does not correspond to the value + 'cpython-38m-darwin'. If this does not correspond to the value stored in .xbuild, then all sources will be rebuilt. """ return sysconfig.get_config_var("SOABI") diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 71719f33c4..b7c6554d88 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -192,11 +192,16 @@ General ------- - -[api] Datatable no longer supports Python 3.6, because Python 3.6 itself - has reached its end of life on 2021-12-23 and will no longer be - supported. If you are still using Python 3.6, please consider upgrading. + -[api] Datatable no longer supports Python 3.6, because it + has reached its end of life on 2021-12-23 and will no longer be supported. + If you are still using Python 3.6, please consider upgrading. [#3376] + -[api] Datatable no longer supports Python 3.7, because it + has reached its end of life on 2023-06-27 and will no longer be supported. + If you are still using Python 3.7, please consider upgrading. + [#3434] + -[new] Added properties :attr:`.is_array `, :attr:`.is_boolean `, :attr:`.is_categorical `, diff --git a/docs/start/install.rst b/docs/start/install.rst index 79c0181b9a..86c3becdc0 100644 --- a/docs/start/install.rst +++ b/docs/start/install.rst @@ -9,13 +9,13 @@ This page describes how to install ``datatable`` on various systems. Prerequisites ------------- -Python 3.7+ is required. Generally, we will support each version of Python +Python 3.8+ is required. Generally, we will support each version of Python until its official `end of life`_. You can verify your python version via .. code-block:: console $ python --version - Python 3.7.10 + Python 3.8.10 In addition, we recommend using ``pip`` version 20.3+, especially if you're planning to install datatable from the source, or if you are on a Unix machine. @@ -233,7 +233,7 @@ know how to resolve them. If none of these help you, please ask a question on ``Python.h: no such file or directory`` when compiling from source Your Python distribution was shipped without the ``Python.h`` header file. This has been observed on certain Linux machines. You would need to install - a Python package with a ``-dev`` suffix, for example ``python3.7-dev``. + a Python package with a ``-dev`` suffix, for example ``python3.8-dev``. ``fatal error: 'sys/mman.h' file not found`` on macOS In order to compile from source on mac computers, you need to have Xcode