Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RPP Rain augmentation on HOST and HIP #463

Open
wants to merge 26 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
aa1af83
Add Intial u8 implementation for Rain
HazarathKumarM Aug 19, 2024
085e243
Add I8 implementation and Changes based on the Review comments
HazarathKumarM Aug 20, 2024
2558cfa
Initial HIP implementation
HazarathKumarM Aug 23, 2024
e8ae15b
Merge remote-tracking branch 'TOT/develop' into hk/rain
HazarathKumarM Aug 23, 2024
4c89f7a
Add test case for Rain in HIP test suite
HazarathKumarM Aug 23, 2024
395bdc6
minor code cleanup
HazarathKumarM Aug 23, 2024
bffe622
Modified func names and removed unnecessary code
HazarathKumarM Aug 26, 2024
bfc3592
Resolve Review comments
HazarathKumarM Aug 28, 2024
fba3773
replaced pinned memory with HIP memory for Rain Layer computation
HazarathKumarM Aug 29, 2024
32799ed
Modified RGB Rain Mask to planar Rain Mask in HIP
HazarathKumarM Aug 30, 2024
ed1d83c
Address review comments
HazarathKumarM Sep 2, 2024
8a8ab59
Add Rain compute function
HazarathKumarM Sep 2, 2024
000ab57
Add version changes and Resolve review comments
HazarathKumarM Sep 2, 2024
6aac89c
fix build warnings
HazarathKumarM Sep 2, 2024
21aaa2c
Fix the outputs of f16 toggle variants
HazarathKumarM Sep 2, 2024
fdd622d
Revert Rain width changes
HazarathKumarM Sep 3, 2024
b482e96
Merge remote-tracking branch 'abishek/develop' into hk/rain
HazarathKumarM Sep 3, 2024
655d841
Fix pln3 outputs for u8 and i8 bitdepths
HazarathKumarM Sep 3, 2024
9003435
Resolve review comments
HazarathKumarM Sep 4, 2024
3a23fa4
Modified load and store routines for planar cases
HazarathKumarM Sep 4, 2024
d940131
Resolve review comments
HazarathKumarM Oct 22, 2024
b3030c9
Merge remote-tracking branch 'develop' into hk/rain
HazarathKumarM Oct 22, 2024
3ff4958
Modify docs image
HazarathKumarM Oct 23, 2024
f6e845c
Merge remote-tracking branch 'ar/opt_rain' into hk/rain
HazarathKumarM Oct 23, 2024
d479c5c
Merge pull request #319 from HazarathKumarM/hk/rain
r-abishek Oct 23, 2024
19a1557
Fix versioning
r-abishek Oct 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/rpp/en/latest](https://rocm.docs.amd.com/projects/rpp/en/latest)

## RPP 1.9.8 (Unreleased)

### Changes

RPP Rain augmentation on HOST and HIP

## RPP 1.9.3 (unreleased)

### Changes
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ endif()
set(CMAKE_CXX_STANDARD 17)

# RPP Version
set(VERSION "1.9.3")
set(VERSION "1.9.8")

# Set Project Version and Language
project(rpp VERSION ${VERSION} LANGUAGES CXX)
Expand Down
Binary file added docs/data/doxygenInputs/img640x480.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion include/rpp_version.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" {
// NOTE: IMPORTANT: Match the version with CMakelists.txt version
#define RPP_VERSION_MAJOR 1
#define RPP_VERSION_MINOR 9
#define RPP_VERSION_PATCH 3
#define RPP_VERSION_PATCH 8
#ifdef __cplusplus
}
#endif
Expand Down
54 changes: 54 additions & 0 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,60 @@ RppStatus rppt_glitch_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
RppStatus rppt_glitch_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptChannelOffsets *rgbOffsets, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Rain augmentation on HOST backend for a NCHW/NHWC layout tensor
* \details The rain augmentation simulates a rain effect for a batch of RGB (3-channel) / greyscale (1-channel) images with an NHWC/NCHW tensor layout.<br>
* <b> NOTE: This augmentation gives a more realistic Rain output when all images in a batch are of similar / same sizes </b> <br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be the same depth as srcPtr.
* \image html img640x480.png Sample Input
* \image html effects_augmentations_rain_img640x480.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param [in] rainPercentage The percentage of the rain effect to be applied (0 <= rainPercentage <= 100)
* \param [in] rainWidth Width of the rain drops in pixels. To be tuned by user depending on size of the image.
* \param [in] rainHeight Height of the rain drops in pixels. To be tuned by user depending on size of the image.
* \param [in] slantAngle Slant angle of the rain drops (positive value for right slant, negative for left slant). A single Rpp32s/f representing the slant of raindrops in degrees.
* Values range from [-90, 90], where -90 represents extreme left slant, 0 is vertical, and 90 is extreme right slant.
* \param [in] alpha An array of alpha blending values to be used for blending the rainLayer and the input image for each image in the batch (0 ≤ alpha ≤ 1 for each image in the batch).
* \param [in] roiTensorPtrSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_rain_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f rainPercentage, Rpp32u rainWidth, Rpp32u rainHeight, Rpp32f slantAngle, Rpp32f *alpha, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Rain augmentation on HIP backend for a NCHW/NHWC layout tensor
* \details The rain augmentation simulates a rain effect for a batch of RGB (3-channel) / greyscale (1-channel) images with an NHWC/NCHW tensor layout.<br>
* <b> NOTE: This augmentation gives a more realistic Rain output when all images in a batch are of similar / same sizes </b> <br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be the same depth as srcPtr.
* \image html img640x480.png Sample Input
* \image html effects_augmentations_rain_img640x480.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param [in] rainPercentage The percentage of the rain effect to be applied (0 <= rainPercentage <= 100)
* \param [in] rainWidth Width of the rain drops in pixels. To be tuned by user depending on size of the image.
* \param [in] rainHeight Height of the rain drops in pixels. To be tuned by user depending on size of the image.
* \param [in] slantAngle Slant angle of the rain drops (positive value for right slant, negative for left slant). A single Rpp32s/f representing the slant of raindrops in degrees.
* Values range from [-90, 90], where -90 represents extreme left slant, 0 is vertical, and 90 is extreme right slant.
* \param [in] alpha An array of alpha blending values in pinned / HIP memory is used for blending the rainLayer and the input image for each image in the batch (0 ≤ alpha ≤ 1 for each image in the batch).
* \param [in] roiTensorPtrSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_rain_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f rainPercentage, Rpp32u rainWidth, Rpp32u rainHeight, Rpp32f slantAngle, Rpp32f *alpha, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Pixelate augmentation on HOST backend for a NCHW/NHWC layout tensor
* \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
Expand Down
25 changes: 25 additions & 0 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6563,6 +6563,31 @@ inline void compute_transpose4x8_avx(__m256 *pSrc, __m128 *pDst)
pDst[7] = _mm256_extractf128_ps(pSrc[3], 1); /* extract [P08|P16|P24|P32] */
}

inline void compute_rain_48_host(__m256 *p1, __m256 *p2, __m256 &pMul)
{
p1[0] = _mm256_fmadd_ps(_mm256_sub_ps(p2[0], p1[0]), pMul, p1[0]); // alpha-blending adjustment
p1[1] = _mm256_fmadd_ps(_mm256_sub_ps(p2[1], p1[1]), pMul, p1[1]); // alpha-blending adjustment
p1[2] = _mm256_fmadd_ps(_mm256_sub_ps(p2[0], p1[2]), pMul, p1[2]); // alpha-blending adjustment
p1[3] = _mm256_fmadd_ps(_mm256_sub_ps(p2[1], p1[3]), pMul, p1[3]); // alpha-blending adjustment
p1[4] = _mm256_fmadd_ps(_mm256_sub_ps(p2[0], p1[4]), pMul, p1[4]); // alpha-blending adjustment
p1[5] = _mm256_fmadd_ps(_mm256_sub_ps(p2[1], p1[5]), pMul, p1[5]); // alpha-blending adjustment
}

inline void compute_rain_32_host(__m256 *p1, __m256 *p2, __m256 &pMul)
{
p1[0] = _mm256_fmadd_ps(_mm256_sub_ps(p2[0], p1[0]), pMul, p1[0]); // alpha-blending adjustment
p1[1] = _mm256_fmadd_ps(_mm256_sub_ps(p2[1], p1[1]), pMul, p1[1]); // alpha-blending adjustment
p1[2] = _mm256_fmadd_ps(_mm256_sub_ps(p2[2], p1[2]), pMul, p1[2]); // alpha-blending adjustment
p1[3] = _mm256_fmadd_ps(_mm256_sub_ps(p2[3], p1[3]), pMul, p1[3]); // alpha-blending adjustment
}

inline void compute_rain_24_host(__m256 *p1, __m256 p2, __m256 &pMul)
{
p1[0] = _mm256_fmadd_ps(_mm256_sub_ps(p2, p1[0]), pMul, p1[0]); // alpha-blending adjustment
p1[1] = _mm256_fmadd_ps(_mm256_sub_ps(p2, p1[1]), pMul, p1[1]); // alpha-blending adjustment
p1[2] = _mm256_fmadd_ps(_mm256_sub_ps(p2, p1[2]), pMul, p1[2]); // alpha-blending adjustment
}

// Compute hanning window
inline RPP_HOST_DEVICE void hann_window(Rpp32f *output, Rpp32s windowSize)
{
Expand Down
30 changes: 30 additions & 0 deletions src/include/cpu/rpp_cpu_simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,24 @@ inline void rpp_load24_f32pkd3_to_f32pln3_avx(Rpp32f *srcPtr, __m256 *p)
p[2] = _mm256_setr_m128(p128[2], p128[6]);
}

inline void rpp_load24_f16pkd3_to_f32pln3_avx(Rpp16f *srcPtr, __m256 *p)
{
__m128 p128[8];
p128[0] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr))));
p128[1] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 3))));
p128[2] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 6))));
p128[3] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 9))));
_MM_TRANSPOSE4_PS(p128[0], p128[1], p128[2], p128[3]);
p128[4] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 12))));
p128[5] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 15))));
p128[6] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 18))));
p128[7] = _mm_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr + 21))));
_MM_TRANSPOSE4_PS(p128[4], p128[5], p128[6], p128[7]);
p[0] = _mm256_setr_m128(p128[0], p128[4]);
p[1] = _mm256_setr_m128(p128[1], p128[5]);
p[2] = _mm256_setr_m128(p128[2], p128[6]);
}

inline void rpp_load24_f32pkd3_to_f32pln3_mirror_avx(Rpp32f *srcPtr, __m256 *p)
{
__m128 p128[8];
Expand Down Expand Up @@ -1555,6 +1573,13 @@ inline void rpp_load24_f32pln3_to_f32pln3_avx(Rpp32f *srcPtrR, Rpp32f *srcPtrG,
p[2] = _mm256_loadu_ps(srcPtrB);
}

inline void rpp_load24_f16pln3_to_f32pln3_avx(Rpp16f *srcPtrR, Rpp16f *srcPtrG, Rpp16f *srcPtrB, __m256 *p)
{
p[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtrR))));
p[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtrG))));
p[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtrB))));
}

inline void rpp_load24_f32pln3_to_f32pln3_mirror_avx(Rpp32f *srcPtrR, Rpp32f *srcPtrG, Rpp32f *srcPtrB, __m256 *p)
{
__m256i pxMask = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
Expand Down Expand Up @@ -1647,6 +1672,11 @@ inline void rpp_load8_f32_to_f32_avx(Rpp32f *srcPtr, __m256 *p)
p[0] = _mm256_loadu_ps(srcPtr);
}

inline void rpp_load8_f16_to_f32_avx(Rpp16f *srcPtr, __m256 *p)
{
p[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast<Rpp32f *>(srcPtr))));
}

inline void rpp_load8_f32_to_f32_mirror_avx(Rpp32f *srcPtr, __m256 *p)
{
__m256i pxMask = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
Expand Down
1 change: 1 addition & 0 deletions src/modules/cpu/host_tensor_effects_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@ SOFTWARE.
#include "kernel/vignette.hpp"
#include "kernel/resize.hpp" //pixelate dependency
#include "kernel/erase.hpp"
#include "kernel/rain.hpp"

#endif // HOST_TENSOR_EFFECTS_AUGMENTATIONS_HPP
Loading