@@ -23,52 +23,6 @@ THE SOFTWARE.
23
23
#include " resize_kernels.h"
24
24
#include " roc_video_dec.h"
25
25
26
- #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
27
- /* *
28
- * @brief low level HIP kernel for Resize using tex2d
29
- *
30
- * @tparam YuvUnitx2
31
- * @param tex_y - text2D object Y pointer
32
- * @param tex_uv - text2D object UV pointer
33
- * @param p_dst - dst Y pointer
34
- * @param p_dst_uv - dst UV pointer
35
- * @param pitch - dst pitch
36
- * @param width - dst width
37
- * @param height - dst height
38
- * @param fx_scale - xscale
39
- * @param fy_scale - yscale
40
- * @return
41
- */
42
-
43
- template <typename YuvUnitx2>
44
- static __global__ void ResizeHip (hipTextureObject_t tex_y, hipTextureObject_t tex_uv,
45
- uint8_t *p_dst, uint8_t *p_dst_uv, int pitch, int width, int height,
46
- float fx_scale, float fy_scale)
47
- {
48
- int ix = blockIdx.x * blockDim.x + threadIdx.x ,
49
- iy = blockIdx.y * blockDim.y + threadIdx.y ;
50
-
51
- if (ix >= width / 2 || iy >= height / 2 ) {
52
- return ;
53
- }
54
-
55
- int x = ix * 2 , y = iy * 2 ;
56
- typedef decltype (YuvUnitx2::x) YuvUnit;
57
- const int max_yuv_value = (1 << (sizeof (YuvUnit) * 8 )) - 1 ;
58
- *(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof (YuvUnit)) = YuvUnitx2 {
59
- (YuvUnit)(tex2D<float >(tex_y, x * fx_scale, y * fy_scale) * max_yuv_value),
60
- (YuvUnit)(tex2D<float >(tex_y, (x + 1 ) * fx_scale, y * fy_scale) * max_yuv_value)
61
- };
62
- y++;
63
- *(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof (YuvUnit)) = YuvUnitx2 {
64
- (YuvUnit)(tex2D<float >(tex_y, x * fx_scale, y * fy_scale) * max_yuv_value),
65
- (YuvUnit)(tex2D<float >(tex_y, (x + 1 ) * fx_scale, y * fy_scale) * max_yuv_value)
66
- };
67
- float2 uv = tex2D<float2>(tex_uv, ix * fx_scale, iy * fy_scale + 0 .5f );
68
- *(YuvUnitx2 *)(p_dst_uv + iy * pitch + ix * 2 * sizeof (YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * max_yuv_value), (YuvUnit)(uv.y * max_yuv_value) };
69
- }
70
- #endif
71
-
72
26
/* *
73
27
* @brief low level HIP kernel for Resize using nearest neighbor interpolation
74
28
*
@@ -118,40 +72,9 @@ static __global__ void ResizeHip(uint8_t *p_src, uint8_t *p_src_uv, int src_pitc
118
72
119
73
template <typename YuvUnitx2>
120
74
static void Resize (unsigned char *p_dst, unsigned char * p_dst_uv, int dst_pitch, int dst_width, int dst_height,
121
- unsigned char *p_src, unsigned char *p_src_uv, int src_pitch, int src_width, int src_height, hipStream_t hip_stream) {
122
- #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
123
- hipResourceDesc res_desc = {};
124
- res_desc.resType = hipResourceTypePitch2D;
125
- res_desc.res .pitch2D .devPtr = p_src;
126
- res_desc.res .pitch2D .desc = hipCreateChannelDesc<decltype (YuvUnitx2::x)>();
127
- res_desc.res .pitch2D .width = src_width;
128
- res_desc.res .pitch2D .height = src_height;
129
- res_desc.res .pitch2D .pitchInBytes = src_pitch;
130
-
131
- hipTextureDesc tex_desc = {};
132
- tex_desc.filterMode = hipFilterModeLinear;
133
- tex_desc.readMode = hipReadModeNormalizedFloat;
134
-
135
- hipTextureObject_t tex_y=0 ;
136
- HIP_API_CALL (hipCreateTextureObject (&tex_y, &res_desc, &tex_desc, NULL ));
137
-
138
- res_desc.res .pitch2D .devPtr = p_src_uv;
139
- res_desc.res .pitch2D .desc = hipCreateChannelDesc<YuvUnitx2>();
140
- res_desc.res .pitch2D .width = src_width >> 1 ;
141
- res_desc.res .pitch2D .height = src_height / 2 ;
142
-
143
- hipTextureObject_t tex_uv=0 ;
144
- HIP_API_CALL (hipCreateTextureObject (&tex_uv, &res_desc, &tex_desc, NULL ));
145
-
146
- ResizeHip<YuvUnitx2> <<<dim3 ((dst_width + 31 ) / 32 , (dst_height + 31 ) / 32 ), dim3 (16 , 16 ), 0 , hip_stream >>>(tex_y, tex_uv, p_dst, p_dst_uv,
147
- dst_pitch, dst_width, dst_height, 1 .0f * src_width / dst_width, 1 .0f * src_height / dst_height);
148
-
149
- HIP_API_CALL (hipDestroyTextureObject (tex_y));
150
- HIP_API_CALL (hipDestroyTextureObject (tex_uv));
151
- #else
75
+ unsigned char *p_src, unsigned char *p_src_uv, int src_pitch, int src_width, int src_height, hipStream_t hip_stream) {
152
76
ResizeHip<YuvUnitx2> <<<dim3 ((dst_width + 31 ) / 32 , (dst_height + 31 ) / 32 ), dim3 (16 , 16 ), 0 , hip_stream >>>(p_src, p_src_uv, src_pitch, p_dst, p_dst_uv,
153
77
dst_pitch, dst_width, dst_height, 1 .0f * src_width / dst_width, 1 .0f * src_height / dst_height);
154
- #endif
155
78
}
156
79
157
80
void ResizeNv12 (unsigned char *p_dst_nv12, int dst_pitch, int dst_width, int dst_height, unsigned char *p_src_nv12,
@@ -171,34 +94,6 @@ void ResizeP016(unsigned char *p_dst_p016, int dst_pitch, int dst_width, int dst
171
94
return Resize<ushort2>(p_dst_p016, p_dst_uv, dst_pitch, dst_width, dst_height, p_src_p016, p_src_uv, src_pitch, src_width, src_height, hip_stream);
172
95
}
173
96
174
- #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
175
- static __global__ void Scale_tex2D (hipTextureObject_t tex_src, uint8_t *p_dst, int pitch, int width,
176
- int height, float fx_scale, float fy_scale) {
177
- int x = blockIdx.x * blockDim.x + threadIdx.x ,
178
- y = blockIdx.y * blockDim.y + threadIdx.y ;
179
-
180
- if (x >= width || y >= height) {
181
- return ;
182
- }
183
-
184
- *(unsigned char *)(p_dst + (y * pitch) + x) = (unsigned char )(fminf ((tex2D<float >(tex_src, x * fx_scale, y * fy_scale)) * 255 .0f , 255 .0f ));
185
- }
186
-
187
- static __global__ void Scale_UV_tex2D (hipTextureObject_t tex_src, uint8_t *p_dst, int pitch, int width,
188
- int height, float fx_scale, float fy_scale) {
189
- int x = blockIdx.x * blockDim.x + threadIdx.x ,
190
- y = blockIdx.y * blockDim.y + threadIdx.y ;
191
-
192
- if (x >= width || y >= height){
193
- return ;
194
- }
195
- float2 uv = tex2D<float2>(tex_src, x * fx_scale, y * fy_scale);
196
- uchar2 dst_uv = uchar2{ (unsigned char )(fminf (uv.x * 255 .0f , 255 .0f )), (unsigned char )(fminf (uv.y * 255 .0f , 255 .0f )) };
197
-
198
- *(uchar2*)(p_dst + (y * pitch) + 2 * x) = dst_uv;
199
- }
200
- #endif
201
-
202
97
static __global__ void Scale (uint8_t *p_src, int src_pitch, uint8_t *p_dst, int pitch, int width,
203
98
int height, float fx_scale, float fy_scale) {
204
99
int x = blockIdx.x * blockDim.x + threadIdx.x ,
@@ -244,41 +139,6 @@ static __global__ void Scale_UV(uint8_t *p_src, int src_pitch, uint8_t *p_dst, i
244
139
void ResizeYUVHipLaunchKernel (uint8_t *dp_dst, int dst_pitch, int dst_width, int dst_height, uint8_t *dp_src, int src_pitch,
245
140
int src_width, int src_height, bool b_resize_uv, hipStream_t hip_stream) {
246
141
247
- #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
248
- hipResourceDesc res_desc = {};
249
- res_desc.resType = hipResourceTypePitch2D;
250
- res_desc.res .pitch2D .devPtr = dp_src;
251
- res_desc.res .pitch2D .desc = b_resize_uv ? hipCreateChannelDesc<uchar2>() : hipCreateChannelDesc<unsigned char >();
252
- res_desc.res .pitch2D .width = src_width;
253
- res_desc.res .pitch2D .height = src_height;
254
- res_desc.res .pitch2D .pitchInBytes = src_pitch;
255
-
256
- hipTextureDesc tex_desc = {};
257
- tex_desc.filterMode = hipFilterModeLinear;
258
- tex_desc.readMode = hipReadModeNormalizedFloat;
259
-
260
- tex_desc.addressMode [0 ] = hipAddressModeClamp;
261
- tex_desc.addressMode [1 ] = hipAddressModeClamp;
262
- tex_desc.addressMode [2 ] = hipAddressModeClamp;
263
-
264
- hipTextureObject_t tex_src = 0 ;
265
- HIP_API_CALL (hipCreateTextureObject (&tex_src, &res_desc, &tex_desc, NULL ));
266
-
267
- dim3 blockSize (16 , 16 , 1 );
268
- dim3 gridSize (((uint32_t )dst_width + blockSize.x - 1 ) / blockSize.x , ((uint32_t )dst_height + blockSize.y - 1 ) / blockSize.y , 1 );
269
-
270
- if (b_resize_uv){
271
- Scale_UV_tex2D <<<gridSize, blockSize, 0 , hip_stream >>>(tex_src, dp_dst,
272
- dst_pitch, dst_width, dst_height, 1 .0f * src_width / dst_width, 1 .0f * src_height / dst_height);
273
- }
274
- else {
275
- Scale_tex2D <<<gridSize, blockSize, 0 , hip_stream >>>(tex_src, dp_dst,
276
- dst_pitch, dst_width, dst_height, 1 .0f * src_width / dst_width, 1 .0f * src_height / dst_height);
277
- }
278
-
279
- HIP_API_CALL (hipGetLastError ());
280
- HIP_API_CALL (hipDestroyTextureObject (tex_src));
281
- #else
282
142
dim3 blockSize (16 , 16 , 1 );
283
143
dim3 gridSize (((uint32_t )dst_width + blockSize.x - 1 ) / blockSize.x , ((uint32_t )dst_height + blockSize.y - 1 ) / blockSize.y , 1 );
284
144
@@ -290,8 +150,6 @@ void ResizeYUVHipLaunchKernel(uint8_t *dp_dst, int dst_pitch, int dst_width, int
290
150
Scale <<<gridSize, blockSize, 0 , hip_stream >>>(dp_src, src_pitch, dp_dst,
291
151
dst_pitch, dst_width, dst_height, 1 .0f * src_width / dst_width, 1 .0f * src_height / dst_height);
292
152
}
293
- #endif
294
-
295
153
}
296
154
297
155
void ResizeYUV420 (uint8_t *p_dst_y,
0 commit comments