@@ -627,7 +627,7 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
627627 else
628628 {
629629 unsigned short *out = (unsigned short *)outImage;
630- out[(startRow + r) * outImageCols + c] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
630+ out[(startRow + r) * outImageCols / sizeof ( unsigned short ) + c] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
631631 }
632632 }
633633 }
@@ -670,7 +670,7 @@ static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows
670670 else
671671 {
672672 unsigned short *out = (unsigned short *)outImage;
673- out[(startRow + r) * outImageCols + c] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
673+ out[(startRow + r) * outImageCols / sizeof ( unsigned short ) + c] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
674674 }
675675 }
676676 }
@@ -808,6 +808,8 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
808808{
809809 VideoDataType *inY;
810810 VideoDataType *outY;
811+ int pix_bytes = int ((gBitDepth + 7 ) / 8 );
812+
811813 for (int passIdx = 0 ; passIdx < gPasses ; passIdx++)
812814 {
813815#ifdef MEASURE_TIME
@@ -846,7 +848,7 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
846848 // the step of gIppCtx.segZones[passIdx][threadIdx].inYUpscaled is equal to the outY->width
847849 const int rows = outY->height ;
848850 const int cols = outY->width ;
849- const int step = outY->width ;
851+ const int step = outY->width * pix_bytes ;
850852
851853 // 1. Prepare cheap up-scaled 32f data
852854 IppStatus status = ippStsNoErr;
@@ -887,7 +889,7 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
887889 ippiConvert_8u32f_C1R (pDst, cols,
888890 pSeg32f, cols * sizeof (float ), {(int )cols, segRows});
889891 else
890- ippiConvert_16u32f_C1R ((Ipp16u *)pDst, cols ,
892+ ippiConvert_16u32f_C1R ((Ipp16u *)pDst, step ,
891893 pSeg32f, cols * sizeof (float ), {(int )cols, segRows});
892894
893895 // 2. Run hashing
@@ -900,24 +902,24 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
900902 {
901903 // it needs to do memcpy line by line when the line size of outY->pData is not equal to pDst's line size.
902904 if (step == outY->step ) {
903- memcpy (outY->pData , pDst, outY->step * gLoopMargin + gLoopMargin );
905+ memcpy (outY->pData , pDst, outY->step * gLoopMargin + gLoopMargin * pix_bytes );
904906 } else {
905907 for (int i = 0 ; i < gLoopMargin ; i++) {
906908 memcpy (outY->pData + i * outY->step , pDst + i * step, step);
907909 }
908- memcpy (outY->pData + gLoopMargin * outY->step , pDst + gLoopMargin * step, gLoopMargin );
910+ memcpy (outY->pData + gLoopMargin * outY->step , pDst + gLoopMargin * step, gLoopMargin * pix_bytes );
909911 }
910912 }
911913 if (endRow == rows)
912914 {
913915 if (step == outY->step ) {
914- memcpy (outY->pData + (rows - gLoopMargin ) * step - gLoopMargin ,
915- pDst + (segRows - gLoopMargin ) * step - gLoopMargin ,
916- outY->step * gLoopMargin + gLoopMargin );
916+ memcpy (outY->pData + (rows - gLoopMargin ) * step - gLoopMargin * pix_bytes ,
917+ pDst + (segRows - gLoopMargin ) * step - gLoopMargin * pix_bytes ,
918+ outY->step * gLoopMargin + gLoopMargin * pix_bytes );
917919 } else {
918- memcpy (outY->pData + (rows - gLoopMargin - 1 ) * outY->step + outY->width - gLoopMargin ,
919- pDst + (segRows - gLoopMargin ) * step - gLoopMargin ,
920- gLoopMargin );
920+ memcpy (outY->pData + (rows - gLoopMargin - 1 ) * outY->step + ( outY->width - gLoopMargin ) * pix_bytes ,
921+ pDst + (segRows - gLoopMargin ) * step - gLoopMargin * pix_bytes ,
922+ gLoopMargin * pix_bytes );
921923
922924 for (int i = gLoopMargin ; i > 0 ; i--) {
923925 memcpy (outY->pData + (rows - i) * outY->step ,
@@ -1030,22 +1032,22 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
10301032 else
10311033 {
10321034 unsigned short *out = (unsigned short *)outY->pData ;
1033- out[r * outY->step + c + pix] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
1035+ out[r * outY->step / sizeof ( unsigned short ) + c + pix] = (unsigned short )(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
10341036 }
10351037 }
10361038 }
10371039 }
10381040 }
10391041 // Copy right border pixels for this row and left border pixels for next row
10401042 if (step == outY->step ) {
1041- memcpy (outY->pData + r * step - gLoopMargin , pDst + rOffset * step - gLoopMargin , 2 * gLoopMargin );
1043+ memcpy (outY->pData + r * step - gLoopMargin * pix_bytes , pDst + rOffset * step - gLoopMargin * pix_bytes , 2 * gLoopMargin * pix_bytes );
10421044 } else {
1043- memcpy (outY->pData + (r -1 ) * outY->step + outY->width - gLoopMargin ,
1044- pDst + rOffset * step - gLoopMargin ,
1045- gLoopMargin );
1045+ memcpy (outY->pData + (r -1 ) * outY->step + ( outY->width - gLoopMargin ) * pix_bytes ,
1046+ pDst + rOffset * step - gLoopMargin * pix_bytes ,
1047+ gLoopMargin * pix_bytes );
10461048 memcpy (outY->pData + r * outY->step ,
10471049 pDst + rOffset * step,
1048- gLoopMargin );
1050+ gLoopMargin * pix_bytes );
10491051 }
10501052 }
10511053 // 3. Run CT-Blending
0 commit comments