Skip to content

Commit 29bdb3a

Browse files
committed
Fix segment fault issue for 10 bit.
We need to consider the size of a pixel when do memcpy. Signed-off-by: Xiaoxia Liang <xiaoxia.liang@intel.com>
1 parent e501f14 commit 29bdb3a

2 files changed

Lines changed: 22 additions & 20 deletions

File tree

Library/Raisr.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
627627
else
628628
{
629629
unsigned short *out = (unsigned short *)outImage;
630-
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
630+
out[(startRow + r) * outImageCols / sizeof(unsigned short) + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
631631
}
632632
}
633633
}
@@ -670,7 +670,7 @@ static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows
670670
else
671671
{
672672
unsigned short *out = (unsigned short *)outImage;
673-
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
673+
out[(startRow + r) * outImageCols / sizeof(unsigned short) + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
674674
}
675675
}
676676
}
@@ -808,6 +808,8 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
808808
{
809809
VideoDataType *inY;
810810
VideoDataType *outY;
811+
int pix_bytes = int((gBitDepth + 7) / 8);
812+
811813
for (int passIdx = 0; passIdx < gPasses; passIdx++)
812814
{
813815
#ifdef MEASURE_TIME
@@ -846,7 +848,7 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
846848
// the step of gIppCtx.segZones[passIdx][threadIdx].inYUpscaled is equal to the outY->width
847849
const int rows = outY->height;
848850
const int cols = outY->width;
849-
const int step = outY->width;
851+
const int step = outY->width * pix_bytes;
850852

851853
// 1. Prepare cheap up-scaled 32f data
852854
IppStatus status = ippStsNoErr;
@@ -887,7 +889,7 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
887889
ippiConvert_8u32f_C1R(pDst, cols,
888890
pSeg32f, cols * sizeof(float), {(int)cols, segRows});
889891
else
890-
ippiConvert_16u32f_C1R((Ipp16u *)pDst, cols,
892+
ippiConvert_16u32f_C1R((Ipp16u *)pDst, step,
891893
pSeg32f, cols * sizeof(float), {(int)cols, segRows});
892894

893895
// 2. Run hashing
@@ -900,24 +902,24 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
900902
{
901903
// it needs to do memcpy line by line when the line size of outY->pData is not equal to pDst's line size.
902904
if (step == outY->step) {
903-
memcpy(outY->pData, pDst, outY->step * gLoopMargin + gLoopMargin);
905+
memcpy(outY->pData, pDst, outY->step * gLoopMargin + gLoopMargin * pix_bytes);
904906
} else {
905907
for (int i = 0; i < gLoopMargin; i++) {
906908
memcpy(outY->pData + i * outY->step, pDst + i * step, step);
907909
}
908-
memcpy(outY->pData + gLoopMargin * outY->step, pDst + gLoopMargin * step, gLoopMargin);
910+
memcpy(outY->pData + gLoopMargin * outY->step, pDst + gLoopMargin * step, gLoopMargin * pix_bytes);
909911
}
910912
}
911913
if (endRow == rows)
912914
{
913915
if (step == outY->step) {
914-
memcpy(outY->pData + (rows - gLoopMargin) * step - gLoopMargin,
915-
pDst + (segRows - gLoopMargin) * step - gLoopMargin,
916-
outY->step * gLoopMargin + gLoopMargin);
916+
memcpy(outY->pData + (rows - gLoopMargin) * step - gLoopMargin * pix_bytes,
917+
pDst + (segRows - gLoopMargin) * step - gLoopMargin * pix_bytes,
918+
outY->step * gLoopMargin + gLoopMargin * pix_bytes);
917919
} else {
918-
memcpy(outY->pData + (rows - gLoopMargin - 1) * outY->step + outY->width - gLoopMargin,
919-
pDst + (segRows - gLoopMargin) * step - gLoopMargin,
920-
gLoopMargin);
920+
memcpy(outY->pData + (rows - gLoopMargin - 1) * outY->step + (outY->width - gLoopMargin) * pix_bytes,
921+
pDst + (segRows - gLoopMargin) * step - gLoopMargin * pix_bytes,
922+
gLoopMargin * pix_bytes);
921923

922924
for (int i = gLoopMargin; i > 0; i--) {
923925
memcpy(outY->pData + (rows - i) * outY->step,
@@ -1030,22 +1032,22 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
10301032
else
10311033
{
10321034
unsigned short *out = (unsigned short *)outY->pData;
1033-
out[r * outY->step + c + pix] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
1035+
out[r * outY->step / sizeof(unsigned short) + c + pix] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
10341036
}
10351037
}
10361038
}
10371039
}
10381040
}
10391041
// Copy right border pixels for this row and left border pixels for next row
10401042
if (step == outY->step) {
1041-
memcpy(outY->pData + r * step - gLoopMargin, pDst + rOffset * step - gLoopMargin, 2 * gLoopMargin);
1043+
memcpy(outY->pData + r * step - gLoopMargin * pix_bytes, pDst + rOffset * step - gLoopMargin * pix_bytes, 2 * gLoopMargin * pix_bytes);
10421044
} else {
1043-
memcpy(outY->pData + (r -1 ) * outY->step + outY->width - gLoopMargin,
1044-
pDst + rOffset * step - gLoopMargin,
1045-
gLoopMargin);
1045+
memcpy(outY->pData + (r -1 ) * outY->step + (outY->width - gLoopMargin) * pix_bytes,
1046+
pDst + rOffset * step - gLoopMargin * pix_bytes,
1047+
gLoopMargin * pix_bytes);
10461048
memcpy(outY->pData + r * outY->step,
10471049
pDst + rOffset * step,
1048-
gLoopMargin);
1050+
gLoopMargin * pix_bytes);
10491051
}
10501052
}
10511053
// 3. Run CT-Blending

Library/Raisr_AVX256.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ static void CTCountOfBitsChangedSegment_AVX256_32f(float *LRImage, float *HRImag
147147
val_epu16 = _mm256_packus_epi32(val_epi32,val_epi32);
148148
perm_epu = _mm256_permute4x64_epi64(val_epu16, 0x88);
149149
unsigned short *out = (unsigned short *)outImage;
150-
_mm_storeu_si128((__m128i *) &out[(startRow + r) * outImageCols + c], _mm256_extractf128_si256(perm_epu, 0));
150+
_mm_storeu_si128((__m128i *) &out[(startRow + r) * outImageCols / sizeof(unsigned short) + c], _mm256_extractf128_si256(perm_epu, 0));
151151
}
152152
}
153153

@@ -174,7 +174,7 @@ static void CTCountOfBitsChangedSegment_AVX256_32f(float *LRImage, float *HRImag
174174
}
175175
else {
176176
unsigned short *out = (unsigned short *)outImage;
177-
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
177+
out[(startRow + r) * outImageCols / sizeof(unsigned short) + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
178178
}
179179
}
180180
}

0 commit comments

Comments
 (0)