Skip to content

Commit 99f5056

Browse files
committed
Fix the bug that raisr can't work well in some resolutions
Signed-off-by: Xiaoxia Liang <xiaoxia.liang@intel.com>
1 parent ba9f968 commit 99f5056

1 file changed

Lines changed: 52 additions & 18 deletions

File tree

Library/Raisr.cpp

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,8 @@ static void CTCountOfBitsChanged_AVX2(float *LRImage, float *HRImage, float *out
809809
// LRImage: cheap up scaled. HRImage: RAISR refined. outImage: output buffer in 8u.
810810
// rows: rows of LRImage/HRImage. startRow: seg start row. blendingZone: zone to run blending.
811811
// cols: stride for buffers in DT type.
812-
static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, const int rows, const int startRow, const std::pair<int, int> blendingZone, unsigned char *outImage, const int cols)
812+
// outImageCols: stride for outImage buffer
813+
static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, const int rows, const int startRow, const std::pair<int, int> blendingZone, unsigned char *outImage, const int cols, const int outImageCols)
813814
{
814815
int rowStartOffset = blendingZone.first - startRow;
815816
int rowEndOffset = blendingZone.second - startRow;
@@ -841,12 +842,12 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
841842
// convert 32f to 8bit/10bit
842843
if (gBitDepth == 8)
843844
{
844-
outImage[(startRow + r) * cols + c] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
845+
outImage[(startRow + r) * outImageCols + c] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
845846
}
846847
else
847848
{
848849
unsigned short *out = (unsigned short *)outImage;
849-
out[(startRow + r) * cols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
850+
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
850851
}
851852
}
852853
}
@@ -855,8 +856,9 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
855856
// LRImage: cheap up scaled. HRImage: RAISR refined. outImage: output buffer in 8u.
856857
// rows: rows of LRImage/HRImage. startRow: seg start row. blendingZone: zone to run blending.
857858
// cols: stride for buffers in DT type.
859+
// outImageCols: stride for outImage buffer
858860
template <typename DT>
859-
static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows, const int startRow, const std::pair<int, int> blendingZone, unsigned char *outImage, const int cols)
861+
static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows, const int startRow, const std::pair<int, int> blendingZone, unsigned char *outImage, const int cols, const int outImageCols)
860862
{
861863
// run census transform on a CTwindowSize * CTwindowSize block centered by [r, c]
862864
int rowStartOffset = blendingZone.first - startRow;
@@ -883,12 +885,12 @@ static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows
883885
// convert 32f to 8bit/10bit
884886
if (gBitDepth == 8)
885887
{
886-
outImage[(startRow + r) * cols + c] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
888+
outImage[(startRow + r) * outImageCols + c] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
887889
}
888890
else
889891
{
890892
unsigned short *out = (unsigned short *)outImage;
891-
out[(startRow + r) * cols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
893+
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
892894
}
893895
}
894896
}
@@ -1224,9 +1226,12 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
12241226
outY = gIntermediateY; // change the output to VideoDataType gIntermediateY to save the output of 1st pass
12251227
}
12261228

1229+
// step is mean line size in a frame, for video the line size should be multiplies of the CPU alignment(16 or 32 bytes),
1230+
// the outY->step may greater than or equal to outY->width.
1231+
// the step of gIppCtx.segZones[passIdx][threadIdx].inYUpscaled is equal to the outY->width
12271232
const int rows = outY->height;
12281233
const int cols = outY->width;
1229-
const int step = outY->step;
1234+
const int step = outY->width;
12301235

12311236
// 1. Prepare cheap up-scaled 32f data
12321237
IppStatus status = ippStsNoErr;
@@ -1267,8 +1272,8 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
12671272
ippiConvert_8u32f_C1R(pDst, cols,
12681273
pSeg32f, cols * sizeof(float), {(int)cols, segRows});
12691274
else
1270-
ippiConvert_16u32f_C1R((Ipp16u *)pDst, step,
1271-
pSeg32f, cols * sizeof(float), {(int)cols, segRows});
1275+
ippiConvert_16u32f_C1R((Ipp16u *)pDst, cols,
1276+
pSeg32f, cols * sizeof(float), {(int)cols, segRows});
12721277

12731278
// 2. Run hashing
12741279
// Update startRow, endRow for hashing algo
@@ -1278,13 +1283,33 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
12781283
// Handle top and bottom borders
12791284
if (startRow == 0)
12801285
{
1281-
memcpy(outY->pData, pDst, step * gLoopMargin + gLoopMargin);
1286+
// it needs to do memcpy line by line when the line size of outY->pData is not equal to pDst's line size.
1287+
if (step == outY->step) {
1288+
memcpy(outY->pData, pDst, outY->step * gLoopMargin + gLoopMargin);
1289+
} else {
1290+
for (int i = 0; i < gLoopMargin; i++) {
1291+
memcpy(outY->pData + i * outY->step, pDst + i * step, step);
1292+
}
1293+
memcpy(outY->pData + gLoopMargin * outY->step, pDst + gLoopMargin * step, gLoopMargin);
1294+
}
12821295
}
12831296
if (endRow == rows)
12841297
{
1285-
memcpy(outY->pData + (rows - gLoopMargin) * step - gLoopMargin,
1286-
pDst + (segRows - gLoopMargin) * step - gLoopMargin,
1287-
step * gLoopMargin + gLoopMargin);
1298+
if (step == outY->step) {
1299+
memcpy(outY->pData + (rows - gLoopMargin) * step - gLoopMargin,
1300+
pDst + (segRows - gLoopMargin) * step - gLoopMargin,
1301+
outY->step * gLoopMargin + gLoopMargin);
1302+
} else {
1303+
memcpy(outY->pData + (rows - gLoopMargin - 1) * outY->step + outY->width - gLoopMargin,
1304+
pDst + (segRows - gLoopMargin) * step - gLoopMargin,
1305+
gLoopMargin);
1306+
1307+
for (int i = gLoopMargin; i > 0; i--) {
1308+
memcpy(outY->pData + (rows - i) * outY->step,
1309+
pDst + (segRows - i) * step,
1310+
step);
1311+
}
1312+
}
12881313
}
12891314
memcpy(pRaisr32f, pSeg32f, sizeof(float) * cols * segRows);
12901315

@@ -1353,27 +1378,36 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
13531378
val += 0.5; // to round the value
13541379
if (gBitDepth == 8)
13551380
{
1356-
outY->pData[r * cols + c + pix] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
1381+
outY->pData[r * outY->step + c + pix] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
13571382
}
13581383
else
13591384
{
13601385
unsigned short *out = (unsigned short *)outY->pData;
1361-
out[r * cols + c + pix] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
1386+
out[r * outY->step + c + pix] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
13621387
}
13631388
}
13641389
}
13651390
}
13661391
}
13671392
// Copy right border pixels for this row and left border pixels for next row
1368-
memcpy(outY->pData + r * step - gLoopMargin, pDst + rOffset * step - gLoopMargin, 2 * gLoopMargin);
1393+
if (step == outY->step) {
1394+
memcpy(outY->pData + r * step - gLoopMargin, pDst + rOffset * step - gLoopMargin, 2 * gLoopMargin);
1395+
} else {
1396+
memcpy(outY->pData + (r -1 ) * outY->step + outY->width - gLoopMargin,
1397+
pDst + rOffset * step - gLoopMargin,
1398+
gLoopMargin);
1399+
memcpy(outY->pData + r * outY->step,
1400+
pDst + rOffset * step,
1401+
gLoopMargin);
1402+
}
13691403
}
13701404
// 3. Run CT-Blending
13711405
if (blendingMode == CountOfBitsChanged)
13721406
{
13731407
int segStart = gIppCtx.segZones[passIdx][threadIdx].scaleStartRow;
1374-
CTCountOfBitsChangedSegment<float>(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[passIdx][threadIdx].blendingStartRow, gIppCtx.segZones[passIdx][threadIdx].blendingEndRow}, outY->pData, cols);
1408+
CTCountOfBitsChangedSegment<float>(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[passIdx][threadIdx].blendingStartRow, gIppCtx.segZones[passIdx][threadIdx].blendingEndRow}, outY->pData, cols, outY->step);
13751409
// No improve with AVX2
1376-
// CTCountOfBitsChangedSegment_AVX2(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[threadIdx].blendingStartRow, gIppCtx.segZones[threadIdx].blendingEndRow}, outY->pData, cols);
1410+
// CTCountOfBitsChangedSegment_AVX2(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[threadIdx].blendingStartRow, gIppCtx.segZones[threadIdx].blendingEndRow}, outY->pData, cols, outY->step);
13771411
}
13781412

13791413
threadStatus[threadIdx] = 1;

0 commit comments

Comments
 (0)