Skip to content

Commit 8a7a645

Browse files
committed
fix non-utf chars in utf-8 encoding
1 parent 65b0c57 commit 8a7a645

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

src/spss/readstat_sav_read.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -981,9 +981,17 @@ static readstat_error_t sav_parse_machine_integer_info_record(const void *data,
981981
// but the field only has room for two bytes). So to prevent the client
982982
// from receiving an invalid byte sequence, we ram everything through
983983
// our iconv machinery.
984-
iconv_t converter = iconv_open(dst_charset, src_charset);
984+
985+
// Try with //IGNORE suffix first to handle invalid byte sequences gracefully
986+
char dst_with_ignore[256];
987+
snprintf(dst_with_ignore, sizeof(dst_with_ignore), "%s//IGNORE", dst_charset);
988+
iconv_t converter = iconv_open(dst_with_ignore, src_charset);
985989
if (converter == (iconv_t)-1) {
986-
return READSTAT_ERROR_UNSUPPORTED_CHARSET;
990+
// Fallback to without //IGNORE if not supported
991+
converter = iconv_open(dst_charset, src_charset);
992+
if (converter == (iconv_t)-1) {
993+
return READSTAT_ERROR_UNSUPPORTED_CHARSET;
994+
}
987995
}
988996
if (ctx->converter) {
989997
iconv_close(ctx->converter);

0 commit comments

Comments
 (0)