Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit 7759765

Browse files
committed
encoding: Fix encoding lookup with xmlOpenCharEncodingHandler
Make xmlOpenCharEncodingHandler call xmlParseCharEncoding first so we prefer our own handlers for names like "UTF8". Only UTF-16 needs an exception. Make callers check the return value. For UTF-8, a NULL encoding doesn't mean an error. Remove unnecessary UTF-8 check from htmlFindOutputEncoder. Don't try to look up ASCII handler since the HTML handler is always available. Fix return code of xmlParseCharEncoding. Should fix #744.
1 parent 5ff3794 commit 7759765

3 files changed

Lines changed: 34 additions & 32 deletions

File tree

HTMLtree.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,22 +385,17 @@ htmlFindOutputEncoder(const char *encoding) {
385385
xmlCharEncodingHandler *handler = NULL;
386386

387387
if (encoding != NULL) {
388-
xmlCharEncoding enc;
388+
int res;
389389

390-
enc = xmlParseCharEncoding(encoding);
391-
if (enc != XML_CHAR_ENCODING_UTF8) {
392-
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
393-
if (handler == NULL)
394-
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
395-
}
390+
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1,
391+
&handler);
392+
if (res != XML_ERR_OK)
393+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
396394
} else {
397395
/*
398-
* Fallback to HTML or ASCII when the encoding is unspecified
396+
* Fallback to HTML when the encoding is unspecified
399397
*/
400-
if (handler == NULL)
401-
xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler);
402-
if (handler == NULL)
403-
xmlOpenCharEncodingHandler("ascii", /* output */ 1, &handler);
398+
xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler);
404399
}
405400

406401
return(handler);

encoding.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ xmlParseCharEncoding(const char* name)
11601160
if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
11611161
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
11621162

1163-
return(XML_CHAR_ENCODING_ERROR);
1163+
return(XML_CHAR_ENCODING_NONE);
11641164
}
11651165

11661166
/**
@@ -1930,9 +1930,7 @@ int
19301930
xmlOpenCharEncodingHandler(const char *name, int output,
19311931
xmlCharEncodingHandler **out) {
19321932
const char *nalias;
1933-
const char *norig;
19341933
xmlCharEncoding enc;
1935-
int ret;
19361934

19371935
if (out == NULL)
19381936
return(XML_ERR_ARGUMENT);
@@ -1944,22 +1942,27 @@ xmlOpenCharEncodingHandler(const char *name, int output,
19441942
/*
19451943
* Do the alias resolution
19461944
*/
1947-
norig = name;
19481945
nalias = xmlGetEncodingAlias(name);
19491946
if (nalias != NULL)
19501947
name = nalias;
19511948

1952-
ret = xmlFindHandler(name, output, out);
1953-
if (*out != NULL)
1954-
return(0);
1955-
if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1956-
return(ret);
1957-
19581949
/*
1959-
* Fallback using the canonical names
1950+
* UTF-16 needs the built-in handler which is only available via
1951+
* xmlFindHandler.
19601952
*/
1961-
enc = xmlParseCharEncoding(norig);
1962-
return(xmlLookupCharEncodingHandler(enc, out));
1953+
if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF16") == 0) {
1954+
name = "UTF-16";
1955+
} else if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-16") != 0) {
1956+
enc = xmlParseCharEncoding(name);
1957+
if (enc != XML_CHAR_ENCODING_NONE) {
1958+
int res = xmlLookupCharEncodingHandler(enc, out);
1959+
1960+
if (res != XML_ERR_UNSUPPORTED_ENCODING)
1961+
return(res);
1962+
}
1963+
}
1964+
1965+
return(xmlFindHandler(name, output, out));
19631966
}
19641967

19651968
/**

xmlsave.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ xmlNewSaveCtxt(const char *encoding, int options)
343343

344344
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1,
345345
&ret->handler);
346-
if (ret->handler == NULL) {
346+
if (res != XML_ERR_OK) {
347347
xmlSaveErr(NULL, res, NULL, encoding);
348348
xmlFreeSaveCtxt(ret);
349349
return(NULL);
@@ -801,7 +801,7 @@ static int xmlSaveSwitchEncoding(xmlSaveCtxtPtr ctxt, const char *encoding) {
801801
int res;
802802

803803
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
804-
if (handler == NULL) {
804+
if (res != XML_ERR_OK) {
805805
xmlSaveErr(buf, res, NULL, encoding);
806806
return(-1);
807807
}
@@ -2669,7 +2669,7 @@ xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr,
26692669

26702670
res = xmlOpenCharEncodingHandler(txt_encoding, /* output */ 1,
26712671
&conv_hdlr);
2672-
if (conv_hdlr == NULL) {
2672+
if (res != XML_ERR_OK) {
26732673
xmlSaveErr(NULL, res, NULL, txt_encoding);
26742674
return;
26752675
}
@@ -2783,8 +2783,10 @@ xmlDocFormatDump(FILE *f, xmlDocPtr cur, int format) {
27832783
encoding = (const char *) cur->encoding;
27842784

27852785
if (encoding != NULL) {
2786-
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
2787-
if (handler == NULL) {
2786+
int res;
2787+
2788+
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
2789+
if (res != XML_ERR_OK) {
27882790
xmlFree((char *) cur->encoding);
27892791
cur->encoding = NULL;
27902792
encoding = NULL;
@@ -2920,8 +2922,10 @@ xmlSaveFormatFileEnc( const char * filename, xmlDocPtr cur,
29202922
encoding = (const char *) cur->encoding;
29212923

29222924
if (encoding != NULL) {
2923-
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
2924-
if (handler == NULL)
2925+
int res;
2926+
2927+
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
2928+
if (res != XML_ERR_OK)
29252929
return(-1);
29262930
}
29272931

0 commit comments

Comments
 (0)