Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit b3579ea

Browse files
committed
encoding: Restore old lookup order in xmlOpenCharEncodingHandler
When looking up encodings with xmlLookupCharEncodingHandler, the returned handler can have a different name than requested (capitalization, internal aliases). This should eventually be fixed. For now we revert part of commit 5b893fa, start the lookup with xmlFindHandler and add an explicit check for UTF-8. Should fix the encoding name issue mentioned in #749.
1 parent 411eeed commit b3579ea

2 files changed

Lines changed: 22 additions & 17 deletions

File tree

encoding.c

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ xmlParseCharEncoding(const char* name)
11601160
if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
11611161
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
11621162

1163-
return(XML_CHAR_ENCODING_NONE);
1163+
return(XML_CHAR_ENCODING_ERROR);
11641164
}
11651165

11661166
/**
@@ -1935,7 +1935,9 @@ int
19351935
xmlOpenCharEncodingHandler(const char *name, int output,
19361936
xmlCharEncodingHandler **out) {
19371937
const char *nalias;
1938+
const char *norig;
19381939
xmlCharEncoding enc;
1940+
int ret;
19391941

19401942
if (out == NULL)
19411943
return(XML_ERR_ARGUMENT);
@@ -1944,30 +1946,32 @@ xmlOpenCharEncodingHandler(const char *name, int output,
19441946
if (name == NULL)
19451947
return(XML_ERR_ARGUMENT);
19461948

1949+
if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) ||
1950+
(xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF8") == 0))
1951+
return(XML_ERR_OK);
1952+
19471953
/*
19481954
* Do the alias resolution
19491955
*/
1956+
norig = name;
19501957
nalias = xmlGetEncodingAlias(name);
19511958
if (nalias != NULL)
19521959
name = nalias;
19531960

1961+
ret = xmlFindHandler(name, output, out);
1962+
if (*out != NULL)
1963+
return(0);
1964+
if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1965+
return(ret);
1966+
19541967
/*
1955-
* UTF-16 needs the built-in handler which is only available via
1956-
* xmlFindHandler.
1968+
* Fallback using the canonical names
1969+
*
1970+
* TODO: We should make sure that the name of the returned
1971+
* handler equals norig.
19571972
*/
1958-
if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF16") == 0) {
1959-
name = "UTF-16";
1960-
} else if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-16") != 0) {
1961-
enc = xmlParseCharEncoding(name);
1962-
if (enc != XML_CHAR_ENCODING_NONE) {
1963-
int res = xmlLookupCharEncodingHandler(enc, out);
1964-
1965-
if (res != XML_ERR_UNSUPPORTED_ENCODING)
1966-
return(res);
1967-
}
1968-
}
1969-
1970-
return(xmlFindHandler(name, output, out));
1973+
enc = xmlParseCharEncoding(norig);
1974+
return(xmlLookupCharEncodingHandler(enc, out));
19711975
}
19721976

19731977
/**

testparser.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ testUnsupportedEncoding(void) {
4949
xmlFreeDoc(doc);
5050

5151
error = xmlGetLastError();
52-
if (error->code != XML_ERR_UNSUPPORTED_ENCODING ||
52+
if (error == NULL ||
53+
error->code != XML_ERR_UNSUPPORTED_ENCODING ||
5354
error->level != XML_ERR_WARNING ||
5455
strcmp(error->message, "Unsupported encoding: #unsupported\n") != 0)
5556
{

0 commit comments

Comments
 (0)