Skip to content

Commit 1313353

Browse files
committed
add escape char support
1 parent 459f646 commit 1313353

7 files changed

Lines changed: 51 additions & 22 deletions

File tree

bin/options.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@
6161
"--quiet": {
6262
"desc": "If any error happens, quit the process quietly rather than log out the error. Default is false.",
6363
"type": "boolean"
64+
},
65+
"--escape":{
66+
"desc":"escape character used in quoted column. Default is double quote (\") according to RFC4108. Change to back slash (\\) or other chars for your own case.",
67+
"type":"string"
6468
}
6569
},
6670
"examples": [

libs/core/Converter.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ function Converter(params,options) {
1515
constructResult: true, //set to false to not construct result in memory. suitable for big csv data
1616
delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"]
1717
quote: '"', //quote for a column containing delimiter.
18+
escape:'"', //escape char for quoted column
1819
trim: true, //trim column's space charcters
1920
checkType: true, //whether check column type
2021
toArrayString: false, //stream down stringified json array instead of string of json. (useful if downstream is file writer etc)

libs/core/utils.js

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
module.exports = {
33
getDelimiter: getDelimiter, // Handle auto delimiter: return explicitely specified delimiter or try auto detect
44
rowSplit: rowSplit, //Split a csv row to an array based on delimiter and quote
5-
isToogleQuote: isToogleQuote, //returns if a segmenthas even number of quotes
6-
twoDoubleQuote: twoDoubleQuote //converts two double quotes to one
5+
isToogleQuote: isToogleQuote //returns if a segmenthas even number of quotes
76
}
8-
var cachedRegExp = {};
7+
98
var defaulDelimiters=[",","|","\t",";",":"];
109
function getDelimiter(rowStr,param) {
1110
var checker;
@@ -29,13 +28,17 @@ function getDelimiter(rowStr,param) {
2928
}
3029
function isQuoteOpen(str,param){
3130
var quote=param.quote;
32-
return str[0] === quote && (str[1]!==quote || str[1]===quote && (str[2] === quote || str.length ===2));
31+
var escape=param.escape;
32+
return str[0] === quote && (
33+
str[1]!==quote ||
34+
str[1]===escape && (str[2] === quote || str.length ===2));
3335
}
3436
function isQuoteClose(str,param){
3537
var quote=param.quote;
3638
var count=0;
3739
var idx=str.length-1;
38-
while (str[idx] === quote){
40+
var escape=param.escape;
41+
while (str[idx] === quote || str[idx]===escape){
3942
idx--;
4043
count++;
4144
}
@@ -47,6 +50,7 @@ function rowSplit(rowStr, param) {
4750
}
4851
var quote=param.quote;
4952
var trim=param.trim;
53+
var escape=param.escape;
5054
if (param.needCheckDelimiter===true){
5155
param.delimiter=getDelimiter(rowStr,param);
5256
param.needCheckDelimiter=false;
@@ -70,7 +74,7 @@ function rowSplit(rowStr, param) {
7074
e=e.substr(1);
7175
if (isQuoteClose(e,param)){ //quote close
7276
e=e.substring(0,e.length-1);
73-
e=twoDoubleQuote(e,quote);
77+
e=_escapeQuote(e,quote,escape);
7478
row.push(e);
7579
continue;
7680
}else{
@@ -87,7 +91,7 @@ function rowSplit(rowStr, param) {
8791
inquote=false;
8892
e=e.substr(0,len-1);
8993
quoteBuff+=delimiter+e;
90-
quoteBuff=twoDoubleQuote(quoteBuff,quote);
94+
quoteBuff=_escapeQuote(quoteBuff,quote,escape);
9195
if (trim){
9296
quoteBuff=quoteBuff.trimRight();
9397
}
@@ -125,25 +129,28 @@ function rowSplit(rowStr, param) {
125129
return row;
126130
}
127131

128-
function _getRegExpObj(quote) {
129-
if (cachedRegExp[quote]) {
130-
return cachedRegExp[quote];
131-
} else {
132-
cachedRegExp[quote] = {
133-
single: new RegExp(quote, 'g'),
134-
double: new RegExp(quote + quote, 'g')
135-
}
136-
return _getRegExpObj(quote);
137-
}
138-
}
132+
133+
var cachedRegExp = {};
139134

140135
function isToogleQuote(segment, quote) {
141-
var reg = _getRegExpObj(quote).single;
136+
var key="s|"+quote;
137+
if ( cachedRegExp[key]=== undefined){
138+
cachedRegExp[key]=new RegExp(quote, 'g');
139+
}
140+
var reg = cachedRegExp[key];
142141
var match = segment.match(reg);
143142
return match && match.length % 2 !== 0;
144143
}
145144

146-
function twoDoubleQuote(segment, quote) {
147-
var regExp = _getRegExpObj(quote).double;
145+
function _escapeQuote(segment, quote,escape) {
146+
147+
var key="es|"+quote+"|"+escape;
148+
if (cachedRegExp[key] === undefined){
149+
if (escape ==="\\"){
150+
escape="\\\\";
151+
}
152+
cachedRegExp[key]=new RegExp(escape+quote,'g');
153+
}
154+
var regExp = cachedRegExp[key];
148155
return segment.replace(regExp, quote);
149156
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"email": "t3dodson@gmail.com"
1919
}
2020
],
21-
"version": "1.0.2",
21+
"version": "1.0.3",
2222
"keywords": [
2323
"csv",
2424
"csvtojson",

readme.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ Following parameters are supported:
160160
* **maxRowLength**: the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0
161161
* **checkColumn**: whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false
162162
* **eol**: End of line character. If omitted, parser will attempt retrieve it from first chunk of CSV data. If no valid eol found, then operation system eol will be used.
163+
* **escape**: escape character used in quoted column. Default is double quote (") according to RFC4108. Change to back slash (\) or other chars for your own case.
163164

164165
All parameters can be used in Command Line tool. see
165166

test/data/dataWithSlashEscape

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
id,raw
2+
0,"{\"hello\":\"world\",\"test\":true}"

test/testCSVConverter2.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,20 @@ describe("CSV Converter", function() {
240240
done();
241241
});
242242
});
243+
it ("should process escape chars",function(done){
244+
var test_converter = new Converter({
245+
escape:"\\"
246+
});
247+
248+
var testData = __dirname + "/data/dataWithSlashEscape";
249+
var rs = fs.createReadStream(testData);
250+
test_converter.on("end_parsed",function(res){
251+
assert.equal(res[0].raw.hello,"world");
252+
assert.equal(res[0].raw.test,true);
253+
done();
254+
});
255+
rs.pipe(test_converter);
256+
});
243257
// it ("should convert big csv",function(done){
244258
// // var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");
245259
// var rs=fs.createReadStream("/Users/kxiang/tmp/csvdata");

0 commit comments

Comments
 (0)