Skip to content

Commit 5b9f35e

Browse files
committed
add preProcessHook
1 parent 6503255 commit 5b9f35e

6 files changed

Lines changed: 75 additions & 15 deletions

File tree

libs/core/Converter.js

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ var utils = require("./utils.js");
1010
var async = require("async");
1111

1212
function Converter(params) {
13-
Transform.call(this,{
14-
highWaterMark:1024
15-
}); //TODO what does this do? -->This calls the constructor of Transform and initialise anything the Transform needs.(like var initialisation)
13+
Transform.call(this);
1614
var _param = {
1715
constructResult: true, //set to false to not construct result in memory. suitable for big csv data
1816
delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"]
@@ -190,16 +188,25 @@ Converter.prototype.flushBuffer = function() {
190188
}
191189
this.checkAndFlush();
192190
}
193-
var size = 0;
191+
Converter.prototype.preProcessRaw=function(data,cb){
192+
cb(data);
193+
}
194194

195195
Converter.prototype._transformNoFork = function(data, encoding, cb) {
196-
size += data.length;
197196
if (this.param.toArrayString && this.started === false) {
198197
this.started = true;
199198
this.push("[" + eol, "utf8");
200199
}
201-
var lines = this.toCSVLines(this.toLines(data, encoding)); //lines of csv
202-
this.processCSVLines(lines, cb);
200+
data=data.toString("utf8");
201+
var self=this;
202+
this.preProcessRaw(data,function(d){
203+
if (d && d.length>0){
204+
var lines = self.toCSVLines(self.toLines(d)); //lines of csv
205+
self.processCSVLines(lines, cb);
206+
}else{
207+
cb();
208+
}
209+
})
203210
// async.eachLimit(lines,1,function(line,scb){
204211
// this.push(line.data);
205212
// scb();
@@ -232,18 +239,17 @@ Converter.prototype.processCSVLines = function(csvLines, cb) {
232239
}
233240
}.bind(this), cb);
234241
}
235-
Converter.prototype.toLines = function(data, encoding) {
236-
if (encoding === "buffer") {
237-
encoding = "utf8";
238-
}
239-
data = this._lineBuffer + data.toString(encoding);
242+
Converter.prototype.toLines = function(data) {
243+
data = this._lineBuffer + data;
240244
var eol = this.getEol(data);
241245
return data.split(eol);
242246
}
247+
var lineNumber=0;
243248
Converter.prototype.toCSVLines = function(fileLines, last) {
244249
var recordLine = "";
245250
var lines = [];
246251
while (fileLines.length > 1) {
252+
lineNumber++;
247253
var line = fileLines.shift();
248254
lines = lines.concat(this._line(line));
249255
}

libs/core/Processor.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ function Processor(params) {
3232
this.runningWorker = 0;
3333
this.flushCb = null;
3434
if (this.param.workerNum > 1) {
35-
for (var i = 0; i < this.param.workerNum; i++) {
35+
for (var i = 0; i < this.param.workerNum-1; i++) {
3636
var worker = new Worker(this.param, false);
3737
// worker.on("error",onError);
3838
this.addWorker(worker);

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"email": "t3dodson@gmail.com"
1919
}
2020
],
21-
"version": "0.5.12",
21+
"version": "0.5.13",
2222
"keywords": [
2323
"csv",
2424
"csvtojson",

readme.md

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ All you need nodejs csv to json converter.
2727
* [Synchronouse Transformer](#synchronouse-transformer)
2828
* [Asynchronouse Transformer](#asynchronouse-transformer)
2929
* [Convert to other data type](#convert-to-other-data-type)
30+
* [Hooks](#hooks)
3031
* [Events](#events)
3132
* [Flags](#flags)
3233
* [Big CSV File Streaming](#big-csv-file)
@@ -300,6 +301,43 @@ It will be converted to:
300301
}
301302
```
302303

304+
# Hooks
305+
## preProcessRaw
306+
This hook is called when parser received any data from upper stream and allow developers to change it. e.g.
307+
```js
308+
/*
309+
CSV data:
310+
a,b,c,d,e
311+
12,e3,fb,w2,dd
312+
*/
313+
314+
var conv=new Converter();
315+
conv.preProcessRaw=function(data,cb){
316+
//change all 12 to 23
317+
cb(data.replace("12","23"));
318+
}
319+
conv.fromString(csv,function(err,json){
320+
//json:{a:23 ....}
321+
})
322+
```
323+
By default, the preProcessRaw just returns the data from the source
324+
```js
325+
Converter.prototype.preProcessRaw=function(data,cb){
326+
cb(data);
327+
}
328+
```
329+
It is also very good to sanitise/prepare the CSV data stream.
330+
```js
331+
var headWhiteSpaceRemoved=false;
332+
conv.preProcessRaw=function(data,cb){
333+
if (!headWhiteSpaceRemoved){
334+
data=data.replace(/^\s+/,"");
335+
cb(data);
336+
}else{
337+
cb(data);
338+
}
339+
}
340+
```
303341

304342
# Events
305343

@@ -727,7 +765,7 @@ The parameter of Parse function is a JSON object. It contains following fields:
727765

728766
## 0.5.12
729767
* Added support for scientific notation number support (#100)
730-
* Added "off" option to quote parameter
768+
* Added "off" option to quote parameter
731769

732770
## 0.5.4
733771
* Added new feature: accept special delimiter "auto" and array

test/data/quoteTolerant

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"Style","Description","SKU","BusinessAreaCode","BusinessAreaDescription","DepartmentCode","DepartmentDescription","ClassCode","ClassDescription","ColorCode","ColorDescription","SizeCode","SizeDescription","PrimaryMaterialCode","PrimaryMaterialDescription","VendorCode","VendorDescription","CurrentRetail"
2+
"503951518","OVERSIZE ROUND GOLD PENDANT ON 32" THICK BLACK CORD","00505039515193","W12","Jewelry","1231","Jewelry","123111","D2 New Jewelry Necklace","70","GOLD","100","1 Size","065","Costume/Imitation","3522","KENNETH JAY LANE, INC.","79.0000"

test/testCSVConverter2.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,20 @@ describe("CSV Converter", function() {
177177
});
178178
rs.pipe(conv);
179179
})
180+
it ("should pre process data in the line",function(done){
181+
var testData = __dirname + "/data/quoteTolerant";
182+
var rs = fs.createReadStream(testData);
183+
var conv=new Converter();
184+
conv.preProcessRaw=function(d,cb){
185+
d=d.replace('32"','32""');
186+
cb(d);
187+
}
188+
conv.on("end_parsed",function(res){
189+
assert(res[0].Description.indexOf('32"')>-1);
190+
done();
191+
});
192+
rs.pipe(conv);
193+
})
180194
// it ("should convert big csv",function(done){
181195
// // var rs=fs.createReadStream(__dirname+"/data/large-csv-sample.csv");
182196
// var rs=fs.createReadStream("/Users/kxiang/tmp/csvdata");

0 commit comments

Comments
 (0)