Skip to content

Commit 36cf101

Browse files
committed
Added ignoreColumns and includeColumns features.
Added ignoreColumns and includeColumns features that will accept an array of column indices that specify specific columns to ignore or include during processing. The change was made within rowSplit.js and columns are included or ignored after being split. Test cases have been added to testRowSplit.js. readme.md has been updated, however, I did not update the change log.
1 parent 263353a commit 36cf101

5 files changed

Lines changed: 64 additions & 21 deletions

File tree

bin/options.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@
6464
"--escape":{
6565
"desc":"escape character used in quoted column. Default is double quote (\") according to RFC4108. Change to back slash (\\) or other chars for your own case.",
6666
"type":"string"
67+
},
68+
"--ignoreColumns": {
69+
"desc": "Columns to ignore on input. e.g. --ignoreColumns=# --ignoreColumns='[0,4,5]' ",
70+
"type": "~object"
71+
},
72+
"--includeColumns": {
73+
"desc": "Columns to include on input. e.g. --includeColumns=# --includeColumns='[0,4,5]' ",
74+
"type": "~object"
6775
}
6876
},
6977
"examples": [
@@ -72,4 +80,4 @@
7280
"cat <csvfile> | csvtojson",
7381
"csvtojson <csvfilepath> --checkType=false --trim=false --delimiter=#"
7482
]
75-
}
83+
}

libs/core/defParam.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ module.exports=function(params){
22
var _param = {
33
constructResult: true, //set to false to not construct result in memory. suitable for big csv data
44
delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"]
5+
ignoreColumns: [], // columns to ignore upon input.
6+
includeColumns: [], // columns to include upon input.
57
quote: '"', //quote for a column containing delimiter.
68
trim: true, //trim column's space charcters
79
checkType: true, //whether check column type
@@ -40,4 +42,4 @@ function getEnv(key,def){
4042
}else{
4143
return def;
4244
}
43-
}
45+
}

libs/core/rowSplit.js

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@ module.exports=function rowSplit(rowStr, param) {
1717
}
1818
var delimiter=param.delimiter;
1919
var rowArr = rowStr.split(delimiter);
20+
if(param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) {
21+
param.ignoreColumns.sort(function(a,b){ return b - a; });
22+
for(var irow = 0; irow < param.ignoreColumns.length; irow++) {
23+
if(param.ignoreColumns[irow] >= 0) {
24+
rowArr.splice(param.ignoreColumns[irow], 1);
25+
}
26+
}
27+
}
28+
if(param.includeColumns instanceof Array && param.includeColumns.length > 0) {
29+
var cleanRowArr = [];
30+
for(var irow = 0; irow < param.includeColumns.length; irow++) {
31+
if(param.includeColumns[irow] >= 0) {
32+
cleanRowArr.push(rowArr[param.includeColumns[irow]]);
33+
}
34+
}
35+
rowArr = cleanRowArr;
36+
}
2037
if (quote ==="off"){
2138
return {cols:rowArr,closed:true};
2239
}
@@ -75,14 +92,14 @@ module.exports=function rowSplit(rowStr, param) {
7592
// }
7693
// return {cols:row,closed:true};
7794
// }
78-
95+
7996
}
8097

8198
function isQuoteOpen(str,param){
8299
var quote=param.quote;
83100
var escape=param.escape;
84101
return str[0] === quote && (
85-
str[1]!==quote ||
102+
str[1]!==quote ||
86103
str[1]===escape && (str[2] === quote || str.length ===2));
87104
}
88105
function isQuoteClose(str,param){
@@ -106,7 +123,7 @@ function twoDoubleQuote(str,quote){
106123
}
107124
var cachedRegExp = {}
108125
function _escapeQuote(segment, quote,escape) {
109-
126+
110127
var key="es|"+quote+"|"+escape;
111128
if (cachedRegExp[key] === undefined){
112129
if (escape ==="\\"){

readme.md

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Nodejs csv to json converter. Fully featured:
1717

1818
## Major update v1.1.0
1919

20-
Version 1.1.0 has added new features and optimised lib performance. It also introduced simpler APIs to use. Thus readme is re-written to adapt the preferred new APIs. The lib will support old APIs. To review the old readme please [click here](https://github.com/Keyang/node-csvtojson/blob/develop/readme-old.md).
20+
Version 1.1.0 has added new features and optimised lib performance. It also introduced simpler APIs to use. Thus readme is re-written to adapt the preferred new APIs. The lib will support old APIs. To review the old readme please [click here](https://github.com/Keyang/node-csvtojson/blob/develop/readme-old.md).
2121

2222
* [Performance Optimisation](https://github.com/Keyang/node-csvtojson/blob/develop/docs/performance.md#performance-optimisation): V1.1.0 is 30%-50% faster
2323
* Better error tolerance
@@ -174,7 +174,7 @@ Convert csv file and save result to json file:
174174
$ csvtojson source.csv > converted.json
175175
```
176176

177-
Use multiple cpu-cores:
177+
Use multiple cpu-cores:
178178

179179
```
180180
$ csvtojson --workerNum=4 source.csv > converted.json
@@ -200,7 +200,7 @@ const converter=csv(params) //params see below Parameters section
200200

201201
```
202202

203-
In above, `converter` is an instance of Converter which is a subclass of node.js `Transform` class.
203+
In above, `converter` is an instance of Converter which is a subclass of node.js `Transform` class.
204204

205205
* [Parameters](#parameters)
206206
* [Events](#events)
@@ -219,8 +219,8 @@ In above, `converter` is an instance of Converter which is a subclass of node.js
219219

220220
```js
221221
const csv=require('csvtojson')
222-
const converter=csv(parserParameters, streamOptions)
223-
```
222+
const converter=csv(parserParameters, streamOptions)
223+
```
224224
Both arguments are optional.
225225

226226
For `Stream Options` please read [Stream Option](https://nodejs.org/api/stream.html#stream_new_stream_transform_options) from Node.JS
@@ -231,14 +231,14 @@ For `Stream Options` please read [Stream Option](https://nodejs.org/api/stream.h
231231
const converter=csv({
232232
noheader:true,
233233
trim:true,
234-
})
234+
})
235235
```
236236
Following parameters are supported:
237237

238238
* **delimiter**: delimiter used for seperating columns. Use "auto" if delimiter is unknown in advance, in this case, delimiter will be auto-detected (by best attempt). Use an array to give a list of potential delimiters e.g. [",","|","$"]. default: ","
239239
* **quote**: If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. Set to "off" will ignore all quotes. default: " (double quote)
240240
* **trim**: Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true
241-
* **checkType**: This parameter turns on and off whether check field type. default is true.
241+
* **checkType**: This parameter turns on and off whether check field type. default is true.
242242
* **toArrayString**: Stringify the stream output to JSON array. This is useful when pipe output to a file which expects stringified JSON array. default is false and only stringified JSON (without []) will be pushed to downstream.
243243
* **ignoreEmpty**: Ignore the empty value in CSV columns. If a column value is not giving, set this to true to skip them. Defalut: false.
244244
* **workerNum**: Number of worker processes. The worker process will use multi-cores to help process CSV data. Set to number of Core to improve the performance of processing large csv file. Keep 1 for small csv files. Default 1.
@@ -249,6 +249,8 @@ Following parameters are supported:
249249
* **checkColumn**: whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false
250250
* **eol**: End of line character. If omitted, parser will attempt retrieve it from first chunk of CSV data. If no valid eol found, then operation system eol will be used.
251251
* **escape**: escape character used in quoted column. Default is double quote (") according to RFC4108. Change to back slash (\\) or other chars for your own case.
252+
* **includeColumns**: This parameter instructs the parser to include only those columns as specified by an array of column indexes. Example: [0,2,3] will parse and include only columns 0, 2, and 3 in the JSON output.
253+
* **ignoreColumns**: This parameter instructs the parser to ignore columns as specified by an array of column indexes. Example: [1,3,5] will ignore columns 1, 3, and 5 and will not return them in the JSON output.
252254

253255
All parameters can be used in Command Line tool.
254256

@@ -311,7 +313,7 @@ csv()
311313
})
312314
```
313315

314-
Note that if `error` being emitted, the process will stop as node.js will automatically `unpipe()` upper-stream and chained down-stream<sup>1</sup>. This will cause `end` / `end_parsed` event never being emitted because `end` event is only emitted when all data being consumed <sup>2</sup>.
316+
Note that if `error` being emitted, the process will stop as node.js will automatically `unpipe()` upper-stream and chained down-stream<sup>1</sup>. This will cause `end` / `end_parsed` event never being emitted because `end` event is only emitted when all data being consumed <sup>2</sup>.
315317

316318
1. [Node.JS Readable Stream](https://github.com/nodejs/node/blob/master/lib/_stream_readable.js#L572-L583)
317319
2. [Writable end Event](https://nodejs.org/api/stream.html#stream_event_end)
@@ -367,7 +369,7 @@ csv()
367369
cb(newData);
368370
})
369371
.on('json',(jsonObj)=>{
370-
372+
371373
});
372374
```
373375

@@ -385,7 +387,7 @@ csv()
385387
return fileLineString
386388
})
387389
.on('json',(jsonObj)=>{
388-
390+
389391
});
390392
```
391393

@@ -464,7 +466,7 @@ Using csvtojson to convert, the result would be like:
464466
},
465467
"description": "Awesome castle"
466468
}]
467-
```
469+
```
468470

469471
### No nested JSON
470472

@@ -490,7 +492,7 @@ csv({flatKeys:true})
490492

491493
1. First row of csv source. Use first row of csv source as header row. This is default.
492494
2. If first row of csv source is header row but it is incorrect and need to be replaced. Use `headers:[]` and `noheader:false` parameters.
493-
3. If original csv source has no header row but the header definition can be defined. Use `headers:[]` and `noheader:true` parameters.
495+
3. If original csv source has no header row but the header definition can be defined. Use `headers:[]` and `noheader:true` parameters.
494496
4. If original csv source has no header row and the header definition is unknow. Use `noheader:true`. This will automatically add `fieldN` header to csv cells
495497

496498

@@ -545,7 +547,7 @@ See [here](https://github.com/Keyang/node-csvtojson/blob/develop/docs/performanc
545547

546548
There are some limitations when using multi-core feature:
547549

548-
* Does not support if a column contains line break.
550+
* Does not support if a column contains line break.
549551

550552
#Change Log
551553

@@ -641,5 +643,3 @@ There are some limitations when using multi-core feature:
641643
* Deprecated applyWebServer
642644
* Added construct parameter for Converter Class
643645
* Converter Class now works as a proper stream object
644-
645-

test/testRowSplit.js

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,20 @@ describe("RowSplit function",function(){
2020
assert.equal(res.closed,true);
2121
assert.equal(res.cols[2],'csvtojson,a"\nwesome')
2222
})
23-
})
23+
it ("should allow columns to be ignored on csv line",function(){
24+
var str="hello,world,csvtojson,awesome,great";
25+
var res=func(str,defParam({ignoreColumns:[0,3,2]}));
26+
assert.equal(res.cols.length,2);
27+
assert.equal(res.cols[0], "world");
28+
assert.equal(res.cols[1], "great");
29+
assert.equal(res.closed,true);
30+
})
31+
it ("should include only requested columns on csv line",function(){
32+
var str="hello,world,csvtojson,awesome,great";
33+
var res=func(str,defParam({includeColumns:[0,3,2]}));
34+
assert.equal(res.cols.length,3);
35+
assert.equal(res.cols[0], "hello");
36+
assert.equal(res.cols[1], "awesome");
37+
assert.equal(res.closed,true);
38+
})
39+
})

0 commit comments

Comments
 (0)