Skip to content

Commit c0a24db

Browse files
committed
Trim newlines from descriptions and attribute values.
1 parent 32d6d7d commit c0a24db

7 files changed

Lines changed: 71 additions & 12 deletions

File tree

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
3+
biojava-adam Biojava and ADAM integration.
4+
Copyright (c) 2017-2019 held jointly by the individual authors.
5+
6+
This library is free software; you can redistribute it and/or modify it
7+
under the terms of the GNU Lesser General Public License as published
8+
by the Free Software Foundation; either version 3 of the License, or (at
9+
your option) any later version.
10+
11+
This library is distributed in the hope that it will be useful, but WITHOUT
12+
ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
13+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14+
License for more details.
15+
16+
You should have received a copy of the GNU Lesser General Public License
17+
along with this library; if not, write to the Free Software Foundation,
18+
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19+
20+
> http://www.fsf.org/licensing/licenses/lgpl.html
21+
> http://www.opensource.org/licenses/lgpl-license.php
22+
23+
*/
24+
package org.biojava.nbio.adam.convert;
25+
26+
/**
27+
* Utility methods for the convert package.
28+
*
29+
* @author Michael Heuer
30+
*/
31+
final class ConvertUtils {
32+
33+
/**
34+
* Return the specified value with newlines removed and whitespace
35+
* trimmed from the beginning and end.
36+
*
37+
* @param value value to trim
38+
* @return the specified value with newlines removed and whitespace
39+
* trimmed from the beginning and end
40+
*/
41+
static String trimNewlines(final String value) {
42+
if (value == null) {
43+
return value;
44+
}
45+
return value.replaceAll("\\R+", " ").trim();
46+
}
47+
}

src/main/java/org/biojava/nbio/adam/convert/DnaSequenceToFeatures.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import java.util.ArrayList;
2729
import java.util.HashMap;
2830
import java.util.Iterator;
@@ -130,10 +132,10 @@ public List<Feature> convert(final DNASequence dnaSequence,
130132
}
131133

132134
if (feature.getShortDescription() != null) {
133-
fb.setName(feature.getShortDescription());
135+
fb.setName(trimNewlines(feature.getShortDescription()));
134136
}
135137
else if (feature.getDescription() != null) {
136-
fb.setName(feature.getDescription());
138+
fb.setName(trimNewlines(feature.getDescription()));
137139
}
138140
else {
139141
fb.clearName();
@@ -146,7 +148,7 @@ else if (feature.getDescription() != null) {
146148
List<Qualifier> value = entry.getValue();
147149
StringBuilder sb = new StringBuilder();
148150
for (Iterator<Qualifier> i = value.iterator(); i.hasNext(); ) {
149-
sb.append(i.next().getValue());
151+
sb.append(trimNewlines(i.next().getValue()));
150152
if (i.hasNext()) {
151153
sb.append(",");
152154
}

src/main/java/org/biojava/nbio/adam/convert/DnaSequenceToSequence.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import javax.annotation.concurrent.Immutable;
2729

2830
import org.bdgenomics.convert.AbstractConverter;
@@ -64,7 +66,7 @@ public Sequence convert(final DNASequence dnaSequence,
6466

6567
Sequence.Builder sb = Sequence.newBuilder()
6668
.setName(dnaSequence.getAccession().toString())
67-
.setDescription(dnaSequence.getDescription())
69+
.setDescription(trimNewlines(dnaSequence.getDescription()))
6870
.setAlphabet(Alphabet.DNA)
6971
.setSequence(dnaSequence.getSequenceAsString().toUpperCase())
7072
.setLength((long) dnaSequence.getLength());

src/main/java/org/biojava/nbio/adam/convert/ProteinSequenceToFeatures.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import java.util.ArrayList;
2729
import java.util.HashMap;
2830
import java.util.Iterator;
@@ -130,10 +132,10 @@ public List<Feature> convert(final ProteinSequence proteinSequence,
130132
}
131133

132134
if (feature.getShortDescription() != null) {
133-
fb.setName(feature.getShortDescription());
135+
fb.setName(trimNewlines(feature.getShortDescription()));
134136
}
135137
else if (feature.getDescription() != null) {
136-
fb.setName(feature.getDescription());
138+
fb.setName(trimNewlines(feature.getDescription()));
137139
}
138140
else {
139141
fb.clearName();
@@ -146,7 +148,7 @@ else if (feature.getDescription() != null) {
146148
List<Qualifier> value = entry.getValue();
147149
StringBuilder sb = new StringBuilder();
148150
for (Iterator<Qualifier> i = value.iterator(); i.hasNext(); ) {
149-
sb.append(i.next().getValue());
151+
sb.append(trimNewlines(i.next().getValue()));
150152
if (i.hasNext()) {
151153
sb.append(",");
152154
}

src/main/java/org/biojava/nbio/adam/convert/ProteinSequenceToSequence.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import javax.annotation.concurrent.Immutable;
2729

2830
import org.bdgenomics.convert.AbstractConverter;
@@ -64,7 +66,7 @@ public Sequence convert(final ProteinSequence proteinSequence,
6466

6567
Sequence.Builder sb = Sequence.newBuilder()
6668
.setName(proteinSequence.getAccession().toString())
67-
.setDescription(proteinSequence.getDescription())
69+
.setDescription(trimNewlines(proteinSequence.getDescription()))
6870
.setAlphabet(Alphabet.PROTEIN)
6971
.setSequence(proteinSequence.getSequenceAsString().toUpperCase())
7072
.setLength((long) proteinSequence.getLength());

src/main/java/org/biojava/nbio/adam/convert/RnaSequenceToFeatures.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import java.util.ArrayList;
2729
import java.util.HashMap;
2830
import java.util.Iterator;
@@ -130,10 +132,10 @@ public List<Feature> convert(final RNASequence rnaSequence,
130132
}
131133

132134
if (feature.getShortDescription() != null) {
133-
fb.setName(feature.getShortDescription());
135+
fb.setName(trimNewlines(feature.getShortDescription()));
134136
}
135137
else if (feature.getDescription() != null) {
136-
fb.setName(feature.getDescription());
138+
fb.setName(trimNewlines(feature.getDescription()));
137139
}
138140
else {
139141
fb.clearName();
@@ -146,7 +148,7 @@ else if (feature.getDescription() != null) {
146148
List<Qualifier> value = entry.getValue();
147149
StringBuilder sb = new StringBuilder();
148150
for (Iterator<Qualifier> i = value.iterator(); i.hasNext(); ) {
149-
sb.append(i.next().getValue());
151+
sb.append(trimNewlines(i.next().getValue()));
150152
if (i.hasNext()) {
151153
sb.append(",");
152154
}

src/main/java/org/biojava/nbio/adam/convert/RnaSequenceToSequence.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
package org.biojava.nbio.adam.convert;
2525

26+
import static org.biojava.nbio.adam.convert.ConvertUtils.trimNewlines;
27+
2628
import javax.annotation.concurrent.Immutable;
2729

2830
import org.bdgenomics.convert.AbstractConverter;
@@ -64,7 +66,7 @@ public Sequence convert(final RNASequence rnaSequence,
6466

6567
Sequence.Builder sb = Sequence.newBuilder()
6668
.setName(rnaSequence.getAccession().toString())
67-
.setDescription(rnaSequence.getDescription())
69+
.setDescription(trimNewlines(rnaSequence.getDescription()))
6870
.setAlphabet(Alphabet.RNA)
6971
.setSequence(rnaSequence.getSequenceAsString().toUpperCase())
7072
.setLength((long) rnaSequence.getLength());

0 commit comments

Comments
 (0)