Skip to content

Commit 1ecbdee

Browse files
committed
impl column select api
1 parent 4d19602 commit 1ecbdee

1 file changed

Lines changed: 67 additions & 1 deletion

File tree

src/main/kotlin/kscript/text/Tables.kt

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package kscript.text
22

3+
import kscript.stopIfNot
4+
35
/**
46
* Utility methods to allow for awk-like data processing using kscript.
57
*
@@ -36,7 +38,7 @@ fun Sequence<String>.split(separator: String = "\t"): Sequence<Row> {
3638
return this.map { it.split(separator) }
3739
}
3840

39-
/** awk-like convenience wrapper around columns->map->print */
41+
/** awk-like convenience wrapper around split->map->join->print */
4042
fun Sequence<String>.awk(separator: String = "\t", rule: (Row) -> String) = split(separator).map { rule(it) }.print()
4143

4244

@@ -65,5 +67,69 @@ fun Sequence<Row>.print(separator: String = "\t") = join(separator).print()
6567
fun List<Row>.print() = forEach { println(it) }
6668

6769

70+
//
71+
// Column Select
72+
//
73+
74+
75+
/** Internal representations for column selection indices. Usually not use directly but rather via [with] and [without].
76+
*/
77+
abstract class ColSelect(val indices: Array<Int> = emptyArray()) {
78+
abstract fun and(column: Int): ColSelect
79+
abstract fun and(range: IntRange): ColSelect
80+
}
81+
82+
class PosSelect(arrayOf: Array<Int>) : ColSelect(arrayOf) {
83+
override fun and(column: Int) = PosSelect(arrayOf(*indices, column))
84+
override fun and(range: IntRange) = PosSelect(arrayOf(*indices, *range.toList().toTypedArray()))
85+
}
86+
87+
class NegSelect(arrayOf: Array<Int>) : ColSelect(arrayOf) {
88+
override fun and(column: Int) = NegSelect(arrayOf(*indices, column))
89+
override fun and(range: IntRange) = NegSelect(arrayOf(*indices, *range.toList().toTypedArray()))
90+
}
91+
92+
/** Starts building a column selection index. Both positive and negative indices are supported. */
93+
fun with(index: Int) = PosSelect(arrayOf(index))
94+
95+
fun with(range: IntRange) = PosSelect(range.toList().toTypedArray())
96+
fun without(index: Int) = NegSelect(arrayOf(index))
97+
fun without(range: IntRange) = NegSelect(range.toList().toTypedArray())
98+
99+
100+
private fun retainColumn(selectIndex: ColSelect, colIndex: Int): Boolean {
101+
val indexInRange = selectIndex.indices.contains(colIndex)
102+
103+
return if (selectIndex is PosSelect) indexInRange else !indexInRange
104+
}
105+
106+
/**
107+
* Select or remove columns by providing an index-vector. Positive selections are done with [with] and negative selections with [without]. Both methods implement a [builder][https://en.wikipedia.org/wiki/Builder_pattern] to construct more complex selectors.
108+
*/
109+
fun Sequence<Row>.select(vararg colIndices: Int): Sequence<Row> {
110+
val isPositive = colIndices.all { it > 0 }
111+
stopIfNot(isPositive || colIndices.all { it < 0 }) {
112+
" Can not mix positive and negative selections"
113+
}
114+
115+
val selector = if (isPositive) PosSelect(arrayOf(*colIndices.toTypedArray())) else NegSelect(arrayOf(*colIndices.toTypedArray()))
116+
117+
return select(selector)
118+
}
119+
120+
fun Sequence<Row>.select(columns: ColSelect): Sequence<Row> {
121+
// more efficient but does not allow to change the order
122+
// return map { it.filterIndexed { index, _ -> retainColumn(columns, index + 1) } }
123+
124+
return if (columns is PosSelect) {
125+
// positive selection
126+
map { row -> columns.indices.map { row[it - 1] } }
127+
} else {
128+
// negative selection
129+
map { it.filterIndexed { index, _ -> !columns.indices.contains(index - 1) } }
130+
}
131+
}
132+
133+
68134
// todo add krangl ColNames interface here
69135

0 commit comments

Comments
 (0)