-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f11e2a8
commit 20d52ce
Showing
9 changed files
with
130 additions
and
2 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# klite-csv | ||
|
||
Provides simple CSV parsing and generation classes | ||
|
||
* [CSVGenerator](src/CSVGenerator.kt) | ||
* [CSVParser](src/CSVParser.kt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package klite.csv | ||
|
||
import java.io.OutputStream | ||
import java.sql.ResultSet | ||
import kotlin.text.Charsets.UTF_8 | ||
|
||
open class CSVGenerator(val out: OutputStream, val separator: String = ",", bom: ByteArray = "\uFEFF".toByteArray()) { | ||
init { out.write(bom) } | ||
|
||
fun row(vararg values: Any?) = this.apply { | ||
out.write(values.joinToString(separator, postfix = "\n", transform = ::transform).toByteArray(UTF_8)) | ||
} | ||
|
||
protected open fun transform(o: Any?): String = when(o) { | ||
is Number -> if (separator == ";") o.toString().replace(".", ",") else o.toString() | ||
is String -> if (o.contains("[\\s\"';,]".toRegex())) "\"${o.replace("\"", "\"\"")}\"" else o | ||
else -> transform(o?.toString()) ?: "" | ||
} | ||
|
||
private fun sqlHeader(rs: ResultSet) = row(*(1..rs.metaData.columnCount).map { rs.metaData.getColumnName(it) }.toTypedArray()) | ||
fun sqlDump(rs: ResultSet) { | ||
if (rs.isFirst) sqlHeader(rs) | ||
row(*(1..rs.metaData.columnCount).map { rs.getObject(it) }.toTypedArray()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package klite.csv | ||
|
||
import java.io.InputStream | ||
|
||
class CSVParser(separator: String = ",", private val skipBOM: Boolean = true) { | ||
private val splitter = """(?:$separator|^)("((?:(?:"")*[^"]*)*)"|([^"$separator]*))""".toRegex() | ||
|
||
fun parse(stream: InputStream): Sequence<Map<String, String>> { | ||
if (skipBOM) stream.read(ByteArray(3)) | ||
val lines = stream.bufferedReader().lineSequence().iterator() | ||
val header = splitLine(lines.next()).toList() | ||
return lines.asSequence().map { | ||
splitLine(it).withIndex().associate { header[it.index] to it.value } | ||
} | ||
} | ||
|
||
internal fun splitLine(line: String) = splitter.findAll(line).map { | ||
val values = it.groupValues.drop(2) | ||
values[0].ifEmpty { values[1] }.replace("\"\"", "\"") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package klite.csv | ||
|
||
import ch.tutteli.atrium.api.fluent.en_GB.toEqual | ||
import ch.tutteli.atrium.api.verbs.expect | ||
import klite.d | ||
import org.junit.jupiter.api.Test | ||
import java.io.ByteArrayOutputStream | ||
|
||
class CSVGeneratorTest { | ||
val out = ByteArrayOutputStream() | ||
|
||
@Test fun `generate with comma`() { | ||
CSVGenerator(out).apply { | ||
row("column1", "column2") | ||
row("Hello", "World") | ||
row(1.25, 2) | ||
} | ||
|
||
expect(out.toString()).toEqual("\uFEFFcolumn1,column2\nHello,World\n1.25,2\n") | ||
} | ||
|
||
@Test fun `generate with semicolon for Estonian`() { | ||
CSVGenerator(out, separator = ";").apply { | ||
row("Hello", "World", "OÜ \"Mets ja koer\";xxx") | ||
row(1.25, 2, 3.75.d) | ||
} | ||
expect(out.toString()).toEqual(""" | ||
Hello;World;"OÜ ""Mets ja koer"";xxx" | ||
1,25;2;3,75 | ||
""".trimIndent()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package klite.csv | ||
|
||
import ch.tutteli.atrium.api.fluent.en_GB.toEqual | ||
import ch.tutteli.atrium.api.verbs.expect | ||
import org.junit.jupiter.api.Test | ||
|
||
class CSVParserTest { | ||
val parser = CSVParser(separator = ";", skipBOM = false) | ||
|
||
@Test fun quotes() { | ||
expect(parser.splitLine("""1;2;"hello; world";4""").toList()).toEqual(listOf("1", "2", "hello; world", "4")) | ||
expect(parser.splitLine("\"\"\"Aare Mägi FLORES\"\"\";101;;R").toList()).toEqual(listOf("\"Aare Mägi FLORES\"", "101", "", "R")) | ||
} | ||
|
||
@Test fun parse() { | ||
val lines = parser.parse(""" | ||
nimi;ariregistri_kood;ettevotja_oiguslik_vorm;ettevotja_oigusliku_vormi_alaliik;kmkr_nr;ettevotja_staatus;ettevotja_staatus_tekstina;ettevotja_esmakande_kpv;ettevotja_aadress;asukoht_ettevotja_aadressis;asukoha_ehak_kood;asukoha_ehak_tekstina;indeks_ettevotja_aadressis;ads_adr_id;ads_ads_oid;ads_normaliseeritud_taisaadress;teabesysteemi_link | ||
001 Kinnisvara OÜ;12652512;Osaühing;;EE101721589;R;Registrisse kantud;25.04.2014;;Õismäe tee 78-9;0176;Haabersti linnaosa, Tallinn, Harju maakond;13513;2182337;;Harju maakond, Tallinn, Haabersti linnaosa, Õismäe tee 78-9;https://ariregister.rik.ee/est/company/12652512 | ||
""".trimIndent().byteInputStream()).toList() | ||
expect(lines.first()).toEqual(mapOf( | ||
"nimi" to "001 Kinnisvara OÜ", | ||
"ariregistri_kood" to "12652512", | ||
"ettevotja_oiguslik_vorm" to "Osaühing", | ||
"ettevotja_oigusliku_vormi_alaliik" to "", | ||
"kmkr_nr" to "EE101721589", | ||
"ettevotja_staatus" to "R", | ||
"ettevotja_staatus_tekstina" to "Registrisse kantud", | ||
"ettevotja_esmakande_kpv" to "25.04.2014", | ||
"ettevotja_aadress" to "", | ||
"asukoht_ettevotja_aadressis" to "Õismäe tee 78-9", | ||
"asukoha_ehak_kood" to "0176", | ||
"asukoha_ehak_tekstina" to "Haabersti linnaosa, Tallinn, Harju maakond", | ||
"indeks_ettevotja_aadressis" to "13513", | ||
"ads_adr_id" to "2182337", | ||
"ads_ads_oid" to "", | ||
"ads_normaliseeritud_taisaadress" to "Harju maakond, Tallinn, Haabersti linnaosa, Õismäe tee 78-9", | ||
"teabesysteemi_link" to "https://ariregister.rik.ee/est/company/12652512" | ||
)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters