From e0729bc6be3b0c26007031e8c5d74554bf88005d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Reppen?= Date: Mon, 25 Mar 2019 10:09:37 +0100 Subject: [PATCH 1/4] =?UTF-8?q?Lisenser=20og=20lenker=20til=20datasett=20p?= =?UTF-8?q?=C3=A5=20Geonorge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Kloner nå data fra https://github.com/Artsdatabanken/nin-data - Leser CSV direkte - Mapper dynamisk kolonner fra CSV-fil. Nye kolonner resulterer i nye nøkler --- .gitignore | 3 +- lib/csv.js | 13 ++++++ lib/git.js | 18 ++++++++ package.json | 1 + steg/01_nedlasting/inn_nin-data.js | 5 +++ steg/05/organisasjon.js | 47 +++++++++++++-------- steg/09/full_med_graf.js | 66 +++++++++++++++++++++++------- yarn.lock | 5 +++ 8 files changed, 125 insertions(+), 33 deletions(-) create mode 100644 lib/csv.js create mode 100644 lib/git.js create mode 100644 steg/01_nedlasting/inn_nin-data.js diff --git a/.gitignore b/.gitignore index f26b7d71..cb8d8990 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ build/ node_modules/ .vscode/ kildedata/raster_index.json -log/ \ No newline at end of file +log/ +nin-data/ diff --git a/lib/csv.js b/lib/csv.js new file mode 100644 index 00000000..8540abd5 --- /dev/null +++ b/lib/csv.js @@ -0,0 +1,13 @@ +const parse = require("csv-parse/lib/sync") +const fs = require("fs") +var JSONStream = require("JSONStream") + +function les(csvFilePath) { + const input = fs.readFileSync(csvFilePath) + const records = parse(input, { + columns: true + }) + return records +} + +module.exports = { les } diff --git a/lib/git.js b/lib/git.js new file mode 100644 index 00000000..af7d8c48 --- /dev/null +++ b/lib/git.js @@ -0,0 +1,18 @@ +const execSync = require("child_process").execSync +const fs = require("fs") + +function git(cmd, args = "") { + execSync("git " + cmd + " " + args) +} + +function clone(url, destFolder) { + if (fs.existsSync(destFolder)) pull(destFolder) + // only latest version + else git(`-C ${destFolder} clone --depth=1 ${url}`) +} + +function pull(destFolder) { + git(`-C ${destFolder} pull`) +} + +module.exports = { clone } diff --git a/package.json b/package.json index 05a0e3e6..ce7e8cdc 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "JSONStream": "^1.3.5", "babel-eslint": "^10.0.0", "csv": "^5.1.1", + "csv-parse": "^4.3.4", "decompress-zip": "^0.2.2", "eslint-config-airbnb-base": "^13.1.0", "fs-extra": "^7.0.1", diff --git a/steg/01_nedlasting/inn_nin-data.js b/steg/01_nedlasting/inn_nin-data.js new file mode 100644 index 00000000..008f5cb9 --- /dev/null +++ b/steg/01_nedlasting/inn_nin-data.js @@ -0,0 +1,5 @@ +const config = require("../../config") +const git = require("../../lib/git") + +// Download "Natur i Norge" data kildedata +git.clone("https://github.com/Artsdatabanken/nin-data.git", "nin-data") diff --git a/steg/05/organisasjon.js b/steg/05/organisasjon.js index 71b2ebe5..7c150d34 100644 --- a/steg/05/organisasjon.js +++ b/steg/05/organisasjon.js @@ -2,26 +2,39 @@ const io = require("../../lib/io") const log = require("log-less-fancy")() const config = require("../../config") const typesystem = require("@artsdatabanken/typesystem") +const csv = require("../../lib/csv") +const organisasjonTilKode = {} let organisasjon = io.lesKildedatafil("Datakilde/organisasjon") -let datasett = io.lesKildedatafil("Datakilde/datasett") +Object.entries(organisasjon).forEach(([kode, o]) => { + organisasjonTilKode[o.tittel.nb] = kode + organisasjonTilKode[kode] = kode + organisasjonTilKode[kode.replace("OR-", "")] = kode +}) -function lagRelasjonTilDatasett(kilde) { - Object.keys(datasett).forEach(key => { - const o = organisasjon[key] - const sett = datasett[key] - o.relasjon = sett.map(s => { - return { - kode: s, - kant: "Datasett", - kantRetur: "Datakilde", - kantReturFraAlleBarna: true, - erSubset: true - } +let datasett = csv.les("./nin-data/Natur_i_Norge/datasett.csv") +// Sorter slik at mer spesifikke koder kommer sist og overstyrer generell datakilde +datasett.sort((a, b) => a.Datasett.length - b.Datasett.length) +lagRelasjonTilDatasett(datasett) +io.skrivDatafil(__filename, organisasjon) + +function lagRelasjonTilDatasett(datasett) { + datasett.forEach(ds => { + const { Datasett: kode, Dataleverandør, ...datasett } = ds + const orgkode = organisasjonTilKode[Dataleverandør] + if (!orgkode) throw new Error("Ukjent dataleverandør: ", Dataleverandør) + const o = organisasjon[orgkode] + if (!o.relasjon) o.relasjon = [] + const rel = { + kode: kode, + kant: "Datasett", + kantRetur: "Datakilde", + kantReturFraAlleBarna: true, + erSubset: true + } + Object.entries(datasett).forEach(([key, value]) => { + rel[key.toLowerCase()] = value }) + o.relasjon.push(rel) }) } - -lagRelasjonTilDatasett() - -io.skrivDatafil(__filename, organisasjon) diff --git a/steg/09/full_med_graf.js b/steg/09/full_med_graf.js index 7dac7836..56d29a75 100644 --- a/steg/09/full_med_graf.js +++ b/steg/09/full_med_graf.js @@ -7,10 +7,13 @@ const typesystem = require("@artsdatabanken/typesystem") let full = io.lesDatafil("full") let hierarki = io.lesDatafil("kodehierarki") const barnAv = hierarki.barn +const skalPropageresNed = [] + Object.keys(full).forEach(kode => lagGrafkoblinger(kode, full[kode])) Object.keys(full).forEach(kode => lagGradientPåSegSelv(kode, full[kode])) Object.keys(full).forEach(kode => lagGrafGradientkoblinger(kode, full[kode])) Object.keys(full).forEach(kode => propagerGradientTilRelasjon(kode, full[kode])) +propagerGrafkoblinger() io.skrivDatafil(__filename, full) @@ -48,23 +51,27 @@ function lagGrafkobling(kodeFra, kodeTil, kant, metadata, erSubset) { } if (!nodeFra.graf) nodeFra.graf = {} - if (!nodeFra.graf[kant]) nodeFra.graf[kant] = {} let kobling = Object.assign({}, metadata, tilBarn(nodeTil)) kobling.type = nodeTil.type + if (kobling.kant === "Datasett" && nodeFra.graf["Datakilde"]) return + if (!nodeFra.graf[kant]) nodeFra.graf[kant] = {} if (nodeTil.type === "flagg" && kobling.kant !== "Datasett") { if (!nodeFra.flagg) nodeFra.flagg = {} nodeFra.flagg[kodeTil] = { tittel: nodeTil.tittel } - } else { - kobling.erSubset = erSubset - delete kobling.kode - delete kobling.kant - delete kobling.kantRetur - delete kobling.kantReturFraAlleBarna - nodeFra.graf[kant][kodeTil] = kobling + return true } + + kobling.erSubset = erSubset + delete kobling.kode + delete kobling.kant + delete kobling.kantRetur + delete kobling.kantReturFraAlleBarna + nodeFra.graf[kant][kodeTil] = kobling + return true } + function lagGrafkoblingerTilAlleBarna( kodeFra, kodeTil, @@ -72,10 +79,33 @@ function lagGrafkoblingerTilAlleBarna( metadata, erSubset ) { - const barna = barnAv[kodeFra] || [] - lagGrafkobling(kodeFra, kodeTil, kant, metadata, erSubset) - barna.forEach(kodeFraBarn => { - lagGrafkoblingerTilAlleBarna(kodeFraBarn, kodeTil, kant, metadata, erSubset) + if (lagGrafkobling(kodeFra, kodeTil, kant, metadata, erSubset)) { + const barna = barnAv[kodeFra] || [] + barna.forEach(kodeFraBarn => { + lagGrafkoblingerTilAlleBarna( + kodeFraBarn, + kodeTil, + kant, + metadata, + erSubset + ) + }) + } +} + +function propagerGrafkoblinger() { + skalPropageresNed.forEach(e => { + if (e.kode === "NN-NA-BS") debugger + const barna = barnAv[e.kode] || [] + barna.forEach(barnkode => + lagGrafkoblingerTilAlleBarna( + barnkode, + e.tilKode, + e.kantRetur, + e, + e.erSubset + ) + ) }) } @@ -85,9 +115,15 @@ function lagGrafkoblinger(kode, node) { if (!e.kode) throw new Error("Mangler kode " + e.kode) lagGrafkobling(kode, e.kode, e.kant, e, e.erSubset) if (e.kantRetur) { - if (e.kantReturFraAlleBarna) { - lagGrafkoblingerTilAlleBarna(e.kode, kode, e.kantRetur, e, e.erSubset) - } else lagGrafkobling(e.kode, kode, e.kantRetur, e, false) + lagGrafkobling( + e.kode, + kode, + e.kantRetur, + e, + e.kantReturFraAlleBarna && e.erSubset + ) + if (e.kantReturFraAlleBarna) + skalPropageresNed.push({ ...e, tilKode: kode }) } }) delete node.relasjon diff --git a/yarn.lock b/yarn.lock index bdb34303..5358ac40 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1876,6 +1876,11 @@ csv-parse@^4.3.0: resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-4.3.1.tgz#b2b92a4068106d6713952050593b2be06ec1a577" integrity sha512-1V98UTtfefu8yKdYIGX1LFhfE2yMllveq2uCBay5y4ybfTzvW6I4M6r8Yc2YnKJdJBUig5ksEMh/bLqKg4vEMQ== +csv-parse@^4.3.4: + version "4.3.4" + resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-4.3.4.tgz#fc896c170ebbdf6fb286de85c41bbaea4973d25f" + integrity sha512-M1R4WL+vt81+GnkKzi0s1qQM6WXvHQKDecNkpozzAEG8LHvIW9bq5eBnOKFQn50fTuAos7JodBh/07MK+J6G2Q== + csv-stringify@^5.1.2: version "5.3.0" resolved "https://registry.yarnpkg.com/csv-stringify/-/csv-stringify-5.3.0.tgz#ff2dfafa6fcccd455ff5039be9c202475aa3bbe0" From aca1b752e59384070780dc9f998722fb3838a804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Reppen?= Date: Mon, 25 Mar 2019 10:20:32 +0100 Subject: [PATCH 2/4] Create the git checkout directory if necessary --- lib/git.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/git.js b/lib/git.js index af7d8c48..1097870d 100644 --- a/lib/git.js +++ b/lib/git.js @@ -8,7 +8,10 @@ function git(cmd, args = "") { function clone(url, destFolder) { if (fs.existsSync(destFolder)) pull(destFolder) // only latest version - else git(`-C ${destFolder} clone --depth=1 ${url}`) + else { + fs.mkdirSync(destFolder) + git(`-C ${destFolder} clone --depth=1 ${url}`) + } } function pull(destFolder) { From 27d05001330e3e8c1c0073e842eadb982f1afb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Reppen?= Date: Mon, 25 Mar 2019 10:33:05 +0100 Subject: [PATCH 3/4] Les fra nin-data git checkout dir --- steg/01_nedlasting/inn_nin-data.js | 2 +- steg/05/organisasjon.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/steg/01_nedlasting/inn_nin-data.js b/steg/01_nedlasting/inn_nin-data.js index 008f5cb9..5c04280c 100644 --- a/steg/01_nedlasting/inn_nin-data.js +++ b/steg/01_nedlasting/inn_nin-data.js @@ -2,4 +2,4 @@ const config = require("../../config") const git = require("../../lib/git") // Download "Natur i Norge" data kildedata -git.clone("https://github.com/Artsdatabanken/nin-data.git", "nin-data") +git.clone("https://github.com/Artsdatabanken/nin-data.git", ".") diff --git a/steg/05/organisasjon.js b/steg/05/organisasjon.js index 7c150d34..a214738f 100644 --- a/steg/05/organisasjon.js +++ b/steg/05/organisasjon.js @@ -12,7 +12,7 @@ Object.entries(organisasjon).forEach(([kode, o]) => { organisasjonTilKode[kode.replace("OR-", "")] = kode }) -let datasett = csv.les("./nin-data/Natur_i_Norge/datasett.csv") +let datasett = csv.les("nin-data/Natur_i_Norge/datasett.csv") // Sorter slik at mer spesifikke koder kommer sist og overstyrer generell datakilde datasett.sort((a, b) => a.Datasett.length - b.Datasett.length) lagRelasjonTilDatasett(datasett) From 1f92b553be83d2fe27b24ac06ec1c66c1e3b75a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Reppen?= Date: Mon, 25 Mar 2019 10:38:19 +0100 Subject: [PATCH 4/4] =?UTF-8?q?Kan=20ikke=20spesifisere=20checkout=20path?= =?UTF-8?q?=20uten=20tr=C3=B8bbel,=20skipper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/git.js | 2 +- steg/01_nedlasting/inn_nin-data.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/git.js b/lib/git.js index 1097870d..2b671507 100644 --- a/lib/git.js +++ b/lib/git.js @@ -10,7 +10,7 @@ function clone(url, destFolder) { // only latest version else { fs.mkdirSync(destFolder) - git(`-C ${destFolder} clone --depth=1 ${url}`) + git(`clone --depth=1 ${url}`) } } diff --git a/steg/01_nedlasting/inn_nin-data.js b/steg/01_nedlasting/inn_nin-data.js index 5c04280c..008f5cb9 100644 --- a/steg/01_nedlasting/inn_nin-data.js +++ b/steg/01_nedlasting/inn_nin-data.js @@ -2,4 +2,4 @@ const config = require("../../config") const git = require("../../lib/git") // Download "Natur i Norge" data kildedata -git.clone("https://github.com/Artsdatabanken/nin-data.git", ".") +git.clone("https://github.com/Artsdatabanken/nin-data.git", "nin-data")