From 4ce317435c4b94da615c16f84514135fb0cacdeb Mon Sep 17 00:00:00 2001 From: jeremy Date: Sat, 26 Oct 2024 14:39:00 +0200 Subject: [PATCH] doc: wip --- README.md | 35 +++++++++ docs/.vitepress/config.ts | 4 +- docs/changelog.md | 6 +- docs/guide/detail.md | 16 +++- docs/guide/usage.md | 155 +++++++++++++++++++++++++++++++++++++- src/types.ts | 2 - 6 files changed, 205 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e69de29..a765ca2 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,35 @@ +# ETL for Node.js + +ETL (Extract, Transform, Load) is a process used to extract information from multiple sources, transform it according to your needs, and then load it into a target system, such as a database. + +**Extract (Source):** +This step involves collecting data from various sources, which may include databases, files (JSON, CSV, XLS, etc.), APIs, etc. + +**Transform:** +The extracted data is often in different formats or does not meet the requirements of the target system. The transformation adjusts and formats the data to make it consistent and usable. +This may include data type conversion, data cleaning (removing duplicates, correcting errors), data enrichment, and applying business rules. + +**Load (Destination):** +After transformation, the data is sent to the destination, such as a database, file, API, etc. +This step can be done incrementally (only adding new data) or by reloading the entire dataset. + + +## Installation + +```sh +npm install @jrmc/etl +``` + +## Run + +```ts +import etl from '@jrmc/etl' + +await etl.run({ + source: UserSource, + transform: UserTransform, // optional + destination: UserDestination, +}) +``` + +view Documentation \ No newline at end of file diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 5fba570..685edce 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -22,8 +22,8 @@ export default defineConfig({ { text: 'Samples', items: [ - { text: 'DB to CSV', link: '/samples/getting-started' }, - { text: 'xlsx to db', link: '/samples/usage' }, + { text: 'DB to CSV', link: '/samples/db-to-csv' }, + { text: 'xlsx to db', link: '/samples/xlsx-to-db' }, ] }, { diff --git a/docs/changelog.md b/docs/changelog.md index 41f297c..131013b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1 +1,5 @@ -# changelog \ No newline at end of file +# changelog + +## 1.0.0 + +- first version \ No newline at end of file diff --git a/docs/guide/detail.md b/docs/guide/detail.md index ef71e9d..a359efd 100644 --- a/docs/guide/detail.md +++ b/docs/guide/detail.md @@ -12,10 +12,18 @@ export interface Etl { ## Type ```ts -type EtlAttributes = { - source: LazyImport | AsyncIterator - transform?: LazyImport | AsyncWithData - destination: LazyImport | AsyncWithData +export type LazyImport = () => Promise +export type AsyncIterator = () => AsyncIterableIterator +export type AsyncWithData = (data: any) => Promise + +export type SourceEtl = LazyImport | [LazyImport, options: Object] | AsyncIterator +export type TransformEtl = LazyImport | AsyncWithData +export type DestinationEtl = LazyImport | [LazyImport, options: Object] | AsyncWithData + +export type EtlAttributes = { + source: SourceEtl + transform?: TransformEtl + destination: DestinationEtl } ``` diff --git a/docs/guide/usage.md b/docs/guide/usage.md index 1ffada9..299ecf2 100644 --- a/docs/guide/usage.md +++ b/docs/guide/usage.md @@ -19,7 +19,7 @@ await etl.run({ ``` ```ts [user_array_source.js] -import { Source } from '@jrmc/etl' +import { Source } from '@jrmc/etl/types' export default class UserArraySource extends Source { async *each() { @@ -36,7 +36,7 @@ export default class UserArraySource extends Source { ``` ```ts [user_array_to_db_transform.js] -import { Transform } from '@jrmc/etl' +import { Transform } from '@jrmc/etl/types' type User = { firstname: string @@ -55,7 +55,7 @@ export default class UserArrayToDbTransform extends Transform { ``` ```ts [user_db_destination.js] -import { Destination } from '@jrmc/etl' +import { Destination } from '@jrmc/etl/types' export default class UserDbDestination extends Destination { async write(row: any) { @@ -66,6 +66,78 @@ export default class UserDbDestination extends Destination { ::: +## by [LazyImport](/guide/detail#type) with options + +::: code-group + + +```ts [main.js] +import etl from '@jrmc/etl' + +const UserSource = () => import('./user_array_source.js') +const UserDestination = () => import('./user_db_destination.js') + +await etl.run({ + source: [UserSource, { + data: [ + { lastname: 'Doe', firstname: 'John', age: 30 }, + { lastname: 'Doe', firstname: 'Jane', age: 25 }, + ] + }], + destination: [UserDestination, { + age: 22 + }], +}) +``` + +```ts [user_array_source.js] +import { Source } from '@jrmc/etl/types' + +type Options = Record> + +export default class TestWithOptionsSource implements Source { + #data: Array + + constructor(options: Options) { + this.#data = options.data || []; + } + + async *each() { + for (let item of this.#data) { + yield item + } + } +} +``` + +```ts [user_db_destination.js] +import { Destination } from '@jrmc/etl/types' + +type Options = { + age: number +} + +type Person = { + firstname: string + lastname: string + age: number +} + +export default class TestWithOptionsDestination implements Destination { + #age: number | null + + constructor(options: Options) { + this.#age = options.age || null; + } + + async write(row: Person) { + User.create({ lastname: row.lastname, firstname: row.firstname, age: this.#age ? this.#age : row.age }) + } +} +``` + +::: + ## by Functions Use [AsyncIterator and AsyncWithData](/guide/detail#type) functions @@ -94,4 +166,79 @@ await etl.run({ User.create(row) }, }) -``` \ No newline at end of file +``` + +## get results + +`run` method return array if Destination class return a result. + +::: code-group + +```ts [main.js] +import etl from '@jrmc/etl' + +const UserSource = () => import('./user_array_source.js') +const UserTransform = () => import('./user_array_to_db_transform.js') +const UserDestination = () => import('./user_db_destination.js') + +const results = await etl.run({ + source: UserSource, + transform: UserTransform, + destination: UserDestination, +}) + +/* +return : +[ + { name: 'John Doe', age: 30 }, + { name: 'Jane Doe', age: 25 }, +] +*/ +``` + +```ts [user_array_source.js] +import { Source } from '@jrmc/etl/types' + +export default class UserArraySource extends Source { + async *each() { + const dataArray = [ + { lastname: 'Doe', firstname: 'John', age: 30 }, + { lastname: 'Doe', firstname: 'Jane', age: 25 }, + ] + + for (let item of dataArray) { + yield item + } + } +} +``` + +```ts [user_array_to_db_transform.js] +import { Transform } from '@jrmc/etl/types' + +type User = { + firstname: string + lastname: string + age: number +} + +export default class UserArrayToDbTransform extends Transform { + async process(row: User) { + return { + name: `${row.firstname} ${row.lastname}`, + age: row.age, + } + } +} +``` + +```ts [user_db_destination.js] +import { Destination } from '@jrmc/etl/types' + +export default class UserDbDestination extends Destination { + async write(row: any) { + return row + } +} + +::: \ No newline at end of file diff --git a/src/types.ts b/src/types.ts index 0c12bce..8386257 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,11 +11,9 @@ export type TransformEtl = LazyImport | AsyncWithData export type DestinationEtl = LazyImport | [LazyImport, options: Object] | AsyncWithData export type EtlAttributes = { - preProcess?: () => Promise source: SourceEtl transform?: TransformEtl destination: DestinationEtl - postProcess?: () => Promise } export interface Etl {