-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Batch version of the pipeline (#102)
* Initial commit for batch version of the pipeline * Read toll booth entries and exits from bigquery * Calculate TollBoothStats and TotalVehicleTime in batch * Batch calculates hourly and daily stats instead of ten minutes granularity from streaming part * Resolves TODOs in batch job test * Better vehicle registration aligned to batch and streaming * Vehicles with expired registrations in batch * Diagnostic in batch * Fix formatting * Fix formatting
- Loading branch information
Showing
51 changed files
with
1,077 additions
and
474 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
109 changes: 109 additions & 0 deletions
109
...ion/src/main/scala/org/mkuthan/streamprocessing/toll/application/batch/TollBatchJob.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package org.mkuthan.streamprocessing.toll.application.batch | ||
|
||
import com.spotify.scio.ContextAndArgs | ||
|
||
import org.joda.time.Duration | ||
|
||
import org.mkuthan.streamprocessing.infrastructure._ | ||
import org.mkuthan.streamprocessing.infrastructure.bigquery.RowRestriction | ||
import org.mkuthan.streamprocessing.infrastructure.bigquery.RowRestriction.PartitionDateRestriction | ||
import org.mkuthan.streamprocessing.infrastructure.bigquery.StorageReadConfiguration | ||
import org.mkuthan.streamprocessing.shared._ | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothEntry | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothExit | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothStats | ||
import org.mkuthan.streamprocessing.toll.domain.registration.VehicleRegistration | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.TotalVehicleTime | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.TotalVehicleTimeDiagnostic | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.VehiclesWithExpiredRegistration | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.VehiclesWithExpiredRegistrationDiagnostic | ||
|
||
object TollBatchJob extends TollBatchJobIo { | ||
|
||
private val OneHour = Duration.standardHours(1) | ||
|
||
private val OneDay = Duration.standardDays(1) | ||
|
||
def main(mainArgs: Array[String]): Unit = { | ||
val (sc, args) = ContextAndArgs(mainArgs) | ||
|
||
val config = TollBatchJobConfig.parse(args) | ||
|
||
// read toll booth entries and toll booth exists | ||
val boothEntryRecords = sc.readFromBigQuery( | ||
EntryTableIoId, | ||
config.entryTable, | ||
StorageReadConfiguration().withRowRestriction( | ||
RowRestriction.DateColumnRestriction(TollBoothEntry.PartitioningColumnName, config.effectiveDate) | ||
) | ||
) | ||
val boothEntries = TollBoothEntry.decodeRecord(boothEntryRecords) | ||
|
||
val boothExitRecords = sc.readFromBigQuery( | ||
ExitTableIoId, | ||
config.exitTable, | ||
StorageReadConfiguration().withRowRestriction( | ||
RowRestriction.DateColumnRestriction(TollBoothExit.PartitioningColumnName, config.effectiveDate) | ||
) | ||
) | ||
val boothExits = TollBoothExit.decodeRecord(boothExitRecords) | ||
|
||
// read vehicle registrations | ||
val vehicleRegistrationRecords = | ||
sc.readFromBigQuery( | ||
VehicleRegistrationTableIoId, | ||
config.vehicleRegistrationTable, | ||
StorageReadConfiguration().withRowRestriction( | ||
PartitionDateRestriction(config.effectiveDate) | ||
) | ||
) | ||
|
||
val vehicleRegistrations = VehicleRegistration.decodeRecord(vehicleRegistrationRecords, config.effectiveDate) | ||
|
||
// calculate tool booth stats | ||
val boothStatsHourly = TollBoothStats.calculateInFixedWindow(boothEntries, OneHour) | ||
TollBoothStats | ||
.encode(boothStatsHourly) | ||
.writeBoundedToBigQuery(EntryStatsHourlyTableIoId, config.entryStatsHourlyPartition) | ||
|
||
val boothStatsDaily = TollBoothStats.calculateInFixedWindow(boothEntries, OneDay) | ||
TollBoothStats | ||
.encode(boothStatsDaily) | ||
.writeBoundedToBigQuery(EntryStatsDailyTableIoId, config.entryStatsDailyPartition) | ||
|
||
// calculate total vehicle times | ||
val (totalVehicleTimes, totalVehicleTimesDiagnostic) = | ||
TotalVehicleTime.calculateInSessionWindow(boothEntries, boothExits, OneHour) | ||
TotalVehicleTime | ||
.encodeRecord(totalVehicleTimes) | ||
.writeBoundedToBigQuery(TotalVehicleTimeOneHourGapTableIoId, config.totalVehicleTimeOneHourGapPartition) | ||
|
||
totalVehicleTimesDiagnostic | ||
.sumByKeyInFixedWindow(windowDuration = OneDay) | ||
.mapWithTimestamp(TotalVehicleTimeDiagnostic.toRecord) | ||
.writeBoundedToBigQuery( | ||
TotalVehicleTimeDiagnosticOneHourGapTableIoId, | ||
config.totalVehicleTimeDiagnosticOneHourGapTable | ||
) | ||
|
||
// calculate vehicles with expired registrations | ||
val (vehiclesWithExpiredRegistration, vehiclesWithExpiredRegistrationDiagnostic) = | ||
VehiclesWithExpiredRegistration.calculateInFixedWindow(boothEntries, vehicleRegistrations, OneDay) | ||
VehiclesWithExpiredRegistration | ||
.encodeRecord(vehiclesWithExpiredRegistration) | ||
.writeBoundedToBigQuery( | ||
VehiclesWithExpiredRegistrationDailyTableIoId, | ||
config.vehiclesWithExpiredRegistrationDailyPartition | ||
) | ||
|
||
vehiclesWithExpiredRegistrationDiagnostic | ||
.sumByKeyInFixedWindow(windowDuration = OneDay) | ||
.mapWithTimestamp(VehiclesWithExpiredRegistrationDiagnostic.toRecord) | ||
.writeBoundedToBigQuery( | ||
VehiclesWithExpiredRegistrationDiagnosticDailyTableIoId, | ||
config.vehiclesWithExpiredRegistrationDiagnosticDailyPartition | ||
) | ||
|
||
val _ = sc.run() | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
...c/main/scala/org/mkuthan/streamprocessing/toll/application/batch/TollBatchJobConfig.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.mkuthan.streamprocessing.toll.application.batch | ||
|
||
import com.spotify.scio.Args | ||
|
||
import org.joda.time.LocalDate | ||
|
||
import org.mkuthan.streamprocessing.infrastructure.bigquery.BigQueryPartition | ||
import org.mkuthan.streamprocessing.infrastructure.bigquery.BigQueryTable | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothEntry | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothExit | ||
import org.mkuthan.streamprocessing.toll.domain.booth.TollBoothStats | ||
import org.mkuthan.streamprocessing.toll.domain.registration.VehicleRegistration | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.TotalVehicleTime | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.TotalVehicleTimeDiagnostic | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.VehiclesWithExpiredRegistration | ||
import org.mkuthan.streamprocessing.toll.domain.vehicle.VehiclesWithExpiredRegistrationDiagnostic | ||
|
||
case class TollBatchJobConfig( | ||
effectiveDate: LocalDate, | ||
entryTable: BigQueryTable[TollBoothEntry.Record], | ||
exitTable: BigQueryTable[TollBoothExit.Record], | ||
vehicleRegistrationTable: BigQueryTable[VehicleRegistration.Record], | ||
entryStatsHourlyPartition: BigQueryPartition[TollBoothStats.Record], | ||
entryStatsDailyPartition: BigQueryPartition[TollBoothStats.Record], | ||
vehiclesWithExpiredRegistrationDailyPartition: BigQueryPartition[VehiclesWithExpiredRegistration.Record], | ||
vehiclesWithExpiredRegistrationDiagnosticDailyPartition: BigQueryPartition[ | ||
VehiclesWithExpiredRegistrationDiagnostic.Record | ||
], | ||
totalVehicleTimeOneHourGapPartition: BigQueryPartition[TotalVehicleTime.Record], | ||
totalVehicleTimeDiagnosticOneHourGapTable: BigQueryPartition[TotalVehicleTimeDiagnostic.Record] | ||
) | ||
|
||
object TollBatchJobConfig { | ||
def parse(args: Args): TollBatchJobConfig = { | ||
val effectiveDate = LocalDate.parse(args.required("effectiveDate")) | ||
TollBatchJobConfig( | ||
effectiveDate = effectiveDate, | ||
entryTable = BigQueryTable(args.required("entryTable")), | ||
exitTable = BigQueryTable(args.required("exitTable")), | ||
vehicleRegistrationTable = BigQueryTable(args.required("vehicleRegistrationTable")), | ||
entryStatsHourlyPartition = BigQueryPartition.daily(args.required("entryStatsHourlyTable"), effectiveDate), | ||
entryStatsDailyPartition = BigQueryPartition.daily(args.required("entryStatsDailyTable"), effectiveDate), | ||
vehiclesWithExpiredRegistrationDailyPartition = | ||
BigQueryPartition.daily(args.required("vehiclesWithExpiredRegistrationDailyTable"), effectiveDate), | ||
vehiclesWithExpiredRegistrationDiagnosticDailyPartition = | ||
BigQueryPartition.daily(args.required("vehiclesWithExpiredRegistrationDiagnosticDailyTable"), effectiveDate), | ||
totalVehicleTimeOneHourGapPartition = | ||
BigQueryPartition.daily(args.required("totalVehicleTimeOneHourGapTable"), effectiveDate), | ||
totalVehicleTimeDiagnosticOneHourGapTable = | ||
BigQueryPartition.daily(args.required("totalVehicleTimeDiagnosticOneHourGapTable"), effectiveDate) | ||
) | ||
} | ||
} |
Oops, something went wrong.