Skip to content

Commit

Permalink
Traversable instead of Iterable
Browse files Browse the repository at this point in the history
  • Loading branch information
propi committed Feb 16, 2018
1 parent 4e0e2dd commit 370378a
Show file tree
Hide file tree
Showing 27 changed files with 301 additions and 338 deletions.
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ name := "easyminer-discretization"

organization := "eu.easyminer"

version := "1.0"
version := "1.1.0"

scalaVersion := "2.11.8"
scalaVersion := "2.12.3"

scalacOptions += "-feature"
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package eu.easyminer.discretization.algorithm

import eu.easyminer.discretization.impl.{ExclusiveIntervalBound, InclusiveIntervalBound, IntervalFrequency}
import eu.easyminer.discretization.impl.{IntervalBound, IntervalFrequency}

/**
* Created by propan on 31. 3. 2017.
*/
trait CutpointsResolver {

def resolveCutpoints(intervals: collection.mutable.ArrayBuffer[IntervalFrequency]) = {
def resolveCutpoints(intervals: collection.mutable.ArrayBuffer[IntervalFrequency]): Unit = {
for (i <- 0 until (intervals.length - 1)) {
val leftInterval = intervals(i)
val rightInterval = intervals(i + 1)
val mergedCutPoint = (leftInterval.interval.maxValue.value + rightInterval.interval.minValue.value) / 2.0
intervals.update(i, leftInterval.copy(interval = leftInterval.interval.copy(maxValue = ExclusiveIntervalBound(mergedCutPoint))))
intervals.update(i + 1, rightInterval.copy(interval = rightInterval.interval.copy(minValue = InclusiveIntervalBound(mergedCutPoint))))
intervals.update(i, leftInterval.copy(interval = leftInterval.interval.copy(maxValue = IntervalBound.Exclusive(mergedCutPoint))))
intervals.update(i + 1, rightInterval.copy(interval = rightInterval.interval.copy(minValue = IntervalBound.Inclusive(mergedCutPoint))))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ trait Discretization[T] {

implicit val n: Numeric[T]

def discretize(data: Iterable[T]): Seq[impl.Interval]
def discretize(data: Traversable[T]): Traversable[impl.Interval]

}

Expand All @@ -25,7 +25,7 @@ object Discretization {

}

def apply[T](dt: DiscretizationTask)(implicit n: Numeric[T]) = {
def apply[T](dt: DiscretizationTask)(implicit n: Numeric[T]): Discretization[T] = {
validate(dt)
dt match {
case dt: EquidistanceDiscretizationTask =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@ import eu.easyminer.discretization.algorithm.DiscretizationTaskValidator.Excepti
import eu.easyminer.discretization.impl.{AbsoluteSupport, RelativeSupport, Support}
import eu.easyminer.discretization.task.{EquidistanceDiscretizationTask, EquifrequencyDiscretizationTask, EquisizeDiscretizationTask}


/**
* Created by propan on 2. 4. 2017.
*/
trait DiscretizationTaskValidator[T <: DiscretizationTask] {

protected def throwIfFalse(msg: String)(f: => Boolean) = if (!f) throw new InvalidDiscretizationTask(msg)

def validate(dt: T): Unit

}
Expand All @@ -25,25 +22,19 @@ object DiscretizationTaskValidator {

}

def apply[T <: DiscretizationTask](dt: T)(implicit validator: DiscretizationTaskValidator[T]) = validator.validate(dt)
def throwIfFalse(msg: String)(f: => Boolean): Unit = if (!f) throw new InvalidDiscretizationTask(msg)

implicit val discretizationTaskValidator = new DiscretizationTaskValidator[DiscretizationTask] {
def validate(dt: DiscretizationTask): Unit = throwIfFalse("Buffer size must be greater than 31 bytes.")(dt.getBufferSize >= 32)
}
def apply[T <: DiscretizationTask](dt: T)(implicit validator: DiscretizationTaskValidator[T]): Unit = validator.validate(dt)

implicit val equidistanceDiscretizationTaskValidator = new DiscretizationTaskValidator[EquidistanceDiscretizationTask] {
def validate(dt: EquidistanceDiscretizationTask): Unit = throwIfFalse("Number of bins must be greater than zero.")(dt.getNumberOfBins > 0)
}
implicit val discretizationTaskValidator: DiscretizationTaskValidator[DiscretizationTask] = (dt: DiscretizationTask) => throwIfFalse("Buffer size must be greater than 31 bytes.")(dt.getBufferSize >= 32)

implicit val equifrequencyDiscretizationTaskValidator = new DiscretizationTaskValidator[EquifrequencyDiscretizationTask] {
def validate(dt: EquifrequencyDiscretizationTask): Unit = throwIfFalse("Number of bins must be greater than zero.")(dt.getNumberOfBins > 0)
}
implicit val equidistanceDiscretizationTaskValidator: DiscretizationTaskValidator[EquidistanceDiscretizationTask] = (dt: EquidistanceDiscretizationTask) => throwIfFalse("Number of bins must be greater than zero.")(dt.getNumberOfBins > 0)

implicit val equifrequencyDiscretizationTaskValidator: DiscretizationTaskValidator[EquifrequencyDiscretizationTask] = (dt: EquifrequencyDiscretizationTask) => throwIfFalse("Number of bins must be greater than zero.")(dt.getNumberOfBins > 0)

implicit val equisizeDiscretizationTaskValidator = new DiscretizationTaskValidator[EquisizeDiscretizationTask] {
def validate(dt: EquisizeDiscretizationTask): Unit = (dt.getMinSupport: Support) match {
case AbsoluteSupport(s) => throwIfFalse("Absolute support must be greater than 1.")(s > 1)
case RelativeSupport(s) => throwIfFalse("Relative support must be greater than zero and lower than 1")(s > 0 && s < 1)
}
implicit val equisizeDiscretizationTaskValidator: DiscretizationTaskValidator[EquisizeDiscretizationTask] = (dt: EquisizeDiscretizationTask) => (dt.getMinSupport: Support) match {
case AbsoluteSupport(s) => throwIfFalse("Absolute support must be greater than 1.")(s > 1)
case RelativeSupport(s) => throwIfFalse("Relative support must be greater than zero and lower than 1")(s > 0 && s < 1)
}

}
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
package eu.easyminer.discretization.algorithm

import eu.easyminer.discretization.impl.{ExclusiveIntervalBound, InclusiveIntervalBound, Interval}
import eu.easyminer.discretization.{impl, _}
import eu.easyminer.discretization.impl
import eu.easyminer.discretization.impl.{Interval, IntervalBound}

/**
* Created by propan on 17. 3. 2017.
*/
class EquidistantIntervals[T] private[algorithm](bins: Int)(implicit val n: Numeric[T]) extends Discretization[T] {

def discretize(data: Iterable[T]): Seq[impl.Interval] = data.iterator
.map(x => (x, x))
.reduceOption((x, y) => n.min(x._1, y._1) -> n.max(x._2, y._2))
.map(x => n.toDouble(x._1) -> n.toDouble(x._2))
.toList
.flatMap { case (min, max) =>
val intervalSize = (max - min) / bins
for (binNumber <- 0 until bins) yield {
val leftBound = InclusiveIntervalBound(min + intervalSize * binNumber)
val rightBound = if (binNumber + 1 == bins) InclusiveIntervalBound(max) else ExclusiveIntervalBound(leftBound.value + intervalSize)
Interval(leftBound, rightBound)
}
def discretize(data: Traversable[T]): Traversable[impl.Interval] = new Traversable[impl.Interval] {
def foreach[U](f: Interval => U): Unit = {
data.view
.map(x => (x, x))
.reduceOption((x, y) => n.min(x._1, y._1) -> n.max(x._2, y._2))
.map(x => n.toDouble(x._1) -> n.toDouble(x._2))
.toIterator
.flatMap { case (min, max) =>
val intervalSize = (max - min) / bins
for (binNumber <- 0 until bins) yield {
val leftBound = IntervalBound.Inclusive(min + intervalSize * binNumber)
val rightBound = if (binNumber + 1 == bins) IntervalBound.Inclusive(max) else IntervalBound.Exclusive(leftBound.value + intervalSize)
Interval(leftBound, rightBound)
}
}.foreach(f)
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package eu.easyminer.discretization.algorithm
import eu.easyminer.discretization.algorithm.CutpointsResolver._
import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfIterable
import eu.easyminer.discretization.algorithm.IntervalSmoothing._
import eu.easyminer.discretization.impl.sorting.SortedIterable
import eu.easyminer.discretization.impl.sorting.SortedTraversable
import eu.easyminer.discretization.impl.{InclusiveIntervalBound, Interval, IntervalFrequency, ValueFrequency}

/**
Expand Down Expand Up @@ -60,14 +60,14 @@ class EquifrequentIntervals[T] private[algorithm](bins: Int)(implicit val n: Num
}


def discretize(data: Iterable[T]): Seq[Interval] = data match {
case data: SortedIterable[T] =>
def discretize(data: Traversable[T]): Traversable[Interval] = data match {
case data: SortedTraversable[T] =>
val optimalFrequency = countOptimalFrequency(data)
val intervals = searchIntervals(data, optimalFrequency)
smoothIntervals(intervals, data, 1000000)(canItMoveLeft(optimalFrequency))(canItMoveRight(optimalFrequency))
resolveCutpoints(intervals)
intervals.iterator.map(_.interval).toList
case _ => throw new IllegalTypeOfIterable(classOf[SortedIterable[T]], data.getClass)
case _ => throw new IllegalTypeOfIterable(classOf[SortedTraversable[T]], data.getClass)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import eu.easyminer.discretization.algorithm.CutpointsResolver._
import eu.easyminer.discretization.algorithm.Discretization.Exceptions.IllegalTypeOfIterable
import eu.easyminer.discretization.algorithm.IntervalSmoothing._
import eu.easyminer.discretization.impl._
import eu.easyminer.discretization.impl.sorting.SortedIterable
import eu.easyminer.discretization.impl.sorting.SortedTraversable

/**
* Created by propan on 31. 3. 2017.
Expand Down Expand Up @@ -53,13 +53,13 @@ class EquisizedIntervals[T] private[algorithm](minSupport: Support)(implicit val
}

def discretize(data: Iterable[T]): Seq[Interval] = data match {
case data: SortedIterable[T] =>
case data: SortedTraversable[T] =>
val optimalFrequency = countOptimalFrequency(data)
val intervals = searchIntervals(data, optimalFrequency)
smoothIntervals(intervals, data, 1000000)(canItMoveLeft(optimalFrequency))(canItMoveRight(optimalFrequency))
resolveCutpoints(intervals)
intervals.iterator.map(_.interval).toList
case _ => throw new IllegalTypeOfIterable(classOf[SortedIterable[T]], data.getClass)
case _ => throw new IllegalTypeOfIterable(classOf[SortedTraversable[T]], data.getClass)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package eu.easyminer.discretization.algorithm

import java.util

import eu.easyminer.discretization.impl.sorting.SortedIterable
import eu.easyminer.discretization.impl.sorting.SortedTraversable
import eu.easyminer.discretization.impl.{InclusiveIntervalBound, IntervalFrequency, ValueFrequency}
import eu.easyminer.discretization.util.NumericByteArray._

Expand All @@ -11,7 +11,7 @@ import eu.easyminer.discretization.util.NumericByteArray._
*/
trait IntervalSmoothing {

def smoothIntervals[T](intervals: collection.mutable.ArrayBuffer[IntervalFrequency], records: SortedIterable[T], bufferSize: Int)
def smoothIntervals[T](intervals: collection.mutable.ArrayBuffer[IntervalFrequency], records: SortedTraversable[T], bufferSize: Int)
(canItMoveLeft: (ValueFrequency[T], IntervalFrequency, IntervalFrequency) => Boolean)
(canItMoveRight: (ValueFrequency[T], IntervalFrequency, IntervalFrequency) => Boolean)
(implicit n: Numeric[T]): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import java.io.File
import eu.easyminer.discretization
import eu.easyminer.discretization.algorithm.{Discretization, EquidistantIntervals, EquifrequentIntervals, EquisizedIntervals}
import eu.easyminer.discretization.impl.IterableConversions._
import eu.easyminer.discretization.impl.sorting.{ReversableSortedIterable, SortedInMemoryNumericIterable, SortedPersistentNumericIterable}
import eu.easyminer.discretization.impl.sorting.{ReversableSortedTraversable, SortedInMemoryNumericTraversable, SortedPersistentNumericTraversable}
import eu.easyminer.discretization.{Discretizable, DiscretizationTask}

import scala.language.implicitConversions
Expand All @@ -22,22 +22,22 @@ object DefaultDiscretization extends Discretizable {
private def doWithNumeric[A <: Number, B](discretizationTask: DiscretizationTask, data: java.lang.Iterable[A])(implicit n: Numeric[B]): Array[discretization.Interval] = {
lazy val file = Stream.continually(new File(Random.alphanumeric.take(8).mkString)).find(!_.exists()).get
lazy val directory = new File("./")
implicit val sn: A => B = javaNumberToScalaNumber[A, B] _
implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B] _
implicit val sn: A => B = javaNumberToScalaNumber[A, B]
implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B]
val dt = Discretization(discretizationTask)
dt match {
case dt: EquidistantIntervals[B] => dt.discretize(data.asScala)
case _: EquifrequentIntervals[B] | _: EquisizedIntervals[B] => data match {
case data: discretization.SortedIterable[A] with discretization.PersistentIterable[A] =>
SortedPersistentNumericIterable[B, Seq[Interval]](data, file)(dt.discretize)
SortedPersistentNumericTraversable[B, Seq[Interval]](data, file)(dt.discretize)
case data: discretization.InMemoryIterable[A] =>
dt.discretize(SortedInMemoryNumericIterable(data.iterator(), discretizationTask.getBufferSize))
dt.discretize(SortedInMemoryNumericTraversable(data.iterator(), discretizationTask.getBufferSize))
case data: discretization.ReversableSortedIterable[A] =>
dt.discretize(data: ReversableSortedIterable[B])
dt.discretize(data: ReversableSortedTraversable[B])
case data: discretization.SortedIterable[A] =>
SortedPersistentNumericIterable[B, Seq[Interval]](data, file)(dt.discretize)
SortedPersistentNumericTraversable[B, Seq[Interval]](data, file)(dt.discretize)
case _ =>
SortedPersistentNumericIterable[B, Seq[Interval]](data.iterator(), directory, discretizationTask.getBufferSize)(dt.discretize)
SortedPersistentNumericTraversable[B, Seq[Interval]](data.iterator(), directory, discretizationTask.getBufferSize)(dt.discretize)
}
case _ => Array()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ object Interval {

def getRightBoundValue: java.lang.Double = interval.maxValue.value

def isLeftBoundClosed: java.lang.Boolean = interval.minValue.isInstanceOf[InclusiveIntervalBound]
def isLeftBoundClosed: java.lang.Boolean = interval.minValue.isInstanceOf[IntervalBound.Inclusive]

def isRightBoundClosed: java.lang.Boolean = interval.maxValue.isInstanceOf[InclusiveIntervalBound]
def isRightBoundClosed: java.lang.Boolean = interval.maxValue.isInstanceOf[IntervalBound.Inclusive]

def isLeftBoundOpened: java.lang.Boolean = !isLeftBoundClosed

Expand All @@ -27,6 +27,4 @@ object Interval {

implicit def seqIntervalsToArrayJavaIntervals(intervals: Seq[Interval]): Array[discretization.Interval] = intervals.iterator.map(x => x: discretization.Interval).toArray

}

case class IntervalFrequency(interval: Interval, frequency: Int)
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ sealed trait IntervalBound {
val value: Double
}

case class InclusiveIntervalBound(value: Double) extends IntervalBound
object IntervalBound {

case class ExclusiveIntervalBound(value: Double) extends IntervalBound
case class Inclusive(value: Double) extends IntervalBound

case class Exclusive(value: Double) extends IntervalBound

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package eu.easyminer.discretization.impl

/**
* Created by Vaclav Zeman on 16. 2. 2018.
*/
case class IntervalFrequency(interval: Interval, frequency: Int)
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ trait IterableConversions {
implicit class PimpedJavaIterable[A <: Number](it: java.lang.Iterable[A]) {

def asScala[B](implicit n: Numeric[B], numberToScalaNumber: A => B): Iterable[B] = {
implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B] _
implicit val c: java.util.Iterator[A] => Iterator[B] = javaIteratorToIterator[A, B]
it match {
case it: ReversableSortedIterable[A] => it: sorting.ReversableSortedIterable[B]
case it: SortedIterable[A] => it: sorting.SortedIterable[B]
case it: ReversableSortedIterable[A] => it: sorting.ReversableSortedTraversable[B]
case it: SortedIterable[A] => it: sorting.SortedTraversable[B]
case _ => new Iterable[B] {
def iterator: Iterator[B] = it.iterator()
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.easyminer.discretization.impl

import java.io._

import eu.easyminer.discretization.util.NumericByteArray._
import eu.easyminer.discretization.util.PersistentTraversableOps._

/**
* Created by propan on 17. 3. 2017.
*/
class PersistentNumericTraversable[T] private(col: Traversable[T], file: File)(implicit n: Numeric[T]) extends Traversable[T] {
//implicit private val b2n: Array[Byte] => T = byteArrayToNumber[T]
def foreach[U](f: T => U): Unit = if (file.exists()) inputStreamTraversable[T](new FileInputStream(file)).foreach(f) else outputStreamTraversable(col, new FileOutputStream(file))
}

object PersistentNumericTraversable {

def apply[A, B](col: Traversable[A], file: File)(f: Traversable[A] => B)(implicit n: Numeric[A]): B = {
val pni = new PersistentNumericTraversable(col, file)
try {
f(pni)
} finally {
file.delete()
}
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.easyminer.discretization.impl

import eu.easyminer.discretization.impl.sorting.SortedIterable
import eu.easyminer.discretization.impl.sorting.SortedTraversable

import scala.language.implicitConversions

Expand All @@ -11,7 +11,7 @@ case class ValueFrequency[T](value: T, frequency: Int)

object ValueFrequency {

implicit def sortedIterableToValueFrequencyIterable[T](it: SortedIterable[T])(implicit n: Numeric[T]): Iterable[ValueFrequency[T]] = new Iterable[ValueFrequency[T]] {
implicit def sortedIterableToValueFrequencyIterable[T](it: SortedTraversable[T])(implicit n: Numeric[T]): Iterable[ValueFrequency[T]] = new Iterable[ValueFrequency[T]] {
def iterator: Iterator[ValueFrequency[T]] = new Iterator[ValueFrequency[T]] {
val _it = it.iterator
var lastValue = Option.empty[T]
Expand Down
Loading

0 comments on commit 370378a

Please sign in to comment.