diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MapPasswordFinder.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MapPasswordFinder.scala new file mode 100644 index 0000000..37965c5 --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MapPasswordFinder.scala @@ -0,0 +1,27 @@ +package chriszen.multihash + +import java.io.{InputStreamReader, BufferedReader, InputStream} +import java.security.MessageDigest +import scala.collection.mutable +import javax.xml.bind.DatatypeConverter + +class MapPasswordFinder extends PasswordFinder { + + private val map = new mutable.HashMap[String, String]() + + override def loadDictionary(inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256") { + + val bis = new BufferedReader(new InputStreamReader(inputStream, charsetName)) + + var s: String = null + while ({ s = bis.readLine(); s != null }) { + val hash = DatatypeConverter.printBase64Binary( + MessageDigest.getInstance(hashAlgorithm).digest(s.getBytes)) + map.put(hash, s) + } + } + + override def findPassword(hash: String): Option[String] = map.get(hash) +} diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MultiHashEval.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MultiHashEval.scala new file mode 100644 index 0000000..369d65e --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/MultiHashEval.scala @@ -0,0 +1,75 @@ +package chriszen.multihash + +import java.io.{InputStreamReader, BufferedReader, InputStream, FileInputStream} +import scala.io.Source +import javax.xml.bind.DatatypeConverter +import java.security.MessageDigest +import scala.collection.mutable.ArrayBuffer + +object MultiHashEval { + val hashAlgorithm = "SHA-256" + val charsetName = "UTF-8" + val dictResource = "/cain.txt" + + def main(args: Array[String]) { + + val hashes = if (args.length > 0) + Source.fromFile(args(0), charsetName).getLines + else + loadHashesFromDictionary( + getClass.getResourceAsStream(dictResource), charsetName, hashAlgorithm) + + val inputStream = if (args.length > 1) + new FileInputStream(args(1)) + else + getClass.getResourceAsStream(dictResource) + + val passwordFinder = new chriszen.multihash.MapPasswordFinder + + time("Loading dictionary ...") { + passwordFinder.loadDictionary(inputStream, charsetName, hashAlgorithm) + } + + time("Looking for passwords ...") { + val hits = hashes.map { hash => + passwordFinder.findPassword(hash).fold(0)(pass => 1) + }.sum + + println(s"Found $hits passwords out of " + hashes.size) + } + + /*time("Looking for passwords ...") { + hashes.foreach { hash => + passwordFinder.findPassword(hash).foreach { password => + println(s"The password for '$hash' is $password") + } + } + }*/ + } + + def loadHashesFromDictionary(inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256") = { + + val bis = new BufferedReader(new InputStreamReader(inputStream, charsetName)) + + val hashes = new ArrayBuffer[String]() + + var s: String = null + while ({ s = bis.readLine(); s != null }) + hashes += DatatypeConverter.printBase64Binary( + MessageDigest.getInstance(hashAlgorithm).digest(s.getBytes)) + + hashes + } + + // Adapted from http://stackoverflow.com/questions/15436593/how-to-measure-and-display-the-running-time-of-a-single-test + def time[T](title: String)(code: => T): T = { + println(title) + val start = System.currentTimeMillis + val x = code + val elapsed = ((System.currentTimeMillis - start) / 1000.0) + println("Done in %.3f secs" format elapsed) + x + } +} diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/PasswordFinder.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/PasswordFinder.scala new file mode 100644 index 0000000..518a2af --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/multihash/PasswordFinder.scala @@ -0,0 +1,12 @@ +package chriszen.multihash + +import java.io.InputStream + +abstract class PasswordFinder { + + def loadDictionary(inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256") + + def findPassword(hash: String): Option[String] +} diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/Hash.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/Hash.scala new file mode 100644 index 0000000..3bf45ef --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/Hash.scala @@ -0,0 +1,34 @@ +package chriszen.singlehash + +import javax.xml.bind.DatatypeConverter + +object Hash { + def apply(b: Array[Byte]) = { + new Hash(b) + } + + def fromBase64(s: String) = { + new Hash(DatatypeConverter.parseBase64Binary(s)) + } +} + +class Hash(val bytes: Array[Byte]) { + + override def equals(other: Any) = { + if (!other.isInstanceOf[Hash]) + false + else { + val o = other.asInstanceOf[Hash] + if (bytes.length != o.bytes.length) + false + else { + var i = 0 + while (i < bytes.length && bytes(i) == o.bytes(i)) + i += 1 + i == bytes.length + } + } + } + + override def toString = DatatypeConverter.printBase64Binary(bytes) +} \ No newline at end of file diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/SingleHashEval.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/SingleHashEval.scala new file mode 100644 index 0000000..79a0dd0 --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/SingleHashEval.scala @@ -0,0 +1,25 @@ +package chriszen.singlehash + +import java.io.FileInputStream + +object SingleHashEval { + val hashAlgorithm = "SHA-256" + val charsetName = "UTF-8" + + def main(args: Array[String]) { + + val defaultTargetHash = "tMWuKulh/ojRKCG9+UWxVILvAhcD4fkAzL4aJ/It8H8=" + + val targetHash = if (args.length > 0) args(0) else defaultTargetHash + + val inputStream = if (args.length > 1) + new FileInputStream(args(1)) + else + getClass.getResourceAsStream("/cain.txt") + + val passwordFinder = new chriszen.singlehash.solution1.PasswordFinder + val password = passwordFinder.findPassword(targetHash, inputStream, charsetName, hashAlgorithm) + + println(password.fold(s"The password for '$targetHash' has not been found :-(")(password => s"The password for '$targetHash' is $password")) + } +} diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution1/PasswordFinder.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution1/PasswordFinder.scala new file mode 100644 index 0000000..fe6cca2 --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution1/PasswordFinder.scala @@ -0,0 +1,45 @@ +package chriszen.singlehash.solution1 + +import java.io.{InputStreamReader, BufferedReader, InputStream} +import java.security.MessageDigest +import javax.xml.bind.DatatypeConverter +import chriszen.singlehash.Hash + +/** + * This is the simple solution that just calculates the hash for each dictionary key and compares with the target. + * + * It is more efficient than the solution2 for a single target search. + */ +class PasswordFinder { + + def findPassword( + targetHashString: String, + inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256" + ) = { + + val bis = new BufferedReader(new InputStreamReader(inputStream, charsetName)) + + var password: Option[String] = None + + val targetHash = Hash.fromBase64(targetHashString) + + def check(key: String) = { + //println(s"Checking $key ...") + + val validationHash = new Hash(MessageDigest.getInstance(hashAlgorithm).digest(key.getBytes)) + + if (targetHash equals validationHash) + Some(key) + else + None + } + + var s: String = null + while (password.isEmpty && {s = bis.readLine(); s != null}) + password = check(s) + + password + } +} diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/HashedTST.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/HashedTST.scala new file mode 100644 index 0000000..b52ba47 --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/HashedTST.scala @@ -0,0 +1,156 @@ +package chriszen.singlehash.solution2 + +import java.security.MessageDigest +import chriszen.singlehash.Hash + +/** + * Immutable hashed ternary search tree. + * + * http://en.wikipedia.org/wiki/Ternary_search_tree + * + * It keeps the digest and the previously calculated hash for each node + * so the calculation of the hash for strings with common prefixes is optimized. + * + * The hash algorithm is hard-coded and requires recompilation to change it. + */ + +object HashedTST { + val Algorithm = "SHA-256" + val Empty = new HashedTST {} +} + +trait HashedTST { + + def isEmpty: Boolean = true + + def put(s: String, index: Int, prevDigest: MessageDigest): (HashedTST, Option[Hash]) = { + if (index >= s.length) + (HashedTST.Empty, None) + else { + val currentChar = s.charAt(index) + + val digest = prevDigest.clone.asInstanceOf[MessageDigest] + + digest.update(currentChar.toByte) + + val (subtree, foundHash) = put(s, index + 1, digest) + + val nextHash = if (index == s.length - 1) + Some(new Hash(digest.clone.asInstanceOf[MessageDigest].digest())) + else None + + (new HashedTSTNode(HashedTST.Empty, subtree, HashedTST.Empty, currentChar, + /*s.substring(0, index + 1),*/ digest, nextHash), foundHash orElse nextHash) + } + } + + def put(s: String): (HashedTST, Hash) = { + val (nextTree, foundHash) = put(s, 0, createDigest) + (nextTree, foundHash.get) + } + + def get(key: String, index: Int): Option[Hash] = None + def get(key: String): Option[Hash] = None + + def createDigest = { MessageDigest.getInstance(HashedTST.Algorithm) } + + def toStringHelper(sb: StringBuilder, margin: String) { + sb.append("-\n") + } + + override def toString: String = "Empty" +} + +class HashedTSTNode( + val left: HashedTST, + val mid: HashedTST, + val right: HashedTST, + val key: Char, + //val path: String, + val digest: MessageDigest, + val hash: Option[Hash] + ) extends HashedTST { + + override def isEmpty() = false + + override def put(s: String, index: Int, prevDigest: MessageDigest): (HashedTST, Option[Hash]) = { + if (index >= s.length()) + (HashedTST.Empty, None) + else { + val currentChar = s.charAt(index) + + val nextHash = if (index == s.length - 1) + Some(hash getOrElse new Hash(digest.clone.asInstanceOf[MessageDigest].digest())) + else None + + if (currentChar == key) { + val (subtree, foundHash) = mid.put(s, index + 1, digest) + (new HashedTSTNode(left, subtree, right, key, /*path,*/ digest, nextHash), foundHash orElse nextHash) + } + else if (currentChar < key) { + val (subtree, foundHash) = left.put(s, index, prevDigest) + (new HashedTSTNode(subtree, mid, right, key, /*path,*/ digest, nextHash), foundHash orElse nextHash) + } + else {//if (currentChar > ch) + val (subtree, foundHash) = right.put(s, index, prevDigest) + (new HashedTSTNode(left, mid, subtree, key, /*path,*/ digest, nextHash), foundHash orElse nextHash) + } + } + } + + override def put(s: String): (HashedTST, Hash) = { + val (nextTree, foundHash) = put(s, 0, createDigest) + (nextTree, foundHash.get) + } + + override def get(s: String, index: Int): Option[Hash] = { + val currentChar = s.charAt(index) + if (currentChar < key) + left.get(s, index) + else if (currentChar > key) + right.get(s, index) + else {// if (currentChar == key) + if (index < s.length - 1) + get(s, index + 1) + else + hash + } + } + + override def get(key: String): Option[Hash] = { + get(key, 0) + } + + override def toStringHelper(sb: StringBuilder, margin: String) { + //sb.append(s"[$key, $path") + sb.append(s"[$key") + sb.append(hash.fold("]")(h => ", " + h.toString + "]")) + + if (left != HashedTST.Empty || mid != HashedTST.Empty || right != HashedTST.Empty) { + sb.append("\n") + if (left != HashedTST.Empty) { + sb.append(s"${margin}l: ") + left.toStringHelper(sb, margin + " ") + } + if (mid != HashedTST.Empty) { + sb.append(s"${margin}m: ") + mid.toStringHelper(sb, margin + " ") + } + if (right != HashedTST.Empty) { + sb.append(s"${margin}r: ") + right.toStringHelper(sb, margin + " ") + } + } + else + sb.append("\n") + } + + override def toString: String = { + val sb = new StringBuilder + toStringHelper(sb, "") + sb.toString() + } +} + + + diff --git a/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/PasswordFinder.scala b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/PasswordFinder.scala new file mode 100644 index 0000000..d02c19d --- /dev/null +++ b/micro/backlog/001/chris_zen/src/main/scala/chriszen/singlehash/solution2/PasswordFinder.scala @@ -0,0 +1,137 @@ +package chriszen.singlehash.solution2 + +import java.io.{InputStreamReader, BufferedReader, InputStream} +import scala.util.Random +import chriszen.singlehash.Hash + +/** + * This solution uses a Ternary Search Tree to save the pre-calculated digests for all the substrings already found. + * + * This would be a good solution in case that many target hashes were checked + * but for a single one the immutability (and its associated heap use) has a big penalty + */ + +class PasswordFinder { + + //TODO HashedTST does not support defining the hash algorithm yet + + def findPassword( + targetHashString: String, + inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256" + ) = { + + val bis = new BufferedReader(new InputStreamReader(inputStream, charsetName)) + + var password: Option[String] = None + + var tree = HashedTST.Empty + + val targetHash = Hash.fromBase64(targetHashString) + + def check(key: String) = { + //println(s"Putting $key ...") + val (nextTree, foundHash) = tree.put(key) + //println(tree.toString) + //println(buffer.toList) + + tree = nextTree + + /*println(s"$key = " + foundHash.toString) + val validationHash = new Hash(MessageDigest.getInstance("SHA-256").digest(key.getBytes)) + if (!(validationHash equals foundHash)) + println(s"[----------- $x ]")*/ + + if (targetHash equals foundHash) + Some(key) + else + None + } + + var s: String = null + while (password.isEmpty && {s = bis.readLine(); s != null}) + password = check(s) + + //println(tree.toString) + + password + } + + def findPasswordWithRandomization( + targetHashString: String, + inputStream: InputStream, + charsetName: String = "UTF-8", + hashAlgorithm: String = "SHA-256", + bufSize: Int = 1000) = { + + val bis = new BufferedReader(new InputStreamReader(inputStream, charsetName)) + + // we use a shuffle buffer to keep the tree as balanced as possible + val buffer = new Array[String](bufSize) + var bufIndex: Int = 0 + val r = new Random + + var password: Option[String] = None + + var tree = HashedTST.Empty + + val targetHash = Hash.fromBase64(targetHashString) + + def check(key: String) = { + //println(s"Putting $key ...") + val (nextTree, foundHash) = tree.put(key) + //println(tree.toString) + //println(buffer.toList) + + tree = nextTree + + /*println(s"$key = " + foundHash.toString) + val validationHash = new Hash(MessageDigest.getInstance("SHA-256").digest(key.getBytes)) + if (!(validationHash equals foundHash)) + println(s"[----------- $x ]")*/ + + if (targetHash equals foundHash) + Some(key) + else + None + } + + // fill the buffer + var s: String = null + while ({s = bis.readLine(); s != null && bufIndex < bufSize}) { + buffer(bufIndex) = s + bufIndex += 1 + } + bufIndex -= 1 + + // grow the tree while keeping the buffer filled + while (password.isEmpty && {s = bis.readLine(); s != null}) { + val nextIndex = r.nextInt(bufIndex + 1) + val nextWord = buffer(nextIndex) + buffer(nextIndex) = buffer(bufIndex) + buffer(bufIndex) = s + + password = check(nextWord) + } + + // flush the buffer + while (password.isEmpty && bufIndex > 0) { + val nextIndex = r.nextInt(bufIndex + 1) + val nextWord = buffer(nextIndex) + buffer(nextIndex) = buffer(bufIndex) + buffer(bufIndex) = s + bufIndex -= 1 + + password = check(nextWord) + } + + // pop the last element + if (password.isEmpty && bufIndex >= 0) + password = check(buffer(0)) + + //println(tree.toString) + + password + } +}