diff --git a/pom.xml b/pom.xml
index eb2c9e8..dd3a4b5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -97,7 +97,8 @@
- 8.2.1
+
+ 8.2.2
Partially Collapsed Parallel LDA
diff --git a/src/main/java/cc/mallet/util/LDAUtils.java b/src/main/java/cc/mallet/util/LDAUtils.java
index da29a95..80b9a3c 100644
--- a/src/main/java/cc/mallet/util/LDAUtils.java
+++ b/src/main/java/cc/mallet/util/LDAUtils.java
@@ -98,16 +98,28 @@ public static Pipe buildSerialPipe(String stoplistFile) {
}
public static Pipe buildSerialPipe(String stoplistFile, Alphabet dataAlphabet) {
- return buildSerialPipe(stoplistFile, dataAlphabet, null);
+ return buildSerialPipe(stoplistFile, dataAlphabet, null, false);
}
- public static Pipe buildSerialPipe(String stoplistFile, Alphabet dataAlphabet, LabelAlphabet targetAlphabet) {
+ public static Pipe buildSerialPipe(String stoplistFile, Alphabet dataAlphabet, boolean raw) {
+ return buildSerialPipe(stoplistFile, dataAlphabet, null, raw);
+ }
+
+ public static Pipe buildSerialPipe(String stoplistFile, Alphabet dataAlphabet, LabelAlphabet targetAlphabet, boolean raw) {
int maxBufSize = 10000;
- SimpleTokenizerLarge tokenizer = null;
- if(stoplistFile==null) {
- tokenizer = new SimpleTokenizerLarge(new HashSet(), maxBufSize);
+ Pipe tokenizer = null;
+ if(raw) {
+ if(stoplistFile==null) {
+ tokenizer = new RawTokenizer(new HashSet(), maxBufSize);
+ } else {
+ tokenizer = new RawTokenizer(new File(stoplistFile), maxBufSize);
+ }
} else {
- tokenizer = new SimpleTokenizerLarge(new File(stoplistFile), maxBufSize);
+ if(stoplistFile==null) {
+ tokenizer = new SimpleTokenizerLarge(new HashSet(), maxBufSize);
+ } else {
+ tokenizer = new SimpleTokenizerLarge(new File(stoplistFile), maxBufSize);
+ }
}
ArrayList pipes = new ArrayList();
@@ -130,7 +142,7 @@ public static Pipe buildSerialPipe(String stoplistFile, Alphabet dataAlphabet, L
Target2Label ttl = new Target2Label (tAlphabet);
- pipes.add(csl);
+ if(!raw) pipes.add(csl);
pipes.add(tokenizer);
pipes.add(sl2fs);
pipes.add(ttl);