huaban · fcbruce · Jan 27, 2018 · Jan 27, 2018
diff --git a/pom.xml b/pom.xml
@@ -57,6 +57,23 @@
             <artifactId>commons-lang3</artifactId>
             <version>3.3.1</version>
         </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.25</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.25</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <version>1.16.20</version>
+            <scope>provided</scope>
+        </dependency>
     </dependencies>
 
     <build>

diff --git a/src/main/java/com/huaban/analysis/jieba/WordDictionary.java b/src/main/java/com/huaban/analysis/jieba/WordDictionary.java
@@ -1,5 +1,7 @@
 package com.huaban.analysis.jieba;
 
+import lombok.extern.slf4j.Slf4j;
+
 import java.io.BufferedReader;
 import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
@@ -17,6 +19,7 @@
 import java.util.Set;
 
 
+@Slf4j
 public class WordDictionary {
     private static WordDictionary singleton;
     private static final String MAIN_DICT = "/dict.txt";
@@ -54,7 +57,7 @@ public static WordDictionary getInstance() {
      */
     public void init(Path configFile) {
         String abspath = configFile.toAbsolutePath().toString();
-        System.out.println("initialize user dictionary:" + abspath);
+        log.info("initialize user dictionary: {}", abspath);
         synchronized (WordDictionary.class) {
             if (loadedPath.contains(abspath))
                 return;
@@ -63,14 +66,12 @@ public void init(Path configFile) {
             try {
                 stream = Files.newDirectoryStream(configFile, String.format(Locale.getDefault(), "*%s", USER_DICT_SUFFIX));
                 for (Path path: stream){
-                    System.err.println(String.format(Locale.getDefault(), "loading dict %s", path.toString()));
+                    log.info("loading dict {}", path);
                     singleton.loadUserDict(path);
                 }
                 loadedPath.add(abspath);
             } catch (IOException e) {
-                // TODO Auto-generated catch block
-                // e.printStackTrace();
-                System.err.println(String.format(Locale.getDefault(), "%s: load user dict failure!", configFile.toString()));
+                log.error("{}: load user dict failure", configFile, e);
             }
         }
     }
@@ -110,19 +111,15 @@ public void loadDict() {
                 entry.setValue((Math.log(entry.getValue() / total)));
                 minFreq = Math.min(entry.getValue(), minFreq);
             }
-            System.out.println(String.format(Locale.getDefault(), "main dict load finished, time elapsed %d ms",
-                System.currentTimeMillis() - s));
-        }
-        catch (IOException e) {
-            System.err.println(String.format(Locale.getDefault(), "%s load failure!", MAIN_DICT));
-        }
-        finally {
+            log.info("main dict load finished, time elapsed {} ms", System.currentTimeMillis() - s);
+        } catch (IOException e) {
+            log.error("{} load failure", MAIN_DICT, e);
+        } finally {
             try {
                 if (null != is)
                     is.close();
-            }
-            catch (IOException e) {
-                System.err.println(String.format(Locale.getDefault(), "%s close failure!", MAIN_DICT));
+            } catch (IOException e) {
+                log.error("{} close failure!", MAIN_DICT, e);
             }
         }
     }
@@ -143,38 +140,65 @@ public void loadUserDict(Path userDict) {
         loadUserDict(userDict, StandardCharsets.UTF_8);
     }
 
-
-    public void loadUserDict(Path userDict, Charset charset) {                
+    public void loadUserDict(Path userDict, Charset charset) {
+        BufferedReader br = null;
         try {
-            BufferedReader br = Files.newBufferedReader(userDict, charset);
-            long s = System.currentTimeMillis();
-            int count = 0;
-            while (br.ready()) {
-                String line = br.readLine();
-                String[] tokens = line.split("[\t ]+");
-
-                if (tokens.length < 1) {
-                    // Ignore empty line
-                    continue;
+            log.info("to read user dict {}", userDict);
+            br = Files.newBufferedReader(userDict, charset);
+            loadUserDict(br);
+        } catch (IOException e) {
+            log.error("load user dict {} failure!", userDict, e);
+        } finally {
+            if (br != null) {
+                try {
+                    br.close();
+                } catch (IOException e) {
+                    log.error("close BufferedReader failure!", e);
                 }
-
-                String word = tokens[0];
-
-                double freq = 3.0d;
-                if (tokens.length == 2)
-                    freq = Double.valueOf(tokens[1]);
-                word = addWord(word); 
-                freqs.put(word, Math.log(freq / total));
-                count++;
             }
-            System.out.println(String.format(Locale.getDefault(), "user dict %s load finished, tot words:%d, time elapsed:%dms", userDict.toString(), count, System.currentTimeMillis() - s));
-            br.close();
         }
-        catch (IOException e) {
-            System.err.println(String.format(Locale.getDefault(), "%s: load user dict failure!", userDict.toString()));
+    }
+
+    public void loadUserDict(InputStream is) {
+        BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
+        try {
+            log.info("to read user dict from InputStream");
+            loadUserDict(br);
+        } catch (IOException e) {
+            log.error("load user dict failure!", e);
+        } finally {
+            try {
+                br.close();
+            } catch (IOException e) {
+                log.error("close BufferedReader failure!", e);
+            }
         }
     }
 
+    public void loadUserDict(BufferedReader br) throws IOException {
+          long s = System.currentTimeMillis();
+          int count = 0;
+          while (br.ready()) {
+              String line = br.readLine();
+              String[] tokens = line.split("[\t ]+");
+
+              if (tokens.length < 1) {
+                  // Ignore empty line
+                  continue;
+              }
+
+              String word = tokens[0];
+
+              double freq = 3.0d;
+              if (tokens.length == 2)
+                  freq = Double.valueOf(tokens[1]);
+              word = addWord(word);
+              freqs.put(word, Math.log(freq / total));
+              count++;
+          }
+          log.info("user dict load finished, total words: {}, time elapsed: {} ms", count, System.currentTimeMillis() - s);
+    }
+
 
     public DictSegment getTrie() {
         return this._dict;

diff --git a/src/main/java/com/huaban/analysis/jieba/viterbi/FinalSeg.java b/src/main/java/com/huaban/analysis/jieba/viterbi/FinalSeg.java
@@ -16,8 +16,10 @@
 import com.huaban.analysis.jieba.CharacterUtil;
 import com.huaban.analysis.jieba.Pair;
 import com.huaban.analysis.jieba.Node;
+import lombok.extern.slf4j.Slf4j;
 
 
+@Slf4j
 public class FinalSeg {
     private static FinalSeg singleInstance;
     private static final String PROB_EMIT = "/prob_emit.txt";
@@ -90,21 +92,18 @@ private void loadModel() {
                     values.put(tokens[0].charAt(0), Double.valueOf(tokens[1]));
                 }
             }
-        }
-        catch (IOException e) {
-            System.err.println(String.format(Locale.getDefault(), "%s: load model failure!", PROB_EMIT));
-        }
-        finally {
+        } catch (IOException e) {
+            log.error("{}: load model failure!", PROB_EMIT, e);
+        } finally {
             try {
                 if (null != is)
                     is.close();
             }
             catch (IOException e) {
-                System.err.println(String.format(Locale.getDefault(), "%s: close failure!", PROB_EMIT));
+                log.error("{}: close failure!", PROB_EMIT, e);
             }
         }
-        System.out.println(String.format(Locale.getDefault(), "model load finished, time elapsed %d ms.",
-            System.currentTimeMillis() - s));
+        log.info("model load finished, time elapsed {} ms.", System.currentTimeMillis() - s);
     }
 
 

diff --git a/src/test/java/com/huaban/analysis/jieba/JiebaSegmenterTest.java b/src/test/java/com/huaban/analysis/jieba/JiebaSegmenterTest.java
@@ -3,7 +3,6 @@
  */
 package com.huaban.analysis.jieba;
 
-import java.io.File;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Paths;
 import java.util.List;
@@ -14,13 +13,16 @@
 import org.junit.Test;
 
 import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 /**
  * @author matrix
  * 
  */
 public class JiebaSegmenterTest extends TestCase {
+    private Logger logger = LoggerFactory.getLogger(JiebaSegmenterTest.class);
     private JiebaSegmenter segmenter = new JiebaSegmenter();
     String[] sentences =
             new String[] {
@@ -133,7 +135,7 @@ protected void tearDown() throws Exception {
     public void testCutForSearch() {
         for (String sentence : sentences) {
             List<SegToken> tokens = segmenter.process(sentence, SegMode.SEARCH);
-            System.out.print(String.format(Locale.getDefault(), "\n%s\n%s", sentence, tokens.toString()));
+            logger.info("\n{}\n{}", sentence, tokens);
         }
     }
 
@@ -142,7 +144,7 @@ public void testCutForSearch() {
     public void testCutForIndex() {
         for (String sentence : sentences) {
             List<SegToken> tokens = segmenter.process(sentence, SegMode.INDEX);
-            System.out.print(String.format(Locale.getDefault(), "\n%s\n%s", sentence, tokens.toString()));
+            logger.info("\n{}\n{}", sentence, tokens);
         }
     }
 
@@ -159,7 +161,7 @@ public void testBugSentence() {
                               "干脆就把那部蒙人的闲法给废了拉倒！RT @laoshipukong : 27日，全国人大常委会第三次审议侵权责任法草案，删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 " };
         for (String sentence : bugs) {
             List<SegToken> tokens = segmenter.process(sentence, SegMode.SEARCH);
-            System.out.print(String.format(Locale.getDefault(), "\n%s\n%s", sentence, tokens.toString()));
+            logger.info("\n{}\n{}", sentence, tokens);
         }
     }
 
@@ -176,8 +178,7 @@ public void testSegmentSpeed() {
                 wordCount += sentence.length();
             }
         long elapsed = (System.currentTimeMillis() - start);
-        System.out.println(String.format(Locale.getDefault(), "time elapsed:%d, rate:%fkb/s, sentences:%.2f/s", elapsed,
-            (length * 1.0) / 1024.0f / (elapsed * 1.0 / 1000.0f), wordCount * 1000.0f / (elapsed * 1.0)));
+        logger.info("time elapsed: {}, rate: {}kb/s, sentences: {}/s", elapsed, length / 1024.0 / (elapsed / 1000.0), wordCount * 1000.0 / elapsed);
     }
 
 
@@ -193,7 +194,6 @@ public void testLongTextSegmentSpeed() {
                 wordCount += sentence.length();
             }
         long elapsed = (System.currentTimeMillis() - start);
-        System.out.println(String.format(Locale.getDefault(), "time elapsed:%d, rate:%fkb/s, sentences:%.2f/s", elapsed,
-            (length * 1.0) / 1024.0f / (elapsed * 1.0 / 1000.0f), wordCount * 1000.0f / (elapsed * 1.0)));
+        logger.info("time elapsed: {}, rate: {}kb/s, sentences: {}/s", elapsed, length / 1024.0 / (elapsed / 1000.0), wordCount * 1000.0 / elapsed);
     }
 }