Skip to content

Commit

Permalink
Publish first version
Browse files Browse the repository at this point in the history
  • Loading branch information
SongRb committed Jun 24, 2018
1 parent 1655915 commit a895130
Show file tree
Hide file tree
Showing 33 changed files with 2,403 additions and 3,192 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@

/OneNotePageSearcher/bin
/OneNotePageSearcher/obj
/OneNotePageSearcher/deep_learning.xml
/OneNotePageSearcher/NLP.one (On 11-19-2017).one
/.vs
102 changes: 56 additions & 46 deletions OneNotePageSearcher/NetLuceneProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,104 +15,116 @@ namespace OneNotePageSearcher
internal class NetLuceneProvider
{
private readonly string _indexPath = "LuceneIndex";

private readonly string _indexByDocumentPath = "LuceneIndex";

private int _maxDoc;
private readonly Analyzer _analyzer;

private IndexWriter writer;

private int _searchLimit = 1000;

private Directory directory;
private Directory indexDirectory= null;

public bool debug;

public NetLuceneProvider()
public NetLuceneProvider(bool overwrite)
{
_analyzer = new StandardAnalyzer(Version.LUCENE_30);
SetWorkingDirectory();
}

public void SetWorkingDirectory()
{
directory = FSDirectory.Open(_indexPath);
indexDirectory = FSDirectory.Open(_indexPath);
}

public void SetUpIndexer()
public void SetUpReader()
{
var indexReader = IndexReader.Open(directory, true);
var indexReader = IndexReader.Open(indexDirectory, true);
_maxDoc = indexReader.MaxDoc;
Console.WriteLine(_maxDoc);
if (debug) Console.WriteLine(_maxDoc);
indexReader.Dispose();
directory.Dispose();

indexDirectory.Dispose();
}

public void SetUpWriter()
public void SetUpWriter(bool overwrite = true)
{
writer = new IndexWriter(directory, _analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
writer = new IndexWriter(indexDirectory, _analyzer, overwrite, IndexWriter.MaxFieldLength.UNLIMITED);
}


public void CloseWriter()
{
writer.Optimize();
//Close the writer
writer.Flush(true, true, true);
writer.Dispose();
directory.Dispose();
indexDirectory.Dispose();
}

public void AddDocument(List<Tuple<string, string>> documentList)
public void AddDocumentList(List<Tuple<string, string>> documentList)
{
Directory directory = FSDirectory.Open(_indexPath);

foreach (var docPair in documentList)
{
AddTextToIndex(docPair.Item1, docPair.Item2);
}


}

//private static void Main(string[] args)
//{
// var netLuceneProvider = new NetLuceneProvider();
// var docList = new List<Tuple<string, string>>();

// docList.Add(new Tuple<string, string>("1", "科学"));
// docList.Add(new Tuple<string, string>("2", "中国科学院"));
// docList.Add(new Tuple<string, string>("3", "国科大"));
// docList.Add(new Tuple<string, string>("4", "科技是第一发展力"));
// docList.Add(new Tuple<string, string>("5", "语文和数学科目"));
// docList.Add(new Tuple<string, string>("6", "特搜科"));

// netLuceneProvider.SetUpIndexer();
// netLuceneProvider.AddDocument(docList);
public void AddDocument(Tuple<string,string> doc)
{
AddTextToIndex(doc.Item1, doc.Item2);
}

// var result = netLuceneProvider.Search("科");
// foreach (var resTuple in result)
// {
// Console.WriteLine(resTuple.ToString());
// }
public void DeleteDocumentByID(string id)
{
writer.DeleteDocuments(new Term("id", id));
}

public HashSet<String> GetAllValuesByField(string field_name)
{
IndexReader reader = IndexReader.Open(indexDirectory,true);
TermEnum terms = reader.Terms();
HashSet<String> uniqueTerms = new HashSet<String>();
while (terms.Next())
{
var term = terms.Term;
if (term.Field.Equals(field_name))
{
uniqueTerms.Add(term.Text);
}
}
return uniqueTerms;
}

// Console.ReadLine();
//}
private void Main(string[] args)
{
var lucene = new NetLuceneProvider(true);
lucene.SetWorkingDirectory();
if (debug) Console.WriteLine(String.Join(",", lucene.GetAllValuesByField("id")));
if (debug) Console.Read();
}

public List<Tuple<string, string, float>> Search(string q)
{
//Setup searcher
Directory directory = FSDirectory.Open(_indexPath);
var searcher = new IndexSearcher(directory);
//Directory directory = FSDirectory.Open(_indexPath);

SetWorkingDirectory();

var searcher = new IndexSearcher(indexDirectory);
var parser = new QueryParser(Version.LUCENE_30, "postBody", _analyzer);

var query = parser.Parse(q);
var hits = searcher.Search(query, _searchLimit);

Console.WriteLine(hits.TotalHits);
if (debug) Console.WriteLine(hits.TotalHits);

var results = hits.TotalHits;

var resultList = new List<Tuple<string, string, float>>();

Console.WriteLine("Found {0} results", results);
if (debug) Console.WriteLine("Found {0} results", results);
for (var i = 0; i < results && i< _searchLimit; i++)
{
var doc = searcher.Doc(hits.ScoreDocs[i].Doc);
Expand All @@ -126,16 +138,14 @@ public List<Tuple<string, string, float>> Search(string q)

//Clean up everything
searcher.Dispose();
directory.Dispose();
indexDirectory.Dispose();
return resultList;
}



private void AddTextToIndex(string id, string text)
{
var doc = new Document();
doc.Add(new Field("id", id, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(doc);
}
Expand Down
Loading

0 comments on commit a895130

Please sign in to comment.