Skip to content

Commit

Permalink
Merge pull request #32 from yhshu/main
Browse files Browse the repository at this point in the history
Quick fix about colbertv2 issue
  • Loading branch information
bernaljg authored Jul 4, 2024
2 parents 026514a + 6f3be2a commit 1075a0d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
4 changes: 2 additions & 2 deletions src/hipporag.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ def __init__(self, corpus_name='hotpotqa', extraction_model='openai', extraction

if self.linking_retriever_name == 'colbertv2':
if self.dpr_only is False or self.doc_ensemble:
colbertv2_index(self.phrases.tolist(), self.corpus_name, 'phrase', self.colbert_config['phrase_index_name'], overwrite='reuse')
colbertv2_index(self.phrases.tolist(), self.corpus_name, 'phrase', self.colbert_config['phrase_index_name'], overwrite=True)
with Run().context(RunConfig(nranks=1, experiment="phrase", root=self.colbert_config['root'])):
config = ColBERTConfig(root=self.colbert_config['root'], )
self.phrase_searcher = Searcher(index=self.colbert_config['phrase_index_name'], config=config, verbose=0)
if self.doc_ensemble or dpr_only:
colbertv2_index(self.dataset_df['paragraph'].tolist(), self.corpus_name, 'corpus', self.colbert_config['doc_index_name'], overwrite='reuse')
colbertv2_index(self.dataset_df['paragraph'].tolist(), self.corpus_name, 'corpus', self.colbert_config['doc_index_name'], overwrite=True)
with Run().context(RunConfig(nranks=1, experiment="corpus", root=self.colbert_config['root'])):
config = ColBERTConfig(root=self.colbert_config['root'], )
self.corpus_searcher = Searcher(index=self.colbert_config['doc_index_name'], config=config, verbose=0)
Expand Down
9 changes: 5 additions & 4 deletions src/ircot_hipporag.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,11 @@ def reason_step(dataset, few_shot: list, query: str, passages: list, thoughts: l

client = init_langchain_model(args.llm, args.llm_model)
llm_model_name_processed = args.llm_model.replace('/', '_').replace('.', '_')
if args.llm_model == 'gpt-3.5-turbo-1106': # Default OpenIE system
colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'}
else:
colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}_{args.llm_model}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'}
# if args.llm_model == 'gpt-3.5-turbo-1106': # Default OpenIE system
# colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'}
# else:
# colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}_{llm_model_name_processed}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'}
colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'}

rag = HippoRAG(args.dataset, args.llm, args.llm_model, args.retriever, doc_ensemble=doc_ensemble, node_specificity=not (args.wo_node_spec), sim_threshold=args.sim_threshold,
colbert_config=colbert_configs, dpr_only=dpr_only, graph_alg=args.graph_alg, damping=args.damping, recognition_threshold=args.recognition_threshold)
Expand Down

0 comments on commit 1075a0d

Please sign in to comment.