diff --git a/src/hipporag.py b/src/hipporag.py index 5890db0..64b6b90 100644 --- a/src/hipporag.py +++ b/src/hipporag.py @@ -116,12 +116,12 @@ def __init__(self, corpus_name='hotpotqa', extraction_model='openai', extraction if self.linking_retriever_name == 'colbertv2': if self.dpr_only is False or self.doc_ensemble: - colbertv2_index(self.phrases.tolist(), self.corpus_name, 'phrase', self.colbert_config['phrase_index_name'], overwrite='reuse') + colbertv2_index(self.phrases.tolist(), self.corpus_name, 'phrase', self.colbert_config['phrase_index_name'], overwrite=True) with Run().context(RunConfig(nranks=1, experiment="phrase", root=self.colbert_config['root'])): config = ColBERTConfig(root=self.colbert_config['root'], ) self.phrase_searcher = Searcher(index=self.colbert_config['phrase_index_name'], config=config, verbose=0) if self.doc_ensemble or dpr_only: - colbertv2_index(self.dataset_df['paragraph'].tolist(), self.corpus_name, 'corpus', self.colbert_config['doc_index_name'], overwrite='reuse') + colbertv2_index(self.dataset_df['paragraph'].tolist(), self.corpus_name, 'corpus', self.colbert_config['doc_index_name'], overwrite=True) with Run().context(RunConfig(nranks=1, experiment="corpus", root=self.colbert_config['root'])): config = ColBERTConfig(root=self.colbert_config['root'], ) self.corpus_searcher = Searcher(index=self.colbert_config['doc_index_name'], config=config, verbose=0) diff --git a/src/ircot_hipporag.py b/src/ircot_hipporag.py index 648f0e0..8b9caa6 100644 --- a/src/ircot_hipporag.py +++ b/src/ircot_hipporag.py @@ -133,10 +133,11 @@ def reason_step(dataset, few_shot: list, query: str, passages: list, thoughts: l client = init_langchain_model(args.llm, args.llm_model) llm_model_name_processed = args.llm_model.replace('/', '_').replace('.', '_') - if args.llm_model == 'gpt-3.5-turbo-1106': # Default OpenIE system - colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'} - else: - colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}_{args.llm_model}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'} + # if args.llm_model == 'gpt-3.5-turbo-1106': # Default OpenIE system + # colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'} + # else: + # colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}_{llm_model_name_processed}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'} + colbert_configs = {'root': f'data/lm_vectors/colbert/{args.dataset}', 'doc_index_name': 'nbits_2', 'phrase_index_name': 'nbits_2'} rag = HippoRAG(args.dataset, args.llm, args.llm_model, args.retriever, doc_ensemble=doc_ensemble, node_specificity=not (args.wo_node_spec), sim_threshold=args.sim_threshold, colbert_config=colbert_configs, dpr_only=dpr_only, graph_alg=args.graph_alg, damping=args.damping, recognition_threshold=args.recognition_threshold)