|
254 | 254 | "start_time": "2026-05-26T08:59:55.782346Z" |
255 | 255 | } |
256 | 256 | }, |
257 | | - "source": "from haystack import Document, Pipeline, super_component\nfrom haystack.components.joiners import DocumentJoiner\nfrom haystack.components.embedders import SentenceTransformersTextEmbedder\nfrom haystack.components.rankers import SentenceTransformersSimilarityRanker\nfrom haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\nfrom haystack.document_stores.in_memory import InMemoryDocumentStore\n\nfrom datasets import load_dataset\n\n\n@super_component\nclass HybridRetrieverWithRanker:\n def __init__(\n self,\n document_store: InMemoryDocumentStore,\n embedder_model: str = \"BAAI/bge-small-en-v1.5\",\n ranker_model: str = \"BAAI/bge-reranker-base\",\n ):\n # Create the components\n embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n bm25_retriever = InMemoryBM25Retriever(document_store)\n text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n document_joiner = DocumentJoiner()\n ranker = SentenceTransformersSimilarityRanker(ranker_model)\n\n # Create the pipeline\n self.pipeline = Pipeline()\n self.pipeline.add_component(\"text_embedder\", text_embedder)\n self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n self.pipeline.add_component(\"document_joiner\", document_joiner)\n self.pipeline.add_component(\"ranker\", ranker)\n\n # Connect the components\n self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n self.pipeline.connect(\"document_joiner\", \"ranker\")\n\n # Define input mapping\n self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}", |
| 257 | + "source": "from haystack import Document, Pipeline, super_component\nfrom haystack.components.joiners import DocumentJoiner\nfrom haystack.components.embedders import SentenceTransformersTextEmbedder\nfrom haystack.components.rankers import SentenceTransformersSimilarityRanker\nfrom haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\nfrom haystack.document_stores.in_memory import InMemoryDocumentStore\n\nfrom datasets import load_dataset\n\n\n@super_component\nclass HybridRetrieverWithRanker:\n def __init__(\n self,\n document_store: InMemoryDocumentStore,\n embedder_model: str = \"BAAI/bge-small-en-v1.5\",\n ranker_model: str = \"BAAI/bge-reranker-base\",\n ):\n # Create the components\n embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n bm25_retriever = InMemoryBM25Retriever(document_store)\n text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n document_joiner = DocumentJoiner()\n ranker = SentenceTransformersSimilarityRanker(model=ranker_model)\n\n # Create the pipeline\n self.pipeline = Pipeline()\n self.pipeline.add_component(\"text_embedder\", text_embedder)\n self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n self.pipeline.add_component(\"document_joiner\", document_joiner)\n self.pipeline.add_component(\"ranker\", ranker)\n\n # Connect the components\n self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n self.pipeline.connect(\"document_joiner\", \"ranker\")\n\n # Define input mapping\n self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}", |
258 | 258 | "outputs": [], |
259 | 259 | "execution_count": null |
260 | 260 | }, |
|
502 | 502 | "start_time": "2026-05-26T09:01:35.685324Z" |
503 | 503 | } |
504 | 504 | }, |
505 | | - "source": "from haystack import Document, Pipeline, super_component\nfrom haystack.components.joiners import DocumentJoiner\nfrom haystack.components.embedders import SentenceTransformersTextEmbedder\nfrom haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\nfrom haystack.components.rankers import SentenceTransformersSimilarityRanker\nfrom haystack.document_stores.in_memory import InMemoryDocumentStore\n\n\n@super_component\nclass AdvancedHybridRetriever:\n def __init__(\n self,\n document_store: InMemoryDocumentStore,\n embedder_model: str = \"BAAI/bge-small-en-v1.5\",\n ranker_model: str = \"BAAI/bge-reranker-base\",\n ):\n # Create the components\n embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n bm25_retriever = InMemoryBM25Retriever(document_store)\n text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n document_joiner = DocumentJoiner()\n ranker = SentenceTransformersSimilarityRanker(ranker_model)\n\n # Create the pipeline\n self.pipeline = Pipeline()\n self.pipeline.add_component(\"text_embedder\", text_embedder)\n self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n self.pipeline.add_component(\"document_joiner\", document_joiner)\n self.pipeline.add_component(\"ranker\", ranker)\n\n # Connect the components\n self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n self.pipeline.connect(\"document_joiner\", \"ranker\")\n\n # Define input and output mappings\n self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}\n\n # Expose outputs from multiple components, including non-leaf components\n self.output_mapping = {\n \"bm25_retriever.documents\": \"bm25_documents\",\n \"embedding_retriever.documents\": \"embedding_documents\",\n \"document_joiner.documents\": \"joined_documents\",\n \"ranker.documents\": \"ranked_documents\",\n \"text_embedder.embedding\": \"query_embedding\",\n }", |
| 505 | + "source": "from haystack import Document, Pipeline, super_component\nfrom haystack.components.joiners import DocumentJoiner\nfrom haystack.components.embedders import SentenceTransformersTextEmbedder\nfrom haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\nfrom haystack.components.rankers import SentenceTransformersSimilarityRanker\nfrom haystack.document_stores.in_memory import InMemoryDocumentStore\n\n\n@super_component\nclass AdvancedHybridRetriever:\n def __init__(\n self,\n document_store: InMemoryDocumentStore,\n embedder_model: str = \"BAAI/bge-small-en-v1.5\",\n ranker_model: str = \"BAAI/bge-reranker-base\",\n ):\n # Create the components\n embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n bm25_retriever = InMemoryBM25Retriever(document_store)\n text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n document_joiner = DocumentJoiner()\n ranker = SentenceTransformersSimilarityRanker(model=ranker_model)\n\n # Create the pipeline\n self.pipeline = Pipeline()\n self.pipeline.add_component(\"text_embedder\", text_embedder)\n self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n self.pipeline.add_component(\"document_joiner\", document_joiner)\n self.pipeline.add_component(\"ranker\", ranker)\n\n # Connect the components\n self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n self.pipeline.connect(\"document_joiner\", \"ranker\")\n\n # Define input and output mappings\n self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}\n\n # Expose outputs from multiple components, including non-leaf components\n self.output_mapping = {\n \"bm25_retriever.documents\": \"bm25_documents\",\n \"embedding_retriever.documents\": \"embedding_documents\",\n \"document_joiner.documents\": \"joined_documents\",\n \"ranker.documents\": \"ranked_documents\",\n \"text_embedder.embedding\": \"query_embedding\",\n }", |
506 | 506 | "outputs": [], |
507 | 507 | "execution_count": null |
508 | 508 | }, |
|
0 commit comments