|
10 | 10 | "\n", |
11 | 11 | "- **Level**: Intermediate\n", |
12 | 12 | "- **Time to complete**: 20 minutes\n", |
13 | | - "- **Concepts and Components Used**: [`@super_component`](https://docs.haystack.deepset.ai/docs/supercomponents), [`Pipeline`](https://docs.haystack.deepset.ai/docs/pipelines), [`SentenceTransformersTextEmbedder`](https://docs.haystack.deepset.ai/docs/sentencetransformerstextembedder), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`InMemoryEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever), [`TransformersSimilarityRanker`](https://docs.haystack.deepset.ai/docs/transformerssimilarityranker)\n", |
| 13 | + "- **Concepts and Components Used**: [`@super_component`](https://docs.haystack.deepset.ai/docs/supercomponents), [`Pipeline`](https://docs.haystack.deepset.ai/docs/pipelines), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`SentenceTransformersTextEmbedder`](https://docs.haystack.deepset.ai/docs/sentencetransformerstextembedder), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`InMemoryEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever), [`TransformersSimilarityRanker`](https://docs.haystack.deepset.ai/docs/transformerssimilarityranker)\n", |
14 | 14 | "- **Goal**: After completing this tutorial, you'll have learned how to create custom SuperComponents using the `@super_component` decorator to simplify complex pipelines and make them reusable as components." |
15 | 15 | ] |
16 | 16 | }, |
|
103 | 103 | "outputs": [], |
104 | 104 | "source": [ |
105 | 105 | "from haystack import Document, Pipeline, super_component\n", |
| 106 | + "from haystack.components.joiners import DocumentJoiner\n", |
106 | 107 | "from haystack.components.embedders import SentenceTransformersTextEmbedder\n", |
107 | 108 | "from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n", |
108 | 109 | "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", |
|
117 | 118 | " embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n", |
118 | 119 | " bm25_retriever = InMemoryBM25Retriever(document_store)\n", |
119 | 120 | " text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n", |
| 121 | + " document_joiner = DocumentJoiner(join_mode=\"reciprocal_rank_fusion\")\n", |
120 | 122 | "\n", |
121 | 123 | " # Create the pipeline\n", |
122 | 124 | " self.pipeline = Pipeline()\n", |
123 | 125 | " self.pipeline.add_component(\"text_embedder\", text_embedder)\n", |
124 | 126 | " self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n", |
125 | 127 | " self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n", |
| 128 | + " self.pipeline.add_component(\"document_joiner\", document_joiner)\n", |
126 | 129 | "\n", |
127 | 130 | " # Connect the components\n", |
128 | | - " self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")" |
| 131 | + " self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n", |
| 132 | + " self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n", |
| 133 | + " self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")" |
129 | 134 | ] |
130 | 135 | }, |
131 | 136 | { |
|
214 | 219 | "\n", |
215 | 220 | "\n", |
216 | 221 | "If we define an input mapping like `{\"query\": [\"text_embedder.text\", \"bm25_retriever.query\"]}`, we can call `retriever.run(query=query)`, and the query will automatically be routed to both the text embedder's `text` input and the BM25 retriever's `query` input.\n", |
217 | | - "" |
| 222 | + "\n", |
| 223 | + "You can also specify how the outputs should be exposed through `output_mapping`. For example, output mapping `{\"document_joiner.documents\": \"documents\"}` means that the documents produced by the `document_joiner` will be returned under the name `documents` when you call `retriever.run(...)`." |
218 | 224 | ] |
219 | 225 | }, |
220 | 226 | { |
|
237 | 243 | "outputs": [], |
238 | 244 | "source": [ |
239 | 245 | "from haystack import Document, Pipeline, super_component\n", |
| 246 | + "from haystack.components.joiners import DocumentJoiner\n", |
240 | 247 | "from haystack.components.embedders import SentenceTransformersTextEmbedder\n", |
241 | 248 | "from haystack.components.rankers import TransformersSimilarityRanker\n", |
242 | 249 | "from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n", |
|
257 | 264 | " embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n", |
258 | 265 | " bm25_retriever = InMemoryBM25Retriever(document_store)\n", |
259 | 266 | " text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n", |
| 267 | + " document_joiner = DocumentJoiner()\n", |
260 | 268 | " ranker = TransformersSimilarityRanker(ranker_model)\n", |
261 | 269 | "\n", |
262 | 270 | " # Create the pipeline\n", |
263 | 271 | " self.pipeline = Pipeline()\n", |
264 | 272 | " self.pipeline.add_component(\"text_embedder\", text_embedder)\n", |
265 | 273 | " self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n", |
266 | 274 | " self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n", |
| 275 | + " self.pipeline.add_component(\"document_joiner\", document_joiner)\n", |
267 | 276 | " self.pipeline.add_component(\"ranker\", ranker)\n", |
268 | 277 | "\n", |
269 | 278 | " # Connect the components\n", |
270 | 279 | " self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n", |
| 280 | + " self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n", |
| 281 | + " self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n", |
| 282 | + " self.pipeline.connect(\"document_joiner\", \"ranker\")\n", |
271 | 283 | "\n", |
272 | 284 | " # Define input mapping\n", |
273 | 285 | " self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}" |
|
614 | 626 | "outputs": [], |
615 | 627 | "source": [ |
616 | 628 | "from haystack import Document, Pipeline, super_component\n", |
| 629 | + "from haystack.components.joiners import DocumentJoiner\n", |
617 | 630 | "from haystack.components.embedders import SentenceTransformersTextEmbedder\n", |
618 | 631 | "from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n", |
619 | 632 | "from haystack.components.rankers import TransformersSimilarityRanker\n", |
|
632 | 645 | " embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n", |
633 | 646 | " bm25_retriever = InMemoryBM25Retriever(document_store)\n", |
634 | 647 | " text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n", |
| 648 | + " document_joiner = DocumentJoiner()\n", |
635 | 649 | " ranker = TransformersSimilarityRanker(ranker_model)\n", |
636 | 650 | "\n", |
637 | 651 | " # Create the pipeline\n", |
638 | 652 | " self.pipeline = Pipeline()\n", |
639 | 653 | " self.pipeline.add_component(\"text_embedder\", text_embedder)\n", |
640 | 654 | " self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n", |
641 | 655 | " self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n", |
| 656 | + " self.pipeline.add_component(\"document_joiner\", document_joiner)\n", |
642 | 657 | " self.pipeline.add_component(\"ranker\", ranker)\n", |
643 | 658 | "\n", |
644 | 659 | " # Connect the components\n", |
645 | 660 | " self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n", |
| 661 | + " self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n", |
| 662 | + " self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n", |
| 663 | + " self.pipeline.connect(\"document_joiner\", \"ranker\")\n", |
646 | 664 | "\n", |
647 | 665 | " # Define input and output mappings\n", |
648 | 666 | " self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}\n", |
|
651 | 669 | " self.output_mapping = {\n", |
652 | 670 | " \"bm25_retriever.documents\": \"bm25_documents\",\n", |
653 | 671 | " \"embedding_retriever.documents\": \"embedding_documents\",\n", |
| 672 | + " \"document_joiner.documents\": \"joined_documents\",\n", |
654 | 673 | " \"ranker.documents\": \"ranked_documents\",\n", |
655 | 674 | " \"text_embedder.embedding\": \"query_embedding\",\n", |
656 | 675 | " }" |
|
0 commit comments