Skip to content

Commit c5ea845

Browse files
fix: revert breaking changes in 32 and 44
1 parent f8fe644 commit c5ea845

2 files changed

Lines changed: 35 additions & 10 deletions

File tree

tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"- **Level**: Beginner\n",
1212
"- **Time to complete**: 15 minutes\n",
13-
"- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`ChatPromptBuilder`](https://docs.haystack.deepset.ai/docs/chatpromptbuilder), [`OpenAIChatGenerator`](https://docs.haystack.deepset.ai/docs/openaichatgenerator)\n",
13+
"- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`ChatPromptBuilder`](https://docs.haystack.deepset.ai/docs/chatpromptbuilder), [`OpenAIChatGenerator`](https://docs.haystack.deepset.ai/docs/openaichatgenerator)\n",
1414
"- **Goal**: After completing this tutorial, you'll have learned how to build a Haystack pipeline to classify documents based on the (human) language they were written in.\n",
1515
"- Optionally, at the end you'll also incorporate language clasification and query routing into a RAG pipeline, so you can query documents based on the language a question was written in."
1616
]
@@ -385,6 +385,7 @@
385385
"outputs": [],
386386
"source": [
387387
"from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n",
388+
"from haystack.components.joiners import DocumentJoiner\n",
388389
"from haystack.components.builders import ChatPromptBuilder\n",
389390
"from haystack.components.generators.chat import OpenAIChatGenerator\n",
390391
"from haystack.dataclasses import ChatMessage\n",
@@ -417,6 +418,7 @@
417418
"Create a new `Pipeline`. Add the following components:\n",
418419
"- `TextLanguageRouter`\n",
419420
"- `InMemoryBM25Retriever`. You'll need a retriever per language, since each language has its own `DocumentStore`.\n",
421+
"- `DocumentJoiner`\n",
420422
"- `ChatPromptBuilder`\n",
421423
"- `OpenAIChatGenerator`\n",
422424
"\n",
@@ -439,15 +441,17 @@
439441
" - en_retriever: InMemoryBM25Retriever\n",
440442
" - fr_retriever: InMemoryBM25Retriever\n",
441443
" - es_retriever: InMemoryBM25Retriever\n",
444+
" - joiner: DocumentJoiner\n",
442445
" - prompt_builder: ChatPromptBuilder\n",
443446
" - llm: OpenAIChatGenerator\n",
444447
"🛤️ Connections\n",
445448
" - router.en -> en_retriever.query (str)\n",
446449
" - router.fr -> fr_retriever.query (str)\n",
447450
" - router.es -> es_retriever.query (str)\n",
448-
" - en_retriever.documents -> prompt_builder.documents (List[Document])\n",
449-
" - fr_retriever.documents -> prompt_builder.documents (List[Document])\n",
450-
" - es_retriever.documents -> prompt_builder.documents (List[Document])\n",
451+
" - en_retriever.documents -> joiner.documents (List[Document])\n",
452+
" - fr_retriever.documents -> joiner.documents (List[Document])\n",
453+
" - es_retriever.documents -> joiner.documents (List[Document])\n",
454+
" - joiner.documents -> prompt_builder.documents (List[Document])\n",
451455
" - prompt_builder.prompt -> llm.messages (List[ChatMessage])"
452456
]
453457
},
@@ -462,16 +466,18 @@
462466
"rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=en_document_store), name=\"en_retriever\")\n",
463467
"rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=fr_document_store), name=\"fr_retriever\")\n",
464468
"rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=es_document_store), name=\"es_retriever\")\n",
469+
"rag_pipeline.add_component(instance=DocumentJoiner(), name=\"joiner\")\n",
465470
"rag_pipeline.add_component(instance=ChatPromptBuilder(template=prompt_template), name=\"prompt_builder\")\n",
466471
"rag_pipeline.add_component(instance=OpenAIChatGenerator(), name=\"llm\")\n",
467472
"\n",
468473
"\n",
469474
"rag_pipeline.connect(\"router.en\", \"en_retriever.query\")\n",
470475
"rag_pipeline.connect(\"router.fr\", \"fr_retriever.query\")\n",
471476
"rag_pipeline.connect(\"router.es\", \"es_retriever.query\")\n",
472-
"rag_pipeline.connect(\"en_retriever\", \"prompt_builder.documents\")\n",
473-
"rag_pipeline.connect(\"fr_retriever\", \"prompt_builder.documents\")\n",
474-
"rag_pipeline.connect(\"es_retriever\", \"prompt_builder.documents\")\n",
477+
"rag_pipeline.connect(\"en_retriever\", \"joiner\")\n",
478+
"rag_pipeline.connect(\"fr_retriever\", \"joiner\")\n",
479+
"rag_pipeline.connect(\"es_retriever\", \"joiner\")\n",
480+
"rag_pipeline.connect(\"joiner.documents\", \"prompt_builder.documents\")\n",
475481
"rag_pipeline.connect(\"prompt_builder.prompt\", \"llm.messages\")"
476482
]
477483
},

tutorials/44_Creating_Custom_SuperComponents.ipynb

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"- **Level**: Intermediate\n",
1212
"- **Time to complete**: 20 minutes\n",
13-
"- **Concepts and Components Used**: [`@super_component`](https://docs.haystack.deepset.ai/docs/supercomponents), [`Pipeline`](https://docs.haystack.deepset.ai/docs/pipelines), [`SentenceTransformersTextEmbedder`](https://docs.haystack.deepset.ai/docs/sentencetransformerstextembedder), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`InMemoryEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever), [`TransformersSimilarityRanker`](https://docs.haystack.deepset.ai/docs/transformerssimilarityranker)\n",
13+
"- **Concepts and Components Used**: [`@super_component`](https://docs.haystack.deepset.ai/docs/supercomponents), [`Pipeline`](https://docs.haystack.deepset.ai/docs/pipelines), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`SentenceTransformersTextEmbedder`](https://docs.haystack.deepset.ai/docs/sentencetransformerstextembedder), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`InMemoryEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever), [`TransformersSimilarityRanker`](https://docs.haystack.deepset.ai/docs/transformerssimilarityranker)\n",
1414
"- **Goal**: After completing this tutorial, you'll have learned how to create custom SuperComponents using the `@super_component` decorator to simplify complex pipelines and make them reusable as components."
1515
]
1616
},
@@ -103,6 +103,7 @@
103103
"outputs": [],
104104
"source": [
105105
"from haystack import Document, Pipeline, super_component\n",
106+
"from haystack.components.joiners import DocumentJoiner\n",
106107
"from haystack.components.embedders import SentenceTransformersTextEmbedder\n",
107108
"from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n",
108109
"from haystack.document_stores.in_memory import InMemoryDocumentStore\n",
@@ -117,15 +118,19 @@
117118
" embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n",
118119
" bm25_retriever = InMemoryBM25Retriever(document_store)\n",
119120
" text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n",
121+
" document_joiner = DocumentJoiner(join_mode=\"reciprocal_rank_fusion\")\n",
120122
"\n",
121123
" # Create the pipeline\n",
122124
" self.pipeline = Pipeline()\n",
123125
" self.pipeline.add_component(\"text_embedder\", text_embedder)\n",
124126
" self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n",
125127
" self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n",
128+
" self.pipeline.add_component(\"document_joiner\", document_joiner)\n",
126129
"\n",
127130
" # Connect the components\n",
128-
" self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")"
131+
" self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n",
132+
" self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n",
133+
" self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")"
129134
]
130135
},
131136
{
@@ -214,7 +219,8 @@
214219
"\n",
215220
"\n",
216221
"If we define an input mapping like `{\"query\": [\"text_embedder.text\", \"bm25_retriever.query\"]}`, we can call `retriever.run(query=query)`, and the query will automatically be routed to both the text embedder's `text` input and the BM25 retriever's `query` input.\n",
217-
""
222+
"\n",
223+
"You can also specify how the outputs should be exposed through `output_mapping`. For example, output mapping `{\"document_joiner.documents\": \"documents\"}` means that the documents produced by the `document_joiner` will be returned under the name `documents` when you call `retriever.run(...)`."
218224
]
219225
},
220226
{
@@ -237,6 +243,7 @@
237243
"outputs": [],
238244
"source": [
239245
"from haystack import Document, Pipeline, super_component\n",
246+
"from haystack.components.joiners import DocumentJoiner\n",
240247
"from haystack.components.embedders import SentenceTransformersTextEmbedder\n",
241248
"from haystack.components.rankers import TransformersSimilarityRanker\n",
242249
"from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n",
@@ -257,17 +264,22 @@
257264
" embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n",
258265
" bm25_retriever = InMemoryBM25Retriever(document_store)\n",
259266
" text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n",
267+
" document_joiner = DocumentJoiner()\n",
260268
" ranker = TransformersSimilarityRanker(ranker_model)\n",
261269
"\n",
262270
" # Create the pipeline\n",
263271
" self.pipeline = Pipeline()\n",
264272
" self.pipeline.add_component(\"text_embedder\", text_embedder)\n",
265273
" self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n",
266274
" self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n",
275+
" self.pipeline.add_component(\"document_joiner\", document_joiner)\n",
267276
" self.pipeline.add_component(\"ranker\", ranker)\n",
268277
"\n",
269278
" # Connect the components\n",
270279
" self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n",
280+
" self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n",
281+
" self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n",
282+
" self.pipeline.connect(\"document_joiner\", \"ranker\")\n",
271283
"\n",
272284
" # Define input mapping\n",
273285
" self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}"
@@ -614,6 +626,7 @@
614626
"outputs": [],
615627
"source": [
616628
"from haystack import Document, Pipeline, super_component\n",
629+
"from haystack.components.joiners import DocumentJoiner\n",
617630
"from haystack.components.embedders import SentenceTransformersTextEmbedder\n",
618631
"from haystack.components.retrievers import InMemoryBM25Retriever, InMemoryEmbeddingRetriever\n",
619632
"from haystack.components.rankers import TransformersSimilarityRanker\n",
@@ -632,17 +645,22 @@
632645
" embedding_retriever = InMemoryEmbeddingRetriever(document_store)\n",
633646
" bm25_retriever = InMemoryBM25Retriever(document_store)\n",
634647
" text_embedder = SentenceTransformersTextEmbedder(embedder_model)\n",
648+
" document_joiner = DocumentJoiner()\n",
635649
" ranker = TransformersSimilarityRanker(ranker_model)\n",
636650
"\n",
637651
" # Create the pipeline\n",
638652
" self.pipeline = Pipeline()\n",
639653
" self.pipeline.add_component(\"text_embedder\", text_embedder)\n",
640654
" self.pipeline.add_component(\"embedding_retriever\", embedding_retriever)\n",
641655
" self.pipeline.add_component(\"bm25_retriever\", bm25_retriever)\n",
656+
" self.pipeline.add_component(\"document_joiner\", document_joiner)\n",
642657
" self.pipeline.add_component(\"ranker\", ranker)\n",
643658
"\n",
644659
" # Connect the components\n",
645660
" self.pipeline.connect(\"text_embedder\", \"embedding_retriever\")\n",
661+
" self.pipeline.connect(\"bm25_retriever\", \"document_joiner\")\n",
662+
" self.pipeline.connect(\"embedding_retriever\", \"document_joiner\")\n",
663+
" self.pipeline.connect(\"document_joiner\", \"ranker\")\n",
646664
"\n",
647665
" # Define input and output mappings\n",
648666
" self.input_mapping = {\"query\": [\"text_embedder.text\", \"bm25_retriever.query\", \"ranker.query\"]}\n",
@@ -651,6 +669,7 @@
651669
" self.output_mapping = {\n",
652670
" \"bm25_retriever.documents\": \"bm25_documents\",\n",
653671
" \"embedding_retriever.documents\": \"embedding_documents\",\n",
672+
" \"document_joiner.documents\": \"joined_documents\",\n",
654673
" \"ranker.documents\": \"ranked_documents\",\n",
655674
" \"text_embedder.embedding\": \"query_embedding\",\n",
656675
" }"

0 commit comments

Comments
 (0)