使用 Qdrant 和 n8n 的混合搜索,法律 AI:索引
高级
这是一个自动化工作流,包含 37 个节点。主要使用 If、Set、Limit、Merge、SplitOut 等节点。 基于 Qdrant 和 n8n 的混合搜索,法律 AI:索引
前置要求
- •Qdrant 服务器连接信息
- •可能需要目标 API 的认证凭证
使用的节点 (37 个)
分类
未分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "FnlDCNDV3x4pYVyC",
"meta": {
"instanceId": "d975180a7308eb9e1d0eb6c8833136580b02ced551ba46ad477d3b76dff98527",
"templateId": "self-building-ai-agent",
"templateCredsSetupCompleted": true
},
"name": "使用 Qdrant 和 n8n 的混合搜索,法律 AI:索引",
"tags": [],
"nodes": [
{
"id": "2556a724-93f9-4ecc-8112-10458fea8b3e",
"name": "创建集合",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
560,
368
],
"parameters": {
"vectors": "{\n \"mxbai_large\": \n {\n \"size\": 1024,\n \"distance\": \"Cosine\"\n }\n}",
"operation": "createCollection",
"shardNumber": {},
"sparseVectors": "{\n \"bm25\": \n {\n \"modifier\": \"idf\"\n }\n}",
"collectionName": "legalQA_test",
"requestOptions": {},
"replicationFactor": {},
"writeConsistencyFactor": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "c4c7120a-aff6-4bdd-880b-903761b88af8",
"name": "检查集合是否存在",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
208,
288
],
"parameters": {
"operation": "collectionExists",
"collectionName": "legalQA_test",
"requestOptions": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "0639e81c-130c-4fd0-a4df-80509c2f0aaf",
"name": "如果",
"type": "n8n-nodes-base.if",
"position": [
400,
288
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "loose"
},
"combinator": "and",
"conditions": [
{
"id": "d67b3ed7-aea5-4307-86f0-76c06a9da5fa",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.result.exists }}",
"rightValue": "true"
}
]
},
"looseTypeValidation": true
},
"typeVersion": 2.2
},
{
"id": "c454200a-9216-4e69-88cf-bcb3f93b65f0",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1056,
192
],
"parameters": {
"width": 592,
"height": 864,
"content": "## 将法律数据集索引到 Qdrant 进行混合检索"
},
"typeVersion": 1
},
{
"id": "03b3d5c1-cbed-43c6-8d2a-241c8a04d79d",
"name": "从 HuggingFace 索引数据集",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-368,
768
],
"parameters": {},
"typeVersion": 1
},
{
"id": "8e97d7e3-1daf-4cb8-89ea-6235b0d5f8ad",
"name": "全部分割",
"type": "n8n-nodes-base.splitOut",
"position": [
256,
944
],
"parameters": {
"options": {},
"fieldToSplitOut": "splits"
},
"typeVersion": 1
},
{
"id": "4e9a2449-ef56-4f76-b6b6-9195a591e2a8",
"name": "获取数据集分割",
"type": "n8n-nodes-base.httpRequest",
"position": [
64,
944
],
"parameters": {
"url": "https://datasets-server.huggingface.co/splits",
"options": {},
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "dataset",
"value": "={{ $json.dataset }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "4227306b-4008-4d3a-a233-404d12729114",
"name": "逐行划分",
"type": "n8n-nodes-base.splitOut",
"position": [
640,
944
],
"parameters": {
"options": {},
"fieldToSplitOut": "rows"
},
"typeVersion": 1
},
{
"id": "8d9b6c80-00ff-48c5-a9aa-75318c10e080",
"name": "Loop Over Batches",
"type": "n8n-nodes-base.splitInBatches",
"position": [
2640,
496
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 8
},
"executeOnce": false,
"typeVersion": 3
},
{
"id": "987ee18a-78b8-46f4-be12-5897176784e0",
"name": "Aggregate a Batch",
"type": "n8n-nodes-base.aggregate",
"position": [
2976,
512
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData",
"destinationFieldName": "batch"
},
"typeVersion": 1
},
{
"id": "5a11322c-665d-41e4-86fa-b7a0b16a4c75",
"name": "Upsert Points",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
3232,
512
],
"parameters": {
"points": "=[\n {{\n $json.batch.map(i => \n ({ \n \"id\": i.idx,\n \"payload\": { \n \"text\": i.text, \n \"ids_qa\": i.ids_qa\n },\n \"vector\": {\n \"mxbai_large\": {\n \"text\": i.text,\n \"model\": \"mixedbread-ai/mxbai-embed-large-v1\"\n },\n \"bm25\": {\n \"text\": i.text,\n \"model\": \"qdrant/bm25\",\n \"options\": {\n \"avg_len\": i.avg_len\n }\n }\n }\n }).toJsonString()\n )\n }}\n]",
"resource": "point",
"operation": "upsertPoints",
"collectionName": {
"__rl": true,
"mode": "list",
"value": "legalQA_test",
"cachedResultName": "legalQA_test"
},
"requestOptions": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "a4d4ed4a-b24a-4dba-895c-46964d2915be",
"name": "限制",
"type": "n8n-nodes-base.limit",
"position": [
1440,
1264
],
"parameters": {
"maxItems": 500
},
"typeVersion": 1
},
{
"id": "3d45c4b2-c3da-4add-9256-a9cdba062637",
"name": "合并",
"type": "n8n-nodes-base.merge",
"position": [
2224,
784
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineAll"
},
"typeVersion": 3.2
},
{
"id": "8a5ba479-f1b1-4bdf-8934-ff39dfa384dd",
"name": "Sum them Up",
"type": "n8n-nodes-base.summarize",
"position": [
1856,
1264
],
"parameters": {
"options": {},
"fieldsToSummarize": {
"values": [
{
"field": "words_in_text",
"aggregation": "sum"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "dced86c8-5dfb-4718-89ce-707997268382",
"name": "Get the Average Text Length",
"type": "n8n-nodes-base.set",
"position": [
2064,
1264
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "0f436085-17d6-4131-8e6d-7ffee50b60be",
"name": "avg_len",
"type": "number",
"value": "={{ $json.sum_words_in_text / 500 }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "c6de3504-36f4-47b9-8a1d-7df398284e8e",
"name": "Loop Over Batches1",
"type": "n8n-nodes-base.splitInBatches",
"position": [
2640,
1312
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 8
},
"executeOnce": false,
"typeVersion": 3
},
{
"id": "19e6b91d-f03a-4cb7-afd9-a148eb724877",
"name": "Upsert Points1",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
4192,
1312
],
"parameters": {
"points": "=[\n {{\n $json.batch.map(i => \n ({ \n \"id\": i.idx,\n \"payload\": { \n \"text\": i.text, \n \"ids_qa\": i.ids_qa\n },\n \"vector\": {\n \"open_ai_small\": i.embedding,\n \"bm25\": {\n \"text\": i.text,\n \"model\": \"qdrant/bm25\",\n \"options\": {\n \"avg_len\": i.avg_len\n }\n }\n }\n }).toJsonString()\n )\n }}\n]",
"resource": "point",
"operation": "upsertPoints",
"collectionName": {
"__rl": true,
"mode": "list",
"value": "legalQA_openAI_test",
"cachedResultName": "legalQA_openAI_test"
},
"requestOptions": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "1b4ceeb5-fa40-4544-a4f8-cfd9860de452",
"name": "Create Collection1",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
3008,
1840
],
"parameters": {
"vectors": "{\n \"open_ai_small\": \n {\n \"size\": 1536,\n \"distance\": \"Cosine\"\n }\n}",
"operation": "createCollection",
"shardNumber": {},
"sparseVectors": "{\n \"bm25\": \n {\n \"modifier\": \"idf\"\n }\n}",
"collectionName": "legalQA_openAI_test",
"requestOptions": {},
"replicationFactor": {},
"writeConsistencyFactor": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "948b1d9a-a529-4919-bb99-63ce30e2e2a5",
"name": "Check Collection Exists1",
"type": "n8n-nodes-qdrant.qdrant",
"position": [
2608,
1744
],
"parameters": {
"operation": "collectionExists",
"collectionName": "legalQA_openAI_test",
"requestOptions": {}
},
"credentials": {
"qdrantApi": {
"id": "LVjhdCt8pAJjLyt5",
"name": "Qdrant account 2"
}
},
"typeVersion": 1
},
{
"id": "e73d6246-e782-4293-bd57-ccd9a9276e06",
"name": "条件判断1",
"type": "n8n-nodes-base.if",
"position": [
2816,
1744
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "loose"
},
"combinator": "and",
"conditions": [
{
"id": "d67b3ed7-aea5-4307-86f0-76c06a9da5fa",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.result.exists }}",
"rightValue": "true"
}
]
},
"looseTypeValidation": true
},
"typeVersion": 2.2
},
{
"id": "7809aff3-02d1-45e4-949d-b251b37be7ef",
"name": "合并1",
"type": "n8n-nodes-base.merge",
"position": [
3680,
1312
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineByPosition"
},
"typeVersion": 3.2
},
{
"id": "d68cf8a5-400f-41e3-b8bf-3a3e71ff1985",
"name": "拆分输出",
"type": "n8n-nodes-base.splitOut",
"position": [
3520,
1104
],
"parameters": {
"options": {},
"fieldToSplitOut": "data"
},
"typeVersion": 1
},
{
"id": "cdac0c35-6aa9-441a-9859-3f3bfa8e3521",
"name": "Get OpenAI embeddings",
"type": "n8n-nodes-base.httpRequest",
"position": [
3344,
1104
],
"parameters": {
"url": "https://api.openai.com/v1/embeddings",
"method": "POST",
"options": {},
"sendBody": true,
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "input",
"value": "={{ $json.batch.map(item => item.text) }}"
},
{
"name": "model",
"value": "text-embedding-3-small"
}
]
},
"nodeCredentialType": "openAiApi"
},
"credentials": {
"openAiApi": {
"id": "GXLfVfRQpzF795qr",
"name": "OpenAi account 2"
}
},
"typeVersion": 4.2
},
{
"id": "3a5ba038-021f-4cfc-8d59-189357309479",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
592
],
"parameters": {
"color": 5,
"width": 1344,
"height": 528,
"content": "## Get Dataset from Hugging Face\n\nFetching a sample dataset from Hugging Face using the [Dataset Viewer API](https://huggingface.co/docs/dataset-viewer/quick_start).\n**Dataset:** [LegalQAEval from isaacus](https://huggingface.co/datasets/isaacus/LegalQAEval).\n\n1. **Retrieve dataset splits**. \n2. **Fetch all items with pagination** \n - Apply [pagination in HTTP node](https://docs.n8n.io/code/cookbook/http-node/pagination/#enable-pagination) to retrieve the full dataset. \n3. **Deduplicate text chunks** \n - The dataset contains duplicate `text` chunks, since multiple questions may belong to each passage. \n - Deduplicate before indexing into Qdrant to avoid storing duplicates. \n - Aggregate the corresponding **question–answer IDs** so they can be reused later during retrieval evaluation. \n4. **Format data for batching** (embeddings inference & indexing to Qdrant) \n"
},
"typeVersion": 1
},
{
"id": "4f9d02bb-6474-4448-9eab-5bc599cc2587",
"name": "Get Dataset Rows (Pagination)",
"type": "n8n-nodes-base.httpRequest",
"position": [
448,
944
],
"parameters": {
"url": "=https://datasets-server.huggingface.co/rows",
"options": {
"pagination": {
"pagination": {
"parameters": {
"parameters": [
{
"name": "offset",
"value": "={{ $pageCount * 100 }}"
}
]
},
"requestInterval": 1000,
"completeExpression": "={{ $pageCount * 100 > $response.body.num_rows_total}}\n",
"paginationCompleteWhen": "other"
}
}
},
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "dataset",
"value": "={{ $json.dataset }}"
},
{
"name": "config",
"value": "={{ $json.config }}"
},
{
"name": "split",
"value": "={{ $json.split }}"
},
{
"name": "length",
"value": "=100"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "d1b63d11-d424-44ca-8ca9-843eb488235a",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1424,
1024
],
"parameters": {
"color": 5,
"width": 800,
"height": 416,
"content": "## Estimate Average Length of Text Chunks\n\nAverage length of texts in the dataset is a part of the [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) formula used for keyword-based retrieval.\n\n1. **Select a subsample** \n2. **Count words per text chunk** \n3. **Compute average length** \n - Calculate the mean across all chunks in the subsample. \n - This value will be used as the **average document length (avg_len)** parameter in BM25."
},
"typeVersion": 1
},
{
"id": "b16cbdd6-789c-4b21-8755-502e089ca547",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
16,
-128
],
"parameters": {
"color": 5,
"width": 1088,
"height": 640,
"content": "## Create [Qdrant Collection](https://qdrant.tech/documentation/concepts/collections/) for Hybrid Search\nThe collection used for **Hybrid Search** is configured here with two types of vectors:\n\n**1. [Dense Vectors](https://qdrant.tech/documentation/concepts/vectors/#dense-vectors)**\nIn this pipeline, we're using the [**mxbai-embed-large-v1**](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) embedding model through Qdrant's Cloud Inference. Hence, we need to specify during the collection configuration its:\n- **Dimensions**: 1024 \n- **Similarity metric**: `cosine`\n\n\n**2. [Sparse Vectors](https://qdrant.tech/documentation/concepts/vectors/#sparse-vectors)**\nQdrant’s main mechanism for setting up **keyword-based retrieval**. \nFor example, you can set up retrieval with:\n - [**BM25**](https://en.wikipedia.org/wiki/Okapi_BM25) (used in this pipeline);\n - Qdrant provides an [**`IDF` modifier**](https://qdrant.tech/documentation/concepts/indexing/#idf-modifier) for sparse vectors. This enables Qdrant to calculate **inverse document frequency (IDF)** statistics on the server side. These statistics evaluate the importance of keywords, for example, in BM25. \n - SPLADE, miniCOIL and other sparse neural retrievers. \n\n"
},
"typeVersion": 1
},
{
"id": "3f4cedea-edeb-4796-967b-d75b95fd4aad",
"name": "便签4",
"type": "n8n-nodes-base.stickyNote",
"position": [
2544,
288
],
"parameters": {
"color": 5,
"width": 960,
"height": 480,
"content": "## (Option №1) Index Text Chunks to Qdrant Using [Cloud Inference](https://qdrant.tech/documentation/cloud/inference/)\n\n- **Embed & upsert text chunks in batches** \n - **Dense embeddings inference + upsert handled by Qdrant node**, it takes care of generating embeddings and inserting them into the collection. \n - **Sparse representations for BM25** are created automatically under the hood by Qdrant. \n"
},
"typeVersion": 1
},
{
"id": "67fc6b7c-9168-4214-94cd-3c2d68e477cc",
"name": "便签5",
"type": "n8n-nodes-base.stickyNote",
"position": [
2528,
1552
],
"parameters": {
"color": 7,
"width": 688,
"height": 448,
"content": "## (Option №2) 1. Configure a Collection for OpenAI Embeddings & BM25 Retrieval\nSince [`text-embedding-3-small`] OpenAI embeddings have a different dimensionality (1536) than mxbai embeddings (1024), you need to account for this when configuring the collection. \n \nFor simplicity, create a **separate collection** dedicated to OpenAI embeddings. This collection will be used to index texts in this block. "
},
"typeVersion": 1
},
{
"id": "ed76cf94-3b3b-4c8f-af1f-2ea5f7096785",
"name": "便签6",
"type": "n8n-nodes-base.stickyNote",
"position": [
2512,
864
],
"parameters": {
"color": 5,
"width": 1872,
"height": 1152,
"content": "## (Option №2) Index Text Chunks to Qdrant Using External Embedding Provider (OpenAI)\n*Don't forget to create and configure a separate collection for OpenAI’s [`text-embedding-3-small`](https://platform.openai.com/docs/models/text-embedding-3-small) embeddings.*\n\n1. **Embed texts in batches** with OpenAI's [`text-embedding-3-small`](https://platform.openai.com/docs/models/text-embedding-3-small), generating dense vectors. \n\n2. **Upsert batches to Qdrant:**\n- Pass pre-embedded by OpenAi dense vectors to Qdrant;\n- Sparse representations for BM25 are created automatically under the hood by Qdrant. "
},
"typeVersion": 1
},
{
"id": "5eb0cbf7-a151-4bf4-a180-914909a04901",
"name": "Restructure for Deduplicating",
"type": "n8n-nodes-base.set",
"position": [
816,
944
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "961c95d9-c803-404b-b4b6-cb66a8a33928",
"name": "id_qa",
"type": "string",
"value": "={{ $json.row.id }}"
},
{
"id": "00f4a104-8515-49fe-a094-89d22a2ead05",
"name": "text",
"type": "string",
"value": "={{ $json.row.text }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "e3f582f9-aad1-47a4-83a8-1e0127b78ce9",
"name": "Restructure for Batching",
"type": "n8n-nodes-base.set",
"position": [
1200,
944
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "23528728-83f3-4f11-9d66-feddc3bf27d1",
"name": "idx",
"type": "number",
"value": "={{ $itemIndex }}"
},
{
"id": "f663bae7-ff0c-440f-9a57-cb363322fc9c",
"name": "text",
"type": "string",
"value": "={{ $json.text }}"
},
{
"id": "bfb956b4-d5e2-46b2-b41a-850a4e00765f",
"name": "ids_qa",
"type": "array",
"value": "={{ $json.appended_id_qa }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "74568439-a6ab-4f4e-acc5-9a0784d6c1d2",
"name": "Deduplicate Texts",
"type": "n8n-nodes-base.summarize",
"position": [
1008,
944
],
"parameters": {
"options": {},
"fieldsToSplitBy": "text",
"fieldsToSummarize": {
"values": [
{
"field": "id_qa",
"aggregation": "append"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "b65a9c60-44e1-465c-99f4-1d33428e5c4a",
"name": "Calculate #words in Each Text",
"type": "n8n-nodes-base.set",
"position": [
1648,
1264
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "29dc2299-fb1e-4b0a-bff1-0a3e88f7eb03",
"name": "words_in_text",
"type": "number",
"value": "={{ $json.text.trim().split(/\\s+/).length }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "f778e469-8a74-47fe-a854-7da473156f87",
"name": "获取简报",
"type": "n8n-nodes-base.set",
"position": [
2912,
1104
],
"parameters": {
"options": {}
},
"typeVersion": 3.4
},
{
"id": "5a66c3c1-2c6b-4280-b7cb-514f2ae5c720",
"name": "Aggregate a Batch to Embed",
"type": "n8n-nodes-base.aggregate",
"position": [
3088,
1216
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData",
"destinationFieldName": "batch"
},
"typeVersion": 1
},
{
"id": "1e4971c7-c41f-4e7b-b9a1-c777193578c7",
"name": "Aggregate a Batch to Upsert",
"type": "n8n-nodes-base.aggregate",
"position": [
3952,
1312
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData",
"destinationFieldName": "batch"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {
"Index Dataset from HuggingFace": [
{
"json": {
"dataset": "isaacus/LegalQAEval"
}
}
]
},
"settings": {
"executionOrder": "v1"
},
"versionId": "fc4f19dc-4bac-4a41-944d-2c3d0b469e33",
"connections": {
"If": {
"main": [
[],
[
{
"node": "Create Collection",
"type": "main",
"index": 0
}
]
]
},
"If1": {
"main": [
[],
[
{
"node": "Create Collection1",
"type": "main",
"index": 0
}
]
]
},
"Limit": {
"main": [
[
{
"node": "Calculate #words in Each Text",
"type": "main",
"index": 0
}
]
]
},
"Merge": {
"main": [
[
{
"node": "Loop Over Batches",
"type": "main",
"index": 0
},
{
"node": "Loop Over Batches1",
"type": "main",
"index": 0
}
]
]
},
"Merge1": {
"main": [
[
{
"node": "Aggregate a Batch to Upsert",
"type": "main",
"index": 0
}
]
]
},
"Split Out": {
"main": [
[
{
"node": "Merge1",
"type": "main",
"index": 0
}
]
]
},
"Sum them Up": {
"main": [
[
{
"node": "Get the Average Text Length",
"type": "main",
"index": 0
}
]
]
},
"Upsert Points": {
"main": [
[
{
"node": "Loop Over Batches",
"type": "main",
"index": 0
}
]
]
},
"Divide Per Row": {
"main": [
[
{
"node": "Restructure for Deduplicating",
"type": "main",
"index": 0
}
]
]
},
"Upsert Points1": {
"main": [
[
{
"node": "Loop Over Batches1",
"type": "main",
"index": 0
}
]
]
},
"Aggregate a Batch": {
"main": [
[
{
"node": "Upsert Points",
"type": "main",
"index": 0
}
]
]
},
"Create Collection": {
"main": [
[]
]
},
"Deduplicate Texts": {
"main": [
[
{
"node": "Restructure for Batching",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Batches": {
"main": [
[],
[
{
"node": "Aggregate a Batch",
"type": "main",
"index": 0
}
]
]
},
"Get Dataset Splits": {
"main": [
[
{
"node": "Split Them All Out",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Batches1": {
"main": [
[
{
"node": "Edit Fields",
"type": "main",
"index": 0
}
],
[
{
"node": "Merge1",
"type": "main",
"index": 1
},
{
"node": "Aggregate a Batch to Embed",
"type": "main",
"index": 0
}
]
]
},
"Split Them All Out": {
"main": [
[
{
"node": "Get Dataset Rows (Pagination)",
"type": "main",
"index": 0
}
]
]
},
"Get OpenAI embeddings": {
"main": [
[
{
"node": "Split Out",
"type": "main",
"index": 0
}
]
]
},
"Check Collection Exists": {
"main": [
[
{
"node": "If",
"type": "main",
"index": 0
}
]
]
},
"Check Collection Exists1": {
"main": [
[
{
"node": "If1",
"type": "main",
"index": 0
}
]
]
},
"Restructure for Batching": {
"main": [
[
{
"node": "Limit",
"type": "main",
"index": 0
},
{
"node": "Merge",
"type": "main",
"index": 0
}
]
]
},
"Aggregate a Batch to Embed": {
"main": [
[
{
"node": "Get OpenAI embeddings",
"type": "main",
"index": 0
}
]
]
},
"Aggregate a Batch to Upsert": {
"main": [
[
{
"node": "Upsert Points1",
"type": "main",
"index": 0
}
]
]
},
"Get the Average Text Length": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 1
}
]
]
},
"Calculate #words in Each Text": {
"main": [
[
{
"node": "Sum them Up",
"type": "main",
"index": 0
}
]
]
},
"Get Dataset Rows (Pagination)": {
"main": [
[
{
"node": "Divide Per Row",
"type": "main",
"index": 0
}
]
]
},
"Restructure for Deduplicating": {
"main": [
[
{
"node": "Deduplicate Texts",
"type": "main",
"index": 0
}
]
]
},
"Index Dataset from HuggingFace": {
"main": [
[
{
"node": "Get Dataset Splits",
"type": "main",
"index": 0
},
{
"node": "Check Collection Exists",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
这是一个高级难度的通用自动化工作流。适合高级用户,包含 16+ 个节点的复杂工作流
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用 Qdrant 和 n8n 的混合搜索,法律 AI:检索
基于 Qdrant 和 n8n 的混合搜索,法律 AI:检索
Set
Merge
Filter
+7
17 节点Jenny
在可视化参考库中探索n8n节点
在可视化参考库中探索n8n节点
If
Ftp
Set
+93
113 节点I versus AI
Other
(Duc)深度研究市场模板
集成PerplexityAI研究和OpenAI内容的多层级WordPress博客生成器
If
Set
Xml
+28
132 节点Daniel Ng
AI
灵活新闻聚合器 - 多源集成、AI分析和可设置频道
多源新闻策展系统,集成Mistral AI分析、摘要和自定义频道
If
Set
Xml
+32
120 节点Hybroht
Content Creation
WordPress博客自动化专业版(SEO主题)v2
WordPress自动博客专业版 - SEO主题内容自动化机器
If
Set
Xml
+21
63 节点Daniel Ng
AI
WordPress博客自动化专业版(深度研究)v1
WordPress自动博客专业版 - 含深度研究的内容自动化机器
If
Set
Xml
+24
77 节点Daniel Ng
AI