网络爬虫:将网站转换为AI就绪的Markdown格式并存储到Google表格
高级
这是一个自动化工作流,包含 22 个节点。主要使用 Set、Html、Filter、Switch、Markdown 等节点。 网络爬虫:将网站转换为AI就绪的Markdown格式并存储到Google表格
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
使用的节点 (22 个)
分类
未分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "3d7eb9567ae690bf8c9bba1cb43396e6e40c18e15eb5889cf9673ed1713da6db",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "349e50cf-75b8-432c-818e-63f1ff3ead34",
"name": "概述笔记",
"type": "n8n-nodes-base.stickyNote",
"position": [
1696,
3104
],
"parameters": {
"color": 4,
"width": 600,
"height": 1112,
"content": "# 用于 AI 知识库的自动化网站爬虫"
},
"typeVersion": 1
},
{
"id": "eb43d67c-01fc-4d83-bb2c-099938a57468",
"name": "注意:触发器和设置",
"type": "n8n-nodes-base.stickyNote",
"position": [
2512,
3072
],
"parameters": {
"color": 6,
"width": 556,
"height": 176,
"content": "## 🖱️ 触发器与设置节点"
},
"typeVersion": 1
},
{
"id": "3c8581cb-46cd-4f25-af5a-c52bc2f463c6",
"name": "设置网站",
"type": "n8n-nodes-base.set",
"position": [
2688,
3296
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "a652f57e-210e-421e-b20b-781d6f4dc240",
"name": "website_url",
"type": "string",
"value": "https://example.com"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "18201858-7764-4a14-9f6b-12e36eaf158b",
"name": "手动触发器",
"type": "n8n-nodes-base.manualTrigger",
"position": [
2496,
3296
],
"parameters": {},
"typeVersion": 1
},
{
"id": "b7435481-bed3-439f-933c-1c5e0142ad5c",
"name": "抓取首页",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
2880,
3296
],
"parameters": {
"url": "={{ $json.website_url }}",
"options": {
"redirect": {
"redirect": {}
},
"allowUnauthorizedCerts": false
}
},
"executeOnce": false,
"typeVersion": 4.2,
"alwaysOutputData": false
},
{
"id": "ce13710d-24ca-47d4-a25c-8890c1592947",
"name": "注意:首页抓取",
"type": "n8n-nodes-base.stickyNote",
"position": [
3168,
3488
],
"parameters": {
"color": 5,
"width": 396,
"height": 192,
"content": "## 🌐 首页抓取节点"
},
"typeVersion": 1
},
{
"id": "61a60f2c-f032-4b46-83ba-405df0ce05df",
"name": "从HTML提取链接",
"type": "n8n-nodes-base.html",
"position": [
3088,
3296
],
"parameters": {
"options": {
"trimValues": true,
"cleanUpText": true
},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "links",
"attribute": "href",
"cssSelector": "a",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "582eeae0-fec0-4548-9c78-7c05ac5aaebc",
"name": "拆分链接",
"type": "n8n-nodes-base.splitOut",
"position": [
3296,
3296
],
"parameters": {
"options": {},
"fieldToSplitOut": "links"
},
"typeVersion": 1
},
{
"id": "17d59531-4d51-4494-8ae9-e91b81851a0b",
"name": "移除重复链接",
"type": "n8n-nodes-base.removeDuplicates",
"position": [
3520,
3296
],
"parameters": {
"options": {}
},
"typeVersion": 2
},
{
"id": "d50fa2a9-1a58-4dad-8bd0-cfbd31aeae91",
"name": "过滤真实超链接",
"type": "n8n-nodes-base.filter",
"position": [
3696,
3296
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "bd6c6da6-8af7-4809-b6cd-01a38d71953b",
"operator": {
"type": "string",
"operation": "startsWith"
},
"leftValue": "={{ $json.links }}",
"rightValue": "https://"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "cb121b70-a14a-4cbd-a54c-e55c6fc235b7",
"name": "注意:链接处理",
"type": "n8n-nodes-base.stickyNote",
"position": [
3216,
3056
],
"parameters": {
"color": 2,
"width": 556,
"height": 224,
"content": "## 🔄 链接处理节点"
},
"typeVersion": 1
},
{
"id": "d69c0dc2-2c4c-474b-ba11-3d79e1390b12",
"name": "分离图片和链接",
"type": "n8n-nodes-base.switch",
"position": [
2480,
3680
],
"parameters": {
"rules": {
"values": [
{
"outputKey": "Images",
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "16724958-4eea-489d-b494-3d76a3ba2562",
"operator": {
"type": "string",
"operation": "regex"
},
"leftValue": "={{ $json.links }}",
"rightValue": "=^https?:\\/\\/.*\\.(?:png|jpe?g|gif|webp|bmp|svg|ico)(?:\\?.*)?$"
}
]
},
"renameOutput": true
},
{
"outputKey": "Links",
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "816392f0-96db-4134-8bee-4b74688ff929",
"operator": {
"type": "string",
"operation": "notRegex"
},
"leftValue": "={{ $json.links }}",
"rightValue": "=^https?:\\/\\/.*\\.(?:png|jpe?g|gif|webp|bmp|svg|ico)(?:\\?.*)?$"
}
]
},
"renameOutput": true
}
]
},
"options": {}
},
"typeVersion": 3.2
},
{
"id": "23896343-575e-4956-8e95-3b5e6e4c8ae7",
"name": "聚合图片",
"type": "n8n-nodes-base.aggregate",
"position": [
2736,
3504
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "links"
}
]
}
},
"typeVersion": 1
},
{
"id": "fcad347b-60d7-4fa2-9b02-e96c2f27116d",
"name": "聚合链接",
"type": "n8n-nodes-base.aggregate",
"position": [
2736,
3696
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "links"
}
]
}
},
"typeVersion": 1
},
{
"id": "fc5d6ce1-1765-4768-a9c7-de3677e8109d",
"name": "抓取内容链接",
"type": "n8n-nodes-base.httpRequest",
"position": [
2736,
3872
],
"parameters": {
"url": "={{ $json.links }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "0d4b6a4e-b6cb-4e6c-9a22-bd0dc6a72027",
"name": "注意:内容抓取",
"type": "n8n-nodes-base.stickyNote",
"position": [
2320,
3984
],
"parameters": {
"color": 5,
"width": 428,
"height": 224,
"content": "## 📄 内容抓取与聚合节点"
},
"typeVersion": 1
},
{
"id": "349e5f7c-c81b-467b-a59b-ea40a47226f0",
"name": "转换为 Markdown",
"type": "n8n-nodes-base.markdown",
"position": [
2944,
3872
],
"parameters": {
"html": "={{ $json.data }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "24f22a31-03a3-4faf-81f4-3c38c0956ee4",
"name": "聚合抓取内容",
"type": "n8n-nodes-base.aggregate",
"position": [
3136,
3872
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "data"
}
]
}
},
"typeVersion": 1
},
{
"id": "a4d34aab-1af2-4196-85f5-1a2d832969dd",
"name": "添加图片到表格",
"type": "n8n-nodes-base.googleSheets",
"position": [
2944,
3504
],
"parameters": {
"columns": {
"value": {
"Images": "={{ $json.links.join('\\n\\n') }}",
"Website": "={{ $('Set Website').item.json.website_url }}"
},
"schema": [
{
"id": "Website",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Website",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Links",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Links",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Scraped Content",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Scraped Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Images",
"type": "string",
"display": true,
"required": false,
"displayName": "Images",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Website"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": "your-sheet-name",
"documentId": "your-document-id"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "ZVbWK0SlohYDlZYO",
"name": "Ewere"
}
},
"typeVersion": 4.7
},
{
"id": "6afbfad8-b80f-4a0d-81b4-9138cc2af46a",
"name": "添加链接到表格",
"type": "n8n-nodes-base.googleSheets",
"position": [
2944,
3696
],
"parameters": {
"columns": {
"value": {
"Links": "={{ $json.links.join('\\n\\n') }}",
"Website": "={{ $('Set Website').item.json.website_url }}"
},
"schema": [
{
"id": "Website",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Website",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Links",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Links",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Scraped Content",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Scraped Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Images",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Images",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Website"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": "your-sheet-name",
"documentId": "your-document-id"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "ZVbWK0SlohYDlZYO",
"name": "Ewere"
}
},
"typeVersion": 4.7
},
{
"id": "35ae2c30-a93a-4fd2-82b6-07d2f4c56c88",
"name": "添加抓取内容到表格",
"type": "n8n-nodes-base.googleSheets",
"position": [
3344,
3872
],
"parameters": {
"columns": {
"value": {
"Website": "={{ $('Set Website').item.json.website_url }}",
"Scraped Content": "={{ $json.data.join('\\n\\n').slice(0, 50000) }}"
},
"schema": [
{
"id": "Website",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Website",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Links",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Links",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Scraped Content",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Scraped Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Images",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Images",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Website"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": "your-sheet-name",
"documentId": "your-document-id"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "ZVbWK0SlohYDlZYO",
"name": "Ewere"
}
},
"typeVersion": 4.7
},
{
"id": "c3f7b022-db11-400c-baaa-77392acfb991",
"name": "注意:表格集成",
"type": "n8n-nodes-base.stickyNote",
"position": [
3232,
4048
],
"parameters": {
"color": 3,
"width": 444,
"height": 176,
"content": "## 📊 表格集成节点"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"Set Website": {
"main": [
[
{
"node": "Scrape Homepage",
"type": "main",
"index": 0
}
]
]
},
"Split Links": {
"main": [
[
{
"node": "Remove Duplicate Links",
"type": "main",
"index": 0
}
]
]
},
"Manual Trigger": {
"main": [
[
{
"node": "Set Website",
"type": "main",
"index": 0
}
]
]
},
"Aggregate Links": {
"main": [
[
{
"node": "Add Links to Sheet",
"type": "main",
"index": 0
}
]
]
},
"Scrape Homepage": {
"main": [
[
{
"node": "Extract Links from HTML",
"type": "main",
"index": 0
}
]
]
},
"Aggregate Images": {
"main": [
[
{
"node": "Add Images to Sheet",
"type": "main",
"index": 0
}
]
]
},
"Convert to Markdown": {
"main": [
[
{
"node": "Aggregate Scraped Content",
"type": "main",
"index": 0
}
]
]
},
"Scrape Content Links": {
"main": [
[
{
"node": "Convert to Markdown",
"type": "main",
"index": 0
}
]
]
},
"Filter Real Hyperlinks": {
"main": [
[
{
"node": "Separate Images and Links",
"type": "main",
"index": 0
}
]
]
},
"Remove Duplicate Links": {
"main": [
[
{
"node": "Filter Real Hyperlinks",
"type": "main",
"index": 0
}
]
]
},
"Extract Links from HTML": {
"main": [
[
{
"node": "Split Links",
"type": "main",
"index": 0
}
]
]
},
"Aggregate Scraped Content": {
"main": [
[
{
"node": "Add Scraped Content to Sheet",
"type": "main",
"index": 0
}
]
]
},
"Separate Images and Links": {
"main": [
[
{
"node": "Aggregate Images",
"type": "main",
"index": 0
}
],
[
{
"node": "Aggregate Links",
"type": "main",
"index": 0
},
{
"node": "Scrape Content Links",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
这是一个高级难度的通用自动化工作流。适合高级用户,包含 16+ 个节点的复杂工作流
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
在可视化参考库中探索n8n节点
在可视化参考库中探索n8n节点
If
Ftp
Set
+93
113 节点I versus AI
Other
API架构提取器
API架构提取器
If
Set
Code
+22
88 节点Polina Medvedieva
Engineering
(Duc)深度研究市场模板
集成PerplexityAI研究和OpenAI内容的多层级WordPress博客生成器
If
Set
Xml
+28
132 节点Daniel Ng
AI
灵活新闻聚合器 - 多源集成、AI分析和可设置频道
多源新闻策展系统,集成Mistral AI分析、摘要和自定义频道
If
Set
Xml
+32
120 节点Hybroht
Content Creation
潜在客户开发与邮件工作流
使用Google Maps、SendGrid和AI自动化B2B潜在客户开发与邮件营销
If
Set
Code
+21
141 节点Ezema Kingsley Chibuzo
Lead Generation
使用GPT-4.1、Outlook和Mem.ai自动化Microsoft Teams会议分析
使用GPT-4.1、Outlook和Mem.ai自动化Microsoft Teams会议分析
If
Set
Code
+19
61 节点Wayne Simpson
HR
工作流信息
难度等级
高级
节点数量22
分类-
节点类型12
作者
Daniel Nkencho
@daniel-automatesAI Automation Consultant | Helping Business Owners Implement AI Systems for Growth and Lead Gen
外部链接
在 n8n.io 上查看 →
分享此工作流