网站地图页面提取器
高级
这是一个Market Research、Multimodal AI领域的自动化工作流,包含 19 个节点。主要使用 If、Set、Code、Filter、FormTrigger 等节点。 网站地图页面提取器:发现、清理并将网站 URL 保存至 Google Sheets
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "ytgCDUiHYhFkJqlY",
"meta": {
"instanceId": "bc8ca75c203589705ae2e446cad7181d6f2a7cc1766f958ef9f34810e53b8cb2",
"templateCredsSetupCompleted": true
},
"name": "网站地图页面提取器",
"tags": [],
"nodes": [
{
"id": "2464b9f1-f0fe-41df-9941-acad5d5dedb9",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-256,
928
],
"parameters": {
"color": 2,
"height": 336,
"content": "## 构建网站地图 URL:"
},
"typeVersion": 1
},
{
"id": "1b1b00a9-6825-4bae-b74a-6c86d8972299",
"name": "便签 1",
"type": "n8n-nodes-base.stickyNote",
"position": [
304,
560
],
"parameters": {
"color": 2,
"width": 400,
"height": 432,
"content": "## 筛选和提取网站地图文件"
},
"typeVersion": 1
},
{
"id": "e0c1c670-5e7f-4146-a605-e5972d786cf8",
"name": "便签 2",
"type": "n8n-nodes-base.stickyNote",
"position": [
720,
560
],
"parameters": {
"color": 2,
"width": 432,
"height": 432,
"content": "## 获取网站地图 XML 并提取页面 URL"
},
"typeVersion": 1
},
{
"id": "c95c2d0b-2a6b-471a-b168-1f33d4dd6cb4",
"name": "便签 3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1488,
688
],
"parameters": {
"color": 2,
"width": 208,
"height": 304,
"content": "将每个爬取的页面 URL 附加到 List_Of_All_URLs 工作表中,通过自动匹配现有条目避免重复。"
},
"typeVersion": 1
},
{
"id": "1b2c2bde-4edc-4dee-97c1-28f1b1ee8b94",
"name": "便签 5",
"type": "n8n-nodes-base.stickyNote",
"position": [
288,
1152
],
"parameters": {
"color": 2,
"width": 256,
"height": 304,
"content": "向每个生成的网站地图 URL 发送 HTTP 请求并获取原始响应,以检查网站地图是否存在且包含有效数据。"
},
"typeVersion": 1
},
{
"id": "c9f75b84-334a-4926-bdc4-62e59e605b97",
"name": "便签6",
"type": "n8n-nodes-base.stickyNote",
"position": [
1184,
640
],
"parameters": {
"color": 2,
"width": 272,
"height": 352,
"content": "## 排除网站地图 URL:"
},
"typeVersion": 1
},
{
"id": "5ae6ec48-76f1-44ca-8387-ce5f9a84ac9d",
"name": "便签7",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1248,
784
],
"parameters": {
"width": 464,
"height": 640,
"content": "## 📌 自动化摘要:"
},
"typeVersion": 1
},
{
"id": "342fb312-ba7a-427e-907e-b57a206b6252",
"name": "便签 4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-672,
928
],
"parameters": {
"color": 2,
"width": 400,
"height": 336,
"content": "## 表单输入与 URL 准备"
},
"typeVersion": 1
},
{
"id": "e69d955f-2212-4e5b-afa8-998cd18e11f1",
"name": "输入网站 URL",
"type": "n8n-nodes-base.formTrigger",
"position": [
-640,
1088
],
"webhookId": "8a207eb4-74f2-4577-9fe6-98e87f2e9a0d",
"parameters": {
"options": {},
"formTitle": "Sitemap Page Extractor",
"formFields": {
"values": [
{
"fieldLabel": "Website URL"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "4d33e736-fb1d-462f-bad2-46a68bc1b837",
"name": "准备网站 URL",
"type": "n8n-nodes-base.set",
"position": [
-432,
1088
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "2a310b45-ec77-41dd-9436-f3b58b7df477",
"name": "url",
"type": "string",
"value": "={{ $json[\"Website URL\"] }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "604a9837-436b-4701-9d17-cb59cb2a2099",
"name": "构建网站地图 URL",
"type": "n8n-nodes-base.code",
"position": [
-192,
1088
],
"parameters": {
"jsCode": "const inputData = $input.first().json;\nlet baseUrl = inputData.url || inputData.website_url || '';\n\nif (!baseUrl) {\n throw new Error(\"No URL provided\");\n}\n\nbaseUrl = baseUrl.replace(/\\/$/, '');\n\nlet domain = '';\nif (baseUrl.includes('://')) {\n const parts = baseUrl.split('/');\n domain = parts[0] + '//' + parts[2];\n} else {\n domain = 'https://' + baseUrl;\n}\n\nconst urls = [\n `${domain}/robots.txt`,\n `${domain}/sitemap.xml`,\n `${domain}/sitemap_index.xml`,\n `${domain}/sitemap-index.xml`,\n `${domain}/sitemap1.xml`,\n `${domain}/sitemap/sitemap.xml`,\n `${domain}/sitemaps/sitemap.xml`\n];\n\nreturn urls.map(url => ({ sitemap_url: url }));"
},
"typeVersion": 2
},
{
"id": "ad781031-ef3e-4378-92d0-837d3e8fbaf2",
"name": "网站地图 URL 检查",
"type": "n8n-nodes-base.splitInBatches",
"position": [
80,
1088
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "8ca63a55-44bc-4e72-906b-a6e2ed5f9c31",
"name": "获取网站地图数据",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
368,
1280
],
"parameters": {
"url": "={{ $json.sitemap_url }}",
"options": {
"response": {
"response": {
"responseFormat": "text"
}
}
}
},
"typeVersion": 4.2
},
{
"id": "02698f76-9606-426d-b953-3f2adfd80a6f",
"name": "筛选非空网站地图响应",
"type": "n8n-nodes-base.if",
"position": [
336,
832
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "bf0c94e8-a8c6-4419-9b18-cf5d1a01e577",
"operator": {
"type": "string",
"operation": "notEmpty",
"singleValue": true
},
"leftValue": "={{ $json.data }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "2554e396-8d0b-44c5-ab1e-91a192751acd",
"name": "提取网站地图 URL",
"type": "n8n-nodes-base.code",
"position": [
560,
816
],
"parameters": {
"jsCode": "const allUrls = [];\n\n$input.all().forEach(item => {\n const content = item.json.data || '';\n\n const robotMatches = [...content.matchAll(/Sitemap:\\s*(\\S+)/gi)];\n robotMatches.forEach(match => {\n allUrls.push(match[1]);\n });\n\n const locMatches = [...content.matchAll(/<loc>\\s*(.*?)\\s*<\\/loc>/gi)];\n locMatches.forEach(match => {\n allUrls.push(match[1]);\n });\n});\n\nconst uniqueUrls = [...new Set(allUrls)];\n\nreturn uniqueUrls.map(url => ({ json: { sitemap_url: url }}));"
},
"typeVersion": 2
},
{
"id": "95f24edc-bef3-481d-954c-f4245af73faa",
"name": "获取网站地图页面 XML",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
784,
816
],
"parameters": {
"url": "={{ $json.sitemap_url }}",
"method": "=GET",
"options": {
"response": {
"response": {
"responseFormat": "text"
}
}
}
},
"typeVersion": 4.2
},
{
"id": "134f2ded-0200-44a7-8df1-343d76cf16d0",
"name": "从网站地图提取页面 URL",
"type": "n8n-nodes-base.code",
"position": [
1008,
816
],
"parameters": {
"jsCode": "const urls = new Set();\n\n$input.all().forEach(item => {\n const content = item.json.data || '';\n\n const xmlMatches = [...content.matchAll(/<loc>(.*?)<\\/loc>/gi)];\n xmlMatches.forEach(match => {\n const url = match[1].trim();\n urls.add(url);\n });\n\n const htmlMatches = [...content.matchAll(/<a\\s[^>]*href=[\"']([^\"']+)[\"']/gi)];\n htmlMatches.forEach(match => {\n const url = match[1].trim();\n if (url.startsWith('/') || url.startsWith('http')) {\n urls.add(url);\n }\n });\n});\n\nif (urls.size === 0) {\n return [{ json: { message: \"No URLs found in XML or HTML content\" }}];\n}\n\nreturn Array.from(urls).map(url => ({\n json: { page_url: url }\n}));"
},
"typeVersion": 2
},
{
"id": "9708b71a-34c3-42fd-9a3c-15ad7d4cd7c3",
"name": "排除网站地图 URL",
"type": "n8n-nodes-base.filter",
"position": [
1296,
816
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "303d0665-ac2d-452b-8678-a05e53a7372b",
"operator": {
"type": "string",
"operation": "notContains"
},
"leftValue": "={{ $json.page_url }}",
"rightValue": "sitemap"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "535579fc-dcad-4386-920c-6f8857a0bd70",
"name": "将页面 URL 保存到工作表",
"type": "n8n-nodes-base.googleSheets",
"position": [
1568,
816
],
"parameters": {
"columns": {
"value": {},
"schema": [],
"mappingMode": "defineBelow",
"matchingColumns": [
"List URLs"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": "List_Of_All_URLs",
"documentId": "YOUR_GOOGLE_SHEET_URL"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "Aam5AW9oxNhvIGd2",
"name": "Shiv@incrementors.com - Google Sheets"
}
},
"typeVersion": 4.6
}
],
"active": false,
"pinData": {},
"settings": {
"callerPolicy": "any",
"executionOrder": "v1"
},
"versionId": "ff2c3009-f913-4cd2-9042-a36d056b6f90",
"connections": {
"Input Website URL": {
"main": [
[
{
"node": "Prepare website URL",
"type": "main",
"index": 0
}
]
]
},
"Sitemap URL Check": {
"main": [
[
{
"node": "Filter Non-Empty Sitemap Responses",
"type": "main",
"index": 0
}
],
[
{
"node": "Fetch Sitemap Data",
"type": "main",
"index": 0
}
]
]
},
"Build sitemap URLs": {
"main": [
[
{
"node": "Sitemap URL Check",
"type": "main",
"index": 0
}
]
]
},
"Fetch Sitemap Data": {
"main": [
[
{
"node": "Sitemap URL Check",
"type": "main",
"index": 0
}
]
]
},
"Prepare website URL": {
"main": [
[
{
"node": "Build sitemap URLs",
"type": "main",
"index": 0
}
]
]
},
"Extract Sitemap URLs": {
"main": [
[
{
"node": "Fetch Sitemap Pages XML",
"type": "main",
"index": 0
}
]
]
},
"Fetch Sitemap Pages XML": {
"main": [
[
{
"node": "Extract Page URLs from Sitemap",
"type": "main",
"index": 0
}
]
]
},
"Exclude the Sitemap URLs": {
"main": [
[
{
"node": "Save Page URLs to Sheet",
"type": "main",
"index": 0
}
]
]
},
"Extract Page URLs from Sitemap": {
"main": [
[
{
"node": "Exclude the Sitemap URLs",
"type": "main",
"index": 0
}
]
]
},
"Filter Non-Empty Sitemap Responses": {
"main": [
[
{
"node": "Extract Sitemap URLs",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
这是一个高级难度的工作流,适用于Market Research、Multimodal AI等场景。适合高级用户,包含 16+ 个节点的复杂工作流
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
多平台价格查找器:使用 Bright Data 和 Telegram 抓取价格
多平台价格查找器:使用 Bright Data、Claude AI 和 Telegram 抓取价格
If
Code
Wait
+9
85 节点Incrementors
Market Research
博客发布器 – 完整的AI驱动内容研究、创作、优化与发布自动化
使用 Gemini、Ideogram AI 和 WordPress 自动化博客创建与发布
If
Set
Code
+9
35 节点Incrementors
Content Creation
基于AI的潜在客户生成(Apollo、LinkedIn研究和4步个性化邮件)
基于AI的潜在客户生成:使用Apollo、LinkedIn研究和4步个性化邮件
If
Set
Code
+8
30 节点Gain FLow AI
Lead Generation
WordPress博客自动化专业版(深度研究)v2.1市场
使用GPT-4o、Perplexity AI和多语言支持自动化SEO优化的博客创建
If
Set
Xml
+27
125 节点Daniel Ng
Content Creation
MetaAds创意洞察研究员v1.4
使用Google Vision和Video Intelligence API分析Meta广告创意
If
Set
Code
+9
32 节点Kirill Khatkevich
Market Research
使用GPT-5 nano和Google Sheets抓取网站并回答问题
使用GPT-5 nano和Google Sheets抓取网站并回答问题
If
Set
Xml
+17
44 节点Oriol Seguí
Market Research