Univer Clipsheet - 看我手把手教你从零开始构建自己的爬虫插件(2)
相关信息:
前言
1. 手动选择元素
"@univer-clipsheet-core/ui": "workspace:*"
import { ElementInspectService } from '@univer-clipsheet-core/ui';
const elementInspectService = new ElementInspectService();
elementInspectService.shadowComponent.onInspectElement((element) => {
// 点击页面元素时,会触发该回调函数
console.log('Inspect Element:', element);
})
setTimeout(() => {
// 激活元素检查功能
elementInspectService.shadowComponent.activate()
})
const last = <T>(arr: T[]) => arr[arr.length - 1];
elementInspectService.shadowComponent.onInspectElement((element) => {
// 获取最近匹配到的table标签元素
const tableElement = last(checkElementTable(element));
// 获取最近匹配到的ExtractionParams对象
const tableExtractionParams = last(checkElementApproximationTable(element));
// 点击页面元素时,会触发该回调函数
console.log('Inspect Element:', element);
if (tableElement) {
// 如果点击的元素是table标签,则生成IInitialSheet对象
const sheet = generateSheetByElement(tableElement as HTMLTableElement);
// 打印表格数据
console.log('Inspect Table:', sheet);
// 最近匹配到的类表格元素
console.log('Inspect Table success with element:', tableElement);
} else if (tableExtractionParams) {
const sheet = generateSheetByExtractionParams(tableExtractionParams);
// 打印表格数据
console.log('Inspect Table:', sheet);
// 最近匹配到的类表格元素
console.log('Inspect Table success with element:', tableExtractionParams.element);
} else {
console.log('Not found table with element', element);
}
})
上面的代码的代码都有注释,可以了解具体做了些什么,主要是对元素进行最近 table 标签匹配或者类 table 元素的匹配,然后匹配成功后生成 initialSheet 的数据对象。
2. Ajax响应拦截
function interceptRequest(onResponse: (response: any) => void) {
const XHR = XMLHttpRequest;
const _fetch = fetch;
const onReadyStateChange = async function (this: XMLHttpRequest) {
if (this.readyState === 4) {
onResponse(this.response);
}
};
// 拦截 XMLHttpRequest
const innerXHR: typeof XMLHttpRequest = function () {
const xhr = new XHR();
xhr.addEventListener('readystatechange', onReadyStateChange.bind(xhr), false);
return xhr;
};
innerXHR.prototype = XHR.prototype;
Object.entries(XHR).forEach(([key, val]) => {
// @ts-ignore
innerXHR[key] = val;
});
// 拦截 fetch
const innerFetch: typeof _fetch = async (resource, initOptions) => {
const getOriginalResponse = () => _fetch(resource, initOptions);
const fetchedResponse = getOriginalResponse();
fetchedResponse.then((response) => {
if (response instanceof Response) {
try {
response.clone()
.json()
.then((res) => onResponse(res))
.catch(() => {
// Do nothing
});
} catch (err) {}
}
});
return fetchedResponse;
};
window.XMLHttpRequest = innerXHR;
window.fetch = innerFetch;
}
function serializeToJSON(response: unknown) {
try {
return JSON.parse(JSON.stringify(response));
} catch {
return null;
}
}
interceptRequest((res) => {
// 发送消息到content script
postMessage({
type: 'AJAX_INTERCEPT_MESSAGE',
response: serializeToJSON(res),
});
});
"build:ajax-interceptor": "npx esbuild src/ajax-interceptor.ts --bundle --outfile=public/ajax-interceptor.js"
// 启动AJAX拦截器
function startAjaxIntercept(scriptSrc: string, onMessage: (message: unknown) => void) {
const script = document.createElement('script');
script.src = scriptSrc;
script.onload = () => {
window.addEventListener('message', event => {
const message = event.data;
if (message.type === 'AJAX_INTERCEPT_MESSAGE') {
onMessage(message.response);
}
});
};
document.body.appendChild(script);
return () => {
script.remove();
};
}
startAjaxIntercept(chrome.runtime.getURL('content/ajax-interceptor.js'), res => {
if (res) {
console.log('AJAX response', res);
const sheets = ajaxJsonToTable([res as UnknownJson]);
if (sheets.length > 0) {
console.log('AJAX sheets from response', sheets);
}
}
});
结语
- 想直接体验Univer Clipsheet功能的可直接下载商店版本: https://chromewebstore.google.com/detail/univer-clipsheet-an-ai-dr/mbcpbomfebacllmjjefeifejbbibbope
- 有问题或任何建议也可以直接到我们 github 仓库下提 issue: https://github.com/dream-num/univer-clipsheet