Inside a web worker, I have an html string like:
在web worker中,我有一个html字符串,如:
"<div id='foo'> <img src='bar'></img> <ul id='baz'></ul> </div>"
Is there any library I can import to easily access id and src attributes of the different tags ? Is regex the only way inside a worker ?
我可以导入任何库来轻松访问不同标签的id和src属性吗?正则表达式是工人内部的唯一途径吗?
1 个解决方案
#1
2
There are two ways to solve this problem efficiently:
有两种方法可以有效地解决这个问题:
Regex
With the risk of getting false positives, you can use something like:
有获得误报的风险,您可以使用以下内容:
var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';
Built-in parser & messaging
If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:
如果正确获取HTML是一项关键要求,只需让浏览器通过将字符串传递给调用者来解析HTML。这是一个完整的例子:
Caller:
呼叫者:
var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
if (!e.data) return;
if (e.data.method === 'getsrc') {
// Unlike document.createElement, etc, the following method does not
// load the image when the HTML is parsed
var doc = document.implementation.createHTMLDocument('');
doc.body.innerHTML = e.data.data;
var images = doc.getElementsByTagName('img');
var result = [];
for (var i=0; i<images.length; i++) {
result.push(images[i].getAttribute('src'));
}
worker.postMessage({
messageID: e.data.messageID,
result: result
});
} else if (e.data.method === 'debug') {
console.log(e.data.data);
}
});
worker.js
worker.js
// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
if (callbacks[e.data.messageID]) {
callbacks[e.data.messageID](e.data.result);
}
});
function sendRequest(method, data, callback) {
var messageID = ++lastMessageID;
if (callback) callbacks[messageID] = callback;
postMessage({
method: method,
data: data,
messageID: messageID
});
}
// Example:
sendRequest('getsrc',
'<img src="foo.png">' +
"<img src='bar.png'>" +
'<textarea><img src="should.not.be.visible"></textarea>',
function(result) {
sendRequest('debug', 'Received: ' + result.join(', '));
}
);
#1
2
There are two ways to solve this problem efficiently:
有两种方法可以有效地解决这个问题:
Regex
With the risk of getting false positives, you can use something like:
有获得误报的风险,您可以使用以下内容:
var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';
Built-in parser & messaging
If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:
如果正确获取HTML是一项关键要求,只需让浏览器通过将字符串传递给调用者来解析HTML。这是一个完整的例子:
Caller:
呼叫者:
var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
if (!e.data) return;
if (e.data.method === 'getsrc') {
// Unlike document.createElement, etc, the following method does not
// load the image when the HTML is parsed
var doc = document.implementation.createHTMLDocument('');
doc.body.innerHTML = e.data.data;
var images = doc.getElementsByTagName('img');
var result = [];
for (var i=0; i<images.length; i++) {
result.push(images[i].getAttribute('src'));
}
worker.postMessage({
messageID: e.data.messageID,
result: result
});
} else if (e.data.method === 'debug') {
console.log(e.data.data);
}
});
worker.js
worker.js
// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
if (callbacks[e.data.messageID]) {
callbacks[e.data.messageID](e.data.result);
}
});
function sendRequest(method, data, callback) {
var messageID = ++lastMessageID;
if (callback) callbacks[messageID] = callback;
postMessage({
method: method,
data: data,
messageID: messageID
});
}
// Example:
sendRequest('getsrc',
'<img src="foo.png">' +
"<img src='bar.png'>" +
'<textarea><img src="should.not.be.visible"></textarea>',
function(result) {
sendRequest('debug', 'Received: ' + result.join(', '));
}
);