input
输入
books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters
the idea is to split the string by dots but ignore those inside square brackets
我们的想法是用点分割字符串但忽略方括号内的字符串
so after splitting there should be an array
所以分裂后应该有一个数组
[
'books',
'copies',
'[read_by.[p_id="65784"].page=5468]',
'text',
'[paragraph="20"]',
'letters'
]
I already looked at this answer but it doesn't work with nested square brackets, which is what i need. Also I'm using javascript, so negative lookbehinds are not supported.
我已经看过这个答案,但它不适用于嵌套方括号,这是我需要的。我也使用javascript,因此不支持负面的lookbehinds。
Help is much appreciated.
非常感谢帮助。
Edit 1: expand example
编辑1:展开示例
2 个解决方案
#1
1
It isn't possible to do it with a regex in Javascript that isn't able to match nested structures. You need to use the good old method: a stack.
使用Javascript中的无法匹配嵌套结构的正则表达式是不可能的。你需要使用好的旧方法:堆栈。
var text = 'books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters';
var item = '', result = [], stack = 0;
for (var i=0; i < text.length; i++) {
if ( text[i] == '.' && stack == 0 ) {
result.push(item);
item = '';
continue;
} else if ( text[i] == '[' ) {
stack++;
} else if ( text[i] == ']' ) {
stack--;
}
item += text[i];
}
result.push(item);
console.log(result);
#2
0
You need to write a parser for this since a JavaScript regex does not support regex recursion, nor balanced constructs.
您需要为此编写解析器,因为JavaScript正则表达式不支持正则表达式递归,也不支持平衡结构。
The point in these functions is that they keep a stack (level
, openBrackets
) of opening delimiters (in your case, it is [
) and then check the stack state: if the stack is not emppty, the found .
is considered inside the brackets, and is thus just appended to the current match. Else, when the stack is empty, the .
found is considered outside of brackets, and is thus used to split on (the current value is appended to the output array (result
, ret
)).
这些函数的要点是它们保持打开分隔符的堆栈(level,openBrackets)(在你的情况下,它是[)然后检查堆栈状态:如果堆栈不是emppty,则找到。被认为是在括号内,因此只是附加到当前匹配。否则,当堆栈为空时,。 found被认为是在括号之外,因此用于拆分(当前值附加到输出数组(result,ret))。
function splitByDotsOutsideBrackets(string){
var openBrackets = 0, ret = [], i = 0;
while (i < string.length){
if (string.charAt(i) == '[')
openBrackets++;
else if (string.charAt(i) == ']')
openBrackets--;
else if (string.charAt(i) == "." && openBrackets == 0){
ret.push(string.substr(0, i));
string = string.substr(i + 1);
i = -1;
}
i++;
}
if (string != "") ret.push(string);
return ret;
}
var res = splitByDotsOutsideBrackets('books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters');
console.log(res);
Or another variation:
或另一种变化:
function splitOnDotsOutsideNestedBrackets(str) {
var result = [], start = 0, level = 0;
for (var i = 0; i < str.length; ++i) {
switch (str[i]) {
case '[':
++level;
break;
case ']':
if (level > 0)
--level;
break;
case '.':
if (level)
break;
if (start < i)
result.push(str.substr(start, i - start));
start = i + 1;
break;
}
}
if (start < i)
result.push(str.substr(start, i - start));
return result;
}
var s = 'books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters';
console.log(splitOnDotsOutsideNestedBrackets(s))
Adapted from one of my previous answers.
改编自我以前的一个答案。
#1
1
It isn't possible to do it with a regex in Javascript that isn't able to match nested structures. You need to use the good old method: a stack.
使用Javascript中的无法匹配嵌套结构的正则表达式是不可能的。你需要使用好的旧方法:堆栈。
var text = 'books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters';
var item = '', result = [], stack = 0;
for (var i=0; i < text.length; i++) {
if ( text[i] == '.' && stack == 0 ) {
result.push(item);
item = '';
continue;
} else if ( text[i] == '[' ) {
stack++;
} else if ( text[i] == ']' ) {
stack--;
}
item += text[i];
}
result.push(item);
console.log(result);
#2
0
You need to write a parser for this since a JavaScript regex does not support regex recursion, nor balanced constructs.
您需要为此编写解析器,因为JavaScript正则表达式不支持正则表达式递归,也不支持平衡结构。
The point in these functions is that they keep a stack (level
, openBrackets
) of opening delimiters (in your case, it is [
) and then check the stack state: if the stack is not emppty, the found .
is considered inside the brackets, and is thus just appended to the current match. Else, when the stack is empty, the .
found is considered outside of brackets, and is thus used to split on (the current value is appended to the output array (result
, ret
)).
这些函数的要点是它们保持打开分隔符的堆栈(level,openBrackets)(在你的情况下,它是[)然后检查堆栈状态:如果堆栈不是emppty,则找到。被认为是在括号内,因此只是附加到当前匹配。否则,当堆栈为空时,。 found被认为是在括号之外,因此用于拆分(当前值附加到输出数组(result,ret))。
function splitByDotsOutsideBrackets(string){
var openBrackets = 0, ret = [], i = 0;
while (i < string.length){
if (string.charAt(i) == '[')
openBrackets++;
else if (string.charAt(i) == ']')
openBrackets--;
else if (string.charAt(i) == "." && openBrackets == 0){
ret.push(string.substr(0, i));
string = string.substr(i + 1);
i = -1;
}
i++;
}
if (string != "") ret.push(string);
return ret;
}
var res = splitByDotsOutsideBrackets('books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters');
console.log(res);
Or another variation:
或另一种变化:
function splitOnDotsOutsideNestedBrackets(str) {
var result = [], start = 0, level = 0;
for (var i = 0; i < str.length; ++i) {
switch (str[i]) {
case '[':
++level;
break;
case ']':
if (level > 0)
--level;
break;
case '.':
if (level)
break;
if (start < i)
result.push(str.substr(start, i - start));
start = i + 1;
break;
}
}
if (start < i)
result.push(str.substr(start, i - start));
return result;
}
var s = 'books.copies.[read_by.[p_id="65784"].page=5468].text.[paragraph="20"].letters';
console.log(splitOnDotsOutsideNestedBrackets(s))
Adapted from one of my previous answers.
改编自我以前的一个答案。