regexp incorreclty split:由逗号分隔,但不在括号内(都)和]

时间:2021-04-25 21:41:34

The regex idea: split by comma that is not followed by any character and ) or ]. Moreover, both brackets should be considred ( and [. Assumption: string contains valid brackets.

regex思想:用逗号分隔,后面不跟着任何字符和)或]。此外,两个括号都应该包含(和)。假设:字符串包含有效的括号。

Here is my function:

这是我的功能:

function spl(str) {
    var reg = /\,(?!(?:[\w|\s]*\,?)*[\)\]])/;  
    console.log(str.split(reg));
}

Problems:

问题:

incorrectly: spl("tpr(7,4%), nitrita sals (sals- 1.2%, konservants E250)");
incorrectly: spl("tpr(7,4%), nitri(a,b,c[a,b])ta sals (sals- 1.2%, konservants E250),fsfs");
incorrectly: if there are brackets within brackets

Here is examples:

下面是例子:

 str = "a,b (c,d,e)";
// expected: split into strings "a", "b (c,d,e)"

 str = "a,b [c,d,e]";
 // expected: split into strings "a", "b [c,d,e]"    

 str = "tpr(7,4%), nitrita sals (sals- 1.2%, konservants E250)";
 // expected split into "tpr(7,4%)", "nitrita sals (sals- 1.2%, konservants E250)"

 str = "tpr(7,4%), nitri(a,b,c[a,b])ta sals (sals- 1.2%, konservants E250),fsfs";
 //expected: "tpr(7,4%)", "nitri(a,b,c[a,b])ta sals (sals- 1.2%, konservants E250)" and "fsfs"

  str = "šokolāde 47% (cukurs, kakao sviests, (SOJAS); vanilīns), pulv";
  // expected: splited into two strings; "šokolāde 47% (cukurs, kakao sviests, (SOJAS); vanilīns)" and "pulv"

2 个解决方案

#1


2  

You can use negative lookahead assertion as in this regex:

您可以使用这个regex中的否定前视断言:

/,\s*(?![^()]*\)|[^\]\[]*\])/

RegEx Demo

RegEx演示

  • \s*,\s*: Match comma surrounded by 0 or more whitespaces on either side
  • \s*、\s*:两边都用0或更多的空格括住
  • (?![^()]*\)|[^\]\[]*\]): is a negative lookahead expression, that asserts that we don't have a ) ahead following 0 or more non-round-bracket characters or don't have a ] ahead following 0 or more non-square-bracket characters.
  • (? ![^())* \)|[^ \]\[]* \]):是一个负面超前表达式,断言我们没有提前)后0或多个non-round-bracket字符或没有)领先后0或多个non-square-bracket字符。

PS: Note that this regex solution work with non-nested and unescaped brackets only. For handling nested brackets of same type use a parser code as below.

注意,这个regex解决方案只使用非嵌套和非转义括号。要处理相同类型的嵌套括号,请使用下面的解析器代码。

var arr = ['a,b (c,d,e)', 'a,b [c,d,e]',
'tpr(7,4%), nitrita sals (sals- 1.2%, konservants E250)',
'tpr(7,4%), nitri(a,b,c[a,b])ta sals (sals- 1.2%, konservants E250),fsfs',
'šokolāde 47% (cukurs, kakao sviests, (SOJAS); vanilīns), pulv'];

for (var j=0, arrlen=arr.length; j < arrlen; j++)
  console.log("*** Pocessing:", arr[j], "=>", splitComma(arr[j]));

function splitComma(str) {
  var result = [];
  var lvl = {'rb':0, 'sb':0};
  var tmp = '';
  var cd = 0;
  
  for (var i = 0, len = str.length; i < len; i++) {
    var ch = str.charAt(i);
    
    if (ch == '(')
      lvl['rb']++;
    else if (ch == '[')
      lvl['sb']++;

    if (lvl['rb'] + lvl['sb'] == 0 && ch == ',') {
      result.push(tmp.trim());
      tmp = '';
    }
    else
      tmp += ch;
      
    if (ch == ')')
      lvl['rb']--;
    else if (ch == ']')
      lvl['sb']--;
  }      
  result.push(tmp.trim());
  return(result);
}

#2


1  

If you need to handle nesting, you're going to need to parse before you process, regex alone isn't going to handle it for you. This is not the best parser in the world, but it's off the top of my head and an example of the kind of thing you can write to handle this. Or just find a parser that's meant to handle this kind of use case

如果您需要处理嵌套,那么您需要在处理之前进行解析,只有regex不会为您处理它。这并不是世界上最好的解析器,但它已经超出了我的理解范围,并且是您可以编写的用来处理这个问题的示例。或者找到一个解析器来处理这种用例

var test = "m,oo (ba,a) mbo,ool [sdf,lkj (sdfl,kj)] sd,fjk"
            
            function groupStr(str, exclusionPairs){
                let charArr = str.split(''),
                    exclusionLookup = exclusionPairs.reduce((obj, pair) => { obj[pair[0]] = pair[1]; return obj }, {}),
                    arrayOfPieces = [],
                    pieceArray = [],
                    flaggedExclusion = null,
                    char
                
                while((char = charArr.shift()) !== undefined){
                    if(flaggedExclusion){
                        pieceArray.push(char)
                        if(char == flaggedExclusion){
                            arrayOfPieces.push({
                                str: pieceArray.join(""),
                                exclude: true
                            })
                            pieceArray = []
                            flaggedExclusion = null
                        }
                    } else if(exclusionLookup[char]){
                        if(pieceArray.length){
                            arrayOfPieces.push({
                                str: pieceArray.join(""),
                                exclude: false
                            })
                            pieceArray = []
                        }
                        pieceArray.push(char)
                        flaggedExclusion = exclusionLookup[char]
                    } else {
                        pieceArray.push(char)
                    }
                }
                
                if(pieceArray.length){
                    arrayOfPieces.push({
                        str: pieceArray.join(""),
                        exclude: false
                    })
                }
                
                console.log(arrayOfPieces)
                
                return arrayOfPieces
            }
            
            let result = groupStr(test, [
                ["(",")"],
                ["[","]"]
            ])
            
            
            let splitArray = result.reduce((arr, piece) => {
                if(piece.exclude) arr.push(piece.str)
                else arr = arr.concat(piece.str.split(","))
                
                return arr
            }, [])
            
            console.log(splitArray)

#1


2  

You can use negative lookahead assertion as in this regex:

您可以使用这个regex中的否定前视断言:

/,\s*(?![^()]*\)|[^\]\[]*\])/

RegEx Demo

RegEx演示

  • \s*,\s*: Match comma surrounded by 0 or more whitespaces on either side
  • \s*、\s*:两边都用0或更多的空格括住
  • (?![^()]*\)|[^\]\[]*\]): is a negative lookahead expression, that asserts that we don't have a ) ahead following 0 or more non-round-bracket characters or don't have a ] ahead following 0 or more non-square-bracket characters.
  • (? ![^())* \)|[^ \]\[]* \]):是一个负面超前表达式,断言我们没有提前)后0或多个non-round-bracket字符或没有)领先后0或多个non-square-bracket字符。

PS: Note that this regex solution work with non-nested and unescaped brackets only. For handling nested brackets of same type use a parser code as below.

注意,这个regex解决方案只使用非嵌套和非转义括号。要处理相同类型的嵌套括号,请使用下面的解析器代码。

var arr = ['a,b (c,d,e)', 'a,b [c,d,e]',
'tpr(7,4%), nitrita sals (sals- 1.2%, konservants E250)',
'tpr(7,4%), nitri(a,b,c[a,b])ta sals (sals- 1.2%, konservants E250),fsfs',
'šokolāde 47% (cukurs, kakao sviests, (SOJAS); vanilīns), pulv'];

for (var j=0, arrlen=arr.length; j < arrlen; j++)
  console.log("*** Pocessing:", arr[j], "=>", splitComma(arr[j]));

function splitComma(str) {
  var result = [];
  var lvl = {'rb':0, 'sb':0};
  var tmp = '';
  var cd = 0;
  
  for (var i = 0, len = str.length; i < len; i++) {
    var ch = str.charAt(i);
    
    if (ch == '(')
      lvl['rb']++;
    else if (ch == '[')
      lvl['sb']++;

    if (lvl['rb'] + lvl['sb'] == 0 && ch == ',') {
      result.push(tmp.trim());
      tmp = '';
    }
    else
      tmp += ch;
      
    if (ch == ')')
      lvl['rb']--;
    else if (ch == ']')
      lvl['sb']--;
  }      
  result.push(tmp.trim());
  return(result);
}

#2


1  

If you need to handle nesting, you're going to need to parse before you process, regex alone isn't going to handle it for you. This is not the best parser in the world, but it's off the top of my head and an example of the kind of thing you can write to handle this. Or just find a parser that's meant to handle this kind of use case

如果您需要处理嵌套,那么您需要在处理之前进行解析,只有regex不会为您处理它。这并不是世界上最好的解析器,但它已经超出了我的理解范围,并且是您可以编写的用来处理这个问题的示例。或者找到一个解析器来处理这种用例

var test = "m,oo (ba,a) mbo,ool [sdf,lkj (sdfl,kj)] sd,fjk"
            
            function groupStr(str, exclusionPairs){
                let charArr = str.split(''),
                    exclusionLookup = exclusionPairs.reduce((obj, pair) => { obj[pair[0]] = pair[1]; return obj }, {}),
                    arrayOfPieces = [],
                    pieceArray = [],
                    flaggedExclusion = null,
                    char
                
                while((char = charArr.shift()) !== undefined){
                    if(flaggedExclusion){
                        pieceArray.push(char)
                        if(char == flaggedExclusion){
                            arrayOfPieces.push({
                                str: pieceArray.join(""),
                                exclude: true
                            })
                            pieceArray = []
                            flaggedExclusion = null
                        }
                    } else if(exclusionLookup[char]){
                        if(pieceArray.length){
                            arrayOfPieces.push({
                                str: pieceArray.join(""),
                                exclude: false
                            })
                            pieceArray = []
                        }
                        pieceArray.push(char)
                        flaggedExclusion = exclusionLookup[char]
                    } else {
                        pieceArray.push(char)
                    }
                }
                
                if(pieceArray.length){
                    arrayOfPieces.push({
                        str: pieceArray.join(""),
                        exclude: false
                    })
                }
                
                console.log(arrayOfPieces)
                
                return arrayOfPieces
            }
            
            let result = groupStr(test, [
                ["(",")"],
                ["[","]"]
            ])
            
            
            let splitArray = result.reduce((arr, piece) => {
                if(piece.exclude) arr.push(piece.str)
                else arr = arr.concat(piece.str.split(","))
                
                return arr
            }, [])
            
            console.log(splitArray)