How do I make a regular expression to evaluate the following string?
如何制作正则表达式来评估以下字符串?
TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC
and extract the pattern CTCCT.
并提取模式CTCCT。
The pattern must be 3 C's and 2 T's in any order.
模式必须是3 C和2 T的任何顺序。
I tried /[C | T]{5}/ but it matches CCCCT and TCCCC
我试过/ [C | T] {5} /但它与CCCCT和TCCCC相匹配
Thanks in Advance.
提前致谢。
2 个解决方案
#1
2
Compute all permutations of "CTCCT" and concatenate them to a regex:
计算“CTCCT”的所有排列并将它们连接到正则表达式:
CCCTT|CCTCT|CCTTC|CTCCT|CTCTC|CTTCC|TCCCT|TCCTC|TCTCC|TTCCC
This pattern can be optimized:
这种模式可以优化:
C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)/g);
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(regex.exec(string));
This pattern doesn't find overlapping matches, e. g. there would only be one match in CCCTTCCC
.
此模式未找到重叠匹配,例如: G。在CCCTTCCC中只会有一场比赛。
To find overlapping matches, use lookahead:
要查找重叠匹配,请使用前瞻:
C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)/g);
var string = "CCCTTCCC";
while ((match = regex.exec(string)) != null) {
console.log(match.index, string.substring(match.index, match.index + 5));
}
Regex can only deal with a fairly limited number of permutations. If you want to match segments of possibly arbitrary size, use a non-regex solution:
正则表达式只能处理相当有限数量的排列。如果要匹配可能任意大小的段,请使用非正则表达式解决方案:
function c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) return i - 4;
}
return -1;
}
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(c3t2_optimized(string));
Or the same as above, just as a generator stepping through all possibly overlapping matches:
或者与上面相同,就像一个生成器逐步完成所有可能重叠的匹配:
function* c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) yield i - 4;
}
}
var string = "CCCTTCCC";
for (i of c3t2_optimized(string)) {
console.log(i, string.substring(i, i + 5));
}
Performance comparison: https://jsfiddle.net/24qguege/7/
性能比较:https://jsfiddle.net/24qguege/7/
Firefox 47:
- 68.83ms - regex (see above)
- 97.51ms - non-regex (see above)
- 9582.39ms - Andrew Rueckert's answer (better readability)
68.83ms - 正则表达式(见上文)
97.51ms - 非正则表达式(见上文)
9582.39ms - Andrew Rueckert的回答(更好的可读性)
#2
3
This isn't the type of problem that is easily solved using Regular Expressions. It can be solved fairly straighforwardly with a simple function, however
这不是使用正则表达式轻松解决的问题类型。然而,它可以通过简单的功能相当直接地解决
function c3t2(str) {
var lowerCaseStr = str.toLowerCase();
for (index = 0; index + 5 <= str.length; index++) {
var substring = lowerCaseStr.substring(index, index + 5);
var chars = substring.split("");
if (chars.sort().join("") === "ccctt") {
return index;
}
}
return false;
}
#1
2
Compute all permutations of "CTCCT" and concatenate them to a regex:
计算“CTCCT”的所有排列并将它们连接到正则表达式:
CCCTT|CCTCT|CCTTC|CTCCT|CTCTC|CTTCC|TCCCT|TCCTC|TCTCC|TTCCC
This pattern can be optimized:
这种模式可以优化:
C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?:C(?:T(?:CT|TC)|CTT)|T(?:C(?:CT|TC)|TCC))|T(?:C(?:C(?:CT|TC)|TCC)|TCCC)/g);
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(regex.exec(string));
This pattern doesn't find overlapping matches, e. g. there would only be one match in CCCTTCCC
.
此模式未找到重叠匹配,例如: G。在CCCTTCCC中只会有一场比赛。
To find overlapping matches, use lookahead:
要查找重叠匹配,请使用前瞻:
C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)
var regex = new RegExp(/C(?=C(?=T(?=CT|TC)|CTT)|T(?=C(?=CT|TC)|TCC))|T(?=C(?=C(?=CT|TC)|TCC)|TCCC)/g);
var string = "CCCTTCCC";
while ((match = regex.exec(string)) != null) {
console.log(match.index, string.substring(match.index, match.index + 5));
}
Regex can only deal with a fairly limited number of permutations. If you want to match segments of possibly arbitrary size, use a non-regex solution:
正则表达式只能处理相当有限数量的排列。如果要匹配可能任意大小的段,请使用非正则表达式解决方案:
function c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) return i - 4;
}
return -1;
}
var string = "TGATGCCGTCCCCTCAACTTGAGTGCTCCTAATGCGTTGC";
console.log(c3t2_optimized(string));
Or the same as above, just as a generator stepping through all possibly overlapping matches:
或者与上面相同,就像一个生成器逐步完成所有可能重叠的匹配:
function* c3t2_optimized(str) {
var c = 0, t = 0;
for (var i = 0; i < str.length; ++i) {
var last = str.charAt(i);
if (last == 'C') ++c;
else if (last == 'T') ++t;
if (i > 4) {
var first = str.charAt(i - 5);
if (first == 'C') --c;
else if (first == 'T') --t;
}
if (c == 3 && t == 2) yield i - 4;
}
}
var string = "CCCTTCCC";
for (i of c3t2_optimized(string)) {
console.log(i, string.substring(i, i + 5));
}
Performance comparison: https://jsfiddle.net/24qguege/7/
性能比较:https://jsfiddle.net/24qguege/7/
Firefox 47:
- 68.83ms - regex (see above)
- 97.51ms - non-regex (see above)
- 9582.39ms - Andrew Rueckert's answer (better readability)
68.83ms - 正则表达式(见上文)
97.51ms - 非正则表达式(见上文)
9582.39ms - Andrew Rueckert的回答(更好的可读性)
#2
3
This isn't the type of problem that is easily solved using Regular Expressions. It can be solved fairly straighforwardly with a simple function, however
这不是使用正则表达式轻松解决的问题类型。然而,它可以通过简单的功能相当直接地解决
function c3t2(str) {
var lowerCaseStr = str.toLowerCase();
for (index = 0; index + 5 <= str.length; index++) {
var substring = lowerCaseStr.substring(index, index + 5);
var chars = substring.split("");
if (chars.sort().join("") === "ccctt") {
return index;
}
}
return false;
}