排序数组元素(带数字的字符串),自然排序

时间:2022-10-01 15:59:53

I have an array like;

我有一个像阵列;

["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"]

And need to sort it so it appears like;

并且需要对它进行排序以使它看起来像;

["IL0 Foo", "IL3 Bob says hello", "IL10 Baz", "PI0 Bar"]

I have tried a sort function;

我尝试过排序功能;

function compare(a,b) {
  if (a < b)
     return -1;
  if (a > b)
    return 1;
  return 0;
}

but this gives the order

但是这给了订单

["IL0 Foo", "IL10 Baz", "IL3 Bob says hello", "PI0 Bar"]

I have tried to think of a regex that will work but can't get my head around it.
If it helps the format will always be 2 letters, x amount of numbers, then any number of characters.

我试着想到一个可行的正则表达式,但无法理解它。如果它有助于格式将始终是2个字母,x数量的数字,然后任意数量的字符。

8 个解决方案

#1


78  

This is called "natural sort" and can be implemented in JS like this:

这被称为“自然排序”,可以在JS中实现,如下所示:

function naturalCompare(a, b) {
    var ax = [], bx = [];

    a.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { ax.push([$1 || Infinity, $2 || ""]) });
    b.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { bx.push([$1 || Infinity, $2 || ""]) });
    
    while(ax.length && bx.length) {
        var an = ax.shift();
        var bn = bx.shift();
        var nn = (an[0] - bn[0]) || an[1].localeCompare(bn[1]);
        if(nn) return nn;
    }

    return ax.length - bx.length;
}

/////////////////////////

test = [
    "img12.png",
    "img10.png",
    "img2.png",
    "img1.png",
    "img101.png",
    "img101a.png",
    "abc10.jpg",
    "abc10",
    "abc2.jpg",
    "20.jpg",
    "20",
    "abc",
    "abc2",
    ""
];

test.sort(naturalCompare)
document.write("<pre>" + JSON.stringify(test,0,3));

To sort in reverse order, just swap the arguments:

要按相反顺序排序,只需交换参数:

test.sort(function(a, b) { return naturalCompare(b, a) })

or simply

或简单地说

test = test.sort(naturalCompare).reverse();

#2


5  

var re = /([a-z]+)(\d+)(.+)/i;
var arr = ["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"];
var order = arr.sort( function(a,b){
    var ma = a.match(re),
        mb = b.match(re),
        a_str = ma[1],
        b_str = mb[1],
        a_num = parseInt(ma[2],10),
        b_num = parseInt(mb[2],10),
        a_rem = ma[3],
        b_rem = mb[3];
    return a_str > b_str ? 1 : a_str < b_str ? -1 : a_num > b_num ? 1 : a_num < b_num ? -1 : a_rem > b_rem;  
});

#3


4  

You could use String#localeCompare with options

您可以将String#localeCompare与选项一起使用

sensitivity

灵敏度

Which differences in the strings should lead to non-zero result values. Possible values are:

字符串中的哪些差异应导致非零结果值。可能的值是:

  • "base": Only strings that differ in base letters compare as unequal. Examples: a ≠ b, a = á, a = A.
  • “base”:只有基本字母不同的字符串才会比较为不相等。示例:a≠b,a =á,a = A.
  • "accent": Only strings that differ in base letters or accents and other diacritic marks compare as unequal. Examples: a ≠ b, a ≠ á, a = A.
  • “重音”:只有基本字母或重音和其他变音符号不同的字符串才会比较为不相等。示例:a≠b,a≠á,a = A.
  • "case": Only strings that differ in base letters or case compare as unequal. Examples: a ≠ b, a = á, a ≠ A.
  • “case”:只有基本字母或大小写不同的字符串才会比较为不相等。示例:a≠b,a =á,a≠A。
  • "variant": Strings that differ in base letters, accents and other diacritic marks, or case compare as unequal. Other differences may also be taken into consideration. Examples: a ≠ b, a ≠ á, a ≠ A.
  • “variant”:基本字母,重音和其他变音符号不同的字符串,或者不相等的情况比较。其他差异也可以考虑在内。示例:a≠b,a≠á,a≠A。

The default is "variant" for usage "sort"; it's locale dependent for usage "search".

使用“sort”的默认值是“variant”;它的语言环境依赖于使用“搜索”。

numeric

数字

Whether numeric collation should be used, such that "1" < "2" < "10". Possible values are true and false; the default is false. This option can be set through an options property or through a Unicode extension key; if both are provided, the options property takes precedence. Implementations are not required to support this property.

是否应使用数字校对,例如“1”<“2”<“10”。可能的值为true和false;默认值为false。可以通过options属性或Unicode扩展键设置此选项;如果两者都提供,则options属性优先。实现不需要支持此属性。

var array = ["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"];

array.sort(function (a,b) {
    return a.localeCompare(b, undefined, { numeric: true, sensitivity: 'base' });
});

console.log(array);

#4


3  

I liked georg's solution a lot, but I needed underscores ("_") to sort before numbers. Here's how I modified his code:

我很喜欢georg的解决方案,但我需要使用下划线(“_”)来排序数字。这是我修改他的代码的方式:

var chunkRgx = /(_+)|([0-9]+)|([^0-9_]+)/g;
function naturalCompare(a, b) {
    var ax = [], bx = [];
    
    a.replace(chunkRgx, function(_, $1, $2, $3) {
        ax.push([$1 || "0", $2 || Infinity, $3 || ""])
    });
    b.replace(chunkRgx, function(_, $1, $2, $3) {
        bx.push([$1 || "0", $2 || Infinity, $3 || ""])
    });
    
    while(ax.length && bx.length) {
        var an = ax.shift();
        var bn = bx.shift();
        var nn = an[0].localeCompare(bn[0]) || 
                 (an[1] - bn[1]) || 
                 an[2].localeCompare(bn[2]);
        if(nn) return nn;
    }
    
    return ax.length - bx.length;
}

/////////////////////////

test = [
    "img12.png",
    "img10.png",
    "img2.png",
    "img1.png",
    "img101.png",
    "img101a.png",
    "abc10.jpg",
    "abc10",
    "abc2.jpg",
    "20.jpg",
    "20",
    "abc",
    "abc2",
    "_abc",
    "_ab_c",
    "_ab__c",
    "_abc_d",
    "ab_",
    "abc_",
    "_ab_cd",
    ""
];

test.sort(naturalCompare)
document.write("<pre>" + JSON.stringify(test,0,3));

#5


3  

Pad numbers in string with leading zeros, then sort normally.

用前导零填充字符串中的数字,然后正常排序。

var naturalSort = function (a, b) {
    a = ('' + a).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) });
    b = ('' + b).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) });
    return a.localeCompare(b);
}

var naturalSortModern = function (a, b) {
    return ('' + a).localeCompare(('' + b), 'en', { numeric: true });
}

console.dir((["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"].sort(naturalSort)));

console.dir((["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"].sort(naturalSortModern)));

#6


2  

You could do a regex like this to get non-numeric and numeric parts of the string:

你可以像这样做一个正则表达式来获得字符串的非数字和数字部分:

var s = "foo124bar23";
s.match(/[^\d]+|\d+/g)

returns: ["foo", "124" , "bar" , "23"]

返回:[“foo”,“124”,“bar”,“23”]

Then in your compare function you can iterate through the parts of the two strings comparing them part-by-part. The first non-matching part determines the result of the overall comparison. For each part, check if the part starts with a digit and if so parse it as a number before doing the comparison.

然后在比较函数中,您可以遍历两个字符串的各个部分,逐个进行比较。第一个非匹配部分确定整体比较的结果。对于每个零件,检查零件是否以数字开头,如果是,则在进行比较之前将其解析为数字。

#7


1  

Add one more alternative (why not):

添加一个替代方案(为什么不):

var ary = ["IL0 Foo", "PI0 Bar", "IL10 Hello", "IL10 Baz", "IL3 Bob says hello"];

// break out the three components in to an array
// "IL10 Bar" => ['IL', 10, 'Bar']
function getParts(i){
    i = i || '';
    var parts = i.match(/^([a-z]+)([0-9]+)(\s.*)$/i);
    if (parts){
        return [
            parts[1],
            parseInt(parts[2], 10),
            parts[3]
        ];
    }
    return []; // erroneous
}
ary.sort(function(a,b){
    // grab the parts
    var _a = getParts(a),
        _b = getParts(b);

    // trouble parsing (both fail = no shift, otherwise
    // move the troubles element to end of the array)
    if(_a.length == 0 && _b.length == 0) return 0;
    if(_a.length == 0) return -1;
    if(_b.length == 0) return 1;

    // Compare letter portion
    if (_a[0] < _b[0]) return -1;
    if (_a[0] > _b[0]) return 1;
    // letters are equal, continue...

    // compare number portion
    if (_a[1] < _b[1]) return -1;
    if (_a[1] > _b[1]) return 1;
    // numbers are equal, continue...

    // compare remaining string
    if (_a[2] < _b[2]) return -1;
    if (_a[2] > _b[2]) return 1;
    // strings are equal, continue...

    // exact match
    return 0;
});

jsfiddle example

jsfiddle的例子

#8


0  

Not pretty, but check the first two char codes. If all equal parse and compare the numbers:

不漂亮,但检查前两个字符代码。如果全部等于解析并比较数字:

var arr = ["IL0 Foo", "IL10 Baz", "IL3 Bob says hello", "PI0 Bar"];
arr.sort(function (a1, b1) {
    var a = parseInt(a1.match(/\d+/g)[0], 10),
        b = parseInt(b1.match(/\d+/g)[0], 10),
        letterA = a1.charCodeAt(0),
        letterB = b1.charCodeAt(0),
        letterA1 = a1.charCodeAt(1),
        letterB1 = b1.charCodeAt(1);
    if (letterA > letterB) {
        return 1;
    } else if (letterB > letterA) {
        return -1;
    } else {
        if (letterA1 > letterB1) {
            return 1;
        } else if (letterB1 > letterA1) {
            return -1;
        }
        if (a < b) return -1;
        if (a > b) return 1;
        return 0;
    }
});

Example

#1


78  

This is called "natural sort" and can be implemented in JS like this:

这被称为“自然排序”,可以在JS中实现,如下所示:

function naturalCompare(a, b) {
    var ax = [], bx = [];

    a.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { ax.push([$1 || Infinity, $2 || ""]) });
    b.replace(/(\d+)|(\D+)/g, function(_, $1, $2) { bx.push([$1 || Infinity, $2 || ""]) });
    
    while(ax.length && bx.length) {
        var an = ax.shift();
        var bn = bx.shift();
        var nn = (an[0] - bn[0]) || an[1].localeCompare(bn[1]);
        if(nn) return nn;
    }

    return ax.length - bx.length;
}

/////////////////////////

test = [
    "img12.png",
    "img10.png",
    "img2.png",
    "img1.png",
    "img101.png",
    "img101a.png",
    "abc10.jpg",
    "abc10",
    "abc2.jpg",
    "20.jpg",
    "20",
    "abc",
    "abc2",
    ""
];

test.sort(naturalCompare)
document.write("<pre>" + JSON.stringify(test,0,3));

To sort in reverse order, just swap the arguments:

要按相反顺序排序,只需交换参数:

test.sort(function(a, b) { return naturalCompare(b, a) })

or simply

或简单地说

test = test.sort(naturalCompare).reverse();

#2


5  

var re = /([a-z]+)(\d+)(.+)/i;
var arr = ["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"];
var order = arr.sort( function(a,b){
    var ma = a.match(re),
        mb = b.match(re),
        a_str = ma[1],
        b_str = mb[1],
        a_num = parseInt(ma[2],10),
        b_num = parseInt(mb[2],10),
        a_rem = ma[3],
        b_rem = mb[3];
    return a_str > b_str ? 1 : a_str < b_str ? -1 : a_num > b_num ? 1 : a_num < b_num ? -1 : a_rem > b_rem;  
});

#3


4  

You could use String#localeCompare with options

您可以将String#localeCompare与选项一起使用

sensitivity

灵敏度

Which differences in the strings should lead to non-zero result values. Possible values are:

字符串中的哪些差异应导致非零结果值。可能的值是:

  • "base": Only strings that differ in base letters compare as unequal. Examples: a ≠ b, a = á, a = A.
  • “base”:只有基本字母不同的字符串才会比较为不相等。示例:a≠b,a =á,a = A.
  • "accent": Only strings that differ in base letters or accents and other diacritic marks compare as unequal. Examples: a ≠ b, a ≠ á, a = A.
  • “重音”:只有基本字母或重音和其他变音符号不同的字符串才会比较为不相等。示例:a≠b,a≠á,a = A.
  • "case": Only strings that differ in base letters or case compare as unequal. Examples: a ≠ b, a = á, a ≠ A.
  • “case”:只有基本字母或大小写不同的字符串才会比较为不相等。示例:a≠b,a =á,a≠A。
  • "variant": Strings that differ in base letters, accents and other diacritic marks, or case compare as unequal. Other differences may also be taken into consideration. Examples: a ≠ b, a ≠ á, a ≠ A.
  • “variant”:基本字母,重音和其他变音符号不同的字符串,或者不相等的情况比较。其他差异也可以考虑在内。示例:a≠b,a≠á,a≠A。

The default is "variant" for usage "sort"; it's locale dependent for usage "search".

使用“sort”的默认值是“variant”;它的语言环境依赖于使用“搜索”。

numeric

数字

Whether numeric collation should be used, such that "1" < "2" < "10". Possible values are true and false; the default is false. This option can be set through an options property or through a Unicode extension key; if both are provided, the options property takes precedence. Implementations are not required to support this property.

是否应使用数字校对,例如“1”<“2”<“10”。可能的值为true和false;默认值为false。可以通过options属性或Unicode扩展键设置此选项;如果两者都提供,则options属性优先。实现不需要支持此属性。

var array = ["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"];

array.sort(function (a,b) {
    return a.localeCompare(b, undefined, { numeric: true, sensitivity: 'base' });
});

console.log(array);

#4


3  

I liked georg's solution a lot, but I needed underscores ("_") to sort before numbers. Here's how I modified his code:

我很喜欢georg的解决方案,但我需要使用下划线(“_”)来排序数字。这是我修改他的代码的方式:

var chunkRgx = /(_+)|([0-9]+)|([^0-9_]+)/g;
function naturalCompare(a, b) {
    var ax = [], bx = [];
    
    a.replace(chunkRgx, function(_, $1, $2, $3) {
        ax.push([$1 || "0", $2 || Infinity, $3 || ""])
    });
    b.replace(chunkRgx, function(_, $1, $2, $3) {
        bx.push([$1 || "0", $2 || Infinity, $3 || ""])
    });
    
    while(ax.length && bx.length) {
        var an = ax.shift();
        var bn = bx.shift();
        var nn = an[0].localeCompare(bn[0]) || 
                 (an[1] - bn[1]) || 
                 an[2].localeCompare(bn[2]);
        if(nn) return nn;
    }
    
    return ax.length - bx.length;
}

/////////////////////////

test = [
    "img12.png",
    "img10.png",
    "img2.png",
    "img1.png",
    "img101.png",
    "img101a.png",
    "abc10.jpg",
    "abc10",
    "abc2.jpg",
    "20.jpg",
    "20",
    "abc",
    "abc2",
    "_abc",
    "_ab_c",
    "_ab__c",
    "_abc_d",
    "ab_",
    "abc_",
    "_ab_cd",
    ""
];

test.sort(naturalCompare)
document.write("<pre>" + JSON.stringify(test,0,3));

#5


3  

Pad numbers in string with leading zeros, then sort normally.

用前导零填充字符串中的数字,然后正常排序。

var naturalSort = function (a, b) {
    a = ('' + a).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) });
    b = ('' + b).replace(/(\d+)/g, function (n) { return ('0000' + n).slice(-5) });
    return a.localeCompare(b);
}

var naturalSortModern = function (a, b) {
    return ('' + a).localeCompare(('' + b), 'en', { numeric: true });
}

console.dir((["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"].sort(naturalSort)));

console.dir((["IL0 Foo", "PI0 Bar", "IL10 Baz", "IL3 Bob says hello"].sort(naturalSortModern)));

#6


2  

You could do a regex like this to get non-numeric and numeric parts of the string:

你可以像这样做一个正则表达式来获得字符串的非数字和数字部分:

var s = "foo124bar23";
s.match(/[^\d]+|\d+/g)

returns: ["foo", "124" , "bar" , "23"]

返回:[“foo”,“124”,“bar”,“23”]

Then in your compare function you can iterate through the parts of the two strings comparing them part-by-part. The first non-matching part determines the result of the overall comparison. For each part, check if the part starts with a digit and if so parse it as a number before doing the comparison.

然后在比较函数中,您可以遍历两个字符串的各个部分,逐个进行比较。第一个非匹配部分确定整体比较的结果。对于每个零件,检查零件是否以数字开头,如果是,则在进行比较之前将其解析为数字。

#7


1  

Add one more alternative (why not):

添加一个替代方案(为什么不):

var ary = ["IL0 Foo", "PI0 Bar", "IL10 Hello", "IL10 Baz", "IL3 Bob says hello"];

// break out the three components in to an array
// "IL10 Bar" => ['IL', 10, 'Bar']
function getParts(i){
    i = i || '';
    var parts = i.match(/^([a-z]+)([0-9]+)(\s.*)$/i);
    if (parts){
        return [
            parts[1],
            parseInt(parts[2], 10),
            parts[3]
        ];
    }
    return []; // erroneous
}
ary.sort(function(a,b){
    // grab the parts
    var _a = getParts(a),
        _b = getParts(b);

    // trouble parsing (both fail = no shift, otherwise
    // move the troubles element to end of the array)
    if(_a.length == 0 && _b.length == 0) return 0;
    if(_a.length == 0) return -1;
    if(_b.length == 0) return 1;

    // Compare letter portion
    if (_a[0] < _b[0]) return -1;
    if (_a[0] > _b[0]) return 1;
    // letters are equal, continue...

    // compare number portion
    if (_a[1] < _b[1]) return -1;
    if (_a[1] > _b[1]) return 1;
    // numbers are equal, continue...

    // compare remaining string
    if (_a[2] < _b[2]) return -1;
    if (_a[2] > _b[2]) return 1;
    // strings are equal, continue...

    // exact match
    return 0;
});

jsfiddle example

jsfiddle的例子

#8


0  

Not pretty, but check the first two char codes. If all equal parse and compare the numbers:

不漂亮,但检查前两个字符代码。如果全部等于解析并比较数字:

var arr = ["IL0 Foo", "IL10 Baz", "IL3 Bob says hello", "PI0 Bar"];
arr.sort(function (a1, b1) {
    var a = parseInt(a1.match(/\d+/g)[0], 10),
        b = parseInt(b1.match(/\d+/g)[0], 10),
        letterA = a1.charCodeAt(0),
        letterB = b1.charCodeAt(0),
        letterA1 = a1.charCodeAt(1),
        letterB1 = b1.charCodeAt(1);
    if (letterA > letterB) {
        return 1;
    } else if (letterB > letterA) {
        return -1;
    } else {
        if (letterA1 > letterB1) {
            return 1;
        } else if (letterB1 > letterA1) {
            return -1;
        }
        if (a < b) return -1;
        if (a > b) return 1;
        return 0;
    }
});

Example