限制 .split() 拆分的次数，而不是截断结果数组

Question

真的，和标题所说的差不多。

假设你有这个字符串：

var theString = "a=b=c=d";

现在，当您运行 theString.split("=") 时，结果如预期的那样 ["a", "b", "c", "d"]。当然，当您运行 theString.split("=", 2) 时，您会得到 ["a", "b"]，在阅读 the MDN page for String#split() 之后对我来说很有意义。

然而，我正在寻找的行为更像是 Java 的 String#split()：而不是正常构建数组，然后返回第一个 n 元素，它构建第一个 n-1 个匹配项的数组，然后将所有剩余字符添加为数组的最后一个元素。有关更好的描述，请参阅 the relevant docs。

如何在 Java脚本中获得这种效果？

我正在寻找与 Java 实现类似的最佳性能答案，尽管它的实际工作方式可能不同。

我会 post 我的尝试，但我完全不知道如何着手写这篇文章。

Answer 1

我会使用这样的东西：

function JavaSplit(string,separator,n) {
    var split = string.split(separator);
    if (split.length <= n)
        return split;
    var out = split.slice(0,n-1);
    out.push(split.slice(n-1).join(separator));
    return out;
}

我们在这里做的是：

完全拆分字符串
按照说明取前 n-1 个元素。
重新加入剩余元素。
将它们附加到第 2 步中的数组并返回。

有人可能会合理地认为您可以将所有这些调用链接在一起，但 .push() 会改变一个数组而不是返回一个新数组。这样做对你来说也更容易一些。

Answer 2

另一种可能的实现方式：

function split(s, separator, limit) {
  // split the initial string using limit
  var arr = s.split(separator, limit);
  // get the rest of the string...
  var left = s.substring(arr.join(separator).length + separator.length);
  // and append it to the array
  arr.push(left);
  return arr;
}

Fiddle 是 here.

Answer 3

如果你想要 Java 实现的完全等价物（没有错误检查或保护条款等）：

function split(str, sep, n) {
    var out = [];

    while(n--) out.push(str.slice(sep.lastIndex, sep.exec(str).index));

    out.push(str.slice(sep.lastIndex));
    return out;
}

console.log(split("a=b=c=d", /=/g, 2)); // ['a', 'b', 'c=d']

正如您在问题中提到的那样，这样做的额外好处是无需预先计算完整拆分。

Answer 4

您是否在寻找更接近 PHP 的 explode 的内容？

这是我设计的一个方法：

String.prototype.explode = function(sep, n) {
  var arr = this.split(sep, n)
  if (arr[n-1] != undefined) arr[n-1] += this.substring(arr.join(' ').length);
  return arr;
}

此方法像正常情况一样拆分字符串，确定我们是否达到了限制，并使用 substring 将文本追加到上次拆分之后（我们可以直接访问超出第一个字符的偏移量）最后通过获取数组上使用的 join 的 length 以任何单个字符作为分隔符来拆分）

这个方法的用法和split一样：

str = 'my/uri/needs/to/be/split';
splitResult = str.split('/', 4);
explodeResult = str.explode('/', 4);
console.log(splitResult);
console.log(explodeResult);

// The following will be written to the console:
// splitResult:   ["my", "uri", "needs", "to"]
// explodeResult: ["my", "uri", "needs", "to/be/split"]

当然，这也可以作为函数旋转：

function explode(str, sep, n) {
  var arr = str.split(sep, n)
  if (arr[n-1] != undefined) arr[n-1] += this.substring(arr.join(' ').length);
  return arr;
}

str = 'my/uri/needs/to/be/split';
explodeResult = explode(str, '/', 4);

Answer 5

如果您想用更少的行数并避免循环：

const theString = "some=string=with=separators";
const limit = 2;
const parts = theString.split('=', limit);
parts.push(theString.slice(parts.join('').length + limit));

Answer 6

const theString = "a=b=c=d";
const [first, ...rest] = theString.split("=");
const second = rest.join("=")
console.log(first, second)

如果您使用的是 ECMA 2015，则只需要 2 行。

Answer 7

这是我的实现：

String.prototype.splitRemainder = function(delim, count) {
 if (typeof delim !== 'string') {
  return this.split();    
 }

 if (typeof count !== 'number') {
  return this.split(delim);
 }

 if (count < 2) {
  return this.split(delim);
 }

 count--;
 const parts = this.split(delim, count);
 const remainder = this.slice(parts.join('').length + count);

 if (remainder.length > 0) {
  parts.push(remainder);
 }

 return parts;
}

console.log("dasd asds asds asdasd asdasdas".splitRemainder(" ", 4));
console.log("hello-to-you-too".splitRemainder("-",2));

请注意，这不是最有效的实施方式。因此，如果您正在寻找最有效的解决方案，这不是它。

Answer 8

Asad 的非常出色，因为它允许使用可变长度的 RegExp 分隔符（例如 /\s+/g，沿任意长度的空格拆分，包括换行符）。但是，它有几个问题。

如果分隔符不使用全局标志，它将中断。
exec 可以 return null 并导致其损坏。如果分隔符没有出现在输入字符串中，就会发生这种情况。
如果限制大于分隔点，您最终会在字符串上循环并产生可能出乎意料的结果。
限制是必需的，因此没有简单的方法来找到最大拆分数。

以下内容解决了这些问题，同时性能也一样：

/**
 * Split a string with a RegExp separator an optionally limited number of times.
 * @param {string} input
 * @param {RegExp} separator
 * @param {number} [limit] - If not included, splits the maximum times
 * @returns {string[]}
 */
function split(input, separator, limit) {
  // Ensure the separator is global
  separator = new RegExp(separator, 'g');
  // Allow the limit argument to be excluded
  limit = limit ?? -1;

  const output = [];
  let finalIndex = 0;

  while (limit--) {
    const lastIndex = separator.lastIndex;
    const search = separator.exec(input);
    if (search === null) {
        break;
    }
    finalIndex = separator.lastIndex;
    output.push(input.slice(lastIndex, search.index));
  }

  output.push(input.slice(finalIndex));

  return output;
}

split("foo bar baz quux", /\s+/, 3)
// ["foo", "bar", "baz", "quux"]
split("foo bar baz quux", /\s+/, 2)
// ["foo", "bar", "baz quux"]
split("foo bar baz quux", /\s+/, 1)
// ["foo", "bar baz quux"]
split("foo bar baz quux", /\s+/, 0)
// ["foo bar baz quux"]

// A higher limit than possible splits
split("foo bar baz quux", /\s+/, 4)
// ["foo", "bar", "baz", "quux"]

// A split that doesn't exist
split("foo bar baz quux", /p/, 2)
// ["foo bar baz quux"]

// Not providing a limit finds the maximum splits
split("foo bar baz quux", /\s+/)
// ["foo", "bar", "baz", "quux"]

备注：

在生产代码中，建议不要改变函数参数。 separator 和 limit 都在变异。如果需要，您可以选择在函数顶部创建新变量以避免这种情况。我选择不这样做是为了使示例代码简短。这不是生产代码。

我没有包含任何防御性代码来检查函数参数类型。这对于生产代码来说是一件好事，或者考虑使用 TypeScript ;)

最初，如果提供的分隔符没有设置全局标志，我会抛出一个 Error。请参阅下面的评论，了解为什么可能需要为用户添加全局标志而不是抛出。感谢@Stephen P.

的建议

限制 .split() 拆分的次数，而不是截断结果数组

Limiting the times that .split() splits, rather than truncating the resulting array

javascript

split

query-string