如何提取特定字符前后嵌套括号的内容？

Question

在以下字符串中：

(10+10)*2*((1+1)*1)√(16)+(12*12)+2

我正在尝试将 ((1+1)*1)√(16) 替换为 nthroot(16,(1+1)*1)。
具体来说，我想提取 √.
每一侧第一组括号中的所有内容括号本身可以包含多层括号和许多不同的符号。
语言是 JavaScript.

我尝试了一些东西，比如 <str>.replace(/\((.+)\)√\((.+)\)/g, 'nthroot(,)')
但是我学习正则表达式的每一次尝试都失败了，我想不通。

Answer 1

我认为您目前无法使用 Javascript 中的正则表达式以一般方式解决此问题，因为您无法递归匹配平衡括号。

就我个人而言，我会通过将文本拆分为其组成字符、构建括号组并通过某种逻辑将所有内容重新组合在一起来解决这个问题。例如：

let text = '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
let changedText = '';
let parts = text.split('');
let parCount = null;
let group = '';
let groups = [];

// Group the original text into nested parentheses and other characters.
for (let i = 0; i < parts.length; i++) {
    // Keep a track of parentheses nesting; if parCount is larger than 0,
    // then there are unclosed parentheses left in the current group.
    if (parts[i] == '(') parCount++;
    if (parts[i] == ')') parCount--;

    group += parts[i];

    // Add every group of balanced parens or single characters.
    if (parCount === 0 && group !== '') {
        groups.push(group);
        group = '';
    }
}

// Join groups, while replacing the root character and surrounding groups
// with the nthroot() syntax.
for (let i = 0; i < groups.length; i++) {
    let isRoot = i < groups.length - 2 && groups[i + 1] == '√';
    let hasParGroups = groups[i][0] == '(' && groups[i + 2][0] == '(';

    // If the next group is a root symbol surrounded by parenthesized groups,
    // join them using the nthroot() syntax.
    if (isRoot && hasParGroups) {
        let stripped = groups[i + 2].replace(/^\(|\)$/g, '');
        changedText += `nthroot(${stripped}, ${groups[i]})`;
        // Skip groups that belong to root.
        i = i + 2;
    } else {
        // Append non-root groups.
        changedText += groups[i]
    }
}

console.log('Before:', text, '\n', 'After:', changedText);

虽然不是说它很漂亮。 ;)

Answer 2

解析任务，如 OP 所要求的，不能仅由正则表达式涵盖。

特别是令牌对嵌套括号的正确解析需要一个简单且无正则表达式的自定义解析过程。更重要的是，对于 OP 的用例，需要从左和右 hand-side 标记（are/were 由 分隔的每个标记解析 correct/valid 带括号的表达式√).

一种可能的方法是基于单个 split/reduce 任务与一些专门的辅助函数的协作...

// retrieves the correct parenthesized expression
// by counting parantheses from a token's left side.
function createFirstValidParenthesizedExpression(token) {
  let expression = '';

  if (token[0] === '(') { // if (token.at(0) === '(') {
    expression = '(';

    const charList = token.split('').slice(1);
    let char;

    let idx = -1;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[++idx]) !== undefined)
    ) {
      if (char === '(') {
        balance = balance + 1;
      } else if (char === ')') {
        balance = balance - 1;
      }
      expression = expression + char;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}
// retrieves the correct parenthesized expression
// by counting parantheses from a token's right side.
function createFirstValidParenthesizedExpressionFromRight(token) {
  let expression = '';

  if (token.slice(-1) === ')') { // if (token.at(-1) === ')') {
    expression = ')';

    const charList = token.split('').slice(0, -1);
    let char;

    let idx = charList.length;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[--idx]) !== undefined)
    ) {
      if (char === ')') {
        balance = balance + 1;
      } else if (char === '(') {
        balance = balance - 1;
      }
      expression = char + expression;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}

// helper which escapes all the possible math related
// characters which are also regex control characters.
function escapeExpressionChars(expression) {
  return expression.replace(/[-+*()/]/g, '\$&');
}

function createNthRootExpression(leftHandToken, rightHandToken) {
  leftHandToken = leftHandToken.trim();
  rightHandToken = rightHandToken.trim();

  // patterns that match partial 'nthroot' expressions
  // which are free of parentheses.
  const regXSimpleLeftHandExpression = /[\d*/]+$/;
  const regXSimpleRightHandExpression = /^[\d*/]+|^\([^+-]*\)/;

  // retrieve part of the future 'nthroot' expression
  // from the token to the left of '√'.
  const leftHandExpression =
    leftHandToken.match(regXSimpleLeftHandExpression)?.[0] ||
    createFirstValidParenthesizedExpressionFromRight(leftHandToken);

  // retrieve part of the future 'nthroot' expression
  // from the token to the right of '√'.
  const rightHandExpression =
    rightHandToken.match(regXSimpleRightHandExpression)?.[0] ||
    createFirstValidParenthesizedExpression(rightHandToken);

  leftHandToken = leftHandToken
    .replace(
      // remove the terminating match/expression from the token.
      RegExp(escapeExpressionChars(leftHandExpression) + '$'),
      '',
    );
  rightHandToken = rightHandToken
    .replace(
      // remove the starting match/expression from the token.
      RegExp('^' + escapeExpressionChars(rightHandExpression)),
      ''
    );

  return [

    leftHandToken,
    `nthroot(${ rightHandExpression },${ leftHandExpression })`,
    rightHandToken,

  ].join('');
}

const sampleExpressionOriginal =
  '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
const sampleExpressionEdgeCase =
  '(10+10)*2*((1+1)*1)√16+(12*12)+2√(4*(1+2))+3';

console.log("+++ processing the OP's expression +++")
console.log(
  'original value ...\n',
  sampleExpressionOriginal
);
console.log(
  'original value, after split ...',
  sampleExpressionOriginal
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionOriginal
    .split('√')
    .reduce(createNthRootExpression)
);
console.log('\n');

console.log("+++ processing a more edge case like expression +++")
console.log(
  'original value ...\n',
  sampleExpressionEdgeCase
);
console.log(
  'original value, after split ...',
  sampleExpressionEdgeCase
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionEdgeCase
    .split('√')
    .reduce(createNthRootExpression)
);

.as-console-wrapper { min-height: 100%!important; top: 0; }

如何提取特定字符前后嵌套括号的内容？

How to extract the content of also nested parentheses before and after a specific character?

javascript

regex

parsing

nested

parentheses