根据带有占位符的模式过滤字符串数组

Question

至少一个月来，我一直在努力做到这一点（每天！）。我搜索了 Whosebug，我阅读了 MDN 数组、字符串、正则表达式等，一遍又一遍地引用，但没有任何帮助。我对正则表达式有点熟悉，但这超出了我的理解范围。我相信这里有人会用一行代码解决这个问题，这就是为什么我一直等到我要把我的电脑扔出 window 才寻求帮助。我很想自己找到解决办法，但我就是做不到。

我正在玩一个密码游戏，其中随机字母用于排序 'encode' 一首诗或一个故事，我可能不需要在这里描述它，但这里有一张图片以防万一。

所以我认为创建一个表单是一个很好的练习，您可以在其中输入由字母、数字和“?”组合而成的模式。对于未知。在图像中，您看到用“YACAZ”表示的单词，该单词中有两个 A，所以您知道这两个字母是相同的。所以在我的函数中，你会使用任何数字 0 - 9 作为占位符，所以使用相同的例子，你会输入“？1a1？”。

这是我目前拥有的。每次我尝试遍历 regex 给我的数组时，我都会在同一个地方结束，尝试 - 但失败 - 将两组嵌套数组相互比较。无论我如何尝试将它们分解并进行比较，它最终都会变成一个无法正常运行的巨大混乱。我可以获得占位符索引，但那又怎样？

我不反对 lodash，但我对它的经验很少，所以也许它可以帮助解决这个问题？它不会做任何普通香草无法完成的事情 javascript，是吗？

const words = [
  { word: 'bargain', score: 1700 },
  { word: 'balloon', score: 1613 },
  { word: 'bastion', score: 1299 },
  { word: 'babylon', score: 634 },
  { word: 'based on', score: 425 },
  { word: 'bassoon', score: 371 },
  { word: 'baldwin', score: 359 },
  { word: 'bahrain', score: 318 },
  { word: 'balmain', score: 249 },
  { word: 'basilan', score: 218 },
  { word: 'bang on', score: 209 },
  { word: 'baseman', score: 204 },
  { word: 'batsman', score: 204 },
  { word: 'bakunin', score: 143 },
  { word: 'barchan', score: 135 },
  { word: 'bastian', score: 133 },
  { word: 'balagan', score: 118 },
  { word: 'balafon', score: 113 },
  { word: 'bank on', score: 113 },
  { word: 'ballpen', score: 111 },
]

const input = 'ba1122n' // those are numeric 1's, not lowercase L's

//matching words from the list above should be 'balloon' and 'bassoon', using the input 'ba1122n'.

export const stringDiff = (a, b) => {
  let match = false,
    error = ''
  const results = []

  // Idk why I have a reducer inside a loop. I have tried many, many, MANY other
  // ways of looping, usually 'for (const i in whatever)` but they all end up with
  // the same problem. I usually have a loop inside a reducer, not the other way around.
  
  const forLoop = (array) => {
   
    a.reduce((acc, curr, next) => {
      const aa = [...curr.input.matchAll(curr[0])] // this tells me how many 0's, 1's, etc.

      const bChar = b.charAt(curr.index) // this tells me what letters are at those index positions
      const bb = [...b.matchAll(bChar)] // if the array 'bb' is not the same length, it's not a match
      if (aa.length === bb.length) {
        /* console output:
        word bargain

        aa:
        0: ["2", index: 4, input: "ba1122n", groups: undefined]
        1: ["2", index: 5, input: "ba1122n", groups: undefined]

        bb:
        0: ["a", index: 1, input: "bargain", groups: undefined]
        1: ["a", index: 4, input: "bargain", groups: undefined]
        */
       
        // matching the lengths only helps narrow down ***some*** of the non-matching words.
        // How do I match each index of each letter in each word with
        // each index of each placeholder character??? And check the letters match ***EACH OTHER***????
        // with any number of placholders for any digit 0 - 9?
      }
    }, [])

    return array
  }

  console.log('forLoop', forLoop([]))

  return { match, results, error }
}

stringDiff(words,input)

Answer 1

根据我的上述评论，我仍然不太确定下一个提供的方法是否确实以某种方式满足了 OP 的目标。

但如果它是关于从自定义 replacement/substitute 模式创建正则表达式，然后仅通过此正则表达式过滤单词列表（甚至可能捕获正确的字符，可以尝试以下代码。

虽然它有一个限制；用于描述自定义占位符模式的数字范围限制在 1 到 9（0 将被排除），因为这与 definition/limitation 完全匹配正则表达式捕获组（以及如何访问它们）。

function createRegexFromSubstitutePattern(pattern) {
  // - turn e.g. `ba1122n` into `/ba(\w)(\w)n/`
  // - turn e.g. `?1a1?` into `/.(\w)a./`
  // - turn e.g. `?1b22a1?` into `/.(\w)b(\w)a./`
  return RegExp(
    [1, 2, 3, 4, 5, 6, 7, 8, 9].reduce((regXString, placeholder) =>

      // programmatically replace the first occurrence of
      // any digit (from 1 to 9) with a capture group pattern
      // for a single word character.
      regXString.replace(RegExp(placeholder, ''), '(\w)'),

      // provide the initial input/pattern as start value.
      String(pattern)
    )
    // replace any further occurrence of any digit (from 1 to 9)
    // by a back reference pattern which matches the group's index.
    .replace((/([1-9])/g), '\')

    // replace the wildcard placeholder with the regex wildcard.
    .replace((/\?/g), '.'), '');
}

const wordList = [
  { word: 'bargain', score: 1700 },
  { word: 'balloon', score: 1613 },
  { word: 'bastion', score: 1299 },
  { word: 'babylon', score: 634 },
  { word: 'based on', score: 425 },
  { word: 'bassoon', score: 371 },
  { word: 'baldwin', score: 359 },
  { word: 'bahrain', score: 318 },
  { word: 'balmain', score: 249 },
  { word: 'basilan', score: 218 },
  { word: 'bang on', score: 209 },
  { word: 'baseman', score: 204 },
  { word: 'batsman', score: 204 },
  { word: 'bakunin', score: 143 },
  { word: 'barchan', score: 135 },
  { word: 'bastian', score: 133 },
  { word: 'balagan', score: 118 },
  { word: 'balafon', score: 113 },
  { word: 'bank on', score: 113 },
  { word: 'ballpen', score: 111 },
];
const input = 'ba1122n';

const regXWord = createRegexFromSubstitutePattern(input);

console.log(
  'filter word list ...',
  wordList
    .filter(item => regXWord.test(item.word))
);
console.log(
  "filter word list and map each word's match and captures ...",
  wordList
    .filter(item => regXWord.test(item.word))
    .map(item => item.word.match(regXWord))
);

console.log(
  "createRegexFromSubstitutePattern('ba1122n')",
  createRegexFromSubstitutePattern('ba1122n')
);
console.log(
  "createRegexFromSubstitutePattern('?1a1?')",
  createRegexFromSubstitutePattern('?1a1?')
);
console.log(
  "createRegexFromSubstitutePattern('?1b22a1?')",
  createRegexFromSubstitutePattern('?1b22a1?')
);

.as-console-wrapper { min-height: 100%!important; top: 0; }

根据带有占位符的模式过滤字符串数组

Filter array of strings based on a pattern with placeholders

javascript

regex

arrays

string

reduce