如何通过许多不同的类别列表对数据项列表进行分类,其中每个列表包含几个不同的类别值?

How does one categorize a list of data items via many different category lists where each list contains several distinct category values?

我是 JavaScript 的新手,所以我什至不知道从哪里开始。请有人可以帮助我。我有我尝试过的东西,如下所示,但它与我在下面显示的所需输出完全不同

我有这份配料表以及数量和价值:

const Ingris = [
  {
    val: "onion,",
    amount: "1",
  },
  {
    val: "paprika",
    amount: "½ tsp",
  },
  {
    val: "yogurt",
    amount: "1/2 Cup",
  },
  {
    val: "fine sea salt",
    amount: "½ tsp  ",
  },
];

我想根据以下变量对它们进行分类:

var spices = ["paprika", "parsley", "peppermint", "poppy seed", "rosemary"];
var meats = ["steak", "ground beef", "stewing beef", "roast beef", "ribs", "chicken"];
var dairy = ["milk", "egg", "cheese", "yogurt"];
var produce = ["peppers", "radishes", "onions", "Tomato"];

这就是我想要得到的:

    // desired output:
    
    const ShoppingList = [
      {
        produceOutput: [
          {
            val: "garlic, minced",
            amount: "8 cloves ",
          },
        ],
        spicesOutput: [
          {
            val: "paprika",
            amount: "½ tsp  ",
          },
          {
            val: "onion",
            amount: "1",
          },
        ],
//The ingredient only goes in here if the value is not in the categories

        NoCategoryOutput: [
          {
            val: "fine sea salt",
            amount: "½ tsp",
          },
        ],
      },
    ];

我制作了一个正则表达式来检查值,但是它不起作用,并且无法识别 Paprikapaprikagreek yogurtyogurt 之间的值有人可以帮我吗

const Categorize = (term) => {
  let data = []
  if (term) {
    const newData = Ingris.filter(({ Ingris }) => {
      if (RegExp(term, "gim").exec(Ingris))
        return ingridients.filter(({ amount }) => RegExp(term, "gim").exec(amount))
          .length;
    });
    data.push(newData)
  } else {
    data = []
  }
};

您可以将搜索数组更改为带有 i 标志的正则表达式以进行不区分大小写的搜索,并将成分 val 转换为两边都带有通配符的正则表达式(以防万一是复数或有附加信息):

const Ingris = [
  {
val: "onion,",
amount: "1",
  },
  {
val: "paprika",
amount: "½ tsp",
  },
  {
val: "yogurt",
amount: "1/2 Cup",
  },
  {
val: "fine sea salt",
amount: "½ tsp  ",
  },
];
var spices = [/paprika/i, /parsley/i, /peppermint/i, /poppy seed/i, /rosemary/i];
var meats = [/steak/i, /ground beef/i, /stewing beef/i, /roast beef/i, /ribs/i, /chicken/i];
var dairy = [/milk/i, /egg/i, /cheese/i, /yogurt/i];
var produce = [/pepper/i, /radish/i, /onion/i, /Tomato/i];

function shoppingList(array, ingredient) {
  for (var i = 0; i < array.length; i++) {
    if (ingredient.match(array[i])) {
      return ingredient;
    }
  }
}


function Categorize() {
  let produceOutput = [];
  let NoCategoryOutput = [];

  for (const [key, value] of Object.entries(Ingris)) {
    var ingredient = '/\.*' + value.val + '\.*/';
    if (shoppingList(spices, ingredient) || shoppingList(meats, ingredient) || shoppingList(dairy, ingredient) || shoppingList(produce, ingredient)) {
    produceOutput.push(value);
    } else {
    NoCategoryOutput.push(value);
    }
  }
    var ShoppingList = new Object();
    ShoppingList.produceOutput = produceOutput;
    ShoppingList.NoCategoryOutput = NoCategoryOutput;
    console.log(ShoppingList);
}

   Categorize();

如果您希望这对复数成分和单数成分都有效,您必须确保搜索数组值都是单数(即,而不是 "onions",您需要 /onion/.

这是否回答了您的问题?

可以在下一个提供的示例代码下方找到对所选方法的非常详细的解释。

const ingredientList = [{
  "amount": "1",
  "val": "packet pasta"
}, {
  "val": "Chicken breast"
}, {
  "val": "Ground ginger"
}, {
  "amount": "8 cloves",
  "val": "garlic, minced"
}, {
  "amount": "1",
  "val": "onion"
}, {
  "amount": "½ tsp",
  "val": "paprika"
}, {
  "amount": "1 Chopped",
  "val": "Tomato"
}, {
  "amount": "1/2 Cup",
  "val": "yogurt"
}, {
  "amount": "1/2 teaspoon",
  "val": "heavy cream"
}, {
  "amount": "½ tsp",
  "val": "fine sea salt"
}];

const spiceList = ["paprika", "parsley", "peppermint", "poppy seed", "rosemary"];
const meatList = ["steak", "ground beef", "stewing beef", "roast beef", "ribs", "chicken breast"];
const dairyList = ["milk", "eggs", "egg", "cheese", "yogurt", "cream"];
const produceList = ["peppers", "pepper", "radishes", "radish", "onions", "onion", "Tomatos", "Tomato", "Garlic", "Ginger"];


function groupItemByCategoryDescriptorAndSourceKey(collector, item) {
  const {
    descriptorList,
    uncategorizableKey,
    itemSourceKey,
    index
  } = collector;

  const isEqualCategoryValues = (
    ((typeof collector.isEqualCategoryValues === 'function') && collector.isEqualCategoryValues) ||
    ((itemValue, categoryValue) => {

      // this is the default implementation of how to determine equality
      // of two values in case no other function was provided via the
      // `collector`'s `isEqualCategoryValues` property.

      itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
      categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

      return (itemValue === categoryValue);
    })
  );
  let currentCategoryList;

  function doesBoundValueEqualCategoryValue(categoryValue) {
    return isEqualCategoryValues(this.value, categoryValue);
  }
  function doesBoundValueMatchCategoryAndWhichIsIt(descriptor) {
    const isMatchingValue = descriptor.valueList.some(
      doesBoundValueEqualCategoryValue, this
    );
    if (isMatchingValue) { // ... and which is it?
      const categoryKey = descriptor.targetKey;

      currentCategoryList = (
        index[categoryKey] ||
        (index[categoryKey] = [])
      );
      currentCategoryList.push(item);
    }
    return isMatchingValue;
  }

  const isCategorizable = descriptorList.some(
    doesBoundValueMatchCategoryAndWhichIsIt,
    { value: item[itemSourceKey] }
  );
  if (!isCategorizable) {

    currentCategoryList = (
      index[uncategorizableKey] ||
      (index[uncategorizableKey] = [])
    );
    currentCategoryList.push(item);
  }
  return collector;
}


console.log(
  'Shopping List :', JSON.parse(JSON.stringify([ // in order to get rid of SO specific object reference logs.
  ingredientList.reduce(groupItemByCategoryDescriptorAndSourceKey, {

      descriptorList: [{
        targetKey: 'spicesOutput',
        valueList: spiceList
      }, {
        targetKey: 'meatsOutput',
        valueList: meatList
      }, {
        targetKey: 'dairyOutput',
        valueList: dairyList
      }, {
        targetKey: 'produceOutput',
        valueList: produceList
      }],
      uncategorizableKey: 'noCategoryOutput',

      // isEqualCategoryValues: anyCustomImplementationWhichDeterminesEqualityOfTwoCategoryValues
      itemSourceKey: 'val',
      index: {}

  }).index]))
);


function isEqualCategoryValues(itemValue, categoryValue) {
  // this is a custom implementation of how
  // to determine equality of two category.

  itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
  categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

  return (
    (itemValue === categoryValue) ||
    RegExp('\b' + categoryValue + '\b').test(itemValue)
  );
}

console.log(
  'Shopping List (custom method for equality of category values) :', JSON.parse(JSON.stringify([
  ingredientList.reduce(groupItemByCategoryDescriptorAndSourceKey, {

      descriptorList: [{
        targetKey: 'spicesOutput',
        valueList: spiceList
      }, {
        targetKey: 'meatsOutput',
        valueList: meatList
      }, {
        targetKey: 'dairyOutput',
        valueList: dairyList
      }, {
        targetKey: 'produceOutput',
        valueList: produceList
      }],
      uncategorizableKey: 'noCategoryOutput',

      isEqualCategoryValues,
      itemSourceKey: 'val',
      index: {}

  }).index]))
);
.as-console-wrapper { min-height: 100%!important; top: 0; }

方法

OP 提供的问题看起来很像一个(相当复杂的)reduce 任务,从成分项目列表到 index/map,该任务具有成分源列表的不同目标列表项目。

从我的角度来看,将此 reduce-result 作为唯一项添加到数组中是有问题的。

const shoppingListIndex = {
  produceOutput: [{
    val: "garlic, minced",
    amount: "8 cloves ",
  }],
  spicesOutput: [{
    // ...
  }],
  NoCategoryOutput: [{
    val: "fine sea salt",
    amount: "½ tsp",
  }]
};

// ... instead of ...

const ShoppingList = [{
  produceOutput: [{
    // ...
  }],
  spicesOutput: [{
    // ...
  }],
  NoCategoryOutput: [{
    // ...
  }]
}];

任何直接的方法都会以某种方式逐步选择一个成分项目,然后再次针对每个项目搜索每个给定的类别列表,直到该成分项目的 val 值与第一个最佳类别项目匹配为止当前类别列表。

这个任务可以通过减少功能来概括。为了更加通用,这样的实现不应该对(或者不应该“知道”)环境以及所涉及列表的名称和数量等做出任何假设

因此这样的实现必须是抽象的和可配置的。这意味着应该清楚如何将 OP 的问题分解为这样的抽象和配置。

reduce 方法 accumulator 可以用作 configcollector 对象。

因此,为了不依赖类别列表的数量及其名称,确实向 collector 提供了类别描述符对象列表。执行将 know/identify 这个配置项作为 descriptorList.

此外,为了灵活命名成分项目的类别目标列表,这样的描述符项目不仅携带可能匹配的类别值列表,而且还具有属性用于目标列表的名称 ...

通用 reduce 任务的可能用例可能类似于下一个代码示例...

ingredientList.reduce(groupItemByCategoryDescriptorAndSourceKey, {

  descriptorList: [{
    targetKey: 'spicesOutput',
    valueList: spiceList // the OP's category list example.
  }, {
    targetKey: 'meatsOutput',
    valueList: meatList // the OP's category list example.
  }, {
    targetKey: 'dairyOutput',
    valueList: dairyList // the OP's category list example.
  }, {
    targetKey: 'produceOutput',
    valueList: produceList // the OP's category list example.
  }]
});

此外,完全通用的 reduce 任务的配置必须为任何源列表项提供 属性 名称(键),以便将其值与任何类别值进行比较提供的类别值列表。实现将 know/identify 这个配置项作为 itemSourceKey.

另一个必要的配置项是uncategorizableKey。它的值将作为无法分类的源列表项的特殊列表的键(意味着在所有提供的类别列表中找不到匹配项)。

将有一个可选的 isEqualCategoryValues 配置键。如果提供,此 属性 指的是确定两个类别值是否相等的自定义函数;其第一个 itemValue 参数保存当前处理的源列表项的引用,其第二个 categoryValue 参数保存当前处理的类别列表的当前处理值的引用。

最后是 index,它始终是一个空对象文字,也是 reduce 进程将其结果写入的引用。

因此,通用 reduce 任务的完整用例可能看起来类似于下一个代码示例...

const shoppingListIndex =
  ingredientList.reduce(groupItemByCategoryDescriptorAndSourceKey, {

    descriptorList: [{
      targetKey: 'spicesOutput',
      valueList: spiceList
    }, {
      targetKey: 'meatsOutput',
      valueList: meatList
    }, {
      targetKey: 'dairyOutput',
      valueList: dairyList
    }, {
      targetKey: 'produceOutput',
      valueList: produceList
    }],
    uncategorizableKey: 'noCategoryOutput',

    isEqualCategoryValues,
    itemSourceKey: 'val',
    index: {}

  }).index;

比较/确定相等性

现在将通用计算部分与案例特定配置分开,我们必须关注如何确定两个值的相等性,对于给定的示例,成分项目的 val 值在一个一方面,另一方面,OP 的一个类别数组中列出的许多值。

例如 { ... "val": "onion" ... } 甚至 { ... "val": "Chicken breast" ... } 应该在 produceList"chicken breast" 中的 "onion" 中找到它们各自的对应物截至 meatList.

至于 "Chicken breast""chicken breast" 很明显,比较过程必须将两个操作符都转换为自身的标准化变体。 toLowerCase here was already sufficient enough, but in order to be on the safe side, one should take care of any whitespace sequence by first trimming a value and secondly replace使用单个 空白 字符处理任何其他剩余的空白序列。

因此,一个已经足够好的平等标准比较可能看起来像...

function isEqualCategoryValues(itemValue, categoryValue) {

  itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
  categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

  return (itemValue === categoryValue);
});

... 事实上,这是作为 reducer 函数的内部部分实现的回退,以防没有为 reducer 的 collector/config 对象提供用于确定相等性的自定义函数。

对于任何写得不太精确的成分和类别值,这种天真的值相等性检查确实会立即失败,就像示例代码中的那些... "Ground ginger" vs "Ginger" 来自 produceList, ... "heavy cream" vs "cream" 来自 dairyList, ... "garlic, minced" vs "Garlic" 再次来自 produceList.

很明显,为了完全涵盖 OP 的 needs/requirements/acceptance 标准,需要更好的定制平等检查。但现在解决问题归结为只提供一个定制的函数也很好,它只解决了如何准确确定值相等性的一部分。

手头有 "ground ginger""ginger" 的已经规范化的变体,并考虑在由空格终止的 and/or 分隔的字符串值中出现不止 2 个单词) and/or word boundar(y)ie(s) 一个有效的方法可以基于正则表达式 / (RegExp)

console.log(
  "(/\bginger\b/).test('ground ginger') ?",
  (/\bginger\b/).test('ground ginger')
);
console.log(
  "RegExp('\\b' + 'ginger' + '\\b', 'i').test('ground ginger') ?",
  RegExp('\b' + 'ginger' + '\b').test('ground ginger')
);
.as-console-wrapper { min-height: 100%!important; top: 0; }

因此,可靠地涵盖 OP 用例的自定义 isEqualCategoryValues 函数的实现几乎与内部使用的默认相等性检查相同。它还具有基于 RegExp 的检查功能,有时会构建和测试正确的正则表达式,就像本段上方的可执行示例代码所演示的那样。

完整的自定义实现可能看起来像这样...

function isEqualCategoryValues(itemValue, categoryValue) {

  itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
  categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

  return (
    (itemValue === categoryValue) ||
    RegExp('\b' + categoryValue + '\b').test(itemValue)
  );
}

Reduce 逻辑/实现

已经了解了原因(通用的 reduce 任务但配置灵活,因此能够处理各种各样的用例)以及如何使用 reduce 函数收集器配置 ...

const shoppingListIndex =
  ingredientList.reduce(groupItemByCategoryDescriptorAndSourceKey, {

    descriptorList: [{ /* ... */ }, { /* ... */ }/*, ... */],
    uncategorizableKey: 'noCategoryOutput',
    isEqualCategoryValues,
    itemSourceKey: 'val',
    index: {}

  }).index;

... 现在可以通过字面上的 »The Approach« 部分中的文字继续 reduce 逻辑的实际实现

再次阅读本节,可能会形成一个完全由堆叠some任务构建的解决方案。 some 的本质是用第一个找到的匹配项(布尔值 true return 值)尽快离开搜索任务(打破迭代循环)。这正是解决 OP 问题需要做的事情;并且堆叠是由于搜索一个值应该 find 它的 在 [=201= 内匹配]一个列表类别值列表.

由于基于some的方法的检测功能不仅要确保“提前退出”,而且还需要提供有关第二个比较值的信息,必须使用callback function's this context作为数据载体。

最外层的基于some的检测方法解决了编写/收集找到的类别的额外任务。因此这个方法可以命名为 doesBoundValueMatchCategoryAndWhichIsIt 并且它的用法很可能类似于下一个代码示例 ...

// iterate the (descriptor) list of category lists.

const isCategorizable = descriptorList.some(
  doesBoundValueMatchCategoryAndWhichIsIt,
  { value: item[itemSourceKey] }
);

正如大家所见,整个 some 堆栈的最终 return 值是否表明(成分)值是否可以分类(或不分类)。

doesBoundValueMatchCategoryAndWhichIsIt 的实现可能与此类似...

function doesBoundValueMatchCategoryAndWhichIsIt(descriptor) {

  // iterate the current category list.
    
  // boolean return value
  const isMatchingValue = descriptor.valueList.some(
    doesBoundValueEqualCategoryValue, this
  );

  // act upon the return value.
  //
  // - push the item of the related value- match
  //   into the corresponding category list (create
  //   the latter in case it did not yet exist).

  if (isMatchingValue) { // ... and which is it?
    const categoryKey = descriptor.targetKey;

    currentCategoryList = (
      index[categoryKey] ||
      (index[categoryKey] = [])
    );
    currentCategoryList.push(item);
  }

  // forces "early exit" in case of being `true`.
  return isMatchingValue;
}

随着doesBoundValueEqualCategoryValue目前正在处理的(原料)item-value的段落接近尾声了。此函数将其绑定电流 item-value 及其第一个参数电流 category-value 转发给相等函数(后者作为自定义变体或内部默认值提供)...

function doesBoundValueEqualCategoryValue(categoryValue) {
  return isEqualCategoryValues(this.value, categoryValue);
}

最后,如果当前处理的(成分)item-value 无法分类,则该项目将被推入由收集器 uncategorizableKey 属性标识的列表。

就是这样。感谢阅读。

奖金(自以为是)

考虑到 OP 的另一个相关问题...... ... and 那里......人们得到了一些强大的东西,比如下一个基于 reduce 的可配置流程链......

const ingredientList = [
  '1 packet pasta',
  'Chicken breast',
  'Ground ginger',
  '8 cloves garlic, minced',
  '1 onion',
  '½ tsp paprika',
  '1 Chopped Tomato',
  '1/2 Cup yogurt',
  '1/2 teaspoon heavy cream',
  '½ tsp fine sea salt'
];
const measuringUnitList = [
  'tbsp', 'tablespoons', 'tablespoon', 'tsp', 'teaspoons', 'teaspoon', 'chopped',
  'oz', 'ounces', 'ounce', 'fl. oz', 'fl. ounces', 'fl. ounce', 'fluid ounces', 'fluid ounce',
  'cups', 'cup', 'qt', 'quarts', 'quart', 'pt', 'pints', 'pint', 'gal', 'gallons', 'gallon',
  'ml', 'milliliter', 'l', 'liter',
  'g', 'gram', 'kg', 'kilogram'
];
const spiceList = ["paprika", "parsley", "peppermint", "poppy seed", "rosemary"];
const meatList = ["steak", "ground beef", "stewing beef", "roast beef", "ribs", "chicken breast"];
const dairyList = ["milk", "eggs", "egg", "cheese", "yogurt", "cream"];
const produceList = ["peppers", "pepper", "radishes", "radish", "onions", "onion", "Tomatos", "Tomato", "Garlic", "Ginger"];


function isEqualCategoryValues(itemValue, categoryValue) {
  itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
  categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

  return (
    (itemValue === categoryValue) ||
    RegExp('\b' + categoryValue + '\b').test(itemValue)
  );
}


console.log('Ingredient List :', ingredientList);

console.log(
  'Shopping List Index :', JSON.parse(JSON.stringify( // in order to get rid of SO specific object reference logs.
  ingredientList.reduce(collectNamedCaptureGroupData, {

      regXPrimary: createUnitCentricCapturingRegX(measuringUnitList),
      regXSecondary: unitlessCapturingRegX,
      defaultKey: 'val',
      list: []

    }).list.reduce(groupItemByCategoryDescriptorAndSourceKey, {

      descriptorList: [{
        targetKey: 'spicesOutput',
        valueList: spiceList
      }, {
        targetKey: 'meatsOutput',
        valueList: meatList
      }, {
        targetKey: 'dairyOutput',
        valueList: dairyList
      }, {
        targetKey: 'produceOutput',
        valueList: produceList
      }],
      uncategorizableKey: 'noCategoryOutput',

      isEqualCategoryValues,
      itemSourceKey: 'val',
      index: {}

  }).index))
);
.as-console-wrapper { min-height: 100%!important; top: 0; }
<script>
  //  [

  function escapeRegExpSearchString(text) {
    // return text.replace(/[-[\]{}()*+?.,\^$|#\s]/g, '\$&');
    // ... slightly changed ...
    return text
      .replace(/[-[\]{}()*+?.,\^$|#]/g, '\$&')
      .replace((/\s+/), '\s+');
  }

  // 

  function createUnitCentricCapturingRegX(unitList) {
    // see: [https://regex101.com/r/7bmGXN/1/]
    // e.g. (/^(?<amount>.*?)\s*\b(?<unit>tsp|...|fl\.\s*ounces|fl\.\s*ounce|cup)\b\s*(?<content>.*)$/)

    const options = unitList
      .map(unit => escapeRegExpSearchString(unit))
      .join('|')
      .replace((/\\.\s\+/g), '\\.\s*');

    return RegExp('^(?<amount>.*?\s*\b(?:' + options + '))\b\s*(?<val>.*)$', 'i');
  }
  const unitlessCapturingRegX = (/^(?<amount>¼|½|¾|\d+\/\d+|\d+)\s*(?<val>.*)$/);


  function collectNamedCaptureGroupData(collector, item) {
    item = item.trim();

    const { regXPrimary, regXSecondary, defaultKey, list } = collector;
    const result = regXPrimary.exec(item) || regXSecondary.exec(item);

    list.push(
      (result && result.groups && Object.assign({}, result.groups))
      || { [defaultKey]: item }
    );
    return collector;
  }

  // 

  function groupItemByCategoryDescriptorAndSourceKey(collector, item) {
    const {
      descriptorList,
      uncategorizableKey,
      itemSourceKey,
      index
    } = collector;

    const isEqualCategoryValues = (
      ((typeof collector.isEqualCategoryValues === 'function') && collector.isEqualCategoryValues) ||
      ((itemValue, categoryValue) => {

        // this is the default implementation of how to determine equality
        // of two values in case no other function was provided via the
        // `collector`'s `isEqualCategoryValues` property.

        itemValue = itemValue.trim().replace((/\s+/g), ' ').toLowerCase();
        categoryValue = categoryValue.trim().replace((/\s+/g), ' ').toLowerCase();

        return (itemValue === categoryValue);
      })
    );
    let currentCategoryList;

    function doesBoundValueEqualCategoryValue(categoryValue) {
      return isEqualCategoryValues(this.value, categoryValue);
    }
    function doesBoundValueMatchCategoryAndWhichIsIt(descriptor) {
      const isMatchingValue = descriptor.valueList.some(
        doesBoundValueEqualCategoryValue, this
      );
      if (isMatchingValue) { // ... and which is it?
        const categoryKey = descriptor.targetKey;

        currentCategoryList = (
          index[categoryKey] ||
          (index[categoryKey] = [])
        );
        currentCategoryList.push(item);
      }
      return isMatchingValue;
    }

    const isCategorizable = descriptorList.some(
      doesBoundValueMatchCategoryAndWhichIsIt,
      { value: item[itemSourceKey] }
    );
    if (!isCategorizable) {

      currentCategoryList = (
        index[uncategorizableKey] ||
        (index[uncategorizableKey] = [])
      );
      currentCategoryList.push(item);
    }
    return collector;
  }
</script>