填充数组中缺失的数值

Fill missing numeric values in an array

假设我有一个数组 [0, null, null, 3, null, null, null, 11]

我想用基于上一个和下一个已知数字(和索引?)的数字填充空值,所以我得到 [0, 1, 2, 3, 5, 7, 9, 11]。最有效的方法是什么?

我正在考虑一些可以计算两个已知数字之间的空值然后只得到一步的大小的东西。但是这些步骤在配对之间会有所不同

我正在制作一个图表,其中可能缺少一些值,因此我必须填写可能的值。

这是我试过的方法,但我认为它非常低效且混乱。我更愿意使用 ramda.js 或一些功能方法。

const data = [0, null, null, 3, null, null, null, 11]

const getStep = (arr, lastKnown = 0, counter = 1) => {
    const val = arr[0];

    if (val !== null) {
        return (val - lastKnown) / counter
    } else {
        return getStep(arr.slice(1), lastKnown, ++counter)
    }
}

let lastKnown = null
let currentStep = null

const filledData = data.map((x, i) => {
    if (x !== null) {
        lastKnown = x
        currentStep = null
        return x
    }
  
    if (currentStep !== null) {
        lastKnown = lastKnown + currentStep
    } else {
        currentStep = getStep(data.slice(i), lastKnown)
    }

    return currentStep + lastKnown
})

console.log(filledData)

// 更新:我选择 是正确的,但是如果您对解决方案感兴趣,您一定要检查这里的所有答案。有一些很有趣的想法。

一种使用 for 循环和计数的方法:

var skips = 0;
var last;

for (var i=0; i<arr.length; i++){
  var current = arr[i]

  if (current !== null) {

    // If there are skipped spots that need to be filled...
    if (skips > 0){

      // Calculate interval based on on skip count, and difference between current and last
      var interval = (current-arr[last])/(skips+1);

      // Fill in the missing spots in original array
      for (var j=1; j<=skips; j++){
        arr[last+j] = arr[last]+(interval*j)
      }

    }

    last = i; // update last valid index
    skips = 0; // reset skip count
  }
  // If null, just increment skip count
  else { 
    skips++
  }
}

您可以迭代数组,如果找到 null 值,则向前看下一个数字和间隙,直到数字被线性方法填充。

var array = [0, null, null, 3, null, null, null, 11],
    i = 0, j, delta;

while (i < array.length) {
    if (array[i] !== null) {
        i++;
        continue;
    }
    j = i;
    while (array[++j] === null);
    delta = (array[j] - array[i - 1]) / (j - i + 1);
    do {
        array[i] = delta + array[i - 1];
        i++;
    } while (i < j)
}

console.log(array);

ES6 在下一个索引上有一个闭包,它有一个数值,前一个真正的最后一个值和为下一个值添加的增量,如果没有给定的话。

var array = [0, null, null, 3, null, null, null, 11],
    result = array.map(((j, last, delta) => (v, i, a) => {
        if (v !== null) return last = v;
        if (i < j) return last += delta;
        j = i;
        while (++j < a.length && a[j] === null) ;
        delta = (a[j] - last) / (j - i + 1);
        return last += delta;
    })());

console.log(result);

这是我使用 ramda 的快速解决方案:

const xs = [0, null, null, 3, null, null, null, 11]
const scanWithIndex = R.addIndex(R.scan)
const notNil = R.complement(R.isNil)
const mapWithIndex = R.addIndex(R.map)
const zipArrays = R.zipWith(R.concat)

// number of cons nulls for nth element
const consNulls = R.drop(1, R.scan((acc, x) => R.isNil(x) ? (acc + 1) : 0, 0, xs))

// length of ongoing null sequence for each element
const consNullsSeqLens = R.drop(1, scanWithIndex((acc, x, ind) =>{
  if (x !== 0 && acc !== 0) return acc
  const rest = R.drop(ind, consNulls)
  return R.findIndex(R.equals(0), rest)
}, 0, consNulls))

// previous non null value for each el
const prevNonNulls = R.scan((acc, x) => R.isNil(x) ? acc : x, 0, xs)

// next non null value for each el
const nextNonNulls = mapWithIndex((x, ind) => {
  const rest = R.drop(ind, xs)
  return R.find(notNil, rest)
}, xs)

// function to calculate missing values based on zipped arrays
const calculateMissingValue = ([x, seqN, seqLen, next, prev]) =>
  R.isNil(x) ? prev + (next - prev) / (seqLen + 1) * seqN : x 

R.map(
  calculateMissingValue,
  // zips 5 lists together
  zipArrays(
    zipWith(R.append, consNullsSeqLens, R.zip(xs, consNulls)),
    R.zip(nextNonNulls,prevNonNulls)
 )
)

Repl link

虽然 from @bubulik42 显示您可以使用 Ramda 来执行此操作,但我不确定 Ramda 是否会有很大帮助。 (免责声明:我是 Ramda 的作者之一。)

我的第一遍是这样的:

const intersperseNulls = pipe(
  reduce(
    ({vals, prev, nilCount}, curr) => isNil(curr) 
       ? {vals: vals, prev: prev, nilCount: nilCount + 1} 
       : (nilCount < 1) 
         ? {vals: append(curr, vals), prev: curr, nilCount: 0}
         : {
             vals: append(curr, concat(vals, times(n => prev + (n + 1) * (curr - prev) / (nilCount + 1), nilCount))), 
              prev: curr, 
              nilCount: 0
           },
    {vals: [], prev: undefined, nilCount: 0},
  ),
  prop('vals')
);

这使用了通常功能性的 reduce 调用,但它的用法有些奇怪,选择通过所有迭代而不是简单的累加器来传递状态。请注意,如果我删除 Ramda 基础架构,它看起来有多么相似:

const steps = (b, e, c) => {
  const results = []
  for (let i = 0; i < c; i++) {results.push(b + (i + 1) * (e - b) / (c + 1));}
  return results;
}

const intersperseNulls = array => array.reduce(
  ({vals, prev, nilCount}, curr) => (curr == null) 
    ? {vals: vals, prev: prev, nilCount: nilCount + 1} 
    : (nilCount < 1) 
      ? {vals: vals.concat(curr), prev: curr, nilCount: 0}
      : {
          vals: vals.concat(steps(prev, curr, nilCount)).concat(curr), 
          prev: curr, 
          nilCount: 0
        },
  {vals: [], prev: undefined, nilCount: 0},
).vals

只有 times 难以替代。

但最后,我更喜欢 from @Nina Scholz。它更简单,更易于阅读,并且不会尝试任何诡计。

您可以在 Ramda REPL.

中看到这些

另一种方法是将输入数组转换为 "segments" 列表,捕获每个段的起始值、结束值和大小。然后,您可以使用 R.chain 在每个段的开始值和结束值之间以线性步长构建列表。

const input = [0, null, null, 3, null, null, null, 11]

// recursively convert the sparse list of numbers into a list of segments
const segmentNull = xs => {
  if (xs.length === 0) {
    return []
  } else {
    const [y, ...ys] = xs
    const count = R.takeWhile(R.isNil, ys).length + 1
    const next = R.dropWhile(R.isNil, ys)
    
    return next.length > 0
      ? R.prepend({ start: y, end: next[0], count }, segmentNull(next))
      : []
  }
}

// segmentNull(input)
//=> [{"count": 3, "end": 3, "start": 0}, {"count": 4, "end": 11, "start": 3}]

// produce a list of `count` values linearly between `start` and `end` values
const linearRange = (start, end, count) =>
  R.times(n => (end - start) * (n + 1) / count + start, count)

// linearRange(3, 11, 4)
//=> [5, 7, 9, 11]

// convert the list of segments into a list of linear values between segments
const buildListFromSegments = R.chain(({ start, end, count }) =>
  linearRange(start, end, count))

// buildListFromSegments(segmentNull(input))
//=> [1, 2, 3, 5, 7, 9, 11]
//    ^-- note the leading 0 is missing

// prepend the initial value to the result of `buildListFromSegments`
const fn = xs => R.prepend(xs[0], buildListFromSegments(segmentNull(xs)))

console.log(fn(input))
<script src="//cdnjs.cloudflare.com/ajax/libs/ramda/0.25.0/ramda.min.js"></script>

复杂度为 O(n*m) 的解决方案,其中 n 是所有元素的数量,m 是空值的数量。

算法假定在索引位置 0 和长度 1 处始终存在有效数字。

function fillInTheBlanks(a){
  var s, //step
      r = a.reduce(function([t,ns,r], e){  // [temp, nulls array, result accumulator]
                     e === null ? ns.push(e)
                                : t === void 0 ?  t = e
                                               : (s = (e-t)/(ns.length+1),
                                                  r.push(t,...ns.map((_,i) => t+(i+1)*s)),
                                                  ns = [],
                                                  t = e);
                     return [t,ns,r];
                   }, [void 0,[],[]]);
  return r[2].concat(r[0]);
}
var arr = [0, null, null, 3, null, null, null, 11],
    res = fillInTheBlanks(arr);


console.log(JSON.stringify(res));

稍微扩展一下问题:Fill missing numeric values in an array?

以下将以最自然的方式填充与数组中其他数字相关的任何 zero

创建具有自然增量的参考尺度。

/* Array Zero Values Natural fill
   Create a referential scale, as a ruler */
const naturalFill = (array) => {
    let missing = [];
    let keys = [];
    let increment = 0;
    for (let i = 0; i <= array.length; i++) {
      if (array[i] !== 0) {
        keys.push(i)
      }
    }
    for (let i = 0; i < keys.length-2; i++) {
        let slots = keys[i+1] - keys[i],
        min = array[keys[i]],
        max = array[keys[i+1]];
        increment = ((max - min) / slots);
        let afill = [...Array(slots + 1)].map((x, y) => +(min + increment * y).toFixed(4)).slice(0, -1);
        missing = [...missing, ...afill]
    }
    let upfill = [...Array(keys[0] + 1)].map((x, y) => +(array[keys[0]] - increment * y).toFixed(4)).reverse().slice(0, -1);
    let downfill = [...Array(keys[keys.length - 2] + 1)].map((x, y) => +(array[keys[keys.length - 2]] + increment * y).toFixed(4));
    return [...upfill, ...missing, ...downfill]
}

// Example 1
console.log(
    naturalFill( [0, 0, 14, 0, 107, 0, 314, 0, 400, 0, 832, 987, 0, 0] )
)
// Example 2, generate array of epoch intervals 
console.log(
   naturalFill( [0,0, Date.now()-60*60*24, 0,0,0,0,0, Date.now(), 0,0,0] )
)

这可能在很多方面都有用,比如创建图表参考。

或者简单地从任何关键步骤点测量先前缩放的对象。

我们可以用它来生成顺序时间戳,如示例2。