外部加入 Google 个工作表

Outer Join in Google Sheets

我有 2 组数据想对其进行外连接(基本上包括两组的所有数据,其中数据在一组中但不在另一组中的空单元格)

我看过

但我就是无法正常工作。

我已经简化了我的数据,直到我可以让它工作,但基本上两个数据集都有年和月(我将它们结合起来做一个 ID,年和月在每个数据集中只出现一次。

我的输出应该类似于以绿色突出显示的数据集(我手动输入)

但我从公式中得到的只是一个错误(如第一个屏幕截图所示) 或者一堆#VALUE!

这是我的示例 sheet 的 link。 https://docs.google.com/spreadsheets/d/1Iyhi7WKAA6g0hWpgl33fOe8q78MtzsATq4khnqcAT-w/edit?usp=sharing

这让我抓狂,因为我不完全理解数组公式是如何工作的

试试这个:

=arrayformula( 
  unique( 
    { 

      B2:E5, 
      iferror( 
        vlookup( 
          A2:A5, 
          G2:K5, 
          column(J2:K5) - column(G2) + 1, 
          false 
        ) 
      ); 

      H3:I5, 
      iferror( 
        vlookup( 
          G3:G5, 
          A3:E5, 
          column(D3:E5) - column(A3) + 1, 
          false 
        ) 
      ), 
      J3:K5 

    } 
  ) 
)

如果您需要更多功能,Formulas by Top Contributors 附加组件包括 SQL 连接函数。

Google QUERY() 函数非常强大并且仿照 SQL 但它不是完整的实现。因此,我决定编写一个自定义函数来模拟 SQL JOINs,Inner、Left、Right 和 Full。

const ss = SpreadsheetApp.getActiveSpreadsheet();

/**
 * Combines two ranges with a common key and can be used standalone or with the QUERY() function to simulate joins.
 *
 * 
 * @constructor
 * @param {(string|array)} range1 - the main table as a named range, a1Notation or an array
 * @param {(string|array)} range2 - the related table as a named range, a1Notation or an array
 * @param {number} primaryKey - the unique identifier for the main table, columns start with "1"
 * @param {number} foreignKey - the key in the related table to join to the main table, columns start with "1"
 * @param {string} joinType, type of join - "Inner", "Left", "Right", "Full", optional and defaults to "Inner", case insensitive
 * @returns {array} array results as a two dimensional array
 * @customfunction
 *
 * Result Set Example:
 *
 * =QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio' AND Col8=2", FALSE)
 *
 * |EmpID|LastName|FirstName|OrderID|CustomerID|EmpID|OrderDate|ShipperID|
 * |:----|:-------|:--------|:------|:---------|:----|:--------|:--------|
 * |1      |Davolio |Nancy    |10285  |63        |1    |8/20/1996|2        |
 * |1    |Davolio |Nancy    |10292  |81        |1    |8/28/1996|2        |
 * |1    |Davolio |Nancy    |10304  |80        |1    |9/12/1996|2        | 
 * etc.
 *
 * Other Examples:
 * =denormalize("Employees","Orders",1,3)
 * =denormalize("Employees","Orders",1,3,"full")
 * =QUERY(denormalize("Employees","Orders",1,3,"left"), "SELECT * ", FALSE)
 * =QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio'", FALSE)
 * =QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio' AND Col8=2", FALSE)
 * =denormalize("Orders","OrderDetails",1,2)
 * // multiple joins 
 * =denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3)
 * =QUERY(denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3), "SELECT *", FALSE)
 * =denormalize(denormalize("Employees","Orders",1,3),"OrderDetails",1,2)
 * =QUERY(denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3), "SELECT *", FALSE)
 * =QUERY(denormalize(denormalize("Employees","Orders",1,3),"OrderDetails",4,2), "SELECT *", FALSE)
 *
 * Joins Types:
 * (INNER) JOIN: Returns records that have matching values in both tables
 * LEFT (OUTER) JOIN: Returns all records from the left table, and the matched records from the right table
 * RIGHT (OUTER) JOIN: Returns all records from the right table, and the matched records from the left table
 * FULL (OUTER) JOIN: Returns all records when there is a match in either left or right table
 *
 * Note: the most common join is INNER which is why that is the default join type
 * 
 * General:
 * This alogithm is more efficient than using nested loops and uses a form of a hash table instead.
 * A hash table is a structure that can map index keys to values and typically resembles something like this:
 * [index][values]
 *
 * Since javascript provides a native function to return the index, there is no need to store it so,
 * this hash table only stores the values.
 *
 * There is minimal testing in DENORMALIZE() to validate parameters.
 *
 * Author/Coder/Tester: John Agusta, 03/28/2021, Raleigh, NC USA
 *
 * License: Follows the GNU General Public License (GNU GPL or simply GPL), a series of widely-used free 
 * software licenses that guarantee end users the freedom to run, study, share, and modify the software.
 *
 * http://www.gnu.org/licenses/gpl.html
 *
 *
 * Note: DENORMALIZE() can simulate multiple joins by nesting DENORMALIZE() functions as needed.
 *
 * Recursion is theoretically possible to unlimited depth, although only a few levels are normally used in practical programs
 * as performance will degrade accordingly.
 *
 * DENORMALIZE(range1, range2, primaryKey, foreignKey, joinType)
 *
 */
function DENORMALIZE(range1, range2, primaryKey, foreignKey, joinType) {
  var i = 0;
  var j = 0;
  var index = -1;
  var lFound = false;
  var aDenorm = [];
  var hashtable = [];
  var aRange1 = "";
  var aRange2 = "";
  joinType = DefaultTo(joinType, "INNER").toUpperCase();
  // the 6 lines below are used for debugging
  //range1 = "Employees";
  //range1 = "Employees!A2:C12";
  //range2 = "Orders";
  //primaryKey = 1;
  //foreignKey = 3;
  //joinType = "LEFT";
  // Sheets starts numbering columns starting with "1", arrays are zero-based
  primaryKey -= 1;
  foreignKey -= 1;
  // check if range is not an array
  if (typeof range1 !== 'object') {
    // Determine if range is a1Notation and load data into an array
    if (range1.indexOf(":") !== -1) {
      aRange1 = ss.getRange(range1).getValues();
    } else {
      aRange1 = ss.getRangeByName(range1).getValues();
    } 
  } else {
    aRange1 = range1;
  }
  
  if (typeof range2 !== 'object') {
    if (range2.indexOf(":") !== -1) {
      aRange2 = ss.getRange(range2).getValues();
    } else {
      aRange2 = ss.getRangeByName(range2).getValues();
    }
  } else {
    aRange2 = range2;
  }
  
  // make similar structured temp arrays with NULL elements
  var tArray1 = MakeArray(aRange1[0].length);
  var tArray2 = MakeArray(aRange2[0].length);
  var lenRange1 = aRange1.length;
  var lenRange2 = aRange2.length;
  hashtable = getHT(aRange1, lenRange1, primaryKey);
  for(i = 0; i < lenRange2; i++)  {
    index = hashtable.indexOf(aRange2[i][foreignKey]);
    if (index !== -1) {
      aDenorm.push(aRange1[index].concat(aRange2[i]));
    }
  }
  // add left and full no matches
  if (joinType == "LEFT" || joinType == "FULL") {
    for(i = 0; i < lenRange1; i++)  {
      index = aDenorm.indexOf(aRange1[i][primaryKey]);
      //index = aScan(aDenorm, aRange1[i][primaryKey], primaryKey)
      if (index == -1) {
        aDenorm.push(aRange1[i].concat(tArray2));
      }
    }
  }
  // add right and full no matches
  if (joinType == "RIGHT" || joinType == "FULL") {
    for(i = 0; i < lenRange2; i++)  {
      index = ASCAN(aDenorm, aRange2[i][foreignKey], primaryKey)
      if (index == -1) {
        aDenorm.push(tArray1.concat(aRange2[i]));
      }
    }
  }
    return aDenorm;
}

function getHT(aRange, lenRange, key){
var aHashtable = [];
var i = 0;
for (i=0; i < lenRange; i++ ) {
  //aHashtable.push([aRange[i][key], i]);
  aHashtable.push(aRange[i][key]);
  }
return aHashtable;
}

function MakeArray(length) {
  var i = 0;
  var retArray = [];
  for (i=0; i < length; i++) {
    retArray.push("");
  }
  return retArray;
}

function DefaultTo(valueToCheck, valueToDefault) {
return typeof valueToCheck === "undefined" ? valueToDefault : valueToCheck;
}

/**
 *
 * Search a multi-dimensional array for a value and return either the index or value if found, -1 or an empty sting otherwise
 * @constructor
 * @param {array} aValues - the array to scan
 * @param {string} searchVal - the value to look for
 * @param {number} searchCol - the array column to search
 * @param {number} returnCol - optional, the array column to return if specified, otherwise array index is returned
 * @returns {(number|value)} array index of value found or array value specified by returnCol
 * @customfunction
 */
function ASCAN(aValues, searchVal, searchCol, returnCol) {
var retval = typeof returnCol === "undefined" ? -1 : "";
var i = 0;
var aLen   = aValues.length;
for (i = 0; i < aLen; i++) {
    if (aValues[i][searchCol] == searchVal) {
        retval = typeof returnCol === "undefined" ? i : aValues[i][returnCol];
        break;
    }
}
return retval;
}

我有一个 sheet,这里有示例:

https://script.google.com/home/projects/1aQDY3Y0rOj0VrViLffYfARP9rp2j9jQ0XpUcFvye8XnxvkHy3Qr6_d0_/edit