使用 Google Sheets Re2 正则表达式语法提取匹配地址
Extract matching addresses using Google Sheets Re2 regular expression syntax
我正在尝试提取 Google 表格单元格中公式中出现的所有 cell/range 地址。
天生的公式可能非常复杂。我尝试了很多模式,这些模式适用于网络测试人员,但不适用于 google sheets re2.
以下示例显示了两个问题。也许我误读了匹配结果,但据我了解是 4 个匹配项。
公式(忽略逻辑):
=A:B4+$BC+$DE2+F2:G2
正则表达式:
(($?[A-Z]+$?\d+)(:($?[A-Z]+$?\d+))?)
预期结果:
[A:B4,$BC,$DE2,F2:G2]
Here(如果我没有误读结果)看起来不错。
我不确定显示的组匹配是否也被认为是匹配“4个匹配,287步”
但是在 google sheet 中 returns 所有 匹配 1 结果
[A:B4,A,:B4,B4]
忽略其他匹配项
所以我想问题是如何将正则表达式转换为 re2 语法?
更新:
跟着player0的评论,可能我没说清楚。
这只是一个简单的例子,以隔离我遇到的其他问题。这只是一个字符串,包含一些相对和绝对格式的地址。
但是,我正在寻找一个更广泛的通用解决方案,它将适合任何可能包含公式和对其他 sheet 的引用的可能公式。例如:
=(STDEVA(Sheet1!B2:B5)+sum($A:$A))*B2
这里的预期结果是Sheet1!B2:B5,$A:$A,B2
此公式包含两个公式并引用了另一个sheet。
在这里仍然忽略命名范围和我目前无法想到的其他公式可能的引用。
此外,方括号 [] 无关紧要,它只是显示结果的方式,实际上是从日志中复制的,因为它全部在脚本中完成。
好像可以用
[A-Z$]+\d+(?::[A-Z$]\d+)?
尝试:
=INDEX(SUBSTITUTE(TEXTJOIN(",", 1,
IFNA(REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>&"),
"(?:.+!)?[A-Z$]+\d+(?::[A-Z$](?:\d+)?)?|(?:.+!)?[A-Z$]:[A-Z$]+"))), "♥", "'"))
或更长:
=INDEX(SUBSTITUTE(TEXTJOIN(",", 1,
IFNA(IFNA(REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>"), "(?:.+!)?[A-Z$]+\d+(?::[A-Z$](?:\d+)?)?"),
REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>"), "(?:.+!)?[A-Z$]:[A-Z$]+")))), "♥", "'"))
我想出了一个更好的方法,不用拆分,就是使用 /g
标志。
但是,这在脚本中起作用,而不是通过使用表格内部正则表达式函数(即 REGEXEXTRACT
),因为我不知道如何在包含 /g 标志和 [= 的单元格中格式化正则表达式字符串12=] 将作为有效的正则表达式接受。
代码如下:
/* Find all predessesor cells of input range
*/
function findPredecessor(rng){
var formualaText = rng.getFormula();
/* addMatchesRegex
* supports all A1Notation addresses
* the 2nd regex after the | operator will match all column addresses (A:A, Sheet2!b:B, etc)
* some NamedRanges with names like NameRange1
* Does not support - NamedRange with names including dot, not ending with digits
*/
var addMatchesRegex = /(([\w .'!]+)?($?[A-Z]+$?\d+\b)(:($?[A-Z]+$?\d+))?)|([\w .'!]+)?[A-Z]+:[A-Z]+/gi;
var addMatches = formualaText.match(addMatchesRegex);
Logger.log("%s add matched: %s",addMatches.length,addMatches);
/* fullMatchRegex
* modify addMatches to return also strings like
* 1. SUM, IFERROR, etc - internal sheets functions.
* 2. NamedRanges
*
*/
var fullMatchRegex = /(([\w .'!]+)?([$A-Z.\d]*)(:($?[A-Z]+$?\d*))?)/gi;
// match regex with formula
var fullMatches = formualaText.match(fullMatchRegex);
Logger.log("Full matches list: %s",fullMatches);
var namedRangesAdd = analyzeMatch(addMatches,fullMatches);
Logger.log("%s total predecessors: %s",namedRangesAdd.length,namedRangesAdd);
}
/* This function accepts the two regex matches list
* and returns one unique list of all predecessor addresses
* @param {Array} addMatches - All A1 notation addresses
* plus some of NamedRanges
* @param {Array} fullMatches - All A1 notation addresses,All NamedRanges,
* Other irrelevent matches
*/
function analyzeMatch(addMatches,fullMatches){
/*Expected
First parameter - holds all A1Notation addresses as well as NamedRanges that
their name in the form of /[A-Z]+/d+
NamedRange with name including dot(.) or does not contain digits will not
be on the list
Second Parameter - contains all first list matches, as well as all NamedRanges
names and also irrelevant matches to be filtered like function names and empty string
*/
//Full Matched Addresses to be returned
var mAddresses = [];
//Remove duplicate addresses
var uniqueMatches =
addMatches.filter((item,index)=>addMatches.indexOf(item)===index);
//Get all named Ranges in spread sheet
var nr = SpreadsheetApp.getActive().getNamedRanges();
// Loop Named Ranges arr
nr.forEach(function(item){
/* Check if the name of the current Named Range
* is included in matches
* 1. first in addMatches list
* 2. only if not found in the wider list */
var name = item.getName();
//Check if in addmatches array
var i = uniqueMatches.indexOf(name);
//Build A1Notation address of current NamedRange
var rng = item.getRange();
var add = "'" + rng.getSheet().getName() + "'!" + rng.getA1Notation();
if (i > -1){
//Add the address of curr NamedRange to final list
mAddresses.push(add);
//Remove curr NamedRange from list
uniqueMatches.splice(i,1);
}else if (fullMatches.includes(name)){
// Name found - add the address of the
// Named Range to matched Addresses list
mAddresses.push(add);
}
});
//Add all left matched addresses to final list
mAddresses.push(...uniqueMatches);
return mAddresses;
}
让它变得有点复杂的是 NamedRanges。
此代码将匹配并分析 returns 一个包含所有前任地址的列表,包括 NamedRanges 的地址。
我正在尝试提取 Google 表格单元格中公式中出现的所有 cell/range 地址。
天生的公式可能非常复杂。我尝试了很多模式,这些模式适用于网络测试人员,但不适用于 google sheets re2.
以下示例显示了两个问题。也许我误读了匹配结果,但据我了解是 4 个匹配项。
公式(忽略逻辑):
=A:B4+$BC+$DE2+F2:G2
正则表达式:
(($?[A-Z]+$?\d+)(:($?[A-Z]+$?\d+))?)
预期结果:
[A:B4,$BC,$DE2,F2:G2]
Here(如果我没有误读结果)看起来不错。 我不确定显示的组匹配是否也被认为是匹配“4个匹配,287步”
但是在 google sheet 中 returns 所有 匹配 1 结果
[A:B4,A,:B4,B4]
忽略其他匹配项 所以我想问题是如何将正则表达式转换为 re2 语法?
更新: 跟着player0的评论,可能我没说清楚。 这只是一个简单的例子,以隔离我遇到的其他问题。这只是一个字符串,包含一些相对和绝对格式的地址。 但是,我正在寻找一个更广泛的通用解决方案,它将适合任何可能包含公式和对其他 sheet 的引用的可能公式。例如:
=(STDEVA(Sheet1!B2:B5)+sum($A:$A))*B2
这里的预期结果是Sheet1!B2:B5,$A:$A,B2
此公式包含两个公式并引用了另一个sheet。 在这里仍然忽略命名范围和我目前无法想到的其他公式可能的引用。 此外,方括号 [] 无关紧要,它只是显示结果的方式,实际上是从日志中复制的,因为它全部在脚本中完成。
好像可以用
[A-Z$]+\d+(?::[A-Z$]\d+)?
尝试:
=INDEX(SUBSTITUTE(TEXTJOIN(",", 1,
IFNA(REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>&"),
"(?:.+!)?[A-Z$]+\d+(?::[A-Z$](?:\d+)?)?|(?:.+!)?[A-Z$]:[A-Z$]+"))), "♥", "'"))
或更长:
=INDEX(SUBSTITUTE(TEXTJOIN(",", 1,
IFNA(IFNA(REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>"), "(?:.+!)?[A-Z$]+\d+(?::[A-Z$](?:\d+)?)?"),
REGEXEXTRACT(SPLIT(SUBSTITUTE(FORMULATEXT(A3), "'", "♥"),
"+-*/^()=<>"), "(?:.+!)?[A-Z$]:[A-Z$]+")))), "♥", "'"))
我想出了一个更好的方法,不用拆分,就是使用 /g
标志。
但是,这在脚本中起作用,而不是通过使用表格内部正则表达式函数(即 REGEXEXTRACT
),因为我不知道如何在包含 /g 标志和 [= 的单元格中格式化正则表达式字符串12=] 将作为有效的正则表达式接受。
代码如下:
/* Find all predessesor cells of input range
*/
function findPredecessor(rng){
var formualaText = rng.getFormula();
/* addMatchesRegex
* supports all A1Notation addresses
* the 2nd regex after the | operator will match all column addresses (A:A, Sheet2!b:B, etc)
* some NamedRanges with names like NameRange1
* Does not support - NamedRange with names including dot, not ending with digits
*/
var addMatchesRegex = /(([\w .'!]+)?($?[A-Z]+$?\d+\b)(:($?[A-Z]+$?\d+))?)|([\w .'!]+)?[A-Z]+:[A-Z]+/gi;
var addMatches = formualaText.match(addMatchesRegex);
Logger.log("%s add matched: %s",addMatches.length,addMatches);
/* fullMatchRegex
* modify addMatches to return also strings like
* 1. SUM, IFERROR, etc - internal sheets functions.
* 2. NamedRanges
*
*/
var fullMatchRegex = /(([\w .'!]+)?([$A-Z.\d]*)(:($?[A-Z]+$?\d*))?)/gi;
// match regex with formula
var fullMatches = formualaText.match(fullMatchRegex);
Logger.log("Full matches list: %s",fullMatches);
var namedRangesAdd = analyzeMatch(addMatches,fullMatches);
Logger.log("%s total predecessors: %s",namedRangesAdd.length,namedRangesAdd);
}
/* This function accepts the two regex matches list
* and returns one unique list of all predecessor addresses
* @param {Array} addMatches - All A1 notation addresses
* plus some of NamedRanges
* @param {Array} fullMatches - All A1 notation addresses,All NamedRanges,
* Other irrelevent matches
*/
function analyzeMatch(addMatches,fullMatches){
/*Expected
First parameter - holds all A1Notation addresses as well as NamedRanges that
their name in the form of /[A-Z]+/d+
NamedRange with name including dot(.) or does not contain digits will not
be on the list
Second Parameter - contains all first list matches, as well as all NamedRanges
names and also irrelevant matches to be filtered like function names and empty string
*/
//Full Matched Addresses to be returned
var mAddresses = [];
//Remove duplicate addresses
var uniqueMatches =
addMatches.filter((item,index)=>addMatches.indexOf(item)===index);
//Get all named Ranges in spread sheet
var nr = SpreadsheetApp.getActive().getNamedRanges();
// Loop Named Ranges arr
nr.forEach(function(item){
/* Check if the name of the current Named Range
* is included in matches
* 1. first in addMatches list
* 2. only if not found in the wider list */
var name = item.getName();
//Check if in addmatches array
var i = uniqueMatches.indexOf(name);
//Build A1Notation address of current NamedRange
var rng = item.getRange();
var add = "'" + rng.getSheet().getName() + "'!" + rng.getA1Notation();
if (i > -1){
//Add the address of curr NamedRange to final list
mAddresses.push(add);
//Remove curr NamedRange from list
uniqueMatches.splice(i,1);
}else if (fullMatches.includes(name)){
// Name found - add the address of the
// Named Range to matched Addresses list
mAddresses.push(add);
}
});
//Add all left matched addresses to final list
mAddresses.push(...uniqueMatches);
return mAddresses;
}
让它变得有点复杂的是 NamedRanges。 此代码将匹配并分析 returns 一个包含所有前任地址的列表,包括 NamedRanges 的地址。