Confluence wiki 标记 - table 使用正则表达式确定行类型
Confluence wiki markup - table rows type determination with Regex
在 confluence wiki v6.0 中有两种不同的 table 类型。
所以我很难通过第一行来确定 table 类型
(我通过使用正则表达式检测新行 new Regex(@"(\|(\r\n|\r|\n)(.*?)\|)+");
在行上拆分 table 并使用 Matches
拆分,但是)
Table 行可能如下所示:
如果它 header
|| heading 1 || heading 2 || heading 3 ||
如果它的常规行
| cell A1 | cell A2 | cell A3 |
并且如果其 垂直 table 行
||Heading |cell B2 | cell B3 |
我尝试使用这样的表达式 ^(\|\|.*?\|)
但发现它也适用于 headers。
由于 header 标记功能,我尝试使用这个 ^(\|\|.*?\|\|)
之后,但如果它是常规行
那么是否可以通过使用Regex
来实现行类型的确定,或者至少可以说是垂直行?
或者最好写一些逐步处理行的东西?
在不使用正则表达式的情况下写的 javascript
,它看起来像那样
简单的字符串扫描器
var Scanner = (function(){
function Scanner(text){
this.currentString = text.split('');
this.position = 0;
this.errorList = [];
this.getChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
return string[pos];
}
return -1;
};
this.nextChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
me.position++;
return;
}
me.error("EOL reached");
};
this.error = function(errorMsg){
var me = this,
error = "Error at position " + me.position +"\nMessage: "+errorMsg+".\n";
errors = me.errorList;
errors.push[error];
};
return this;
};
return Scanner;
})();
简单的解析器
/**
LINE ::= { CELL }
CELL ::= '|' CELL1
CELL1 ::= HEADER_CELL | REGULAR_CELL
HEADER_CELL ::= '|' TEXT
REGULAR_CELL ::= TEXT
*/
function RowParser(){
this.scanner = {};
this.rawText = "";
this.cellsData = [];
return this;
};
RowParser.prototype = {
parseRow: function(row){
var me = this;
me.scanner = new Scanner(row);
me.rawText = row;
me.cellsData = [];
me.proceedNext();
},
proceedNext: function(){
var me = this,
scanner = me.scanner;
while(scanner.getChar() === '|'){
me.proceedCell();
}
if (scanner.getChar() !== -1)
{
scanner.error("EOL expected, "+ scanner.getChar() +" got");
}
return;
},
proceedCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedHeaderCell();
}
},
proceedHeaderCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
me.onHeaderCell();
} else {
me.onRegularCell();
}
},
onHeaderCell: function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info,
cellData = {
type: cellType.Header
}
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedInnerText(cellType.Header);
}else{
scanner.error("Expected '|' got "+ currentChar +".");
}
},
onRegularCell:function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info;
me.proceedInnerText(cellType.Regular);
},
proceedInnerText: function(cellType){
var me = this,
scanner = me.scanner,
typeData = TableCellType.getValueById(cellType),
innerText = [];
while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
innerText.push(scanner.getChar());
scanner.nextChar();
}
me.cellsData.push({
typeId: typeData.id,
type: typeData.name,
text: innerText.join("")
});
me.proceedNext();
},
getRowData: function(){
var me = this,
scanner = me.scanner,
data = me.cellsData,
emptyCell;
//Proceed cell data
//if there no empty cell in the end - means no close tag
var filteredData = data.filter(function(el){
return el.text.length !== 0;
});
if(filteredData.length === data.length){
scanner.error("No close tag at row "+ me.rawText +".");
return;
}
for (var i = 0; i < filteredData.length; i++) {
filteredData[i].text = filteredData[i].text.trim();
}
return filteredData;
}
};
CellTypeEnum
上述
var TableCellType = {
info:{
Regular: 10,
Header: 20
},
data:[
{
id: 10,
name: "regular"
},
{
id: 20,
name: "header"
}
],
getValueById: function(id){
var me = this,
data = me.data,
result = data.filter(function(el){
return el.id === id;
});
return result[0];
}
}
用法:
var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||\n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();
在 confluence wiki v6.0 中有两种不同的 table 类型。
所以我很难通过第一行来确定 table 类型
(我通过使用正则表达式检测新行 new Regex(@"(\|(\r\n|\r|\n)(.*?)\|)+");
在行上拆分 table 并使用 Matches
拆分,但是)
Table 行可能如下所示:
如果它 header
|| heading 1 || heading 2 || heading 3 ||
如果它的常规行
| cell A1 | cell A2 | cell A3 |
并且如果其 垂直 table 行
||Heading |cell B2 | cell B3 |
我尝试使用这样的表达式 ^(\|\|.*?\|)
但发现它也适用于 headers。
由于 header 标记功能,我尝试使用这个 ^(\|\|.*?\|\|)
之后,但如果它是常规行
那么是否可以通过使用Regex
来实现行类型的确定,或者至少可以说是垂直行?
或者最好写一些逐步处理行的东西?
在不使用正则表达式的情况下写的 javascript
,它看起来像那样
简单的字符串扫描器
var Scanner = (function(){
function Scanner(text){
this.currentString = text.split('');
this.position = 0;
this.errorList = [];
this.getChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
return string[pos];
}
return -1;
};
this.nextChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
me.position++;
return;
}
me.error("EOL reached");
};
this.error = function(errorMsg){
var me = this,
error = "Error at position " + me.position +"\nMessage: "+errorMsg+".\n";
errors = me.errorList;
errors.push[error];
};
return this;
};
return Scanner;
})();
简单的解析器
/**
LINE ::= { CELL }
CELL ::= '|' CELL1
CELL1 ::= HEADER_CELL | REGULAR_CELL
HEADER_CELL ::= '|' TEXT
REGULAR_CELL ::= TEXT
*/
function RowParser(){
this.scanner = {};
this.rawText = "";
this.cellsData = [];
return this;
};
RowParser.prototype = {
parseRow: function(row){
var me = this;
me.scanner = new Scanner(row);
me.rawText = row;
me.cellsData = [];
me.proceedNext();
},
proceedNext: function(){
var me = this,
scanner = me.scanner;
while(scanner.getChar() === '|'){
me.proceedCell();
}
if (scanner.getChar() !== -1)
{
scanner.error("EOL expected, "+ scanner.getChar() +" got");
}
return;
},
proceedCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedHeaderCell();
}
},
proceedHeaderCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
me.onHeaderCell();
} else {
me.onRegularCell();
}
},
onHeaderCell: function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info,
cellData = {
type: cellType.Header
}
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedInnerText(cellType.Header);
}else{
scanner.error("Expected '|' got "+ currentChar +".");
}
},
onRegularCell:function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info;
me.proceedInnerText(cellType.Regular);
},
proceedInnerText: function(cellType){
var me = this,
scanner = me.scanner,
typeData = TableCellType.getValueById(cellType),
innerText = [];
while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
innerText.push(scanner.getChar());
scanner.nextChar();
}
me.cellsData.push({
typeId: typeData.id,
type: typeData.name,
text: innerText.join("")
});
me.proceedNext();
},
getRowData: function(){
var me = this,
scanner = me.scanner,
data = me.cellsData,
emptyCell;
//Proceed cell data
//if there no empty cell in the end - means no close tag
var filteredData = data.filter(function(el){
return el.text.length !== 0;
});
if(filteredData.length === data.length){
scanner.error("No close tag at row "+ me.rawText +".");
return;
}
for (var i = 0; i < filteredData.length; i++) {
filteredData[i].text = filteredData[i].text.trim();
}
return filteredData;
}
};
CellTypeEnum
上述
var TableCellType = {
info:{
Regular: 10,
Header: 20
},
data:[
{
id: 10,
name: "regular"
},
{
id: 20,
name: "header"
}
],
getValueById: function(id){
var me = this,
data = me.data,
result = data.filter(function(el){
return el.id === id;
});
return result[0];
}
}
用法:
var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||\n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();