Confluence wiki 标记 - table 使用正则表达式确定行类型

Question

在 confluence wiki v6.0 中有两种不同的 table 类型。

所以我很难通过第一行来确定 table 类型（我通过使用正则表达式检测新行 new Regex(@"(\|(\r\n|\r|\n)(.*?)\|)+"); 在行上拆分 table 并使用 Matches 拆分，但是）

Table 行可能如下所示：

如果它 header

|| heading 1 || heading 2 || heading 3 ||

如果它的常规行

| cell A1 | cell A2 | cell A3 |

并且如果其 垂直 table 行

||Heading |cell B2 | cell B3 |

我尝试使用这样的表达式 ^(\|\|.*?\|) 但发现它也适用于 headers。

由于 header 标记功能，我尝试使用这个 ^(\|\|.*?\|\|) 之后，但如果它是常规行

那么是否可以通过使用Regex来实现行类型的确定，或者至少可以说是垂直行？

或者最好写一些逐步处理行的东西？

Answer 1

在不使用正则表达式的情况下写的 javascript，它看起来像那样

简单的字符串扫描器

var Scanner = (function(){
    function Scanner(text){
        this.currentString = text.split('');
        this.position = 0;
        this.errorList = [];
        this.getChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                return string[pos];
            }

            return -1;
        };

        this.nextChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                me.position++;
                return;
            }

            me.error("EOL reached");
        };

        this.error = function(errorMsg){
            var me = this,
                error = "Error at position " + me.position +"\nMessage: "+errorMsg+".\n";
                errors = me.errorList;

            errors.push[error];
        };      

        return this;
    };

    return Scanner;

})();

简单的解析器

 /**
     LINE ::= { CELL }

     CELL ::= '|' CELL1
     CELL1 ::= HEADER_CELL | REGULAR_CELL

     HEADER_CELL ::=  '|'  TEXT
     REGULAR_CELL ::=  TEXT

 */

 function RowParser(){
    this.scanner = {}; 
    this.rawText = "";
    this.cellsData = [];

    return this;
};

RowParser.prototype = {
    parseRow: function(row){
        var me = this;

        me.scanner = new Scanner(row);
        me.rawText = row;
        me.cellsData = [];

        me.proceedNext();
    },

    proceedNext: function(){
        var me = this,
            scanner = me.scanner;

        while(scanner.getChar() === '|'){
            me.proceedCell();
        }

        if (scanner.getChar() !== -1)
        {
            scanner.error("EOL expected, "+ scanner.getChar() +" got");
        }

        return;
    },

    proceedCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedHeaderCell();
        }
    },

    proceedHeaderCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            me.onHeaderCell();
        } else { 
            me.onRegularCell();
        }
    },

    onHeaderCell: function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info,
            cellData = {
                type: cellType.Header
            }

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedInnerText(cellType.Header);
        }else{
            scanner.error("Expected '|' got "+ currentChar +".");
        }           
    },

    onRegularCell:function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info;

        me.proceedInnerText(cellType.Regular);  
    },  

    proceedInnerText: function(cellType){
        var me = this,
            scanner = me.scanner,
            typeData = TableCellType.getValueById(cellType),
            innerText = [];

        while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
            innerText.push(scanner.getChar());
            scanner.nextChar();
        }           

        me.cellsData.push({
            typeId: typeData.id,
            type: typeData.name,
            text: innerText.join("")
        });

        me.proceedNext();       
    },

    getRowData: function(){
        var me = this,
            scanner = me.scanner,
            data = me.cellsData,
            emptyCell;

        //Proceed cell data
        //if there no empty cell in the end - means no close tag
        var filteredData = data.filter(function(el){
            return el.text.length !== 0;
        });

        if(filteredData.length === data.length){
            scanner.error("No close tag at row "+ me.rawText +".");
            return;
        }           

        for (var i = 0; i < filteredData.length; i++) {
            filteredData[i].text = filteredData[i].text.trim();
        }

        return filteredData;
    }
};

CellTypeEnum 上述

var TableCellType = {
    info:{
        Regular: 10,
        Header: 20
    },

    data:[
        {
            id: 10,
            name: "regular"
        },
        {
            id: 20,
            name: "header"
        }
    ],

    getValueById: function(id){
        var me = this,
            data = me.data,
            result = data.filter(function(el){
                return el.id === id;
            });

        return result[0];   
    }       
}

用法：

var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||\n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();

Confluence wiki 标记 - table 使用正则表达式确定行类型

Confluence wiki markup - table rows type determination with Regex

c#

regex

confluence

wiki-markup