Confluence wiki markup - table rows type determination with Regex

265 Views Asked by At

In confluence wiki v6.0 there are two different table types.

So I'm struggling with determination of the table type by it's first row (I split table on rows by detection of the new line with regex like this new Regex(@"(\|(\r\n|\r|\n)(.*?)\|)+"); and split using Matches, but however)

Table row could look like:

that if its header

|| heading 1 || heading 2 || heading 3 ||

that if its regular row

| cell A1 | cell A2 | cell A3 |

and that if its vertical table row

||Heading |cell B2 | cell B3 |

I tried to use expression like this ^(\|\|.*?\|) but found out that it works for headers as well.

After I tried to use this one ^(\|\|.*?\|\|) due to header markup feature, but it's not helps to say, if its the regular row

So is it possible to achieve determination of row type or, at least, to say is it vertical row with usage of Regex?

Or it's better to write something that will process row step by step?

1

There are 1 best solutions below

0
On BEST ANSWER

Wrote it without using regex and in javascript, it looks like that

Simple string scanner

var Scanner = (function(){
    function Scanner(text){
        this.currentString = text.split('');
        this.position = 0;
        this.errorList = [];
        this.getChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                return string[pos];
            }

            return -1;
        };

        this.nextChar = function(){
            var me = this,
                pos = me.position,
                string = me.currentString,
                stringLength = string.length;

            if(pos < stringLength){
                me.position++;
                return;
            }

            me.error("EOL reached");
        };

        this.error = function(errorMsg){
            var me = this,
                error = "Error at position " + me.position +"\nMessage: "+errorMsg+".\n";
                errors = me.errorList;

            errors.push[error];
        };      

        return this;
    };

    return Scanner;

})();

Simple parser

 /**
     LINE ::= { CELL }

     CELL ::= '|' CELL1
     CELL1 ::= HEADER_CELL | REGULAR_CELL

     HEADER_CELL ::=  '|'  TEXT
     REGULAR_CELL ::=  TEXT

 */

 function RowParser(){
    this.scanner = {}; 
    this.rawText = "";
    this.cellsData = [];

    return this;
};

RowParser.prototype = {
    parseRow: function(row){
        var me = this;

        me.scanner = new Scanner(row);
        me.rawText = row;
        me.cellsData = [];

        me.proceedNext();
    },

    proceedNext: function(){
        var me = this,
            scanner = me.scanner;

        while(scanner.getChar() === '|'){
            me.proceedCell();
        }

        if (scanner.getChar() !== -1)
        {
            scanner.error("EOL expected, "+ scanner.getChar() +" got");
        }

        return;
    },

    proceedCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedHeaderCell();
        }
    },

    proceedHeaderCell: function(){
        var me = this,
            scanner = me.scanner;

        if(scanner.getChar() === '|'){
            me.onHeaderCell();
        } else { 
            me.onRegularCell();
        }
    },

    onHeaderCell: function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info,
            cellData = {
                type: cellType.Header
            }

        if(scanner.getChar() === '|'){
            scanner.nextChar();
            me.proceedInnerText(cellType.Header);
        }else{
            scanner.error("Expected '|' got "+ currentChar +".");
        }           
    },

    onRegularCell:function(){
        var me = this,
            scanner = me.scanner,
            cellType = TableCellType.info;

        me.proceedInnerText(cellType.Regular);  
    },  

    proceedInnerText: function(cellType){
        var me = this,
            scanner = me.scanner,
            typeData = TableCellType.getValueById(cellType),
            innerText = [];

        while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
            innerText.push(scanner.getChar());
            scanner.nextChar();
        }           

        me.cellsData.push({
            typeId: typeData.id,
            type: typeData.name,
            text: innerText.join("")
        });

        me.proceedNext();       
    },

    getRowData: function(){
        var me = this,
            scanner = me.scanner,
            data = me.cellsData,
            emptyCell;

        //Proceed cell data
        //if there no empty cell in the end - means no close tag
        var filteredData = data.filter(function(el){
            return el.text.length !== 0;
        });

        if(filteredData.length === data.length){
            scanner.error("No close tag at row "+ me.rawText +".");
            return;
        }           

        for (var i = 0; i < filteredData.length; i++) {
            filteredData[i].text = filteredData[i].text.trim();
        }

        return filteredData;
    }
};

CellTypeEnum mentioned above

var TableCellType = {
    info:{
        Regular: 10,
        Header: 20
    },

    data:[
        {
            id: 10,
            name: "regular"
        },
        {
            id: 20,
            name: "header"
        }
    ],

    getValueById: function(id){
        var me = this,
            data = me.data,
            result = data.filter(function(el){
                return el.id === id;
            });

        return result[0];   
    }       
}

Usage:

var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||\n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();