正则表达式在每行引号内捕获第一个文本组

Regex capture first text group within quotes per line

我正在编写一个简单的荧光笔,我需要捕获所有文本,包括每行第一个单词的引号。我该如何调整才能做到这一点?目前这让我得到引号内的每组单词,但我只需要第一个。

这是我在引号内找到的两个正则表达式 (\"[^\"]*\") (\".*?[^\]\")

我只是想在 pyside 中制作一个简单的 json 语法荧光笔。

import os
import sys
from PySide2 import QtCore, QtGui, QtWidgets

class SourceEditor(QtWidgets.QPlainTextEdit):
    def __init__(self, parent=None):
        super(SourceEditor, self).__init__(parent)
        font = QtGui.QFont()
        font.setFamily('Courier')
        font.setFixedPitch(True)
        font.setPointSize(10)
        self.setFont(font)

        self.highlighter = Highlighter(self.document())


class Highlighter(QtGui.QSyntaxHighlighter):
    def __init__(self, parent=None):
        super(Highlighter, self).__init__(parent)

        self.highlightingRules = []

        singleLineCommentFormat = QtGui.QTextCharFormat()
        singleLineCommentFormat.setFontItalic(True)
        singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
        self.highlightingRules.append((QtCore.QRegExp("//[^\n]*"), singleLineCommentFormat))

        self.multiLineCommentFormat = QtGui.QTextCharFormat()
        self.multiLineCommentFormat.setFontItalic(True)
        self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115))

        quotationFormat = QtGui.QTextCharFormat()
        quotationFormat.setForeground(QtGui.QColor(230,145,100))
        self.highlightingRules.append((QtCore.QRegExp("\"[^\"]*\""), quotationFormat))

        self.commentStartExpression = QtCore.QRegExp("/\*")
        self.commentEndExpression = QtCore.QRegExp("\*/")


    def highlightBlock(self, text):
        for pattern, format in self.highlightingRules:
            expression = QtCore.QRegExp(pattern)
            index = expression.indexIn(text)
            while index >= 0:
                length = expression.matchedLength()
                self.setFormat(index, length, format)
                index = expression.indexIn(text, index + length)

        self.setCurrentBlockState(0)

        startIndex = 0
        if self.previousBlockState() != 1:
            startIndex = self.commentStartExpression.indexIn(text)

        while startIndex >= 0:
            endIndex = self.commentEndExpression.indexIn(text, startIndex)

            if endIndex == -1:
                self.setCurrentBlockState(1)
                commentLength = len(text) - startIndex
            else:
                commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength()

            self.setFormat(startIndex, commentLength, self.multiLineCommentFormat)
            startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength);


if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    window = SourceEditor()
    style.setStyle(widget=window)
    window.setPlainText('''
        [
            {
                "group": "Simple",
                "name": "Simple",
                "category name": "Apps",
                "icon": "Simple.svg",
                "paths": [
                    {   
                        "path": "notepad.exe"
                    }
                ]
            },
            // some comment here
            {
                "group": "Simple",
                "name": "Simple",
                "category name": "Simple",
                "icon": "Simple.svg"
                "paths": [
                    {   
                        "path": "notepad",
                        "args": "notepad.py" 
                    },
                    {   
                        "path": "run.exe",
                    }
                ]
            }
        ]
        ''')
    window.resize(640, 512)
    window.show()
    sys.exit(app.exec_())

类似问题...我如何捕获没有尾随逗号的数字? (\d+),

[
    {
        "description": null,
        "entity": {
            "id": 343,
            "name": "07010",
            "type": "Shot"
        },
        "id": 1673,
        "project": {
            "id": 9,
            "name": "test10",
        }
    }
]

使用捕获组 return 它:

^[ \t]*("[^"]*")

参见 regex proof

解释

NODE EXPLANATION
^ the beginning of the string
[ \t]* any character of: ' ', '\t' (tab) (0 or more times (matching the most amount possible))
( group and capture to :
" '"'
[^"]* any character except: '"' (0 or more times (matching the most amount possible))
" '"'
) end of