正则表达式在每行引号内捕获第一个文本组
Regex capture first text group within quotes per line
我正在编写一个简单的荧光笔,我需要捕获所有文本,包括每行第一个单词的引号。我该如何调整才能做到这一点?目前这让我得到引号内的每组单词,但我只需要第一个。
这是我在引号内找到的两个正则表达式
(\"[^\"]*\")
(\".*?[^\]\")
我只是想在 pyside 中制作一个简单的 json 语法荧光笔。
import os
import sys
from PySide2 import QtCore, QtGui, QtWidgets
class SourceEditor(QtWidgets.QPlainTextEdit):
def __init__(self, parent=None):
super(SourceEditor, self).__init__(parent)
font = QtGui.QFont()
font.setFamily('Courier')
font.setFixedPitch(True)
font.setPointSize(10)
self.setFont(font)
self.highlighter = Highlighter(self.document())
class Highlighter(QtGui.QSyntaxHighlighter):
def __init__(self, parent=None):
super(Highlighter, self).__init__(parent)
self.highlightingRules = []
singleLineCommentFormat = QtGui.QTextCharFormat()
singleLineCommentFormat.setFontItalic(True)
singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
self.highlightingRules.append((QtCore.QRegExp("//[^\n]*"), singleLineCommentFormat))
self.multiLineCommentFormat = QtGui.QTextCharFormat()
self.multiLineCommentFormat.setFontItalic(True)
self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
quotationFormat = QtGui.QTextCharFormat()
quotationFormat.setForeground(QtGui.QColor(230,145,100))
self.highlightingRules.append((QtCore.QRegExp("\"[^\"]*\""), quotationFormat))
self.commentStartExpression = QtCore.QRegExp("/\*")
self.commentEndExpression = QtCore.QRegExp("\*/")
def highlightBlock(self, text):
for pattern, format in self.highlightingRules:
expression = QtCore.QRegExp(pattern)
index = expression.indexIn(text)
while index >= 0:
length = expression.matchedLength()
self.setFormat(index, length, format)
index = expression.indexIn(text, index + length)
self.setCurrentBlockState(0)
startIndex = 0
if self.previousBlockState() != 1:
startIndex = self.commentStartExpression.indexIn(text)
while startIndex >= 0:
endIndex = self.commentEndExpression.indexIn(text, startIndex)
if endIndex == -1:
self.setCurrentBlockState(1)
commentLength = len(text) - startIndex
else:
commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength()
self.setFormat(startIndex, commentLength, self.multiLineCommentFormat)
startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength);
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
window = SourceEditor()
style.setStyle(widget=window)
window.setPlainText('''
[
{
"group": "Simple",
"name": "Simple",
"category name": "Apps",
"icon": "Simple.svg",
"paths": [
{
"path": "notepad.exe"
}
]
},
// some comment here
{
"group": "Simple",
"name": "Simple",
"category name": "Simple",
"icon": "Simple.svg"
"paths": [
{
"path": "notepad",
"args": "notepad.py"
},
{
"path": "run.exe",
}
]
}
]
''')
window.resize(640, 512)
window.show()
sys.exit(app.exec_())
类似问题...我如何捕获没有尾随逗号的数字?
(\d+),
[
{
"description": null,
"entity": {
"id": 343,
"name": "07010",
"type": "Shot"
},
"id": 1673,
"project": {
"id": 9,
"name": "test10",
}
}
]
使用捕获组 return 它:
^[ \t]*("[^"]*")
参见 regex proof。
解释
NODE
EXPLANATION
^
the beginning of the string
[ \t]*
any character of: ' ', '\t' (tab) (0 or more times (matching the most amount possible))
(
group and capture to :
"
'"'
[^"]*
any character except: '"' (0 or more times (matching the most amount possible))
"
'"'
)
end of
我正在编写一个简单的荧光笔,我需要捕获所有文本,包括每行第一个单词的引号。我该如何调整才能做到这一点?目前这让我得到引号内的每组单词,但我只需要第一个。
这是我在引号内找到的两个正则表达式
(\"[^\"]*\")
(\".*?[^\]\")
我只是想在 pyside 中制作一个简单的 json 语法荧光笔。
import os
import sys
from PySide2 import QtCore, QtGui, QtWidgets
class SourceEditor(QtWidgets.QPlainTextEdit):
def __init__(self, parent=None):
super(SourceEditor, self).__init__(parent)
font = QtGui.QFont()
font.setFamily('Courier')
font.setFixedPitch(True)
font.setPointSize(10)
self.setFont(font)
self.highlighter = Highlighter(self.document())
class Highlighter(QtGui.QSyntaxHighlighter):
def __init__(self, parent=None):
super(Highlighter, self).__init__(parent)
self.highlightingRules = []
singleLineCommentFormat = QtGui.QTextCharFormat()
singleLineCommentFormat.setFontItalic(True)
singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
self.highlightingRules.append((QtCore.QRegExp("//[^\n]*"), singleLineCommentFormat))
self.multiLineCommentFormat = QtGui.QTextCharFormat()
self.multiLineCommentFormat.setFontItalic(True)
self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
quotationFormat = QtGui.QTextCharFormat()
quotationFormat.setForeground(QtGui.QColor(230,145,100))
self.highlightingRules.append((QtCore.QRegExp("\"[^\"]*\""), quotationFormat))
self.commentStartExpression = QtCore.QRegExp("/\*")
self.commentEndExpression = QtCore.QRegExp("\*/")
def highlightBlock(self, text):
for pattern, format in self.highlightingRules:
expression = QtCore.QRegExp(pattern)
index = expression.indexIn(text)
while index >= 0:
length = expression.matchedLength()
self.setFormat(index, length, format)
index = expression.indexIn(text, index + length)
self.setCurrentBlockState(0)
startIndex = 0
if self.previousBlockState() != 1:
startIndex = self.commentStartExpression.indexIn(text)
while startIndex >= 0:
endIndex = self.commentEndExpression.indexIn(text, startIndex)
if endIndex == -1:
self.setCurrentBlockState(1)
commentLength = len(text) - startIndex
else:
commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength()
self.setFormat(startIndex, commentLength, self.multiLineCommentFormat)
startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength);
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
window = SourceEditor()
style.setStyle(widget=window)
window.setPlainText('''
[
{
"group": "Simple",
"name": "Simple",
"category name": "Apps",
"icon": "Simple.svg",
"paths": [
{
"path": "notepad.exe"
}
]
},
// some comment here
{
"group": "Simple",
"name": "Simple",
"category name": "Simple",
"icon": "Simple.svg"
"paths": [
{
"path": "notepad",
"args": "notepad.py"
},
{
"path": "run.exe",
}
]
}
]
''')
window.resize(640, 512)
window.show()
sys.exit(app.exec_())
类似问题...我如何捕获没有尾随逗号的数字?
(\d+),
[
{
"description": null,
"entity": {
"id": 343,
"name": "07010",
"type": "Shot"
},
"id": 1673,
"project": {
"id": 9,
"name": "test10",
}
}
]
使用捕获组 return 它:
^[ \t]*("[^"]*")
参见 regex proof。
解释
NODE | EXPLANATION |
---|---|
^ |
the beginning of the string |
[ \t]* |
any character of: ' ', '\t' (tab) (0 or more times (matching the most amount possible)) |
( |
group and capture to : |
" |
'"' |
[^"]* |
any character except: '"' (0 or more times (matching the most amount possible)) |
" |
'"' |
) |
end of |