`str.format()` 中的 Unicode 错误
Unicode error in `str.format()`
我正在尝试 运行 以下脚本,该脚本扫描 *.csproj
文件并检查 Visual Studio 解决方案中的项目依赖关系,但我收到以下错误。我已经尝试了各种codec
和encode/decode
和u''
组合,无济于事...
(变音符号 是 的目的,我打算保留它们)。
Traceback (most recent call last):
File "E:[=11=] GIT\SolutionDependencies.py", line 44, in <module>
references = GetProjectReferences("MiotecGit")
File "E:[=11=] GIT\SolutionDependencies.py", line 40, in GetProjectReferences
outputline = u'"{}" -> "{}"'.format(projectName, referenceName)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 19: ordinal not in range(128)
import glob
import os
import fnmatch
import re
import subprocess
import codecs
gvtemplate = """
digraph g {
rankdir = "LR"
#####
}
""".strip()
def GetProjectFiles(rootFolder):
result = []
for root, dirnames, filenames in os.walk(rootFolder):
for filename in fnmatch.filter(filenames, "*.csproj"):
result.append(os.path.join(root, filename))
return result
def GetProjectName(path):
result = os.path.splitext(os.path.basename(path))[0]
return result
def GetProjectReferences(rootFolder):
result = []
projectFiles = GetProjectFiles(rootFolder)
for projectFile in projectFiles:
projectName = GetProjectName(projectFile)
with codecs.open(projectFile, 'r', "utf-8") as pfile:
content = pfile.read()
references = re.findall("<ProjectReference.*?</ProjectReference>", content, re.DOTALL)
for reference in references:
referenceProject = re.search('"([^"]*?)"', reference).group(1)
referenceName = GetProjectName(referenceProject)
outputline = u'"{}" -> "{}"'.format(projectName, referenceName)
result.append(outputline)
return result
references = GetProjectReferences("MiotecGit")
output = u"\n".join(*references)
with codecs.open("output.gv", "w", 'utf-8') as outputfile:
outputfile.write(gvtemplate.replace("#####", output))
graphvizpath = glob.glob(r"C:\Program Files*\Graphviz*\bin\dot.*")[0]
command = '{} -Gcharset=latin1 -T pdf -o "output.pdf" "output.gv"'.format(graphvizpath)
subprocess.call(command)
当 Python 2.x 尝试在 Unicode 上下文中使用字节字符串时,它会自动尝试使用 ascii
将字节字符串 decode
转换为 Unicode 字符串编解码器。虽然 ascii
编解码器是一个安全的选择,但它通常不起作用。
对于 Windows 环境,mbcs
编解码器将 select Windows 用于 8 位字符的代码页。您可以自己显式解码字符串。
outputline = u'"{}" -> "{}"'.format(projectName.decode('mbcs'), referenceName.decode('mbcs'))
我正在尝试 运行 以下脚本,该脚本扫描 *.csproj
文件并检查 Visual Studio 解决方案中的项目依赖关系,但我收到以下错误。我已经尝试了各种codec
和encode/decode
和u''
组合,无济于事...
(变音符号 是 的目的,我打算保留它们)。
Traceback (most recent call last): File "E:[=11=] GIT\SolutionDependencies.py", line 44, in <module> references = GetProjectReferences("MiotecGit") File "E:[=11=] GIT\SolutionDependencies.py", line 40, in GetProjectReferences outputline = u'"{}" -> "{}"'.format(projectName, referenceName) UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 19: ordinal not in range(128)
import glob
import os
import fnmatch
import re
import subprocess
import codecs
gvtemplate = """
digraph g {
rankdir = "LR"
#####
}
""".strip()
def GetProjectFiles(rootFolder):
result = []
for root, dirnames, filenames in os.walk(rootFolder):
for filename in fnmatch.filter(filenames, "*.csproj"):
result.append(os.path.join(root, filename))
return result
def GetProjectName(path):
result = os.path.splitext(os.path.basename(path))[0]
return result
def GetProjectReferences(rootFolder):
result = []
projectFiles = GetProjectFiles(rootFolder)
for projectFile in projectFiles:
projectName = GetProjectName(projectFile)
with codecs.open(projectFile, 'r', "utf-8") as pfile:
content = pfile.read()
references = re.findall("<ProjectReference.*?</ProjectReference>", content, re.DOTALL)
for reference in references:
referenceProject = re.search('"([^"]*?)"', reference).group(1)
referenceName = GetProjectName(referenceProject)
outputline = u'"{}" -> "{}"'.format(projectName, referenceName)
result.append(outputline)
return result
references = GetProjectReferences("MiotecGit")
output = u"\n".join(*references)
with codecs.open("output.gv", "w", 'utf-8') as outputfile:
outputfile.write(gvtemplate.replace("#####", output))
graphvizpath = glob.glob(r"C:\Program Files*\Graphviz*\bin\dot.*")[0]
command = '{} -Gcharset=latin1 -T pdf -o "output.pdf" "output.gv"'.format(graphvizpath)
subprocess.call(command)
当 Python 2.x 尝试在 Unicode 上下文中使用字节字符串时,它会自动尝试使用 ascii
将字节字符串 decode
转换为 Unicode 字符串编解码器。虽然 ascii
编解码器是一个安全的选择,但它通常不起作用。
对于 Windows 环境,mbcs
编解码器将 select Windows 用于 8 位字符的代码页。您可以自己显式解码字符串。
outputline = u'"{}" -> "{}"'.format(projectName.decode('mbcs'), referenceName.decode('mbcs'))