需要简单的文件比较而无需差异
Need simple file comparison without diffing
我的服务器是 运行 Windows 2008。
我已经使用 Araxis 进行差异比较并且喜欢它。但现在我有一个稍微更独特的用例。我需要在服务器上的 2 个文件夹之间进行递归文件比较,这两个文件夹中都存在数十万个文件。 99% 的文件都是一样的,我已经知道了。一些文件存在于一个文件夹树中,但不存在于另一个文件夹树中。我需要一种快速找到 "difference files" 的方法。我已经尝试过 Araxis,但由于文件量很大,速度太慢了。我怀疑 Winmerge 和其他人会同样慢,因为他们试图区分每个文件,而我不需要。
重要提示:文件不需要进行文本或二进制比较。我不是在寻找差异。我正在寻找逐个文件夹对一棵树中存在哪些文件而不是另一棵树中存在的文件进行快速比较。差异工具都太慢了,因为它们试图差异,而不是简单地寻找哪些文件存在于一个地方而不是另一个地方。我要找的工具只需要比较每个文件夹中的文件名,不需要更深入。
速度是最重要的,"internal contents diffing"是不必要的。
有这样的工具吗?
echo.|xcopy /L /s "directory1\*" "directory2"
应该为您提供一个不同于 dir2 列表的 dir1
我也需要类似的东西,这个批处理文件就是我想出来的。我先在一棵小树上试一下,看看效果如何。
@ECHO OFF
SETLOCAL
REM Change these drive letters to ones that aren't already in use!
SET subst_letter_one=Y
SET subst_letter_two=Z
REM If exactly two arguments not specified then print help
IF [%1] NEQ [] IF [%2] NEQ [] IF [%3] EQU [] GOTO BEGIN
ECHO Usage: compare-folder [Folder 1] [Folder 2]
ECHO.
ECHO Very basic "exists in one but not in the other" type comparison of folder trees
ECHO Example: compare-folder "D:\somepath" "D:\anotherpath"
ECHO.
ECHO Before first-run, edit the two variables in the head of this batch file
ECHO with spare drive letters that SUBST can use.
EXIT /B 0
:BEGIN
REM Use the SUBST command to map the two folders to temporary drive letters
SUBST %subst_letter_one%: %1
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem mapping %1 to drive letter %subst_letter_one%.
ECHO Did you try to map an already-used drive letter?
ECHO Exiting
EXIT /B 1 )
SUBST %subst_letter_two%: %2
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem mapping %2 to drive letter %subst_letter_two%.
ECHO Did you try to map an already-used drive letter?
ECHO Exiting
EXIT /B 1 )
REM Main
ECHO.
ECHO ==================== FOLDER 1 ======================
ECHO.
ECHO Files and/or folders absent from %1 are listed below:
ECHO.
FOR /F "tokens=2* delims=:" %%i IN ('DIR %subst_letter_two%:\ /A /B /S /O:N') DO (
IF NOT EXIST %subst_letter_one%:%%i ECHO %%i
)
ECHO.
ECHO ==================== FOLDER 2 ======================
ECHO.
ECHO Files and/or folders absent from %2 are listed below:
ECHO.
FOR /F "tokens=2* delims=:" %%i IN ('DIR %subst_letter_one%:\ /A /B /S /O:N') DO (
IF NOT EXIST %subst_letter_two%:%%i ECHO %%i
)
REM Unmap temporary drive letters
SUBST %subst_letter_one%: /D
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem unmapping letter %subst_letter_one%.
ECHO Exiting
EXIT /B 1 )
SUBST %subst_letter_two%: /D
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem unmapping letter %subst_letter_two%.
ECHO Exiting
EXIT /B 1 )
ENDLOCAL
我最终为此用 c# 编写了一个自定义工具。完整的源代码如下。它工作得很好,我现在已经在我的生产服务器上多次使用它了。它唯一的 objective 是比较两个文件夹树并将仅存在于源中的任何 files/folders 添加到目标。 它不会尝试比较两个地方都存在的文件。如果同名文件存在于两个树中,并且在同一个相对位置,它被忽略了。
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using System.Configuration;
using System.IO;
namespace SyncFolders {
class Program {
private static string rootSourcePath = ConfigurationManager.AppSettings["RootSourcePath"];
private static string rootDestinationPath = ConfigurationManager.AppSettings["RootDestinationPath"];
private static int _itemsSynced = 0;
static void Main(string[] args) {
if (Directory.Exists(rootSourcePath)) {
if (Directory.Exists(rootDestinationPath)) {
var startTime = DateTime.Now;
Console.WriteLine("Source: " + rootSourcePath);
Console.WriteLine("Destination: " + rootDestinationPath);
Console.WriteLine("You are about to copy any DIFFERENT folders/files from source to destination. Continue?");
Console.ReadLine();
DirectoryInfo source = new DirectoryInfo(rootSourcePath);
WalkDirectoryTree(source, rootDestinationPath);
Console.WriteLine("");
Console.WriteLine("Started at " + startTime);
Console.WriteLine("Finished at " + DateTime.Now);
Console.WriteLine("");
if (_itemsSynced > 0)
Console.WriteLine("Folders/files added: " + _itemsSynced);
else
Console.WriteLine("Everything was already in sync!");
Console.ReadLine();
} else {
Console.WriteLine("Folder not found: " + rootDestinationPath);
Console.ReadLine();
}
} else {
Console.WriteLine("Folder not found: " + rootSourcePath);
Console.ReadLine();
}
}
private static void WalkDirectoryTree(DirectoryInfo source, string destination) {
System.IO.FileInfo[] files = null;
System.IO.DirectoryInfo[] subDirs = null;
files = source.GetFiles("*.*");
if (files != null) {
foreach(System.IO.FileInfo fi in files) {
string fullSourcePath = fi.FullName;
string fullDestinationPath = fi.FullName.Replace(source.FullName, destination);
if (!File.Exists(fullDestinationPath)) {
File.Copy(fullSourcePath, fullDestinationPath);
Console.WriteLine(fullSourcePath);
_itemsSynced++;
}
}
// Now find all the subdirectories under this directory.
subDirs = source.GetDirectories();
foreach(System.IO.DirectoryInfo dirInfo in subDirs) {
string fullSourcePath = dirInfo.FullName;
string fullDestinationPath = fullSourcePath.Replace(source.FullName, destination);
if (!Directory.Exists(fullDestinationPath)) {
Directory.CreateDirectory(fullDestinationPath);
Console.WriteLine(fullSourcePath);
_itemsSynced++;
}
// Resursive call for each subdirectory.
WalkDirectoryTree(dirInfo, fullDestinationPath);
}
}
}
}
}
我的服务器是 运行 Windows 2008。
我已经使用 Araxis 进行差异比较并且喜欢它。但现在我有一个稍微更独特的用例。我需要在服务器上的 2 个文件夹之间进行递归文件比较,这两个文件夹中都存在数十万个文件。 99% 的文件都是一样的,我已经知道了。一些文件存在于一个文件夹树中,但不存在于另一个文件夹树中。我需要一种快速找到 "difference files" 的方法。我已经尝试过 Araxis,但由于文件量很大,速度太慢了。我怀疑 Winmerge 和其他人会同样慢,因为他们试图区分每个文件,而我不需要。
重要提示:文件不需要进行文本或二进制比较。我不是在寻找差异。我正在寻找逐个文件夹对一棵树中存在哪些文件而不是另一棵树中存在的文件进行快速比较。差异工具都太慢了,因为它们试图差异,而不是简单地寻找哪些文件存在于一个地方而不是另一个地方。我要找的工具只需要比较每个文件夹中的文件名,不需要更深入。
速度是最重要的,"internal contents diffing"是不必要的。
有这样的工具吗?
echo.|xcopy /L /s "directory1\*" "directory2"
应该为您提供一个不同于 dir2 列表的 dir1
我也需要类似的东西,这个批处理文件就是我想出来的。我先在一棵小树上试一下,看看效果如何。
@ECHO OFF
SETLOCAL
REM Change these drive letters to ones that aren't already in use!
SET subst_letter_one=Y
SET subst_letter_two=Z
REM If exactly two arguments not specified then print help
IF [%1] NEQ [] IF [%2] NEQ [] IF [%3] EQU [] GOTO BEGIN
ECHO Usage: compare-folder [Folder 1] [Folder 2]
ECHO.
ECHO Very basic "exists in one but not in the other" type comparison of folder trees
ECHO Example: compare-folder "D:\somepath" "D:\anotherpath"
ECHO.
ECHO Before first-run, edit the two variables in the head of this batch file
ECHO with spare drive letters that SUBST can use.
EXIT /B 0
:BEGIN
REM Use the SUBST command to map the two folders to temporary drive letters
SUBST %subst_letter_one%: %1
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem mapping %1 to drive letter %subst_letter_one%.
ECHO Did you try to map an already-used drive letter?
ECHO Exiting
EXIT /B 1 )
SUBST %subst_letter_two%: %2
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem mapping %2 to drive letter %subst_letter_two%.
ECHO Did you try to map an already-used drive letter?
ECHO Exiting
EXIT /B 1 )
REM Main
ECHO.
ECHO ==================== FOLDER 1 ======================
ECHO.
ECHO Files and/or folders absent from %1 are listed below:
ECHO.
FOR /F "tokens=2* delims=:" %%i IN ('DIR %subst_letter_two%:\ /A /B /S /O:N') DO (
IF NOT EXIST %subst_letter_one%:%%i ECHO %%i
)
ECHO.
ECHO ==================== FOLDER 2 ======================
ECHO.
ECHO Files and/or folders absent from %2 are listed below:
ECHO.
FOR /F "tokens=2* delims=:" %%i IN ('DIR %subst_letter_one%:\ /A /B /S /O:N') DO (
IF NOT EXIST %subst_letter_two%:%%i ECHO %%i
)
REM Unmap temporary drive letters
SUBST %subst_letter_one%: /D
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem unmapping letter %subst_letter_one%.
ECHO Exiting
EXIT /B 1 )
SUBST %subst_letter_two%: /D
IF %ERRORLEVEL% NEQ 0 ( ECHO Problem unmapping letter %subst_letter_two%.
ECHO Exiting
EXIT /B 1 )
ENDLOCAL
我最终为此用 c# 编写了一个自定义工具。完整的源代码如下。它工作得很好,我现在已经在我的生产服务器上多次使用它了。它唯一的 objective 是比较两个文件夹树并将仅存在于源中的任何 files/folders 添加到目标。 它不会尝试比较两个地方都存在的文件。如果同名文件存在于两个树中,并且在同一个相对位置,它被忽略了。
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using System.Configuration;
using System.IO;
namespace SyncFolders {
class Program {
private static string rootSourcePath = ConfigurationManager.AppSettings["RootSourcePath"];
private static string rootDestinationPath = ConfigurationManager.AppSettings["RootDestinationPath"];
private static int _itemsSynced = 0;
static void Main(string[] args) {
if (Directory.Exists(rootSourcePath)) {
if (Directory.Exists(rootDestinationPath)) {
var startTime = DateTime.Now;
Console.WriteLine("Source: " + rootSourcePath);
Console.WriteLine("Destination: " + rootDestinationPath);
Console.WriteLine("You are about to copy any DIFFERENT folders/files from source to destination. Continue?");
Console.ReadLine();
DirectoryInfo source = new DirectoryInfo(rootSourcePath);
WalkDirectoryTree(source, rootDestinationPath);
Console.WriteLine("");
Console.WriteLine("Started at " + startTime);
Console.WriteLine("Finished at " + DateTime.Now);
Console.WriteLine("");
if (_itemsSynced > 0)
Console.WriteLine("Folders/files added: " + _itemsSynced);
else
Console.WriteLine("Everything was already in sync!");
Console.ReadLine();
} else {
Console.WriteLine("Folder not found: " + rootDestinationPath);
Console.ReadLine();
}
} else {
Console.WriteLine("Folder not found: " + rootSourcePath);
Console.ReadLine();
}
}
private static void WalkDirectoryTree(DirectoryInfo source, string destination) {
System.IO.FileInfo[] files = null;
System.IO.DirectoryInfo[] subDirs = null;
files = source.GetFiles("*.*");
if (files != null) {
foreach(System.IO.FileInfo fi in files) {
string fullSourcePath = fi.FullName;
string fullDestinationPath = fi.FullName.Replace(source.FullName, destination);
if (!File.Exists(fullDestinationPath)) {
File.Copy(fullSourcePath, fullDestinationPath);
Console.WriteLine(fullSourcePath);
_itemsSynced++;
}
}
// Now find all the subdirectories under this directory.
subDirs = source.GetDirectories();
foreach(System.IO.DirectoryInfo dirInfo in subDirs) {
string fullSourcePath = dirInfo.FullName;
string fullDestinationPath = fullSourcePath.Replace(source.FullName, destination);
if (!Directory.Exists(fullDestinationPath)) {
Directory.CreateDirectory(fullDestinationPath);
Console.WriteLine(fullSourcePath);
_itemsSynced++;
}
// Resursive call for each subdirectory.
WalkDirectoryTree(dirInfo, fullDestinationPath);
}
}
}
}
}