优化 EmEditor 宏到元素拆分、排序、从最旧到最长日期和提取
Optimised EmEditor Macro to Element Split, Sort, Oldest to Longest Date, & Extract
我目前有一个这种格式的分隔文件(2 列制表符“\t”分隔),以及“;”分隔列中的所有元素)。
User\tDate
Alice Cooper;John Smith\t07/11/2019
Alice Cooper\t23/11/2018
Alice Cooper\t21/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t26/11/2018
Alice Cooper;John Smith\t09/12/2018
Alice Cooper;John Smith\t09/12/2018
Alice Cooper;John Smith\t04/12/2018
Alice Cooper\t07/12/2018
Alice Cooper\t07/12/2018
我希望有任何关于优化宏(最好是 javascript)的想法,以创建以下输出文件:
User\tEarliest\tLatest\tDates_with_Most_Occurences\tMost_Occurence_Number
Alice Cooper\t21/11/2018\t07/11/2019\t26/11/2018;07/12/2018\t2
John Smith\t04/12/2018\t07/11/2019\t09/12/2018\t1
所以中间步骤(我目前正在手动执行,但想转入宏):
第 1 步:分离出第 1 列中的名称元素
(给出这样的东西):
User\tDate
Alice Cooper\t07/11/2019
John Smith\t07/11/2019
Alice Cooper\t23/11/2018
Alice Cooper\t21/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t09/12/2018
John Smith\t09/12/2018
Alice Cooper\t09/12/2018
John Smith\t09/12/2018
Alice Cooper\t04/12/2018
John Smith\t04/12/2018
Alice Cooper\t07/12/2018
Alice Cooper\t07/12/2018
第 2 步:将 Col1 A-Z 和 Col 2 从旧到新排序。现在根据第 1 列组合第 2 列元素(给出类似这样的内容):
User\tDate
Alice Cooper\t21/11/2018;23/11/2018;26/11/2018;26/11/2018;04/12/2018;07/12/2018;07/12/2018;09/12/2018;09/12/2018;07/11/2019;
John Smith\t04/12/2018;09/12/2018;09/12/2018;07/11/2019;
第 3 步:现在获取 Col2 中每一行的日期信息并创建这 4 个新列:EarliestDate、LatestDate、Dates_with_Most_Occurences、Most_Occurence_Number(给出类似这样的内容):
User\tDate
Alice Cooper\t21/11/2018;23/11/2018;26/11/2018;26/11/2018;04/12/2018;07/12/2018;07/12/2018;09/12/2018;09/12/2018;07/11/2019;
John Smith\t04/12/2018;09/12/2018;09/12/2018;07/11/2019;
第 4 步:删除 Col2(日期):给出最终输出:
User\tEarliestDate\tLatestDate\tDates_with_Most_Occurences\tMost_Occurence_Number
Alice Cooper\t21/11/2018\t07/11/2019\t26/11/2018;07/12/2018\t2
John Smith\t04/12/2018\t07/11/2019\t09/12/2018\t1
我只需要宏来创建最终输出,中间的步骤(上面的步骤 1、2、3)只是显示我正在尝试做的事情的逻辑。真正的源文件将有数千行,所以如果 EmEditor 能以任何方式对其进行优化,那就太棒了。
假设您的数据文件不包含空行,这里是脚本。
document.ConvertCsv(2); // This assumes your Tab format is the second one on the CSV/Sort bar
function parseDate(s) {
var split = s.split('/');
return new Date(split[2], split[1] - 1, split[0]);
}
var data = [];
// Read the file
var numberOfLines = document.GetLines();
if (numberOfLines >= 2 && document.GetLine(2) === '') {
numberOfLines = 1; // CSV document only has header without data
}
for (var line = 1; line < numberOfLines; line++) {
var rowData = [
document.GetCell(line + 1, 1, eeCellIncludeNone),
parseDate(document.GetCell(line + 1, 2, eeCellIncludeNone)),
];
data.push(rowData);
}
// Separate combined users
var separated = [];
for (var row = 0; row < data.length; row++) {
var split = data[row][0].split(';');
for (var i = 0; i < split.length; i++) {
separated.push([split[i], data[row][1]]);
}
}
// Group by user
// {[key: string]: {data: [[]], earliest: Date, latest: Date, mostOccurrence: Date, occurrence: number}}
var users = {};
for (var row = 0; row < separated.length; row++) {
if (!(separated[row][0] in users)) {
users[separated[row][0]] = {data: []};
}
users[separated[row][0]].data.push(separated[row]);
}
// At this point, we have parsed the file into useful data.
// alert(JSON.stringify(users, null, ' ')); // To check
// Data analysis
for (var userKey in users) {
var sorted = users[userKey].data.sort(function(a, b) {
return a[1].getTime() - b[1].getTime();
});
users[userKey].earliest = sorted[0][1];
users[userKey].latest = sorted[sorted.length - 1][1];
// Count dates
var dates = {}; // {[key: number]: number}
for (var i = 0; i < sorted.length; ++i) {
if (!(sorted[i][1].getTime() in dates)) {
dates[sorted[i][1].getTime()] = 0;
}
dates[sorted[i][1].getTime()] += 1;
}
var mostOccurrence = {date: [], occurrence: -1};
for (var k in dates) {
if (dates[k] > mostOccurrence.occurrence) {
mostOccurrence = {date: [k], occurrence: dates[k]}
} else if (dates[k] === mostOccurrence.occurrence) {
mostOccurrence.date.push(k);
}
}
users[userKey].mostOccurrence = [];
for (var i = 0; i < mostOccurrence.date.length; i++) {
var date = new Date();
date.setTime(mostOccurrence.date[i]);
users[userKey].mostOccurrence.push(date);
}
users[userKey].occurrence = mostOccurrence.occurrence;
}
// Format the numbers and output to document
editor.NewFile();
document.selection.Text = 'User\tEarliestDate\tLatestDate\tDates_with_Most_Occurences\tMost_Occurence_Number';
for (var _ in users) {
document.selection.Text += '\r\n';
}
document.ConvertCsv(2);
function formatDate(d) {
return d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear();
}
var line = 2;
for (var userKey in users) {
document.SetCell(line, 1, userKey, eeAutoQuote);
document.SetCell(line, 2, formatDate(users[userKey].earliest), eeAutoQuote);
document.SetCell(line, 3, formatDate(users[userKey].latest), eeAutoQuote);
var mostOccurrenceStr = '';
for (var i = 0; i < users[userKey].mostOccurrence.length; i++) {
mostOccurrenceStr += formatDate(users[userKey].mostOccurrence[i]) + ';';
}
document.SetCell(line, 4, mostOccurrenceStr.substring(0, mostOccurrenceStr.length - 1), eeAutoQuote);
document.SetCell(line, 5, users[userKey].occurrence, eeAutoQuote);
line++;
}
希望它有效,但如果无效,请告诉我。
我目前有一个这种格式的分隔文件(2 列制表符“\t”分隔),以及“;”分隔列中的所有元素)。
User\tDate
Alice Cooper;John Smith\t07/11/2019
Alice Cooper\t23/11/2018
Alice Cooper\t21/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t26/11/2018
Alice Cooper;John Smith\t09/12/2018
Alice Cooper;John Smith\t09/12/2018
Alice Cooper;John Smith\t04/12/2018
Alice Cooper\t07/12/2018
Alice Cooper\t07/12/2018
我希望有任何关于优化宏(最好是 javascript)的想法,以创建以下输出文件:
User\tEarliest\tLatest\tDates_with_Most_Occurences\tMost_Occurence_Number
Alice Cooper\t21/11/2018\t07/11/2019\t26/11/2018;07/12/2018\t2
John Smith\t04/12/2018\t07/11/2019\t09/12/2018\t1
所以中间步骤(我目前正在手动执行,但想转入宏):
第 1 步:分离出第 1 列中的名称元素
(给出这样的东西):
User\tDate
Alice Cooper\t07/11/2019
John Smith\t07/11/2019
Alice Cooper\t23/11/2018
Alice Cooper\t21/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t26/11/2018
Alice Cooper\t09/12/2018
John Smith\t09/12/2018
Alice Cooper\t09/12/2018
John Smith\t09/12/2018
Alice Cooper\t04/12/2018
John Smith\t04/12/2018
Alice Cooper\t07/12/2018
Alice Cooper\t07/12/2018
第 2 步:将 Col1 A-Z 和 Col 2 从旧到新排序。现在根据第 1 列组合第 2 列元素(给出类似这样的内容):
User\tDate
Alice Cooper\t21/11/2018;23/11/2018;26/11/2018;26/11/2018;04/12/2018;07/12/2018;07/12/2018;09/12/2018;09/12/2018;07/11/2019;
John Smith\t04/12/2018;09/12/2018;09/12/2018;07/11/2019;
第 3 步:现在获取 Col2 中每一行的日期信息并创建这 4 个新列:EarliestDate、LatestDate、Dates_with_Most_Occurences、Most_Occurence_Number(给出类似这样的内容):
User\tDate
Alice Cooper\t21/11/2018;23/11/2018;26/11/2018;26/11/2018;04/12/2018;07/12/2018;07/12/2018;09/12/2018;09/12/2018;07/11/2019;
John Smith\t04/12/2018;09/12/2018;09/12/2018;07/11/2019;
第 4 步:删除 Col2(日期):给出最终输出:
User\tEarliestDate\tLatestDate\tDates_with_Most_Occurences\tMost_Occurence_Number
Alice Cooper\t21/11/2018\t07/11/2019\t26/11/2018;07/12/2018\t2
John Smith\t04/12/2018\t07/11/2019\t09/12/2018\t1
我只需要宏来创建最终输出,中间的步骤(上面的步骤 1、2、3)只是显示我正在尝试做的事情的逻辑。真正的源文件将有数千行,所以如果 EmEditor 能以任何方式对其进行优化,那就太棒了。
假设您的数据文件不包含空行,这里是脚本。
document.ConvertCsv(2); // This assumes your Tab format is the second one on the CSV/Sort bar
function parseDate(s) {
var split = s.split('/');
return new Date(split[2], split[1] - 1, split[0]);
}
var data = [];
// Read the file
var numberOfLines = document.GetLines();
if (numberOfLines >= 2 && document.GetLine(2) === '') {
numberOfLines = 1; // CSV document only has header without data
}
for (var line = 1; line < numberOfLines; line++) {
var rowData = [
document.GetCell(line + 1, 1, eeCellIncludeNone),
parseDate(document.GetCell(line + 1, 2, eeCellIncludeNone)),
];
data.push(rowData);
}
// Separate combined users
var separated = [];
for (var row = 0; row < data.length; row++) {
var split = data[row][0].split(';');
for (var i = 0; i < split.length; i++) {
separated.push([split[i], data[row][1]]);
}
}
// Group by user
// {[key: string]: {data: [[]], earliest: Date, latest: Date, mostOccurrence: Date, occurrence: number}}
var users = {};
for (var row = 0; row < separated.length; row++) {
if (!(separated[row][0] in users)) {
users[separated[row][0]] = {data: []};
}
users[separated[row][0]].data.push(separated[row]);
}
// At this point, we have parsed the file into useful data.
// alert(JSON.stringify(users, null, ' ')); // To check
// Data analysis
for (var userKey in users) {
var sorted = users[userKey].data.sort(function(a, b) {
return a[1].getTime() - b[1].getTime();
});
users[userKey].earliest = sorted[0][1];
users[userKey].latest = sorted[sorted.length - 1][1];
// Count dates
var dates = {}; // {[key: number]: number}
for (var i = 0; i < sorted.length; ++i) {
if (!(sorted[i][1].getTime() in dates)) {
dates[sorted[i][1].getTime()] = 0;
}
dates[sorted[i][1].getTime()] += 1;
}
var mostOccurrence = {date: [], occurrence: -1};
for (var k in dates) {
if (dates[k] > mostOccurrence.occurrence) {
mostOccurrence = {date: [k], occurrence: dates[k]}
} else if (dates[k] === mostOccurrence.occurrence) {
mostOccurrence.date.push(k);
}
}
users[userKey].mostOccurrence = [];
for (var i = 0; i < mostOccurrence.date.length; i++) {
var date = new Date();
date.setTime(mostOccurrence.date[i]);
users[userKey].mostOccurrence.push(date);
}
users[userKey].occurrence = mostOccurrence.occurrence;
}
// Format the numbers and output to document
editor.NewFile();
document.selection.Text = 'User\tEarliestDate\tLatestDate\tDates_with_Most_Occurences\tMost_Occurence_Number';
for (var _ in users) {
document.selection.Text += '\r\n';
}
document.ConvertCsv(2);
function formatDate(d) {
return d.getDate() + '/' + (d.getMonth() + 1) + '/' + d.getFullYear();
}
var line = 2;
for (var userKey in users) {
document.SetCell(line, 1, userKey, eeAutoQuote);
document.SetCell(line, 2, formatDate(users[userKey].earliest), eeAutoQuote);
document.SetCell(line, 3, formatDate(users[userKey].latest), eeAutoQuote);
var mostOccurrenceStr = '';
for (var i = 0; i < users[userKey].mostOccurrence.length; i++) {
mostOccurrenceStr += formatDate(users[userKey].mostOccurrence[i]) + ';';
}
document.SetCell(line, 4, mostOccurrenceStr.substring(0, mostOccurrenceStr.length - 1), eeAutoQuote);
document.SetCell(line, 5, users[userKey].occurrence, eeAutoQuote);
line++;
}
希望它有效,但如果无效,请告诉我。