在 Java 中将数据插入地图列表花费了太多时间
Inserting data into list of map taking too much time in Java
我有一项任务是每天发送自动报告。所以,我基本上是将查询结果 set/collection 写入地图列表,以便我可以将该数据写入 excel。在这里,我有以下将集合数据插入地图列表的方法。问题是此方法需要 1 小时 20 分钟才能从具有 3000-3500 行和 14 列的集合中插入数据。在我的代码中,我有 5 个与 运行 类似的查询,并且每个查询花费的时间相同。你能帮我优化代码以减少花费的时间吗?
// avoided following method
public static List<Map<String, Object>> insertAttrValues(IDfCollection dfCollection, List<String> attributes) throws DfException {
if (dfCollection == null || attributes == null) {
throw new MissingParameterException("collection and attributes");
}
List<Map<String, Object>> dataList = new ArrayList<>();
while (dfCollection.next()) {
Map<String, Object> map = new LinkedHashMap<>(attributes.size());
for (String attribute: attributes) {
map.put(attribute, dfCollection.getString(attribute));
}
dataList.add(map);
}
return dataList;
}
编辑:
抱歉,放置重要部分代码并直接使用集合,而不是稍后在映射和处理中插入值。
起点:
@SpringBootApplication
public class ImmsClinicalReportApplication {
public static void main(String[] args) {
ApplicationContext applicationContext = SpringApplication.run(ImmsClinicalReportApplication.class, args);
init(applicationContext);
}
private static void init(@NotNull ApplicationContext applicationContext) {
ClinicalReportController clinicalReportController = (ClinicalReportController) applicationContext.getBean("clinicalReportController");
IDfSession dfSession = null;
try {
dfSession = clinicalReportController.getSession();
clinicalReportController.execute(dfSession);
sendEmail(applicationContext, clinicalReportController);
} catch (DfException | IOException e) {
e.printStackTrace();
} finally {
try {
clinicalReportController.cleanSession(dfSession);
} catch (DfException e) {
e.printStackTrace();
}
}
}
}
@Controller("clinicalReportController")
@PropertySource("classpath:application.properties")
public class ClinicalReportController {
private static final Logger logger = Logger.getLogger(ClinicalReportController.class);
private final SessionHelper sessionHelper;
private final DqlHelper dqlHelper;
private final AppProperties appProperties;
@Value("${report_path}")
private String XLSX_FILE_PATH;
private static final String[] moduleTypes = {
"Clin Protocol", "Clin Investigator Brochure", "Clin Core Text",
"Clin Process Documentation", "Clin Supporting Information"
};
@Autowired
public ClinicalReportController(DqlHelper dqlHelper, SessionHelper sessionHelper, AppProperties appProperties) {
this.dqlHelper = dqlHelper;
this.sessionHelper = sessionHelper;
this.appProperties = appProperties;
}
/**
* Method that processes the report
* @param dfSession dfSession
* @throws DfException DfException
* @throws IOException IOException
*/
public void execute(IDfSession dfSession) throws DfException, IOException {
StopWatch timer = new StopWatch();
for (int i = 0; i < moduleTypes.length; i++) {
// start timer
timer.start();
IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
List<String> attributes = new ArrayList<>(dfCollection.getAttrCount());
for (int j = 0; j < dfCollection.getAttrCount(); j++) {
attributes.add(dfCollection.getAttr(j).getName());
}
// stop timer
timer.stop();
// Each query takes 20 mins of time
/* Sample query: select d.r_object_id, d.object_name, d.title,
d.imms_extreleased_date, d.imms_extreleased_reason, d.imms_extreleaser,
d.imms_protocol_number, d.imms_protocol_number_rep, d.keywords,
d.imms_compound_number, d.imms_module_type, d.imms_prereleaser,
d.imms_prereleased_date, f.r_folder_path from imms_document d,
dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700'
and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date >
date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)*/
logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
timer.getTotalTimeSeconds()/60 + " minutes");
// List<Map<String, Object>> resultSet = ImmsUtils.insertAttrValues(dfCollection, attributes);
if (i == 0) {
processReport(dfCollection, moduleTypes[i], attributes);
} else {
updateReport(dfCollection, moduleTypes[i], attributes);
}
cleanCollection(dfCollection);
}
}
/**
* Method process for remaining queries/sheets
* @param resultSet resultSet
* @param objectType objectType
* @param attributes attributes
* @throws IOException IOException
*/
private void updateReport(IDfCollection resultSet, String objectType, List<String> attributes) throws IOException, DfException {
Workbook workbook = new XSSFWorkbook(new FileInputStream(XLSX_FILE_PATH));
excelWriterAndOperateOutputStream(resultSet, objectType, workbook, attributes);
}
/**
* Method that writes data to excel sheets
* @param dfCollection dfCollection
* @param sheet2 sheet2
* @param workbook workbook
* @param attributes
* Using collection directly. Not sure where is the issue in following method, writing data to sheet is also taking 50 minutes of time
*/
private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2, Workbook workbook, List<String> attributes) throws DfException {
Sheet sheet;
Row row;
sheet = sheet2;
Object[] values = new Object[attributes.size()];
StopWatch timer = new StopWatch();
// moved outside of loop
// TODO: avoid regex, use other logic
String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
Pattern datePattern = Pattern.compile(dateRegex);
// avoid SDF and Date and
// TODO: use java.time - maybe LocalDate
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
Date date = null;
CellStyle dateCellStyle = workbook.createCellStyle();
dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));
timer.start();
while (dfCollection.next()) {
for (int i = 0; i < attributes.size(); i++) {
values[i] = dfCollection.getString(attributes.get(i));
}
int lastRow = sheet.getLastRowNum();
row = sheet.createRow(++lastRow);
int cellNum = 0;
for (Object value: values) {
Cell cell = row.createCell(cellNum++);
if (datePattern.matcher(value.toString()).matches()) {
try {
date = simpleDateFormat.parse(value.toString());
} catch (ParseException e) {
e.printStackTrace();
}
cell.setCellValue(date);
cell.setCellStyle(dateCellStyle);
} else {
cell.setCellValue(value.toString());
}
}
}
timer.stop();
// Taking 50 mins of time to write collection data
// Log: Time taken for writing data 54.567404175 minutes
logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");
// Resize all columns to fit the content size
for (int i = 0; i < attributes.size(); i++) {
sheet.autoSizeColumn(i);
}
}
/**
* Method to create sheet, set fonts and colors
* @param moduleType moduleType
* @param workbook workbook
* @return Sheet
*/
private Sheet createSheet(String moduleType, Workbook workbook) {
return workbook.createSheet(moduleType);
}
/**
* Method to process first query/sheet
* @param dfCollection dfCollection
* @param moduleType moduleType
* @param attributes attributes
* @throws IOException IOException
*/
private void processReport(IDfCollection dfCollection, String moduleType, List<String> attributes) throws IOException, DfException {
// Create a Workbook - for xlsx
Workbook workbook = new XSSFWorkbook();
/*CreationHelper helps us create instances of various things like DataFormat,
Hyperlink, RichTextString etc, in a format (HSSF, XSSF) independent way*/
workbook.getCreationHelper();
excelWriterAndOperateOutputStream(dfCollection, moduleType, workbook, attributes);
}
/**
* Method that writes and saves data to file
* @param resultSet resultSet
* @param moduleType moduleType
* @param workbook workbook
* @param attributes attributes
* @throws IOException IOException
*/
private void excelWriterAndOperateOutputStream(IDfCollection resultSet, String moduleType, Workbook workbook, List<String> attributes) throws IOException, DfException {
Sheet sheet = createSheet(moduleType, workbook);
CellStyle cellStyle = setFontsAndColors(workbook);
// Create a Row
Row headerRow = sheet.createRow(0);
// Create cells
for (int i = 0; i < attributes.size(); i++) {
Cell cell = headerRow.createCell(i);
cell.setCellValue(attributes.get(i));
cell.setCellStyle(cellStyle);
}
writeToSheet(resultSet, workbook.getSheet(moduleType), workbook, attributes);
// Write the output to the file
FileOutputStream fileOutputStream = new FileOutputStream(XLSX_FILE_PATH);
workbook.write(fileOutputStream);
// close the file
fileOutputStream.close();
// close the workbook
workbook.close();
}
@NotNull
private CellStyle setFontsAndColors(Workbook workbook) {
CellStyle cellStyle = workbook.createCellStyle();
// Create a Font for styling header cells
Font headerFont = workbook.createFont();
headerFont.setBold(false);
headerFont.setFontHeightInPoints((short) 12);
headerFont.setColor(IndexedColors.GREEN.getIndex());
cellStyle.setFont(headerFont);
return cellStyle;
}
/**
* Get IDfSession object
* @return IDfSession
* @throws DfException DfException
*/
public IDfSession getSession() throws DfException {
IDfSession dfSession;
IDfSessionManager sessionManager = sessionHelper.getDfSessionManager(appProperties.getRepository(), appProperties.getUsername(), appProperties.getPassword());
dfSession = sessionManager.getSession(appProperties.getRepository());
return dfSession;
}
/**
* Clean IDfCollection
* @param dfCollection dfCollection
*/
public void cleanCollection(IDfCollection dfCollection) {
dqlHelper.cleanup(dfCollection);
}
/**
* Clean IDfSession
* @param dfSession dfSession
*/
public void cleanSession(IDfSession dfSession) throws DfException {
sessionHelper.cleanSession(dfSession);
}
}
您可以尝试 forkjoinPoll 或使用 jdk 的并行流:),使用 cpu 的多核处理器。 forkjoinpool的例子请参考https://www.baeldung.com/java-fork-join
public static List<Map<String, Object>> insertAttrValues(Stream<Object> stream, List<String> attributes) throws RuntimeException {
if (stream == null || attributes == null) {
throw new RuntimeException("collection and attributes");
}
final int size = attributes.size();
return stream.parallel().map(item -> {
Map<String, Object> map = new LinkedHashMap<>(size);
for (String attribute : attributes) {
//map.put(attribute, item.getString(attribute));
}
return map;
}).collect(Collectors.toList());
}
您可以进行以下改进:
- 直接从
IDfCollection
填写POI结构,不要将采集数据复制到List<Map<String, Object>>
。
- 使用
collection.getTime(attribute)
获取时间值,而不是对每条记录进行正则表达式解析。您可以使用 collection.getAttrDataType(attribute) == IDfAttr.DF_TIME
条件来判断该值是否为时间。
- 然后你可以直接使用日期而不用像这样解析:
cell.setCellValue(collection.getTime(attribute).getDate())
- 但数字也是如此,然后你可以在 excel sheet 中获得更好的结果。这意味着使用
collection.getInt(attribute)
和 collection.getDouble(attribute)
而不是 collection.getString(attribute)
。 IDfAttr.DM_INTEGER
和 IDfAttr.DM_DOUBLE
等常量在这里也有帮助。
- 将
int last_row
移到 for 循环外,并在循环内执行 last_row++
。调用 sheet.getLastRowNum()
是没有必要的。顺便说一句:驼峰命名 lastRow
在 Java 世界中会更好 ;-)
另一件事是,您正在另一个循环中调用整个过程以进行 5 个类似查询,因此可能还有另一个 space 用于改进,例如所有的转换使用例如更好的条件查询到一个,UNION
如果可能,更广泛的条件 + 应用程序逻辑中的过滤器,...)。
我认为主要问题在于查询。请尝试以下步骤:
- 不要在
select
查询中给出单独的属性,而是使用 *
。查看查询执行时间。如果执行速度很快而不需要几分钟的时间,请尝试接下来的步骤。
select * from imms_document d, dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700' and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date > date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)
- 当您使用 Spring 启动时,请在
application.properties
中包含所需的属性,如下所示。你可能不想要全部。
included_attributes=r_object_id,object_name,title,imms_extreleased_date,imms_extreleased_reason,imms_extreleaser,imms_protocol_number,imms_protocol_number_rep,keywords,imms_compound_number,imms_module_type,imms_prereleaser,imms_prereleased_date,r_folder_path
在您的 AppProperties
class 文件中执行以下操作:
@Component
public class AppProperties {
/**
*other fields
*/
@Getter
@Value("${included_attributes}")
private String[] includedAttributes;
}
现在,在您的 execute()
方法中,修改代码以仅使用您需要为其获取数据的属性。
public void execute(IDfSession dfSession) throws DfException, IOException {
StopWatch timer = new StopWatch();
for (int i = 0; i < moduleTypes.length; i++) {
// start timer
timer.start();
IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
// stop timer
timer.stop();
logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
timer.getTotalTimeSeconds() + " seconds");
// attributes to be added
List<String> attributes = new ArrayList<>();
// Get included attributes as list
List<String> includedAttributes = Arrays.asList(appProperties.getIncludedAttributes());
for (int j = 0; j < dfCollection.getAttrCount(); j++) {
// check for the attribute in included attributes and add if exists
if (hasAttribute(includedAttributes, dfCollection.getAttr(j).getName())) {
attributes.add(dfCollection.getAttr(j).getName());
}
}
if (i == 0) {
processReport(dfCollection, moduleTypes[i], attributes);
} else {
updateReport(dfCollection, moduleTypes[i], attributes);
}
cleanCollection(dfCollection);
}
}
public static boolean hasAttribute(@NotNull List<String> attributes, String attribute) {
for(String attr : attributes){
if(attribute.contains(attr)){
return true;
}
}
return false;
}
POI结构直接使用collection,无需在array中插入数据再遍历
private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2,
@NotNull Workbook workbook, List<String> attributes) throws DfException {
Sheet sheet;
Row row;
sheet = sheet2;
StopWatch timer = new StopWatch();
String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
Pattern datePattern = Pattern.compile(dateRegex);
DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("yyyy/MM/dd HH:mm:ss");
CellStyle dateCellStyle = workbook.createCellStyle();
dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));
int lastRow = 0;
timer.start();
while (dfCollection.next()) {
row = sheet.createRow(++lastRow);
int cellNum = 0;
for (String attribute : attributes) {
Object value = dfCollection.getString(attribute);
Cell cell = row.createCell(cellNum++);
if (datePattern.matcher(value.toString()).matches()) {
cell.setCellValue(LocalDateTime.parse(value.toString(), timeFormatter));
cell.setCellStyle(dateCellStyle);
} else {
cell.setCellValue(value.toString());
}
}
}
timer.stop();
logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");
// Resize all columns to fit the content size
for (int i = 0; i < attributes.size(); i++) {
sheet.autoSizeColumn(i);
}
}
我有一项任务是每天发送自动报告。所以,我基本上是将查询结果 set/collection 写入地图列表,以便我可以将该数据写入 excel。在这里,我有以下将集合数据插入地图列表的方法。问题是此方法需要 1 小时 20 分钟才能从具有 3000-3500 行和 14 列的集合中插入数据。在我的代码中,我有 5 个与 运行 类似的查询,并且每个查询花费的时间相同。你能帮我优化代码以减少花费的时间吗?
// avoided following method
public static List<Map<String, Object>> insertAttrValues(IDfCollection dfCollection, List<String> attributes) throws DfException {
if (dfCollection == null || attributes == null) {
throw new MissingParameterException("collection and attributes");
}
List<Map<String, Object>> dataList = new ArrayList<>();
while (dfCollection.next()) {
Map<String, Object> map = new LinkedHashMap<>(attributes.size());
for (String attribute: attributes) {
map.put(attribute, dfCollection.getString(attribute));
}
dataList.add(map);
}
return dataList;
}
编辑: 抱歉,放置重要部分代码并直接使用集合,而不是稍后在映射和处理中插入值。
起点:
@SpringBootApplication
public class ImmsClinicalReportApplication {
public static void main(String[] args) {
ApplicationContext applicationContext = SpringApplication.run(ImmsClinicalReportApplication.class, args);
init(applicationContext);
}
private static void init(@NotNull ApplicationContext applicationContext) {
ClinicalReportController clinicalReportController = (ClinicalReportController) applicationContext.getBean("clinicalReportController");
IDfSession dfSession = null;
try {
dfSession = clinicalReportController.getSession();
clinicalReportController.execute(dfSession);
sendEmail(applicationContext, clinicalReportController);
} catch (DfException | IOException e) {
e.printStackTrace();
} finally {
try {
clinicalReportController.cleanSession(dfSession);
} catch (DfException e) {
e.printStackTrace();
}
}
}
}
@Controller("clinicalReportController")
@PropertySource("classpath:application.properties")
public class ClinicalReportController {
private static final Logger logger = Logger.getLogger(ClinicalReportController.class);
private final SessionHelper sessionHelper;
private final DqlHelper dqlHelper;
private final AppProperties appProperties;
@Value("${report_path}")
private String XLSX_FILE_PATH;
private static final String[] moduleTypes = {
"Clin Protocol", "Clin Investigator Brochure", "Clin Core Text",
"Clin Process Documentation", "Clin Supporting Information"
};
@Autowired
public ClinicalReportController(DqlHelper dqlHelper, SessionHelper sessionHelper, AppProperties appProperties) {
this.dqlHelper = dqlHelper;
this.sessionHelper = sessionHelper;
this.appProperties = appProperties;
}
/**
* Method that processes the report
* @param dfSession dfSession
* @throws DfException DfException
* @throws IOException IOException
*/
public void execute(IDfSession dfSession) throws DfException, IOException {
StopWatch timer = new StopWatch();
for (int i = 0; i < moduleTypes.length; i++) {
// start timer
timer.start();
IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
List<String> attributes = new ArrayList<>(dfCollection.getAttrCount());
for (int j = 0; j < dfCollection.getAttrCount(); j++) {
attributes.add(dfCollection.getAttr(j).getName());
}
// stop timer
timer.stop();
// Each query takes 20 mins of time
/* Sample query: select d.r_object_id, d.object_name, d.title,
d.imms_extreleased_date, d.imms_extreleased_reason, d.imms_extreleaser,
d.imms_protocol_number, d.imms_protocol_number_rep, d.keywords,
d.imms_compound_number, d.imms_module_type, d.imms_prereleaser,
d.imms_prereleased_date, f.r_folder_path from imms_document d,
dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700'
and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date >
date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)*/
logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
timer.getTotalTimeSeconds()/60 + " minutes");
// List<Map<String, Object>> resultSet = ImmsUtils.insertAttrValues(dfCollection, attributes);
if (i == 0) {
processReport(dfCollection, moduleTypes[i], attributes);
} else {
updateReport(dfCollection, moduleTypes[i], attributes);
}
cleanCollection(dfCollection);
}
}
/**
* Method process for remaining queries/sheets
* @param resultSet resultSet
* @param objectType objectType
* @param attributes attributes
* @throws IOException IOException
*/
private void updateReport(IDfCollection resultSet, String objectType, List<String> attributes) throws IOException, DfException {
Workbook workbook = new XSSFWorkbook(new FileInputStream(XLSX_FILE_PATH));
excelWriterAndOperateOutputStream(resultSet, objectType, workbook, attributes);
}
/**
* Method that writes data to excel sheets
* @param dfCollection dfCollection
* @param sheet2 sheet2
* @param workbook workbook
* @param attributes
* Using collection directly. Not sure where is the issue in following method, writing data to sheet is also taking 50 minutes of time
*/
private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2, Workbook workbook, List<String> attributes) throws DfException {
Sheet sheet;
Row row;
sheet = sheet2;
Object[] values = new Object[attributes.size()];
StopWatch timer = new StopWatch();
// moved outside of loop
// TODO: avoid regex, use other logic
String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
Pattern datePattern = Pattern.compile(dateRegex);
// avoid SDF and Date and
// TODO: use java.time - maybe LocalDate
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
Date date = null;
CellStyle dateCellStyle = workbook.createCellStyle();
dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));
timer.start();
while (dfCollection.next()) {
for (int i = 0; i < attributes.size(); i++) {
values[i] = dfCollection.getString(attributes.get(i));
}
int lastRow = sheet.getLastRowNum();
row = sheet.createRow(++lastRow);
int cellNum = 0;
for (Object value: values) {
Cell cell = row.createCell(cellNum++);
if (datePattern.matcher(value.toString()).matches()) {
try {
date = simpleDateFormat.parse(value.toString());
} catch (ParseException e) {
e.printStackTrace();
}
cell.setCellValue(date);
cell.setCellStyle(dateCellStyle);
} else {
cell.setCellValue(value.toString());
}
}
}
timer.stop();
// Taking 50 mins of time to write collection data
// Log: Time taken for writing data 54.567404175 minutes
logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");
// Resize all columns to fit the content size
for (int i = 0; i < attributes.size(); i++) {
sheet.autoSizeColumn(i);
}
}
/**
* Method to create sheet, set fonts and colors
* @param moduleType moduleType
* @param workbook workbook
* @return Sheet
*/
private Sheet createSheet(String moduleType, Workbook workbook) {
return workbook.createSheet(moduleType);
}
/**
* Method to process first query/sheet
* @param dfCollection dfCollection
* @param moduleType moduleType
* @param attributes attributes
* @throws IOException IOException
*/
private void processReport(IDfCollection dfCollection, String moduleType, List<String> attributes) throws IOException, DfException {
// Create a Workbook - for xlsx
Workbook workbook = new XSSFWorkbook();
/*CreationHelper helps us create instances of various things like DataFormat,
Hyperlink, RichTextString etc, in a format (HSSF, XSSF) independent way*/
workbook.getCreationHelper();
excelWriterAndOperateOutputStream(dfCollection, moduleType, workbook, attributes);
}
/**
* Method that writes and saves data to file
* @param resultSet resultSet
* @param moduleType moduleType
* @param workbook workbook
* @param attributes attributes
* @throws IOException IOException
*/
private void excelWriterAndOperateOutputStream(IDfCollection resultSet, String moduleType, Workbook workbook, List<String> attributes) throws IOException, DfException {
Sheet sheet = createSheet(moduleType, workbook);
CellStyle cellStyle = setFontsAndColors(workbook);
// Create a Row
Row headerRow = sheet.createRow(0);
// Create cells
for (int i = 0; i < attributes.size(); i++) {
Cell cell = headerRow.createCell(i);
cell.setCellValue(attributes.get(i));
cell.setCellStyle(cellStyle);
}
writeToSheet(resultSet, workbook.getSheet(moduleType), workbook, attributes);
// Write the output to the file
FileOutputStream fileOutputStream = new FileOutputStream(XLSX_FILE_PATH);
workbook.write(fileOutputStream);
// close the file
fileOutputStream.close();
// close the workbook
workbook.close();
}
@NotNull
private CellStyle setFontsAndColors(Workbook workbook) {
CellStyle cellStyle = workbook.createCellStyle();
// Create a Font for styling header cells
Font headerFont = workbook.createFont();
headerFont.setBold(false);
headerFont.setFontHeightInPoints((short) 12);
headerFont.setColor(IndexedColors.GREEN.getIndex());
cellStyle.setFont(headerFont);
return cellStyle;
}
/**
* Get IDfSession object
* @return IDfSession
* @throws DfException DfException
*/
public IDfSession getSession() throws DfException {
IDfSession dfSession;
IDfSessionManager sessionManager = sessionHelper.getDfSessionManager(appProperties.getRepository(), appProperties.getUsername(), appProperties.getPassword());
dfSession = sessionManager.getSession(appProperties.getRepository());
return dfSession;
}
/**
* Clean IDfCollection
* @param dfCollection dfCollection
*/
public void cleanCollection(IDfCollection dfCollection) {
dqlHelper.cleanup(dfCollection);
}
/**
* Clean IDfSession
* @param dfSession dfSession
*/
public void cleanSession(IDfSession dfSession) throws DfException {
sessionHelper.cleanSession(dfSession);
}
}
您可以尝试 forkjoinPoll 或使用 jdk 的并行流:),使用 cpu 的多核处理器。 forkjoinpool的例子请参考https://www.baeldung.com/java-fork-join
public static List<Map<String, Object>> insertAttrValues(Stream<Object> stream, List<String> attributes) throws RuntimeException {
if (stream == null || attributes == null) {
throw new RuntimeException("collection and attributes");
}
final int size = attributes.size();
return stream.parallel().map(item -> {
Map<String, Object> map = new LinkedHashMap<>(size);
for (String attribute : attributes) {
//map.put(attribute, item.getString(attribute));
}
return map;
}).collect(Collectors.toList());
}
您可以进行以下改进:
- 直接从
IDfCollection
填写POI结构,不要将采集数据复制到List<Map<String, Object>>
。 - 使用
collection.getTime(attribute)
获取时间值,而不是对每条记录进行正则表达式解析。您可以使用collection.getAttrDataType(attribute) == IDfAttr.DF_TIME
条件来判断该值是否为时间。 - 然后你可以直接使用日期而不用像这样解析:
cell.setCellValue(collection.getTime(attribute).getDate())
- 但数字也是如此,然后你可以在 excel sheet 中获得更好的结果。这意味着使用
collection.getInt(attribute)
和collection.getDouble(attribute)
而不是collection.getString(attribute)
。IDfAttr.DM_INTEGER
和IDfAttr.DM_DOUBLE
等常量在这里也有帮助。 - 将
int last_row
移到 for 循环外,并在循环内执行last_row++
。调用sheet.getLastRowNum()
是没有必要的。顺便说一句:驼峰命名lastRow
在 Java 世界中会更好 ;-)
另一件事是,您正在另一个循环中调用整个过程以进行 5 个类似查询,因此可能还有另一个 space 用于改进,例如所有的转换使用例如更好的条件查询到一个,UNION
如果可能,更广泛的条件 + 应用程序逻辑中的过滤器,...)。
我认为主要问题在于查询。请尝试以下步骤:
- 不要在
select
查询中给出单独的属性,而是使用*
。查看查询执行时间。如果执行速度很快而不需要几分钟的时间,请尝试接下来的步骤。
select * from imms_document d, dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700' and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date > date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)
- 当您使用 Spring 启动时,请在
application.properties
中包含所需的属性,如下所示。你可能不想要全部。
included_attributes=r_object_id,object_name,title,imms_extreleased_date,imms_extreleased_reason,imms_extreleaser,imms_protocol_number,imms_protocol_number_rep,keywords,imms_compound_number,imms_module_type,imms_prereleaser,imms_prereleased_date,r_folder_path
在您的 AppProperties
class 文件中执行以下操作:
@Component
public class AppProperties {
/**
*other fields
*/
@Getter
@Value("${included_attributes}")
private String[] includedAttributes;
}
现在,在您的 execute()
方法中,修改代码以仅使用您需要为其获取数据的属性。
public void execute(IDfSession dfSession) throws DfException, IOException {
StopWatch timer = new StopWatch();
for (int i = 0; i < moduleTypes.length; i++) {
// start timer
timer.start();
IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
// stop timer
timer.stop();
logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
timer.getTotalTimeSeconds() + " seconds");
// attributes to be added
List<String> attributes = new ArrayList<>();
// Get included attributes as list
List<String> includedAttributes = Arrays.asList(appProperties.getIncludedAttributes());
for (int j = 0; j < dfCollection.getAttrCount(); j++) {
// check for the attribute in included attributes and add if exists
if (hasAttribute(includedAttributes, dfCollection.getAttr(j).getName())) {
attributes.add(dfCollection.getAttr(j).getName());
}
}
if (i == 0) {
processReport(dfCollection, moduleTypes[i], attributes);
} else {
updateReport(dfCollection, moduleTypes[i], attributes);
}
cleanCollection(dfCollection);
}
}
public static boolean hasAttribute(@NotNull List<String> attributes, String attribute) {
for(String attr : attributes){
if(attribute.contains(attr)){
return true;
}
}
return false;
}
POI结构直接使用collection,无需在array中插入数据再遍历
private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2,
@NotNull Workbook workbook, List<String> attributes) throws DfException {
Sheet sheet;
Row row;
sheet = sheet2;
StopWatch timer = new StopWatch();
String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
Pattern datePattern = Pattern.compile(dateRegex);
DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("yyyy/MM/dd HH:mm:ss");
CellStyle dateCellStyle = workbook.createCellStyle();
dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));
int lastRow = 0;
timer.start();
while (dfCollection.next()) {
row = sheet.createRow(++lastRow);
int cellNum = 0;
for (String attribute : attributes) {
Object value = dfCollection.getString(attribute);
Cell cell = row.createCell(cellNum++);
if (datePattern.matcher(value.toString()).matches()) {
cell.setCellValue(LocalDateTime.parse(value.toString(), timeFormatter));
cell.setCellStyle(dateCellStyle);
} else {
cell.setCellValue(value.toString());
}
}
}
timer.stop();
logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");
// Resize all columns to fit the content size
for (int i = 0; i < attributes.size(); i++) {
sheet.autoSizeColumn(i);
}
}