在 Java 中将数据插入地图列表花费了太多时间

Question

我有一项任务是每天发送自动报告。所以，我基本上是将查询结果 set/collection 写入地图列表，以便我可以将该数据写入 excel。在这里，我有以下将集合数据插入地图列表的方法。问题是此方法需要 1 小时 20 分钟才能从具有 3000-3500 行和 14 列的集合中插入数据。在我的代码中，我有 5 个与运行类似的查询，并且每个查询花费的时间相同。你能帮我优化代码以减少花费的时间吗？

// avoided following method

public static List<Map<String, Object>> insertAttrValues(IDfCollection dfCollection, List<String> attributes) throws DfException {

    if (dfCollection == null || attributes == null) {
        throw new MissingParameterException("collection and attributes");
    }

    List<Map<String, Object>> dataList = new ArrayList<>();

    while (dfCollection.next()) {
        Map<String, Object> map = new LinkedHashMap<>(attributes.size());

        for (String attribute: attributes) {
            map.put(attribute, dfCollection.getString(attribute));
        }
        dataList.add(map);
    }

    return dataList;
}

编辑： 抱歉，放置重要部分代码并直接使用集合，而不是稍后在映射和处理中插入值。

起点：

@SpringBootApplication
public class ImmsClinicalReportApplication {

    public static void main(String[] args) {
        ApplicationContext applicationContext = SpringApplication.run(ImmsClinicalReportApplication.class, args);
        init(applicationContext);
    }

    private static void init(@NotNull ApplicationContext applicationContext) {
        ClinicalReportController clinicalReportController = (ClinicalReportController) applicationContext.getBean("clinicalReportController");

        IDfSession dfSession = null;

        try {
            dfSession = clinicalReportController.getSession();
            clinicalReportController.execute(dfSession);
            sendEmail(applicationContext, clinicalReportController);
        } catch (DfException | IOException e) {
            e.printStackTrace();
        } finally {
            try {
                clinicalReportController.cleanSession(dfSession);
            } catch (DfException e) {
                e.printStackTrace();
            }
        }
    }
}

@Controller("clinicalReportController")
@PropertySource("classpath:application.properties")
public class ClinicalReportController {

    private static final Logger logger = Logger.getLogger(ClinicalReportController.class);

    private final SessionHelper sessionHelper;
    private final DqlHelper dqlHelper;
    private final AppProperties appProperties;

    @Value("${report_path}")
    private String XLSX_FILE_PATH;

    private static final String[] moduleTypes = {
        "Clin Protocol", "Clin Investigator Brochure", "Clin Core Text",
        "Clin Process Documentation", "Clin Supporting Information"
    };

    @Autowired
    public ClinicalReportController(DqlHelper dqlHelper, SessionHelper sessionHelper, AppProperties appProperties) {
        this.dqlHelper = dqlHelper;
        this.sessionHelper = sessionHelper;
        this.appProperties = appProperties;
    }

    /**
     * Method that processes the report
     * @param dfSession dfSession
     * @throws DfException DfException
     * @throws IOException IOException
     */
    public void execute(IDfSession dfSession) throws DfException, IOException {

        StopWatch timer = new StopWatch();

        for (int i = 0; i < moduleTypes.length; i++) {
            // start timer
            timer.start();
            IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);

            List<String> attributes = new ArrayList<>(dfCollection.getAttrCount());

            for (int j = 0; j < dfCollection.getAttrCount(); j++) {
                attributes.add(dfCollection.getAttr(j).getName());
            }

            // stop timer
            timer.stop();
            // Each query takes 20 mins of time
            /* Sample query: select d.r_object_id, d.object_name, d.title,
            d.imms_extreleased_date, d.imms_extreleased_reason, d.imms_extreleaser,
            d.imms_protocol_number, d.imms_protocol_number_rep, d.keywords,
            d.imms_compound_number, d.imms_module_type, d.imms_prereleaser,
            d.imms_prereleased_date, f.r_folder_path from imms_document d,
            dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700'
            and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date >
            date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)*/
            logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
                    timer.getTotalTimeSeconds()/60 + " minutes");

            // List<Map<String, Object>> resultSet = ImmsUtils.insertAttrValues(dfCollection, attributes);

            if (i == 0) {
                processReport(dfCollection, moduleTypes[i], attributes);
            } else {
                updateReport(dfCollection, moduleTypes[i], attributes);
            }
            cleanCollection(dfCollection);
        }
    }

    /**
     * Method process for remaining queries/sheets
     * @param resultSet resultSet
     * @param objectType objectType
     * @param attributes attributes
     * @throws IOException IOException
     */
    private void updateReport(IDfCollection resultSet, String objectType, List<String> attributes) throws IOException, DfException {
        Workbook workbook = new XSSFWorkbook(new FileInputStream(XLSX_FILE_PATH));
        excelWriterAndOperateOutputStream(resultSet, objectType, workbook, attributes);
    }

    /**
     * Method that writes data to excel sheets
     * @param dfCollection dfCollection
     * @param sheet2 sheet2
     * @param workbook workbook
     * @param attributes 
 
     * Using collection directly. Not sure where is the issue in following method, writing data to sheet is also taking 50 minutes of time
     */
     private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2, Workbook workbook, List<String> attributes) throws DfException {
        Sheet sheet;
        Row row;

        sheet = sheet2;

        Object[] values = new Object[attributes.size()];
        StopWatch timer = new StopWatch();
        
        // moved outside of loop 
        // TODO: avoid regex, use other logic 
        String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
        Pattern datePattern = Pattern.compile(dateRegex);
        // avoid SDF and Date and
        // TODO: use java.time - maybe LocalDate
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        Date date = null;

        CellStyle dateCellStyle = workbook.createCellStyle();
        dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));

        timer.start();
        while (dfCollection.next()) {
            for (int i = 0; i < attributes.size(); i++) {
                values[i] = dfCollection.getString(attributes.get(i));
            }

            int lastRow = sheet.getLastRowNum();
            row = sheet.createRow(++lastRow);
            int cellNum = 0;


            for (Object value: values) {
                Cell cell = row.createCell(cellNum++);
                if (datePattern.matcher(value.toString()).matches()) {
                    try {
                        date = simpleDateFormat.parse(value.toString());
                    } catch (ParseException e) {
                        e.printStackTrace();
                    }
                    cell.setCellValue(date);
                    cell.setCellStyle(dateCellStyle);
                } else {
                    cell.setCellValue(value.toString());
                }
            }
        }
        timer.stop();
        // Taking 50 mins of time to write collection data
        // Log: Time taken for writing data 54.567404175 minutes
        logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");


        // Resize all columns to fit the content size
        for (int i = 0; i < attributes.size(); i++) {
            sheet.autoSizeColumn(i);
        }
    }

    /**
     * Method to create sheet, set fonts and colors
     * @param moduleType moduleType
     * @param workbook workbook
     * @return Sheet
     */
     private Sheet createSheet(String moduleType, Workbook workbook) {
        return workbook.createSheet(moduleType);
     }

    /**
     * Method to process first query/sheet
     * @param dfCollection dfCollection
     * @param moduleType moduleType
     * @param attributes attributes
     * @throws IOException IOException
     */
     private void processReport(IDfCollection dfCollection, String moduleType, List<String> attributes) throws IOException, DfException {
        // Create a Workbook - for xlsx
        Workbook workbook = new XSSFWorkbook();

        /*CreationHelper helps us create instances of various things like DataFormat,
          Hyperlink, RichTextString etc, in a format (HSSF, XSSF) independent way*/
        
        workbook.getCreationHelper();

        excelWriterAndOperateOutputStream(dfCollection, moduleType, workbook, attributes);
    }

    /**
     * Method that writes and saves data to file
     * @param resultSet resultSet
     * @param moduleType  moduleType
     * @param workbook workbook
     * @param attributes attributes
     * @throws IOException IOException
     */
    private void excelWriterAndOperateOutputStream(IDfCollection resultSet, String moduleType, Workbook workbook, List<String> attributes) throws IOException, DfException {
        Sheet sheet = createSheet(moduleType, workbook);

        CellStyle cellStyle = setFontsAndColors(workbook);

        // Create a Row
        Row headerRow = sheet.createRow(0);
        // Create cells
        for (int i = 0; i < attributes.size(); i++) {
            Cell cell = headerRow.createCell(i);
            cell.setCellValue(attributes.get(i));
            cell.setCellStyle(cellStyle);
        }

        writeToSheet(resultSet, workbook.getSheet(moduleType), workbook, attributes);
        // Write the output to the file
        FileOutputStream fileOutputStream = new FileOutputStream(XLSX_FILE_PATH);
        workbook.write(fileOutputStream);
        // close the file
        fileOutputStream.close();
        // close the workbook
        workbook.close();
    }

    @NotNull
    private CellStyle setFontsAndColors(Workbook workbook) {
        CellStyle cellStyle = workbook.createCellStyle();

        // Create a Font for styling header cells
        Font headerFont = workbook.createFont();
        headerFont.setBold(false);
        headerFont.setFontHeightInPoints((short) 12);
        headerFont.setColor(IndexedColors.GREEN.getIndex());
        cellStyle.setFont(headerFont);
        return cellStyle;
   }

    /**
     * Get IDfSession object
     * @return IDfSession
     * @throws DfException DfException
     */
    public IDfSession getSession() throws DfException {
        IDfSession dfSession;

        IDfSessionManager sessionManager = sessionHelper.getDfSessionManager(appProperties.getRepository(), appProperties.getUsername(), appProperties.getPassword());
        dfSession = sessionManager.getSession(appProperties.getRepository());
        return dfSession;
    }

    /**
     * Clean IDfCollection
     * @param dfCollection dfCollection
     */
    public void cleanCollection(IDfCollection dfCollection) {
        dqlHelper.cleanup(dfCollection);
    }

    /**
     * Clean IDfSession
     * @param dfSession dfSession
     */
    public void cleanSession(IDfSession dfSession) throws DfException {
        sessionHelper.cleanSession(dfSession);
    }
    }

Answer 1

您可以尝试 forkjoinPoll 或使用 jdk 的并行流:)，使用 cpu 的多核处理器。 forkjoinpool的例子请参考https://www.baeldung.com/java-fork-join

  public static List<Map<String, Object>> insertAttrValues(Stream<Object> stream, List<String> attributes) throws RuntimeException {
    if (stream == null || attributes == null) {
        throw new RuntimeException("collection and attributes");
    }
    final int size = attributes.size();
    return stream.parallel().map(item -> {
        Map<String, Object> map = new LinkedHashMap<>(size);
        for (String attribute : attributes) {
            //map.put(attribute, item.getString(attribute));
        }
        return map;
    }).collect(Collectors.toList());
}

Answer 2

您可以进行以下改进：

直接从IDfCollection填写POI结构，不要将采集数据复制到List<Map<String, Object>>。
使用collection.getTime(attribute)获取时间值，而不是对每条记录进行正则表达式解析。您可以使用 collection.getAttrDataType(attribute) == IDfAttr.DF_TIME 条件来判断该值是否为时间。
然后你可以直接使用日期而不用像这样解析：cell.setCellValue(collection.getTime(attribute).getDate())
但数字也是如此，然后你可以在 excel sheet 中获得更好的结果。这意味着使用 collection.getInt(attribute) 和 collection.getDouble(attribute) 而不是 collection.getString(attribute)。 IDfAttr.DM_INTEGER 和 IDfAttr.DM_DOUBLE 等常量在这里也有帮助。
将 int last_row 移到 for 循环外，并在循环内执行 last_row++。调用 sheet.getLastRowNum() 是没有必要的。顺便说一句：驼峰命名 lastRow 在 Java 世界中会更好 ;-)

另一件事是，您正在另一个循环中调用整个过程以进行 5 个类似查询，因此可能还有另一个 space 用于改进，例如所有的转换使用例如更好的条件查询到一个，UNION 如果可能，更广泛的条件 + 应用程序逻辑中的过滤器，...）。

Answer 3

我认为主要问题在于查询。请尝试以下步骤：

不要在 select 查询中给出单独的属性，而是使用 *。查看查询执行时间。如果执行速度很快而不需要几分钟的时间，请尝试接下来的步骤。

select * from imms_document d, dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700' and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date > date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)

当您使用 Spring 启动时，请在 application.properties 中包含所需的属性，如下所示。你可能不想要全部。

included_attributes=r_object_id,object_name,title,imms_extreleased_date,imms_extreleased_reason,imms_extreleaser,imms_protocol_number,imms_protocol_number_rep,keywords,imms_compound_number,imms_module_type,imms_prereleaser,imms_prereleased_date,r_folder_path

在您的 AppProperties class 文件中执行以下操作：

@Component
public class AppProperties {

   /**
    *other fields
    */

    @Getter
    @Value("${included_attributes}")
    private String[] includedAttributes;

}

现在，在您的 execute() 方法中，修改代码以仅使用您需要为其获取数据的属性。

public void execute(IDfSession dfSession) throws DfException, IOException {

    StopWatch timer = new StopWatch();

    for (int i = 0; i < moduleTypes.length; i++) {
        // start timer
        timer.start();
        IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
        // stop timer
        timer.stop();
        logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
                timer.getTotalTimeSeconds() + " seconds");    
        // attributes to be added
        List<String> attributes = new ArrayList<>();
        // Get included attributes as list
        List<String> includedAttributes = Arrays.asList(appProperties.getIncludedAttributes());

        for (int j = 0; j < dfCollection.getAttrCount(); j++) {
            // check for the attribute in included attributes and add if exists
            if (hasAttribute(includedAttributes, dfCollection.getAttr(j).getName())) {
                attributes.add(dfCollection.getAttr(j).getName());
            }
        }


        if (i == 0) {
            processReport(dfCollection, moduleTypes[i], attributes);
        } else {
            updateReport(dfCollection, moduleTypes[i], attributes);
        }
        cleanCollection(dfCollection);
    }
}

public static boolean hasAttribute(@NotNull List<String> attributes, String attribute) {
    for(String attr : attributes){
        if(attribute.contains(attr)){
            return true;
        }
    }
    return false;
}

POI结构直接使用collection，无需在array中插入数据再遍历

private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2,
                              @NotNull Workbook workbook, List<String> attributes) throws DfException {
        Sheet sheet;
        Row row;

        sheet = sheet2;

        StopWatch timer = new StopWatch();

        String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
        Pattern datePattern = Pattern.compile(dateRegex);

        DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("yyyy/MM/dd HH:mm:ss");

        CellStyle dateCellStyle = workbook.createCellStyle();
        dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));

        int lastRow = 0;

        timer.start();
        while (dfCollection.next()) {
            row = sheet.createRow(++lastRow);
            int cellNum = 0;

            for (String attribute : attributes) {
                Object value = dfCollection.getString(attribute);

                Cell cell = row.createCell(cellNum++);

                if (datePattern.matcher(value.toString()).matches()) {
                    cell.setCellValue(LocalDateTime.parse(value.toString(), timeFormatter));
                    cell.setCellStyle(dateCellStyle);
                } else {
                    cell.setCellValue(value.toString());
                }
            }
        }
        timer.stop();
        logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");


        // Resize all columns to fit the content size
        for (int i = 0; i < attributes.size(); i++) {
            sheet.autoSizeColumn(i);
        }
    }

在 Java 中将数据插入地图列表花费了太多时间

Inserting data into list of map taking too much time in Java

java

documentum

apache-poi

documentum-dfc