From 92dbe603af2c226ee17d26783a28e27424f5adfb Mon Sep 17 00:00:00 2001 From: huebsch <huebsch@LDV160.slub-dresden.de> Date: Thu, 6 Feb 2025 17:24:15 +0100 Subject: [PATCH] concurrently run and consume mediaconch output to prevent deadlocks Certain constellations of validation profiles and files do lead to large amounts of validation output being produces by mediaconch. Outputs > 100 kB have been observed for some MKA files. The validation output of the mediaconch Linux process started by the plugin is fed back to the JVM through a buffered reader, where it is analyzed and postprocessed by the plugin. While mediaconch executes, the JVM needs to wait for this process to complete ("p.waitFor()"). In case the buffer allocated by the buffered reader is too small to hold the full output of the process, the mediaconch process is sent to sleep by Linux (interruptible sleep), as it needs to wait for the buffer to be read by the JVM. On the other end, the Java thread that started the mediaconch process waits for it to finish, therefore being unable to read the buffer if the implementation is purely sequential. This commit introduces a second thread that reads the mediaconch process output while the mediaconch process is running to avoid the deadlock situation. --- ...LUBMatroskaFFV1FormatValidationPlugin.java | 89 ++++++++++++++++--- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java index c44522a..e1e3cdb 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; /** * SLUBMatroskaFFV1FormatValidationPlugin @@ -50,6 +51,8 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP private String md5UpcomingProfile; //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK); private boolean isDifferentProfile = true; + + private boolean dumpValidationResultToStdOut = false; /** constructor */ public SLUBMatroskaFFV1FormatValidationPlugin() { @@ -88,7 +91,7 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP } @Override - public final String getProfile () { + public final String getProfile() { String modified_current = modificationDateOfFile( this.mediaconch_current_profile_path); String modified_upcoming = modificationDateOfFile( this.mediaconch_upcoming_profile_path); /* there is no documentation in ExL API, therefore we use it to document the profile versions in a light way */ @@ -107,11 +110,55 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP private void callMediaconch(String filePath, String profilePath) { String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + profilePath; log.info("executing: " + execstring); - InputStreamReader process_out; - try { + + final AtomicBoolean fuse = new AtomicBoolean(true); + + try { Process p = Runtime.getRuntime().exec(execstring); + BufferedReader procOutputReader = new BufferedReader(new InputStreamReader(p.getInputStream())); + + final ArrayList<String> procOutputLineList = new ArrayList<String>(); + + // prepare thread that reads the bulky output (>> 100kByte in some cases) to prevent the buffered reader from reaching its capacity limit + Thread procOutputReaderThread = new Thread() { + public void run() { + try { + String nextLine = ""; + while (fuse.get() && (nextLine = procOutputReader.readLine()) != null) { + procOutputLineList.add(nextLine); + } + } catch (IOException e) { + log.error("ERROR: reading the output of mediaconch failed, " + e.getMessage()); + return; + } finally { + // ensure this thread terminates under all circumstances + fuse.set(false); + } + + if (isDumpValidationResultToStdOut()) { + dumpResultToStdOut(); + } + } + + private void dumpResultToStdOut() { + System.out.println("*** begin full validation result for file " + filePath + " using profile " + profilePath + " ****"); + + for (String valResLine : procOutputLineList) { + System.out.println(valResLine); + } + + System.out.println("*** end full valdation result for file " + filePath + " using profile " + profilePath + " ****"); + } + }; + + procOutputReaderThread.start(); + + // wait for mediaconch to terminate p.waitFor(); - process_out = new InputStreamReader(p.getInputStream()); + + // wait for the output reader thread to terminate + procOutputReaderThread.join(); + if (p.exitValue() == 0) { isvalid = true; iswellformed = true; @@ -119,24 +166,26 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP isvalid = false; iswellformed = false; } - BufferedReader reader = new BufferedReader(process_out); - String line = reader.readLine(); - while (line != null) { + + for (String line : procOutputLineList) { if (line.contains("pass!")) { break; } System.out.println("MEDIACONCH line: " + line); validationLog.add(line); - line = reader.readLine(); log.info( line ); } - reader.close(); + + procOutputReader.close(); } catch (IOException e) { log.error("(actual) mediaconch not available, path=" + this.mediaconch_binary_path + ", " , e.getMessage()); System.out.println("ERROR: (actual) mediaconch not available, path=" + this.mediaconch_binary_path + ", " + e.getMessage()); } catch (InterruptedException e) { log.error("ERROR: call of mediaconch interrupted, path=" + this.mediaconch_binary_path + ", " + e.getMessage()); - } + } finally { + // ensure the while loop of procOutputReaderThread terminates under all circumstances + fuse.set(false); + } } @Override @@ -237,6 +286,18 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP return ""; } + /** + * Configure the plugin to dump the output of the mediaconch process to standard out directly after mediaconch terminates. + * @param dumpOutput <code>true</code> enables output dumping, <code>false</code> disables it + */ + private void setDumpValidationResultToStdOut(boolean dumpOutput) { + this.dumpValidationResultToStdOut = dumpOutput; + } + + private boolean isDumpValidationResultToStdOut() { + return this.dumpValidationResultToStdOut; + } + /** stand-alone check, main file to call local installed clamd * @param args list of files which should be scanned */ @@ -247,12 +308,12 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP initp.put( "mediaconch_current_profile_path", "/etc/mediaconch/profile.xml"); initp.put( "mediaconch_upcoming_profile_path", "/etc/mediaconch/profile.xml"); plugin.initParams( initp ); + plugin.setDumpValidationResultToStdOut(true); + plugin.validateFormat(args[0]); System.out.println("----------------------------------"); System.out.println("Agent: '" + plugin.getAgent() + "'"); System.out.println(); - for (String file : args) { - System.out.println("Validation RESULT: " + plugin.isValid()); - } + System.out.println("Validation RESULT: " + plugin.isValid()); System.out.println("----------------------------------"); System.out.println("getAgent:"); System.out.println( plugin.getAgent()); @@ -261,4 +322,4 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP System.out.println( plugin.getProfile()); System.out.println("----------------------------------"); } -} +} \ No newline at end of file -- GitLab