From 92dbe603af2c226ee17d26783a28e27424f5adfb Mon Sep 17 00:00:00 2001
From: huebsch <huebsch@LDV160.slub-dresden.de>
Date: Thu, 6 Feb 2025 17:24:15 +0100
Subject: [PATCH] concurrently run and consume mediaconch output to prevent
 deadlocks

Certain constellations of validation profiles and files do lead
to large amounts of validation output being produces by mediaconch.
Outputs > 100 kB have been  observed for some MKA files.

The validation output of the mediaconch Linux process started by the
plugin is fed back to the JVM through a buffered reader, where it is
analyzed and postprocessed by the plugin. While mediaconch executes, the
JVM needs to wait for this process to complete ("p.waitFor()"). In case
the buffer allocated by the buffered reader is too small to hold the
full output of the process, the mediaconch process is sent to sleep by
Linux (interruptible sleep), as it needs to wait for the buffer to be
read by the JVM. On the other end, the Java thread that started the
mediaconch process waits for it to finish, therefore being unable to
read the buffer if the implementation is purely sequential.

This commit introduces a second thread that reads the mediaconch process
output while the mediaconch process is running to avoid the deadlock
situation.
---
 ...LUBMatroskaFFV1FormatValidationPlugin.java | 89 ++++++++++++++++---
 1 file changed, 75 insertions(+), 14 deletions(-)

diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java
index c44522a..e1e3cdb 100644
--- a/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java
+++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBMatroskaFFV1FormatValidationPlugin.java
@@ -31,6 +31,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
  * SLUBMatroskaFFV1FormatValidationPlugin
@@ -50,6 +51,8 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
     private String md5UpcomingProfile;
     //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK);
     private boolean isDifferentProfile = true;
+    
+    private boolean dumpValidationResultToStdOut = false;
 
     /** constructor */
     public SLUBMatroskaFFV1FormatValidationPlugin() {
@@ -88,7 +91,7 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
     }
 
     @Override
-    public final String getProfile () {
+    public final String getProfile() {
         String modified_current = modificationDateOfFile( this.mediaconch_current_profile_path);
         String modified_upcoming = modificationDateOfFile( this.mediaconch_upcoming_profile_path);
         /* there is no documentation in ExL API, therefore we use it to document the profile versions in a light way */
@@ -107,11 +110,55 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
     private void callMediaconch(String filePath, String profilePath) {
         String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + profilePath;
         log.info("executing: " + execstring);
-        InputStreamReader process_out;
-        try {
+
+       	final AtomicBoolean fuse = new AtomicBoolean(true);        	
+                
+        try {       	
             Process p = Runtime.getRuntime().exec(execstring);
+            BufferedReader procOutputReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
+            
+            final ArrayList<String> procOutputLineList = new ArrayList<String>();
+            
+        	// prepare thread that reads the bulky output (>> 100kByte in some cases) to prevent the buffered reader from reaching its capacity limit            
+            Thread procOutputReaderThread = new Thread() {
+            	public void run() {
+            		try {
+            			String nextLine = "";
+            			while (fuse.get() && (nextLine = procOutputReader.readLine()) != null) {				
+            				procOutputLineList.add(nextLine);
+            			}
+            		} catch (IOException e) {
+            			log.error("ERROR: reading the output of mediaconch failed, " + e.getMessage());
+            			return;
+            		} finally {
+            			// ensure this thread terminates under all circumstances
+            			fuse.set(false);            			
+            		}
+            		
+            		if (isDumpValidationResultToStdOut()) {
+            			dumpResultToStdOut();
+            		}
+            	}
+            	
+            	private void dumpResultToStdOut() {
+        			System.out.println("*** begin full validation result for file " + filePath + " using profile " + profilePath + " ****");
+        			
+        			for (String valResLine : procOutputLineList) {
+        				System.out.println(valResLine);
+        			}
+        				
+        			System.out.println("*** end full valdation result for file " + filePath + " using profile " + profilePath + " ****");            		
+            	}
+            };
+            
+            procOutputReaderThread.start();
+            
+            // wait for mediaconch to terminate
             p.waitFor();
-            process_out = new InputStreamReader(p.getInputStream());
+            
+            // wait for the output reader thread to terminate
+            procOutputReaderThread.join();
+
             if (p.exitValue() == 0) {
                 isvalid = true;
                 iswellformed = true;
@@ -119,24 +166,26 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
                 isvalid = false;
                 iswellformed = false;
             }
-            BufferedReader reader = new BufferedReader(process_out);
-            String line = reader.readLine();
-            while (line != null) {
+
+            for (String line : procOutputLineList) {
                 if (line.contains("pass!")) {
                     break;
                 }
                 System.out.println("MEDIACONCH line: " + line);
                 validationLog.add(line);
-                line = reader.readLine();
                 log.info( line );
             }
-            reader.close();
+            
+            procOutputReader.close();
         } catch (IOException e) {
             log.error("(actual) mediaconch not available, path=" + this.mediaconch_binary_path + ", " , e.getMessage());
             System.out.println("ERROR: (actual) mediaconch not available, path=" + this.mediaconch_binary_path + ", " + e.getMessage());
         } catch (InterruptedException e) {
             log.error("ERROR: call of mediaconch interrupted, path=" + this.mediaconch_binary_path + ", " + e.getMessage());
-        }
+        } finally {
+			// ensure the while loop of procOutputReaderThread terminates under all circumstances
+			fuse.set(false);
+		}
     }
 
     @Override
@@ -237,6 +286,18 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
         return "";
     }
 
+    /**
+     * Configure the plugin to dump the output of the mediaconch process to standard out directly after mediaconch terminates.
+     * @param dumpOutput <code>true</code> enables output dumping, <code>false</code> disables it
+     */
+    private void setDumpValidationResultToStdOut(boolean dumpOutput) {
+    	this.dumpValidationResultToStdOut = dumpOutput;
+    }
+
+    private boolean isDumpValidationResultToStdOut() {
+    	return this.dumpValidationResultToStdOut;
+    }
+
     /** stand-alone check, main file to call local installed clamd
      * @param args list of files which should be scanned
      */
@@ -247,12 +308,12 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
         initp.put( "mediaconch_current_profile_path", "/etc/mediaconch/profile.xml");
         initp.put( "mediaconch_upcoming_profile_path", "/etc/mediaconch/profile.xml");
         plugin.initParams( initp );
+        plugin.setDumpValidationResultToStdOut(true);
+        plugin.validateFormat(args[0]);
         System.out.println("----------------------------------");
         System.out.println("Agent: '" + plugin.getAgent() + "'");
         System.out.println();
-        for (String file : args) {
-            System.out.println("Validation RESULT: " + plugin.isValid());
-        }
+        System.out.println("Validation RESULT: " + plugin.isValid());
         System.out.println("----------------------------------");
         System.out.println("getAgent:");
         System.out.println( plugin.getAgent());
@@ -261,4 +322,4 @@ public class SLUBMatroskaFFV1FormatValidationPlugin implements FormatValidationP
         System.out.println( plugin.getProfile());
         System.out.println("----------------------------------");
     }
-}
+}
\ No newline at end of file
-- 
GitLab