/*****************************************************************************/ /* Framework for exercises of the lecture "IT applications in science" */ /* held at the BA-University of Cooperative Education, Stuttgart, Germany */ /* */ /* This source code is free for any kind of personal or educational use, */ /* which is absolutely non-profitable. */ /* */ /* This source code as all derivatives of it may not be used for any */ /* commercial purpose. This especially excludes any rights for claiming any */ /* kind of patents, warranty, applicability for a certain purpose, as */ /* finally for earning money on base of the before described inclusion range.*/ /* */ /* (c) 2003, Hans Weghorn, weghorn@ba-stuttgart.de */ /*---------------------------------------------------------------------------*/ /* History */ /* Date Author Description */ /* Oct. 19th, 2003 H. Weghorn Developed from scratch */ /* */ /* */ /* */ /*****************************************************************************/ // Importing required class libraries import java.awt.*; import java.awt.event.*; import java.lang.*; import javax.sound.sampled.*; // main application requires a GUI frame with threading properties public class ft_audio extends Frame implements Runnable { // general constants for audio settings final static float AUDIO_SAMPLING_RATE = 8000.0F; // Hz final static int AUDIO_SAMPLE_DEPTH = 8; // bits per sample final static int AUDIO_NO_OF_CHANNELS = 1; // 1 stands for mono final static int RECOGNITION_LENGTH = 8192; // buffer length for recog. ~ 1 sec // definition of frame size (audio signal frame as also display window) static int size_x = 512; int size_y = 256 + 2 + 100 +1; // arrays for audio processing byte[] audioInput = null; // audio data buffer consists of byte data (8 bits, mono) int[] plotCoorsX = null; // convenient ploting requires array with x-coordinates int[] plotAudioY = null; // and y coordinates in integer format int[] plotPSpecY = null; // same for power spectrum int[] patternPSpec = null; // for trained audio pattern private Graphics g = null; // further arrays for fourier analysis of input data to be defined here // ..?..?.. // // general settings entered through GUI boolean recognitionMode = false; // online or recognition mode boolean terminate = false; // end this nice application // required audio classes private static TargetDataLine m_targetLine; private static SourceDataLine m_sourceLine; //-------------------------------------------------------------------------- // Main method for the first excercise task (2nd task below): "Online FT" // - Implement the Fourier transform for the audio stream // - Calculate the power spectrum // - Display the power spectrum // Advice: Implement integer Fourier transform on base of sine tables; // consider the input number range is signed 8 bit, while for the power // spectrum 32 bit integers can be used. //-------------------------------------------------------------------------- public void run () { int length = -1; // actual processing frame size boolean mode = false; // actual processing mode int counter = 0; // for diagnostic purpose // prepare audio operation prepareRTgeneral (); // endless loop terminated by GUI input while ( !terminate ) { // check for mode or size change events -> if reqd apply modifications if ( length != size_x || mode != recognitionMode ) { if ( mode=recognitionMode ) { prepareRTrecognition (); } else { prepareRTonline (); length = size_x; counter = 0; } } // perform audio loop processing else { if ( mode ) { // recognition mode of audio signal processing // do nothing during recognition mode (everything under button control!!) try { Thread.sleep ( 100 ); } catch (InterruptedException ie) { ie = null; } } else { // online display mode for audio signal and FT int nBytesRead = m_targetLine.read ( audioInput, 0, length ); for ( int i=0; i<length; i++ ) plotAudioY[i] = audioInput[i]; g.setColor ( Color.black ); g.fillRect ( 0, -128, length, 256 ); g.setColor ( Color.green ); g.drawPolyline ( plotCoorsX, plotAudioY, length ); // ... perform Fourier analysis right here // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... plotPSpecY[counter] = 99; if ( ++counter >= length ) counter =0; g.setColor ( Color.lightGray ); g.fillRect ( 0, 129, length, 256 ); g.translate ( 0, 130 ); g.setColor ( Color.magenta ); g.drawPolyline ( plotCoorsX, plotPSpecY, length ); g.translate ( 0, -130 ); } } } myFrame.dispose (); System.exit (0); } //-------------------------------------------------------------------------- // Two main methods for the second excercise task: Speech recognition // - Implement training method // - Implement evaluation of audio input // Hint: use faster FT developed for the 1st task //-------------------------------------------------------------------------- // method for learning audio signal patterns private void learnPattern ( boolean flag ) { disableGUI(); if ( flag ) { // in this case discard existing pattern if ( patternPSpec == null ) System.exit (2); // Internal error ?!?!? for ( int i=0; i<RECOGNITION_LENGTH; i++ ) patternPSpec[i] = 0; enableGUI(); return; } try { Thread.sleep ( 250 ); } catch (InterruptedException ie) { ie = null; } int nBytesRead = m_targetLine.read ( audioInput, 0, RECOGNITION_LENGTH ); // ask the user, whether s/he accepts the recorded pattern recordingAccepted = false; launchModalDialog (); if ( recordingAccepted ) { // add the code here for evaluating the audio input pattern for learning // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... System.out.println ( "just learned another audio pattern!" ); } else { // else audio input ignored System.out.println ( "this pattern was discarded!" ); } enableGUI(); } // method for qualifying an audio input to the stored pattern private void compareAudioSignal ( double threshold ) { if ( audioInput == null ) System.exit (2); // Internal error ?!?!? disableGUI(); try { Thread.sleep ( 250 ); } catch (InterruptedException ie) { ie = null; } double similarityLevel = 0; int nBytesRead = m_targetLine.read ( audioInput, 0, RECOGNITION_LENGTH ); // add the code here for comparing the recent input with the trained pattern // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... // ...?....?.... // nicely display result on GUI screen tfCorrelation.setText ( "" + similarityLevel ); if ( similarityLevel >= threshold ) { System.out.println ( "Bingo! This pattern was accepted!" ); cbResult.setState ( true ); } else { cbResult.setState ( false ); } enableGUI(); } //-------------------------------------------------------------------------- // End of section relevant for the exercise tasks //-------------------------------------------------------------------------- private void prepareRTgeneral () { m_targetLine.start(); m_sourceLine.start(); } private void prepareRTonline () { myFrame.remove ( pUI ); myFrame.setSize ( size_x, size_y ); audioInput = new byte[2*size_x]; plotCoorsX = new int[size_x]; plotAudioY = new int[size_x]; plotPSpecY = new int[size_x]; for ( int i=0; i<size_x; i++ ) plotCoorsX[i] = i; (g = myFrame.getGraphics ()).translate ( 0, 129 ); menubar.add ( menuSize ); } private void prepareRTrecognition () { // prepare GUI myFrame.setSize ( 256, 256 ); menubar.remove ( menuSize ); myFrame.setLayout ( new BorderLayout () ); myFrame.add ( pUI, "Center" ); myFrame.show (); // generate required arrays audioInput = new byte[(AUDIO_SAMPLE_DEPTH/8)*RECOGNITION_LENGTH]; if ( patternPSpec == null ) { // preserve existing pattern patternPSpec = new int[(AUDIO_SAMPLE_DEPTH/8)*RECOGNITION_LENGTH]; } } // supplementary function for preventing interference from additional GUI events // during time-consuming audio signal processing private void disableGUI () { bLearn.setEnabled(false); bClear.setEnabled(false); bTest.setEnabled(false); menuFunction.setEnabled(false); } private void enableGUI () { bLearn.setEnabled(true); bClear.setEnabled(true); bTest.setEnabled(true); menuFunction.setEnabled(true); } // supplementary method for modal dialog asking the user for accepting an audio // input as training pattern Dialog dP = null; boolean recordingAccepted = false; private void launchModalDialog () { // construct a modal confirmation window dP = new Dialog ( myFrame, "Accept this pattern?", true ); Button bOk = new Button ( "yes" ), bNo = new Button ( "no" ); Panel pButtons = new Panel (); dP.setLayout ( new BorderLayout() ); pButtons.add ( bOk ); pButtons.add ( bNo ); dP.add ( pButtons, "South" ); bOk.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { dP.dispose(); recordingAccepted = true; } }); bNo.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { dP.dispose(); } }); Object myC = new Canvas () { public void paint (Graphics g) { g.setColor ( Color.black ); g.fillRect ( 0, 0, 256, 130 ); g.translate ( 0, 65 ); int di = RECOGNITION_LENGTH / 256; g.setColor ( Color.green ); for ( int i=0; i< RECOGNITION_LENGTH; i++ ) { int x = i / di; int y = audioInput[i]>>1; g.drawLine ( x, 0, x, y ); } } }; ((Canvas) myC).setSize ( 256, 130 ); dP.add ( (Canvas) myC, "Center" ); dP.pack (); dP.show (); } // top-level menu bar MenuBar menubar = new MenuBar (); // File menu Menu menuFunction = new Menu ( "Function" ); MenuItem miOnline = new MenuItem ( "Online FT" ); MenuItem miProcessing = new MenuItem ( "Recognition" ); MenuItem miExit = new MenuItem ( "Exit" ); // Size menu Menu menuSize = new Menu ( "Frame size" ); CheckboxMenuItem cbmi256 = new CheckboxMenuItem ( "256", false ); CheckboxMenuItem cbmi512 = new CheckboxMenuItem ( "512", true ); CheckboxMenuItem cbmi1k = new CheckboxMenuItem ( "1024", false ); Panel pUI = new Panel (); Button bLearn = new Button ( "Learn Pattern" ); Button bClear = new Button ( "Clear Pattern" ); Button bTest = new Button ( "Listen & Recognize" ); TextField tfThreshold = new TextField ( "0.5" ); TextField tfCorrelation = new TextField ( " " ); Checkbox cbResult = new Checkbox ( "recognized", false ); Frame myFrame = null; // contructor sets up intial appearance of GUI frame public ft_audio () { myFrame = this; // build up function menu menuFunction.add ( miOnline ); menuFunction.add ( miProcessing ); menuFunction.addSeparator (); menuFunction.add ( miExit ); miOnline.setShortcut ( new MenuShortcut('O') ); miProcessing.setShortcut ( new MenuShortcut('P') ); // build up frame size menu menuSize.add ( cbmi256 ); menuSize.add ( cbmi512 ); menuSize.add ( cbmi1k ); // assemble menu bar, set short cuts, and attach it to the frame menubar.add ( menuFunction ); //menubar.add ( menuSize ); menuFunction.setShortcut ( new MenuShortcut('F') ); setMenuBar ( menubar ); // window closing events from window manager (WM) this.addWindowListener ( new WindowAdapter () { public void windowClosing ( WindowEvent we ) { //myFrame.dispose (); System.exit (0); terminate = true; } } ); // exit menu activation miExit.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { //myFrame.dispose(); System.exit (0); terminate = true; } }); // online menu activation miOnline.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { recognitionMode = false; } }); // recognition menu activation miProcessing.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { recognitionMode = true; } }); // size event activation cbmi256.addItemListener ( new ItemListener () { public void itemStateChanged ( ItemEvent ie ) { if ( cbmi256.getState() ) { cbmi512.setState ( false ); cbmi1k.setState ( false ); size_x = 256; } } }); cbmi512.addItemListener ( new ItemListener () { public void itemStateChanged ( ItemEvent ie ) { if ( cbmi512.getState() ) { cbmi256.setState ( false ); cbmi1k.setState ( false ); size_x = 512; } } }); cbmi1k.addItemListener ( new ItemListener () { public void itemStateChanged ( ItemEvent ie ) { if ( cbmi1k.getState() ) { cbmi256.setState ( false ); cbmi512.setState ( false ); size_x = 1024; } } }); // assemble panel for recognition mode pUI.setBackground ( new Color ( 176, 196, 222 ) ); pUI.setLayout ( new FlowLayout () ); pUI.add ( bLearn ); pUI.add ( bClear ); pUI.add ( tfThreshold ); pUI.add ( bTest ); pUI.add ( tfCorrelation ); tfCorrelation.setEditable ( false ); pUI.add ( cbResult ); cbResult.setEnabled(false); // fill control buttons with life bLearn.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { learnPattern ( false ); } }); bClear.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { learnPattern ( true ); } }); bTest.addActionListener ( new ActionListener () { public void actionPerformed ( ActionEvent ae ) { double threshold; try { threshold = Double.parseDouble ( tfThreshold.getText() ); } catch ( NumberFormatException inf ) { threshold = 0.5; tfThreshold.setText ( "0.5" ); } compareAudioSignal ( threshold ); } }); this.setTitle ( "Audio tool" ); this.setSize ( size_x, size_y ); this.setResizable ( false ); this.show (); } // main entry for preparing audio actions, and launching GUI window public static void main ( String[] args ) { AudioFormat audioFormat = new AudioFormat ( AudioFormat.Encoding.PCM_SIGNED, AUDIO_SAMPLING_RATE, AUDIO_SAMPLE_DEPTH, AUDIO_NO_OF_CHANNELS, (AUDIO_NO_OF_CHANNELS*AUDIO_SAMPLE_DEPTH/8), AUDIO_SAMPLING_RATE, false ); try { DataLine.Info targetInfo = new DataLine.Info(TargetDataLine.class, audioFormat, size_x ); DataLine.Info sourceInfo = new DataLine.Info(SourceDataLine.class, audioFormat, size_x ); m_targetLine = (TargetDataLine) AudioSystem.getLine(targetInfo); m_sourceLine = (SourceDataLine) AudioSystem.getLine(sourceInfo); m_targetLine.open ( audioFormat, size_x ); m_sourceLine.open ( audioFormat, size_x ); } catch (LineUnavailableException e) { e.printStackTrace(); System.exit(1); } (new Thread ( new ft_audio () )).start(); } }