diff --git a/README.rst b/README.rst
index da1477f..f8fc341 100644
--- a/README.rst
+++ b/README.rst
@@ -48,6 +48,15 @@ Folder monitoring:
 
     --> Every time a pdf file is added to `watch_directory` it will be OCR'ed
 
+    pypdfocr -w watch_directory --archive --archive_suffix _orig.pdf
+
+    --> Every time a pdf file is added to `watch_directory` it will be OCR'ed.  The original will move to _orig and the
+    OCR'ed version will have its name
+
+    pypdfocr -w watch_directory --archive --archive_suffix _orig.pdf --initial_scan
+    --> Every time a pdf file is added to `watch_directory` it will be OCR'ed.  The original will move to _orig and the
+    OCR'ed version will have its name.  All PDF's in the folder will be scanned and OCR'ed if they have not been already.
+
 Automatic filing:
 ~~~~~~~~~~~~~~~~~
 
diff --git a/pypdfocr/pypdfocr.py b/pypdfocr/pypdfocr.py
index 4ef136b..3ccd0bf 100644
--- a/pypdfocr/pypdfocr.py
+++ b/pypdfocr/pypdfocr.py
@@ -160,6 +160,17 @@ def get_options(self, argv):
             default=False, dest='match_using_filename', help='Use filename to match if contents did not match anything, before filing to default folder')
 
 
+        #--------------
+        # Watch Options
+        #--------------
+        p.add_argument('--archive', action='store_true',
+             dest='archive', help='Move the source document to an archive')
+        p.add_argument('--initial_scan', action='store_true',
+             dest='initial_scan', help='Include PDF documents already in folder if not processed')
+        p.add_argument('--archive_suffix',
+             dest='archive_suffix', help='Include PDF documents already in folder if not processed', default='_orig.pdf')
+
+
         # Add flow option to single mode extract_images,preprocess,ocr,write
 
         args = p.parse_args(argv)
@@ -173,6 +184,10 @@ def get_options(self, argv):
         self.match_using_filename = args.match_using_filename
         self.skip_preprocess = args.skip_preprocess
 
+        self.archive = args.archive
+        self.archive_suffix = args.archive_suffix
+        self.initial_scan = args.initial_scan
+
         if self.debug:
             logging.basicConfig(level=logging.DEBUG, format='%(message)s')
 
@@ -320,7 +335,11 @@ def run_conversion(self, pdf_filename):
         """
         print ("Starting conversion of %s" % pdf_filename)
         # Make the images for Tesseract
-        img_dpi, glob_img_filename = self.gs.make_img_from_pdf(pdf_filename)
+        try:
+            img_dpi, glob_img_filename = self.gs.make_img_from_pdf(pdf_filename)
+        except Exception, e:
+            print "Exception occurred in processing %s: %s" % (pdf_filename, e)
+            return
 
         fns = glob.glob(glob_img_filename)
 
@@ -337,7 +356,8 @@ def run_conversion(self, pdf_filename):
         
         # Generate new pdf with overlayed text
         #ocr_pdf_filename = self.pdf.overlay_hocr(tiff_dpi, hocr_filename, pdf_filename)
-        ocr_pdf_filename = self.pdf.overlay_hocr_pages(img_dpi, hocr_filenames, pdf_filename)
+        ocr_pdf_filename = self.pdf.overlay_hocr_pages(img_dpi, hocr_filenames, pdf_filename,
+                                                       archive=self.archive, archive_suffix=self.archive_suffix)
 
         # Clean up the files
         if not self.debug:
@@ -426,13 +446,15 @@ def go(self, argv):
         if self.watch:
             while True:  # Make sure the watcher doesn't terminate
                 try:
-                    py_watcher = PyPdfWatcher(self.watch_dir, self.config.get('watch'))
+                    py_watcher = PyPdfWatcher(self.watch_dir, self.config.get('watch'),
+                                              archive=self.archive, initial_scan=self.initial_scan,
+                                              archive_suffix=self.archive_suffix)
                     for pdf_filename in py_watcher.start():
                         self._convert_and_file_email(pdf_filename)
                 except KeyboardInterrupt:
                     break
                 except Exception as e:
-                    print traceback.print_exc(e)
+                    traceback.print_exc(e)
                     py_watcher.stop()
                     
         else:
@@ -442,14 +464,17 @@ def _convert_and_file_email(self, pdf_filename):
         """
             Helper function to run the conversion, then do the optional filing, and optional emailing.
         """
-        ocr_pdffilename = self.run_conversion(pdf_filename)
-        if self.enable_filing:
-            filing = self.file_converted_file(ocr_pdffilename, pdf_filename)
-        else:
-            filing = "None"
+        try:
+            ocr_pdffilename = self.run_conversion(pdf_filename)
+            if self.enable_filing:
+                filing = self.file_converted_file(ocr_pdffilename, pdf_filename)
+            else:
+                filing = "None"
 
-        if self.enable_email:
-            self._send_email(pdf_filename, ocr_pdffilename, filing)
+            if self.enable_email:
+                self._send_email(pdf_filename, ocr_pdffilename, filing)
+        except Exception, e:
+            print traceback.print_exc(e)
 
 def main(): # pragma: no cover 
     script = PyPDFOCR()
diff --git a/pypdfocr/pypdfocr_gs.py b/pypdfocr/pypdfocr_gs.py
index 5599082..af289dc 100644
--- a/pypdfocr/pypdfocr_gs.py
+++ b/pypdfocr/pypdfocr_gs.py
@@ -174,12 +174,12 @@ def _run_gs(self, options, output_filename, pdf_filename):
             out = subprocess.check_output(cmd, shell=True)
 
         except subprocess.CalledProcessError as e:
-            print e.output
+            print "Exception running Ghostscript:\n\n", e.output
+
             if "undefined in .getdeviceparams" in e.output:
-                error(self.msgs['GS_OUTDATED'])
+                raise(self.msgs['GS_OUTDATED'])
             else:
-                error (self.msgs['GS_FAILED'])
-
+                raise(self.msgs['GS_FAILED'])
 
     def make_img_from_pdf(self, pdf_filename):
         self._get_dpi(pdf_filename) # No need to bother anymore
@@ -189,7 +189,6 @@ def make_img_from_pdf(self, pdf_filename):
 
         filename, filext = os.path.splitext(pdf_filename)
 
-
         # Create ancillary jpeg files to use later to calculate image dpi etc
         #   We no longer use these for the final image. Instead the text is merged
         #   directly with the original PDF.  Yay!
@@ -213,6 +212,7 @@ def make_img_from_pdf(self, pdf_filename):
         options = ' '.join(self.gs_options[self.img_format][1]) % {'dpi':self.output_dpi}
         output_filename = '%s_%%d.%s' % (filename, self.img_file_ext)
         self._run_gs(options, output_filename, pdf_filename)
+
         for fn in glob.glob(globable_filename):
             logging.info("Created image %s" % fn)
         return (self.output_dpi, globable_filename)
diff --git a/pypdfocr/pypdfocr_pdf.py b/pypdfocr/pypdfocr_pdf.py
index b4e31e0..e7c8a39 100644
--- a/pypdfocr/pypdfocr_pdf.py
+++ b/pypdfocr/pypdfocr_pdf.py
@@ -76,7 +76,7 @@ def mergeRotateAroundPointPage(self,page, page2, rotation, tx, ty):
                                                  ctm[1][0], ctm[1][1],
                                                  ctm[2][0], ctm[2][1]])
 
-    def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
+    def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename, archive=False, archive_suffix="_orig.pdf"):
         
         logging.debug("Going to overlay following files onto %s" % orig_pdf_filename)
         # Sort the hocr_filenames into natural keys!
@@ -87,6 +87,7 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
         basename = os.path.splitext(pdf_basename)[0]
         pdf_filename = os.path.join(pdf_dir, "%s_ocr.pdf" % (basename))
 
+
         text_pdf_filenames = []
         for img_filename, hocr_filename in hocr_filenames:
             text_pdf_filename = self.overlay_hocr_page(dpi, hocr_filename, img_filename)
@@ -96,6 +97,16 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
 
         writer = PdfFileWriter()
         orig = open(orig_pdf_filename, 'rb')
+        orig_reader = PdfFileReader(orig)
+
+        # Save  the properties
+        pdf_info = orig_reader.getDocumentInfo()
+        if pdf_info is not None:
+            writer.addMetadata(pdf_info)
+
+        writer.addMetadata({ '/PyPDFOCR': 'True' })
+
+        # Loop through the pages
         for orig_pg, text_pg_filename in zip(self.iter_pdf_page(orig), text_pdf_filenames):
             text_file = open(text_pg_filename, 'rb')
             text_pg = self.iter_pdf_page(text_file).next()
@@ -123,6 +134,15 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
         for fn in text_pdf_filenames:
             os.remove(fn)
 
+        print "Done on conversion: ", orig_pdf_filename
+        if archive:
+            original_filename = os.path.join(pdf_dir, "%s%s" % (basename, archive_suffix))
+            ocr_filename = orig_pdf_filename
+            print "Archiving PDF %s -> %s, %s -> %s" % (orig_pdf_filename, original_filename, pdf_filename, ocr_filename)
+            os.rename(orig_pdf_filename, original_filename)
+            os.rename(pdf_filename, ocr_filename)
+
+
         logging.info("Created OCR'ed pdf as %s" % (pdf_filename))
         return pdf_filename
 
diff --git a/pypdfocr/pypdfocr_preprocess.py b/pypdfocr/pypdfocr_preprocess.py
index 39abc37..43e11a7 100644
--- a/pypdfocr/pypdfocr_preprocess.py
+++ b/pypdfocr/pypdfocr_preprocess.py
@@ -25,15 +25,42 @@
 import logging
 import glob
 import functools
+import signal
 
 from multiprocessing import Pool
 
+TIMEOUT = 500
+
 # Ugly hack to pass in object method to the multiprocessing library
 # From http://www.rueckstiess.net/research/snippets/show/ca1d7d90
 # Basically gets passed in a pair of (self, arg), and calls the method
 def unwrap_self(arg, **kwarg):
     return PyPreprocess._run_preprocess(*arg, **kwarg)
 
+class TimeoutError(Exception):
+    pass
+
+
+def handler(signum, frame):
+    raise TimeoutError()
+
+def which(program):
+    import os
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+    fpath, fname = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            path = path.strip('"')
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+
+    return None
 
 class PyPreprocess(object):
     """Class to wrap all the ImageMagick convert calls"""
@@ -51,12 +78,31 @@ def cmd(self, cmd_list):
             cmd_list = ' '.join(cmd_list)
         logging.debug("Running cmd: %s" % cmd_list)
         try:
-            out = subprocess.check_output(cmd_list, stderr=subprocess.STDOUT, shell=True)
+            signal.signal(signal.SIGALRM, handler)
+            signal.alarm(TIMEOUT)
+            proc = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, preexec_fn=os.setsid)
+            pid = proc.pid
+            (out, error) = proc.communicate()
+            signal.alarm(0)
             logging.debug(out)
             return out
         except subprocess.CalledProcessError as e:
             print e.output
             self._warn("Could not run command %s" % cmd_list)
+        except TimeoutError, te:
+            print "Timeout exceeded PID", pid, cmd_list
+            os.killpg(pid, signal.SIGTERM)
+            # os.kill(pid, signal.SIGTERM)
+        finally:
+            signal.alarm(0)
+
+        if proc:
+            proc.terminate()
+            proc.kill()
+            print "Killing processes"
+
+        return None
+
             
 
     def _run_preprocess(self,  in_filename):
@@ -69,7 +115,8 @@ def _run_preprocess(self,  in_filename):
         else:
             backslash = '\\'
 
-        c = ['convert',
+        convert = which('convert');
+        c = [convert,
                 '"%s"' % in_filename,
                 '-respect-parenthesis',
                 #'\\( $setcspace -colorspace gray -type grayscale \\)',
@@ -86,17 +133,23 @@ def _run_preprocess(self,  in_filename):
                 ]
         logging.info("Preprocessing image %s for better OCR" % in_filename)
         res = self.cmd(c)
+
         if res is None:
             return in_filename
         else:
-            return out_filename
+            # Make sure the convert process did not die on us
+            if os.path.isfile(out_filename):
+                print "Filename does not exist: ", out_filename, " using ", in_filename
+                return out_filename
+
+            return in_filename
 
     def preprocess(self, in_filenames):
         fns = in_filenames
 
         pool = Pool(processes=self.threads)
         logging.info("Starting preprocessing parallel execution")
-        preprocessed_filenames = pool.map(unwrap_self,zip([self]*len(fns),fns))
+        preprocessed_filenames = pool.map(unwrap_self, zip([self]*len(fns),fns))
         pool.close()
         pool.join()
         logging.info ("Completed preprocessing")
diff --git a/pypdfocr/pypdfocr_tesseract.py b/pypdfocr/pypdfocr_tesseract.py
index 476d5cd..392a980 100644
--- a/pypdfocr/pypdfocr_tesseract.py
+++ b/pypdfocr/pypdfocr_tesseract.py
@@ -22,6 +22,7 @@
 import os, sys
 import logging
 import subprocess
+import signal
 import glob
 from subprocess import CalledProcessError
 from multiprocessing import Pool
@@ -36,6 +37,9 @@ def error(text):
 def unwrap_self(arg, **kwarg):
     return PyTesseract.make_hocr_from_pnm(*arg, **kwarg)
 
+def init_worker():
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+
 class PyTesseract(object):
     """Class to wrap all the tesseract calls"""
     def __init__(self, config):
@@ -44,7 +48,7 @@ def __init__(self, config):
         """
         self.lang = 'eng'
         self.required = "3.02.02"
-        self.threads = config.get('threads',4)
+        self.threads = config.get('threads', 4)
 
         if "binary" in config:  # Override location of binary
             binary = config['binary']
@@ -129,12 +133,11 @@ def make_hocr_from_pnms(self, fns):
 
         # Glob it
         #fns = glob.glob(img_filename)
-        pool = Pool(processes=self.threads)
-        print("Making pool")
+        pool = Pool(processes=self.threads, initializer=init_worker)
         hocr_filenames = pool.map(unwrap_self, zip([self]*len(fns), fns))
         pool.close()
         pool.join()
-        return zip(fns,hocr_filenames)
+        return zip(fns, hocr_filenames)
 
 
     def make_hocr_from_pnm(self, img_filename):
diff --git a/pypdfocr/pypdfocr_watcher.py b/pypdfocr/pypdfocr_watcher.py
index 73581e0..a495baf 100755
--- a/pypdfocr/pypdfocr_watcher.py
+++ b/pypdfocr/pypdfocr_watcher.py
@@ -15,7 +15,9 @@
 from watchdog.events import LoggingEventHandler
 from watchdog.events import FileSystemEventHandler
 
-        
+from PyPDF2 import PdfFileReader
+from PyPDF2.utils import PdfReadError
+
 class PyPdfWatcher(FileSystemEventHandler):
     """
         Watch a folder for new pdf files.
@@ -28,12 +30,20 @@ class PyPdfWatcher(FileSystemEventHandler):
     events = {}
     events_lock = Lock()
 
-    def __init__(self, monitor_dir, config):
+    def __init__(self, monitor_dir, config, archive=False, initial_scan=False,
+                 archive_suffix="_orig.pdf"):
         FileSystemEventHandler.__init__(self)
 
         self.monitor_dir = monitor_dir
+        self.archive_suffix = archive_suffix
+        self.archive = archive
+
         if not config: config = {}
 
+        # Scan initial folder
+        if initial_scan:
+            self.scan_folder()
+
         self.scan_interval = config.get('scan_interval', 3) # If no updates in 3 seconds (or user specified option in config file) process file
 
     def start(self):
@@ -48,11 +58,11 @@ def start(self):
             if newFile:
                 yield newFile
         self.observer.join()
-            
+
 
     def stop(self):
         self.observer.stop()
-        
+
     def rename_file_with_spaces(self, pdf_filename):
         """
             Rename any portion of a filename that has spaces in the basename with underscores.
@@ -68,11 +78,43 @@ def rename_file_with_spaces(self, pdf_filename):
             newFilename = os.path.join(filepath, filename.replace(' ','_'))
             logging.debug("Renaming spaces")
             logging.debug("---> %s \n ------> %s" % (pdf_filename, newFilename))
-            shutil.move(pdf_filename, newFilename) 
+            shutil.move(pdf_filename, newFilename)
             return newFilename
         else:
             return pdf_filename
 
+    def check_file_for_processing(self, ev_path):
+        """
+        This checks a path to see if it we should process it.
+
+        :param ev_path: Fully qualified path to file to check
+        :return: True if it should be convertred. False if not
+        """
+        if not ev_path.endswith(".pdf"):
+            return False
+
+        if ev_path.endswith("_ocr.pdf"):
+            return False
+
+        if self.archive_suffix and ev_path.endswith(self.archive_suffix):
+            return False
+
+        try:
+            with open(ev_path, "rb") as f:
+                pdf = PdfFileReader(f)
+                pdf_info = pdf.getDocumentInfo()
+
+                # It has been OCR'ed'
+                if pdf_info is not None and '/PyPDFOCR' in pdf_info:
+                    return False
+        except IOError:
+            return False
+        except PdfReadError:
+            return False
+
+        return True
+
+
     def check_for_new_pdf(self,ev_path):
         """
             Called by the file watching api on any file creations/modifications.
@@ -87,29 +129,29 @@ def check_for_new_pdf(self,ev_path):
                 - Add it with the current time
 
             Otherwise:
-                
+
                 - If the file time is marked as -1, delete it from the dict
                 - Else, update the time in the dict to the current time
 
         """
-        if ev_path.endswith(".pdf"):
-            if not ev_path.endswith("_ocr.pdf"):
-                PyPdfWatcher.events_lock.acquire()
-                if not ev_path in PyPdfWatcher.events:
-                    PyPdfWatcher.events[ev_path] = time.time()
-                    logging.info ("Adding %s to event queue" % ev_path)
-                else:
-                    if PyPdfWatcher.events[ev_path] == -1:
-                        logging.info ( "%s removing from event queue" % (ev_path))
-                        del PyPdfWatcher.events[ev_path]
-                    else: 
-                        newTime = time.time()
-                        logging.debug ( "%s already in event queue, updating timestamp to %d" % (ev_path, newTime))
-                        PyPdfWatcher.events[ev_path]  = newTime
-                PyPdfWatcher.events_lock.release()
+        result = self.check_file_for_processing(ev_path)
+        if not result:
+            return
+
+        PyPdfWatcher.events_lock.acquire()
+        if not ev_path in PyPdfWatcher.events:
+            PyPdfWatcher.events[ev_path] = time.time()
+            logging.info ("Adding %s to event queue" % ev_path)
+        else:
+            if PyPdfWatcher.events[ev_path] == -1:
+                logging.info ( "%s removing from event queue" % (ev_path))
+                del PyPdfWatcher.events[ev_path]
+            else:
+                newTime = time.time()
+                logging.debug ( "%s already in event queue, updating timestamp to %d" % (ev_path, newTime))
+                PyPdfWatcher.events[ev_path]  = newTime
+        PyPdfWatcher.events_lock.release()
 
-                      
-              
     def on_created(self, event):
         logging.debug ("on_created: %s at time %d" % (event.src_path, time.time()))
         self.check_for_new_pdf(event.src_path)
@@ -125,7 +167,7 @@ def on_modified(self, event):
     def check_queue(self):
         """
             This function is called at regular intervals by :func:`start`.
-            
+
             Iterate through the events, and if there is any with a timestamp
             greater than the scan_interval, return it and set its timestamp to -1
             for purging later.
@@ -148,5 +190,46 @@ def check_queue(self):
         PyPdfWatcher.events_lock.release()
         return None
 
-
-
+    def scan_folder(self):
+        path = os.path.abspath(self.monitor_dir)
+        dirs, files = self.separate_folder_contents(path)[:2]
+        self.scan_folder_internal(path, dirs, files)
+
+
+    def scan_folder_internal(self, root, dirs, files):
+        if files:
+            for name in files:
+                path = os.path.join(root, name)
+
+                result = self.check_file_for_processing(path)
+                if not result:
+                    continue
+
+                PyPdfWatcher.events[path] = time.time()
+
+        for pos, neg, name in self.enumerate2(dirs):
+            path = os.path.join(root, name)
+
+            try:
+                dirs, files = self.separate_folder_contents(path)[:2]
+            except:
+                pass
+            else:
+                self.scan_folder_internal(path, dirs, files)
+
+    def separate_folder_contents(self, path):
+        dirs, files, links = [], [], []
+        for name in os.listdir(path):
+            path_name = os.path.join(path, name)
+            if os.path.isdir(path_name):
+                dirs.append(name)
+            elif os.path.isfile(path_name):
+                files.append(name)
+            elif os.path.islink(path_name):
+                links.append(name)
+        return dirs, files, links
+
+    def enumerate2(self, sequence):
+        length = len(sequence)
+        for count, value in enumerate(sequence):
+            yield count, count - length, value