diff --git a/photorec_stage_2.py b/photorec_stage_2.py old mode 100644 new mode 100755 index 99e9d23..b97533b --- a/photorec_stage_2.py +++ b/photorec_stage_2.py @@ -25,6 +25,7 @@ from zipfile import ZipFile import argparse import shutil from datetime import datetime +import re OOO_CREATION_DATE_START = '' OOO_CREATION_DATE_END = '' @@ -54,7 +55,7 @@ def remove_extension(filename): def copy_file(orig_filepath, filename, outdir, verbose, extension=None): if not os.path.exists(outdir): if verbose: - print('mkdirs %s' % (outdir)) + print(f'mkdirs {outdir}') os.makedirs(outdir) if not extension: @@ -69,7 +70,7 @@ def copy_file(orig_filepath, filename, outdir, verbose, extension=None): filename = os.path.join(outdir, '%s.%s' % (filename, extension)) if verbose: - print('\tCopy %s => %s' % (orig_filepath, filename)) + print(f'\tCopy {orig_filepath} => {filename}') statistics[extension] = statistics.get(extension, 0) + 1 @@ -88,7 +89,7 @@ def _try_open_office(orig_filepath, zipfile, zipname, filename, outdir, verbose) ext = OPEN_OFFICE_MIME_TYPES[mime] if verbose: - print('Found %s file' % (ext)) + print(f'Found {ext} file') try: meta = zipfile.open('meta.xml') @@ -218,10 +219,10 @@ def manage_picture(orig_filepath, filename, extension, outdir, verbose=False): dt = None try: dt = datetime.strptime(tags['EXIF DateTimeOriginal'].values, '%Y:%m:%d %H:%M:%S') - except KeyError, e: # No 'EXIF DateTimeOriginal' + except KeyError as e: # No 'EXIF DateTimeOriginal' return False except: - print 'Invalid date format \'%s\'' % tags['EXIF DateTimeOriginal'] + print(f'Invalid date format \'{tags['EXIF DateTimeOriginal']}\'') return False copy_datetime_file(dt, orig_filepath, outdir, verbose, extension) @@ -270,8 +271,8 @@ parser = argparse.ArgumentParser(description='Photorec post script analysis: try parser.add_argument('--in', dest='in_dir', help='Directory in (with photorec results)', required=True) parser.add_argument('--out', dest='out_dir', help='Directory out (script results)', required=True) parser.add_argument('--max-files-per-temp', dest='max_files_per_temp', - help='Maximum unknown files in temprorary directory, -1 for no temp dir', - type=int, default=50) + help='Maximum unknown files in temprorary directory, -1 for no temp dir (default)', + type=int, default=-1) parser.add_argument('--skip-ext', dest='skip_ext', help='Don\'t copy some extensions (comma separated eg : mp3,txt,doc)', default='') parser.add_argument('--only-ext', dest='only_ext', help='Copy some extensions (comma separated eg : mp3,txt,doc)', default='') parser.add_argument('--min-size', dest='min_size', help='Minimum size for an extension (comma separated eg : mp3:1M,txt:4k,doc:8)', default='') @@ -282,19 +283,28 @@ args = parser.parse_args() file_ops = { 'zip': manage_zip, + 'odt': manage_zip, + 'ods': manage_zip, + 'odp': manage_zip, + 'docx': manage_zip, + 'xlsx': manage_zip, + 'pptx': manage_zip, } try: import eyed3 file_ops['mp3'] = manage_audio except: - print('Package eyed3 not installed, mp3 format not supported. Use pip install eyed3') + print('Package eyed3 not installed, mp3 format not supported. Use pip install eyed3\n') try: import exifread - file_ops['jpg'] = manage_picture + file_ops['jpg'] = manage_picture + file_ops['jpeg'] = manage_picture + file_ops['JPG'] = manage_picture + file_ops['JPEG'] = manage_picture except: - print('Package exifread not installed, jpg format not supported. Use pip install exifread') + print('Package exifread not installed, jpg format not supported. Use pip install exifread\n') file_ops_keys = file_ops.keys() @@ -306,6 +316,8 @@ only_exts = args.only_ext and args.only_ext.split(',') or None min_sizes = parse_min_sizes(args.min_size) min_sizes_keys = min_sizes.keys() +filename_re = re.compile(r"f\d+_(.+)") + # Disable (force) verbose on quiet if args.quiet: args.verbose = False @@ -321,13 +333,13 @@ for root, dirs, files in os.walk(args.in_dir): # Only some extensions if only_exts and cur_extension not in only_exts: if args.verbose: - print('Skipping %s (only extension)' % (full_path)) + print(f'Skipping {full_path} (only extension)') continue # Skipping some extensions if skip_exts and cur_extension in skip_exts: if args.verbose: - print('Skipping %s (skip extension)' % (full_path)) + print(f'Skipping {full_path} (skip extension)') continue # Min sizes @@ -335,13 +347,19 @@ for root, dirs, files in os.walk(args.in_dir): statinfo = os.stat(full_path) if statinfo.st_size < min_sizes[cur_extension]: if args.verbose: - print('Skipping %s (min size)' % (full_path)) + print(f'Skipping {full_path} (min size)') continue + # Filename already found + m = filename_re.match(filename) + if m: + copy_file(full_path, m.group(1), outdir, args.verbose) + continue + # Filtered files if cur_extension in file_ops_keys: if args.verbose: - print('Filter \'%s\'' % (full_path)) + print(f'Filter \'{full_path}\'') ret = file_ops[cur_extension](full_path, filename, cur_extension, args.out_dir, args.verbose) @@ -356,11 +374,11 @@ for root, dirs, files in os.walk(args.in_dir): cur_files_in_out_dir = cur_files_in_out_dir + 1 if args.verbose: - print('Std copy %s => %s' % (full_path, outdir)) + print(f'Std copy {full_path} => {outdir}/{filename}') copy_file(full_path, filename, outdir, verbose=False) if not args.quiet: print('Statistics :\n') for key in sorted(statistics.keys()): - print('\t.%s\t=> %d' % (key, statistics[key])) + print(f'\t.{key}\t=> {statistics[key]}')