Add support for already found name

This commit is contained in:
Grégory Soutadé 2025-04-02 17:01:18 +02:00
parent 26c784e59f
commit 7cdad12930

50
photorec_stage_2.py Normal file → Executable file
View File

@ -25,6 +25,7 @@ from zipfile import ZipFile
import argparse
import shutil
from datetime import datetime
import re
OOO_CREATION_DATE_START = '<meta:creation-date>'
OOO_CREATION_DATE_END = '</meta:creation-date>'
@ -54,7 +55,7 @@ def remove_extension(filename):
def copy_file(orig_filepath, filename, outdir, verbose, extension=None):
if not os.path.exists(outdir):
if verbose:
print('mkdirs %s' % (outdir))
print(f'mkdirs {outdir}')
os.makedirs(outdir)
if not extension:
@ -69,7 +70,7 @@ def copy_file(orig_filepath, filename, outdir, verbose, extension=None):
filename = os.path.join(outdir, '%s.%s' % (filename, extension))
if verbose:
print('\tCopy %s => %s' % (orig_filepath, filename))
print(f'\tCopy {orig_filepath} => {filename}')
statistics[extension] = statistics.get(extension, 0) + 1
@ -88,7 +89,7 @@ def _try_open_office(orig_filepath, zipfile, zipname, filename, outdir, verbose)
ext = OPEN_OFFICE_MIME_TYPES[mime]
if verbose:
print('Found %s file' % (ext))
print(f'Found {ext} file')
try:
meta = zipfile.open('meta.xml')
@ -218,10 +219,10 @@ def manage_picture(orig_filepath, filename, extension, outdir, verbose=False):
dt = None
try:
dt = datetime.strptime(tags['EXIF DateTimeOriginal'].values, '%Y:%m:%d %H:%M:%S')
except KeyError, e: # No 'EXIF DateTimeOriginal'
except KeyError as e: # No 'EXIF DateTimeOriginal'
return False
except:
print 'Invalid date format \'%s\'' % tags['EXIF DateTimeOriginal']
print(f'Invalid date format \'{tags['EXIF DateTimeOriginal']}\'')
return False
copy_datetime_file(dt, orig_filepath, outdir, verbose, extension)
@ -270,8 +271,8 @@ parser = argparse.ArgumentParser(description='Photorec post script analysis: try
parser.add_argument('--in', dest='in_dir', help='Directory in (with photorec results)', required=True)
parser.add_argument('--out', dest='out_dir', help='Directory out (script results)', required=True)
parser.add_argument('--max-files-per-temp', dest='max_files_per_temp',
help='Maximum unknown files in temprorary directory, -1 for no temp dir',
type=int, default=50)
help='Maximum unknown files in temprorary directory, -1 for no temp dir (default)',
type=int, default=-1)
parser.add_argument('--skip-ext', dest='skip_ext', help='Don\'t copy some extensions (comma separated eg : mp3,txt,doc)', default='')
parser.add_argument('--only-ext', dest='only_ext', help='Copy some extensions (comma separated eg : mp3,txt,doc)', default='')
parser.add_argument('--min-size', dest='min_size', help='Minimum size for an extension (comma separated eg : mp3:1M,txt:4k,doc:8)', default='')
@ -282,19 +283,28 @@ args = parser.parse_args()
file_ops = {
'zip': manage_zip,
'odt': manage_zip,
'ods': manage_zip,
'odp': manage_zip,
'docx': manage_zip,
'xlsx': manage_zip,
'pptx': manage_zip,
}
try:
import eyed3
file_ops['mp3'] = manage_audio
except:
print('Package eyed3 not installed, mp3 format not supported. Use pip install eyed3')
print('Package eyed3 not installed, mp3 format not supported. Use pip install eyed3\n')
try:
import exifread
file_ops['jpg'] = manage_picture
file_ops['jpg'] = manage_picture
file_ops['jpeg'] = manage_picture
file_ops['JPG'] = manage_picture
file_ops['JPEG'] = manage_picture
except:
print('Package exifread not installed, jpg format not supported. Use pip install exifread')
print('Package exifread not installed, jpg format not supported. Use pip install exifread\n')
file_ops_keys = file_ops.keys()
@ -306,6 +316,8 @@ only_exts = args.only_ext and args.only_ext.split(',') or None
min_sizes = parse_min_sizes(args.min_size)
min_sizes_keys = min_sizes.keys()
filename_re = re.compile(r"f\d+_(.+)")
# Disable (force) verbose on quiet
if args.quiet: args.verbose = False
@ -321,13 +333,13 @@ for root, dirs, files in os.walk(args.in_dir):
# Only some extensions
if only_exts and cur_extension not in only_exts:
if args.verbose:
print('Skipping %s (only extension)' % (full_path))
print(f'Skipping {full_path} (only extension)')
continue
# Skipping some extensions
if skip_exts and cur_extension in skip_exts:
if args.verbose:
print('Skipping %s (skip extension)' % (full_path))
print(f'Skipping {full_path} (skip extension)')
continue
# Min sizes
@ -335,13 +347,19 @@ for root, dirs, files in os.walk(args.in_dir):
statinfo = os.stat(full_path)
if statinfo.st_size < min_sizes[cur_extension]:
if args.verbose:
print('Skipping %s (min size)' % (full_path))
print(f'Skipping {full_path} (min size)')
continue
# Filename already found
m = filename_re.match(filename)
if m:
copy_file(full_path, m.group(1), outdir, args.verbose)
continue
# Filtered files
if cur_extension in file_ops_keys:
if args.verbose:
print('Filter \'%s\'' % (full_path))
print(f'Filter \'{full_path}\'')
ret = file_ops[cur_extension](full_path, filename, cur_extension,
args.out_dir, args.verbose)
@ -356,11 +374,11 @@ for root, dirs, files in os.walk(args.in_dir):
cur_files_in_out_dir = cur_files_in_out_dir + 1
if args.verbose:
print('Std copy %s => %s' % (full_path, outdir))
print(f'Std copy {full_path} => {outdir}/{filename}')
copy_file(full_path, filename, outdir, verbose=False)
if not args.quiet:
print('Statistics :\n')
for key in sorted(statistics.keys()):
print('\t.%s\t=> %d' % (key, statistics[key]))
print(f'\t.{key}\t=> {statistics[key]}')