# HG changeset patch # User mdd # Date 1583752769 -3600 # Node ID 5be334b71b089c0927d56a1b39ba9f630bf2a790 # Parent a1ad6f4728be2c30a41e6f6896930c5fa9d977bc optimized fixnames for kodi diff -r a1ad6f4728be -r 5be334b71b08 .hgignore --- a/.hgignore Wed Feb 13 14:10:55 2019 +0100 +++ b/.hgignore Mon Mar 09 12:19:29 2020 +0100 @@ -5,3 +5,4 @@ *.pyc eit.old/* testfiles/* +tmp/* \ No newline at end of file diff -r a1ad6f4728be -r 5be334b71b08 dupecheck.py --- a/dupecheck.py Wed Feb 13 14:10:55 2019 +0100 +++ b/dupecheck.py Mon Mar 09 12:19:29 2020 +0100 @@ -2,8 +2,11 @@ # -*- coding: utf-8 -*- """ Toolkit / executable to scan for duplicate filenames in movie database +More functions: + * sanitize filenames + * statistics -2017-2019 by mdd +2017-2020 by mdd """ #pylint: disable=line-too-long @@ -71,7 +74,7 @@ title = RE_PARENTHESES.sub("", title) self.filelist.append([title, filename, root, ext]) - elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: + elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4', '.ogg', '.mp3', '.iso']: title = filename[:-4].lower() title = RE_PARENTHESES.sub("", title) self.filelist.append([title, filename, root, ext]) @@ -168,19 +171,45 @@ """ import re for item in self.filelist: - if not item[3] in ['.mkv', '.txt']: + if not item[3] in ['.mkv', '.txt', '.nfo']: continue # any non-alphanumeric characters in filename? cleanfn = replace_all(item[1], { #'ä':'ae', 'Ä':'Ae', #'ö':'oe', 'Ö':'Oe', #'ü':'ue', 'Ü':'Ue', - 'ß':'ss', + 'ß': 'ss', + ':': ' -', }) - cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\&öäüÖÄÜ\' ]', '-', cleanfn) + cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\[\]\{\}\&öäüÖÄÜ\' ]', '-', cleanfn) + + # if cleanfn.endswith(".nfo"): + # # force .txt ending of nfo files + # # TODO: later there should nfo files for kodi + # cleanfn = cleanfn[:-4] + ".txt" + + checklist = re.findall(r'\([^\(\)]+\)', cleanfn) + for nonyear in checklist: + if re.match(r'\(\d{4}\)', nonyear): + continue + cleanfn = replace_all(cleanfn, { + nonyear: replace_all(nonyear, {'(':'[', ')':']'}) + }) + #print ("NONYEAR: ", nonyear) + + checklist = re.findall(r'\[\d{4}[^\]]+\]', cleanfn) + for year in checklist: + cleanfn = replace_all(cleanfn, { + year: replace_all(year, { + year[:5]: '(' + year[1:5] + ') [' + }) + }) + # print ("YEAR: ", year) + + if item[1] == cleanfn: continue - print (item[1]) + print (item[1], " -> ", cleanfn) os.rename( os.path.join(item[2], item[1]), os.path.join(item[2], cleanfn) @@ -311,7 +340,7 @@ if args.fixnames: for srcstr in args.basedir: - dupe.scandir(srcstr, ['.txt']) + dupe.scandir(srcstr, ['.txt', '.nfo']) if len(dupe.filelist) > 0: print ("Checking %i file names..." % len(dupe.filelist)) dupe.fixnames()