dupecheck.py

changeset 37
5be334b71b08
parent 36
a1ad6f4728be
equal deleted inserted replaced
36:a1ad6f4728be 37:5be334b71b08
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 """ 3 """
4 Toolkit / executable to scan for duplicate filenames in movie database 4 Toolkit / executable to scan for duplicate filenames in movie database
5 5 More functions:
6 2017-2019 by mdd 6 * sanitize filenames
7 * statistics
8
9 2017-2020 by mdd
7 """ 10 """
8 11
9 #pylint: disable=line-too-long 12 #pylint: disable=line-too-long
10 #pylint: disable=invalid-name 13 #pylint: disable=invalid-name
11 14
69 72
70 # remove parentheses with contents in title 73 # remove parentheses with contents in title
71 title = RE_PARENTHESES.sub("", title) 74 title = RE_PARENTHESES.sub("", title)
72 75
73 self.filelist.append([title, filename, root, ext]) 76 self.filelist.append([title, filename, root, ext])
74 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: 77 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4', '.ogg', '.mp3', '.iso']:
75 title = filename[:-4].lower() 78 title = filename[:-4].lower()
76 title = RE_PARENTHESES.sub("", title) 79 title = RE_PARENTHESES.sub("", title)
77 self.filelist.append([title, filename, root, ext]) 80 self.filelist.append([title, filename, root, ext])
78 elif ext in extra: 81 elif ext in extra:
79 title = filename[:-4].lower() 82 title = filename[:-4].lower()
166 """ 169 """
167 Search for defect filenames and remove illegal characters 170 Search for defect filenames and remove illegal characters
168 """ 171 """
169 import re 172 import re
170 for item in self.filelist: 173 for item in self.filelist:
171 if not item[3] in ['.mkv', '.txt']: 174 if not item[3] in ['.mkv', '.txt', '.nfo']:
172 continue 175 continue
173 # any non-alphanumeric characters in filename? 176 # any non-alphanumeric characters in filename?
174 cleanfn = replace_all(item[1], { 177 cleanfn = replace_all(item[1], {
175 #'ä':'ae', 'Ä':'Ae', 178 #'ä':'ae', 'Ä':'Ae',
176 #'ö':'oe', 'Ö':'Oe', 179 #'ö':'oe', 'Ö':'Oe',
177 #'ü':'ue', 'Ü':'Ue', 180 #'ü':'ue', 'Ü':'Ue',
178 'ß':'ss', 181 'ß': 'ss',
182 ':': ' -',
179 }) 183 })
180 cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\&öäüÖÄÜ\' ]', '-', cleanfn) 184 cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\[\]\{\}\&öäüÖÄÜ\' ]', '-', cleanfn)
185
186 # if cleanfn.endswith(".nfo"):
187 # # force .txt ending of nfo files
188 # # TODO: later there should nfo files for kodi
189 # cleanfn = cleanfn[:-4] + ".txt"
190
191 checklist = re.findall(r'\([^\(\)]+\)', cleanfn)
192 for nonyear in checklist:
193 if re.match(r'\(\d{4}\)', nonyear):
194 continue
195 cleanfn = replace_all(cleanfn, {
196 nonyear: replace_all(nonyear, {'(':'[', ')':']'})
197 })
198 #print ("NONYEAR: ", nonyear)
199
200 checklist = re.findall(r'\[\d{4}[^\]]+\]', cleanfn)
201 for year in checklist:
202 cleanfn = replace_all(cleanfn, {
203 year: replace_all(year, {
204 year[:5]: '(' + year[1:5] + ') ['
205 })
206 })
207 # print ("YEAR: ", year)
208
209
181 if item[1] == cleanfn: 210 if item[1] == cleanfn:
182 continue 211 continue
183 print (item[1]) 212 print (item[1], " -> ", cleanfn)
184 os.rename( 213 os.rename(
185 os.path.join(item[2], item[1]), 214 os.path.join(item[2], item[1]),
186 os.path.join(item[2], cleanfn) 215 os.path.join(item[2], cleanfn)
187 ) 216 )
188 217
309 print("Loading ignore filename prefixes file for dupe checking...") 338 print("Loading ignore filename prefixes file for dupe checking...")
310 dupe.ignore_fileprefix = [line.rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")] 339 dupe.ignore_fileprefix = [line.rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")]
311 340
312 if args.fixnames: 341 if args.fixnames:
313 for srcstr in args.basedir: 342 for srcstr in args.basedir:
314 dupe.scandir(srcstr, ['.txt']) 343 dupe.scandir(srcstr, ['.txt', '.nfo'])
315 if len(dupe.filelist) > 0: 344 if len(dupe.filelist) > 0:
316 print ("Checking %i file names..." % len(dupe.filelist)) 345 print ("Checking %i file names..." % len(dupe.filelist))
317 dupe.fixnames() 346 dupe.fixnames()
318 dupe.filelist = [] 347 dupe.filelist = []
319 sys.exit(0) 348 sys.exit(0)

mercurial