@@ -70,6 +70,9 @@ def decorator(func: Callable[[], list[dict]]):
7070 return decorator
7171
7272add_backup_level (0 , "Cache Only" , "Don't proactively fill the cache at all" )
73+
74+ # @backup_level(1, "lone published", "Non-video files included but without a website source")
75+
7376add_backup_level (10 , "High" , "All valuable items in need of backing up" )
7477add_backup_level (30 , "Medium" , "All items in active need of backing up" )
7578
@@ -292,35 +295,50 @@ def run_backup_level(level: BackupLevel, parallelism=14):
292295 graceful_threadmap (download_file_to_cache , files , unit = 'f' , max_workers = parallelism )
293296 print (f"Done backing up to level { level .level } !" )
294297
295- def sideload_file (file : Path , cache_dir : Path , parent_folder : str | None , move : bool ):
298+ def sideload_file (file : Path , cache_dir : Path , parent_folder : str | None , move : bool , check : bool ):
296299 """moves (or copies, if not `move`) `file` into `cache_dir`
297300
298301 If the file doesn't exist in `gdrive.gcache` then it's uploaded to `parent_folder` (else skipped)"""
299302 assert cache_dir .is_dir ()
300303 hashval = md5 (file )
301- remote_files = gdrive .gcache .get_items_with_md5 (hashval )
302- if not remote_files :
304+ target_path = gdrive .gcache .get_cache_path_for_md5 (hashval )
305+ if not target_path :
303306 if not parent_folder :
304307 print (f"WARNING: Skipping untracked file { file } " )
305308 return
306309 newid = gdrive .gcache .upload_file (file , folder_id = parent_folder )
307- remote_files = [gdrive .gcache .get_item (newid )]
308- target_path = cache_dir / hashval [:2 ] / f"{ hashval [2 :]} { file .suffix .lower ()} "
310+ target_path = gdrive .gcache .get_cache_path_for_md5 (hashval )
311+ assert target_path is not None
312+ assert target_path .suffix == file .suffix .lower (), f"How did we get a different extension { target_path .suffix } for { file } ?"
313+ is_in_trash = target_path .parent .parent .parent .name == 'trash'
314+ if is_in_trash :
315+ if target_path .exists () and md5 (target_path ) != hashval :
316+ new_path = target_path .with_stem (file .stem )
317+ if new_path .exists () and md5 (new_path ) != hashval :
318+ raise FileExistsError (f"{ new_path } also exists with a different file. Idk what to do now" )
319+ target_path = new_path
320+ print (f"WARNING: File was trashed. Placing in { target_path } " )
309321 if target_path .exists ():
310- if md5 (target_path ) == hashval :
322+ if not check or md5 (target_path ) == hashval :
311323 if move :
312324 file .unlink ()
313325 return
314- print (f"WARNING: Overwriting old, corrupted { target_path } " )
326+ print (f"Found corrupted: { target_path } " )
315327 target_path .unlink ()
316- target_path .parent .mkdir (exist_ok = True )
328+ target_path .parent .mkdir (exist_ok = True , parents = is_in_trash )
317329 if move :
318330 file .rename (target_path )
319331 else :
320332 shutil .copy2 (file , target_path )
321333
322334
323- def sideload_main (files : Collection [Path ], parent_folder : str | None = None , move : bool = True ):
335+ def sideload_main (
336+ files : Collection [Path ],
337+ parent_folder : str | None = None ,
338+ move : bool = True ,
339+ recurse : bool = False ,
340+ check : bool = False ,
341+ ):
324342 if parent_folder :
325343 if parent_folder .startswith (gdrive .FOLDER_LINK_PREFIX ):
326344 parent_folder = gdrive .folderlink_to_id (parent_folder )
@@ -329,17 +347,23 @@ def sideload_main(files: Collection[Path], parent_folder: str | None = None, mov
329347 raise ValueError (f"Folder with ID { parent_folder } not found" )
330348 if folder ['mimeType' ] != 'application/vnd.google-apps.folder' :
331349 raise ValueError (f"{ parent_folder } is not a Google Drive Folder, but a { folder ['mimeType' ]} " )
350+ to_remove = set ()
351+ for file in files :
352+ if not file .exists ():
353+ raise FileNotFoundError (file )
354+ if file .is_dir ():
355+ if not recurse :
356+ raise ValueError (f"{ file } is a directory! Please specify files or use -r" )
357+ to_remove .add (file )
358+ for child in file .iterdir ():
359+ files .append (child )
360+ files = [f for f in files if f not in to_remove ]
332361 if len (files ) > 100 :
333362 file_iter = tqdm (files )
334363 else :
335364 file_iter = iter (files )
336365 for file in file_iter :
337- if not file .exists ():
338- print (f"WARNING: { file } does not exist!" )
339- continue
340- if file .is_dir ():
341- raise ValueError (f"{ file } is a directory! Please only specify specific files" )
342- sideload_file (file , gdrive .gcache .file_cache_dir , parent_folder , move )
366+ sideload_file (file , gdrive .gcache .file_cache_dir , parent_folder , move , check )
343367
344368def get_saved_backup_level () -> int | None :
345369 with gdrive .gcache ._lock :
@@ -358,6 +382,29 @@ def save_backup_level(level: int):
358382 )
359383 gdrive .gcache .conn .commit ()
360384
385+ def backup_main (new_max_level : int | None = None , parallelism : int = 0 ):
386+ import sys
387+ if new_max_level is not None :
388+ save_backup_level (new_max_level )
389+ max_level = new_max_level
390+ else :
391+ max_level = get_saved_backup_level ()
392+ if max_level is None :
393+ print ("ERROR: No backup level supplied and no previous level found in the database. Please provide a --level." , file = sys .stderr )
394+ sys .exit (1 )
395+ if max_level == 0 :
396+ print ('The cache is set to "cach only" mode. Nothing further to do.' )
397+ sys .exit (0 )
398+ print (f"Will now back up GDrive to a level { max_level } " )
399+ for level in BACKUP_LEVELS .values ():
400+ if not level .finder :
401+ continue
402+ if level .level > max_level :
403+ break
404+ if parallelism < 1 :
405+ parallelism = 14
406+ run_backup_level (level , parallelism = parallelism )
407+ print (f"All files with priority <= { max_level } are now saved locally!" )
361408
362409if __name__ == "__main__" :
363410 import argparse
@@ -412,14 +459,26 @@ def backup_level(value):
412459 help = "Copy files in (default: move)" ,
413460 default = False ,
414461 )
462+ sideload .add_argument (
463+ "--recursive" , "-r" ,
464+ action = "store_true" ,
465+ help = "Allow sideload to crawl directories" ,
466+ default = False ,
467+ )
468+ sideload .add_argument (
469+ '--replace' , '-f' ,
470+ action = "store_true" ,
471+ default = False ,
472+ help = "Don't assume the existing cache files are good" ,
473+ )
415474
416475 args = parser .parse_args ()
417476
418477 if not gdrive .gcache .file_cache_dir :
419478 gdrive .gcache .set_file_cache_dir ()
420479
421480 if args .command == "sideload" :
422- sideload_main (args .files , args .parent_folder , move = (not args .copy ))
481+ sideload_main (args .files , args .parent_folder , move = (not args .copy ), recurse = args . recursive , check = args . replace )
423482 elif args .command == "backup" :
424483 if args .list_levels :
425484 print ("Available Backup Levels:" )
@@ -429,27 +488,6 @@ def backup_level(value):
429488 else :
430489 print (f" { lvl :3d} : { bl .name :<15} - { bl .description } " )
431490 else :
432- import sys
433- if args .level is not None :
434- save_backup_level (args .level )
435- else :
436- args .level = get_saved_backup_level ()
437- if args .level is None :
438- print ("ERROR: No backup level supplied and no previous level found in the database. Please provide a --level." , file = sys .stderr )
439- sys .exit (1 )
440- if args .level == 0 :
441- print ('The cache is set to "cach only" mode. Nothing further to do.' )
442- sys .exit (0 )
443- print (f"Will now back up GDrive to a level { args .level } " )
444- for level in BACKUP_LEVELS .values ():
445- if not level .finder :
446- continue
447- if level .level > args .level :
448- break
449- parallelism = args .threads
450- if parallelism < 1 :
451- parallelism = 14
452- run_backup_level (level , parallelism = parallelism )
453- print (f"All files with priority <= { args .level } are now saved locally!" )
491+ backup_main (new_max_level = args .level , parallelism = args .threads )
454492 else :
455493 parser .print_help ()
0 commit comments