99from pathlib import Path
1010from typing import Callable , Optional
1111from tqdm import tqdm
12- import gdrive_base
12+ import sys
1313import gdrive
1414import shutil
1515from strutils import (
@@ -168,7 +168,7 @@ def find_unlinked_tag_content(
168168) -> list [dict ]:
169169 ret = []
170170 for tag in website .tags :
171- all_files = find_files_for_tag (tag .slug , include_folders )
171+ all_files = find_files_for_tag (tag .slug , include_av = include_av , include_folders = include_folders )
172172 all_files = [
173173 file for file in all_files
174174 if file ['id' ] not in website .data .linked_ids
@@ -426,68 +426,7 @@ def download_file_to_cache(file: dict, verbose=False) -> str | None:
426426 if file ['id' ] in SEEN_IDS :
427427 return None
428428 SEEN_IDS .add (file ['id' ])
429- if file ['mimeType' ] == 'application/vnd.google-apps.shortcut' :
430- tfile = gdrive .gcache .get_item (file ['shortcutDetails' ]['targetId' ])
431- if not tfile :
432- print (f"WARNING: Skipping dangling shortcut \" { file ['name' ]} \" in { file ['parent_id' ]} " )
433- return None
434- file = tfile
435-
436- is_gdoc = file ['mimeType' ] == 'application/vnd.google-apps.document'
437- is_gsheet = file ['mimeType' ] == 'application/vnd.google-apps.spreadsheet'
438- if is_gdoc or is_gsheet :
439- hashval = md5 (file ['id' ] + str (file ['version' ]))
440- else :
441- hashval = file .get ('md5Checksum' )
442- if not isinstance (hashval , str ):
443- return None
444- assert len (hashval ) == 32
445-
446- if is_gdoc :
447- extension = '.docx'
448- elif is_gsheet :
449- extension = '.xlsx'
450- elif '.' in file .get ('name' , '' ):
451- extension = '.' + str (file ['name' ]).split ('.' )[- 1 ].lower ()
452- else :
453- extension = guess_extension (file ['mimeType' ]) or ''
454-
455- cache_dir = gdrive .gcache .file_cache_dir
456- assert isinstance (cache_dir , Path )
457- target_path = cache_dir / hashval [:2 ] / f"{ hashval [2 :]} { extension } "
458- if target_path .exists ():
459- if verbose :
460- print (f" Skipping already downloaded { file ['name' ]} " )
461- return str (target_path )
462- target_path .parent .mkdir (exist_ok = True )
463- if verbose :
464- print (f" Downloading { file ['name' ]} " )
465- if not (is_gdoc or is_gsheet ):
466- try :
467- gdrive_base .download_file (file ['id' ], target_path , verbose = False )
468- except FileNotFoundError as e :
469- if target_path .exists ():
470- # Another thread got this file before us 😅
471- pass
472- else :
473- raise e
474- else :
475- try :
476- if is_gdoc :
477- gdrive_base .download_gdoc_as_docx (file ['id' ], target_path )
478- elif is_gsheet :
479- gdrive_base .download_gsheet_as_xlsx (file ['id' ], target_path )
480- except gerrors .HttpError as e :
481- if "exportSizeLimitExceeded" in str (e ):
482- if verbose :
483- print (f" Skipping { file ['name' ]} : it's too large to be exported :(" )
484- return None
485- if "cannot be exported" in str (e ):
486- return None
487- raise e
488- if verbose :
489- print (f" Saved to { target_path .parent .name } /{ target_path .name } " )
490- return str (target_path )
429+ return gdrive .gcache .download_file_to_cache (file )
491430
492431def run_backup_level (level : BackupLevel , parallelism = 14 ):
493432 print (f"Starting backup level { level .level } ({ level .name } )..." )
@@ -610,7 +549,11 @@ def backup_main(from_level: int=0, new_max_level: int | None=None, parallelism:
610549 run_backup_level (level , parallelism = parallelism )
611550 print (f"All files with priority <= { max_level } are now saved locally!" )
612551
613- def print_backup_levels_list ():
552+ def format_cache_percentage (dl_size : int , total_size : int ) -> str :
553+ return f"{ (float (dl_size )/ total_size ):.1%} ({ format_size (dl_size )} /{ format_size (total_size )} )"
554+
555+ def print_backup_levels_list (statistics : bool = False ):
556+ """`statistics` replaces the generic description with current fill level stats"""
614557 print ("\033 [1mGoogle Drive Backup Levels\033 [0m" )
615558 print (
616559"""
@@ -623,24 +566,45 @@ def print_backup_levels_list():
623566 )
624567 seen_file_ids = set ()
625568 cum_sum_size = 0
569+ cum_sum_dl_size = 0
626570
627- print (f"\033 [4m Lvl: { 'Level Name' :<16} { 'Est. Size' :>9} - { 'Description' } \033 [0m" )
571+ if statistics :
572+ print (f"\033 [4m Lvl: { 'Level Name' :<16} { 'This Level' :^25} { 'Cummulative' :^24} \033 [0m" )
573+ else :
574+ print (f"\033 [4m Lvl: { 'Level Name' :<16} { 'Est. Size' :>9} - { 'Description' } \033 [0m" )
628575 for lvl , bl in BACKUP_LEVELS .items ():
629576 if bl .finder is None :
630- print (f"\033 [1m { lvl :3d} : { '^' + bl .name + '^' :<16} { "\" " if bl .level else "0 B" :^9} - { bl .description } \033 [0m" )
577+ print (f"\033 [1m { lvl :3d} : { '^' + bl .name + '^' :<16} { "\" " if ( bl .level or statistics ) else "0 B" :^9} - { bl .description if not statistics else '' } \033 [0m" )
631578 else :
632579 files = bl .finder ()
633580 this_level_inc_size = 0
634581 this_level_overlap_size = 0
582+ this_level_overlap_dl_size = 0
583+ this_level_inc_dl_size = 0
635584 while files :
636585 file = files .pop ()
586+ target_path = gdrive .gcache .get_cache_path_for_file (file )
587+ if not target_path :
588+ continue
637589 if file ['id' ] in seen_file_ids :
638590 this_level_overlap_size += file ['size' ]
591+ if statistics and target_path .exists ():
592+ this_level_overlap_dl_size += file ['size' ]
639593 else :
640594 this_level_inc_size += file ['size' ]
641595 seen_file_ids .add (file ['id' ])
596+ if statistics and target_path .exists ():
597+ this_level_inc_dl_size += file ['size' ]
642598 cum_sum_size += this_level_inc_size
643- print (f" { lvl :3d} : { bl .name :<16} { format_size (cum_sum_size ):>9} - { bl .description } " )
599+ cum_sum_dl_size += this_level_inc_dl_size
600+ if statistics :
601+ if this_level_inc_size + this_level_overlap_size > 0 :
602+ description = f"{ format_cache_percentage (this_level_inc_dl_size + this_level_overlap_dl_size , this_level_inc_size + this_level_overlap_size ):<25} { format_cache_percentage (cum_sum_dl_size , cum_sum_size ):^24} "
603+ else :
604+ description = f" [N/A]"
605+ else :
606+ description = f"{ format_size (cum_sum_size ):>9} - { bl .description } "
607+ print (f" { lvl :3d} : { bl .name :<16} { description } " )
644608
645609if __name__ == "__main__" :
646610 import argparse
@@ -657,6 +621,14 @@ def backup_level(value):
657621 raise argparse .ArgumentTypeError (f"{ ivalue } is too large to be a valid backup level. Use --list-levels to see valid levels and their meaning." )
658622 return ivalue
659623
624+ levels = subparsers .add_parser ("levels" , help = "Print information about the backup levels" )
625+ levels .add_argument (
626+ '--stats' ,
627+ action = "store_true" ,
628+ default = False ,
629+ help = "Instead of the description, print stats about the cache at each level"
630+ )
631+
660632 backup = subparsers .add_parser ("backup" , help = "Download files from Drive to the cache" )
661633 backup .add_argument (
662634 "--to-level" , '-l' ,
@@ -674,11 +646,6 @@ def backup_level(value):
674646 type = int ,
675647 help = "Skip levels less than this (this run only)" ,
676648 )
677- backup .add_argument (
678- "--list-levels" ,
679- action = "store_true" ,
680- help = "List all available backup levels and exit" ,
681- )
682649 backup .add_argument (
683650 "--threads" , "-t" ,
684651 required = False ,
@@ -723,20 +690,22 @@ def backup_level(value):
723690
724691 if args .command == "sideload" :
725692 sideload_main (args .files , args .parent_folder , move = (not args .copy ), recurse = args .recursive , check = args .replace )
693+ sys .exit (0 )
694+
695+ with yaspin (text = "Loading website data..." ):
696+ website .load ()
697+ website .data .linked_ids = set ()
698+ for item in website .content :
699+ if not item .get ('external_url' ) and not item .get ('file_links' ):
700+ continue
701+ for drive_link in item .get ('drive_links' , []):
702+ website .data .linked_ids .add (
703+ gdrive .link_to_id (drive_link )
704+ )
705+
706+ if args .command == "levels" :
707+ print_backup_levels_list (statistics = args .stats )
726708 elif args .command == "backup" :
727- with yaspin (text = "Loading website data..." ):
728- website .load ()
729- website .data .linked_ids = set ()
730- for item in website .content :
731- if not item .get ('external_url' ) and not item .get ('file_links' ):
732- continue
733- for drive_link in item .get ('drive_links' , []):
734- website .data .linked_ids .add (
735- gdrive .link_to_id (drive_link )
736- )
737- if args .list_levels :
738- print_backup_levels_list ()
739- else :
740- backup_main (from_level = args .from_level , new_max_level = args .level , parallelism = args .threads )
709+ backup_main (from_level = args .from_level , new_max_level = args .level , parallelism = args .threads )
741710 else :
742711 parser .print_help ()
0 commit comments