|
| 1 | +#!/bin/python |
| 2 | + |
| 3 | +import gdrive |
| 4 | +import datetime |
| 5 | +from collections import defaultdict |
| 6 | +from functools import cache |
| 7 | +import hashlib |
| 8 | + |
| 9 | +ROOT_FOLDER = "1RJi6bEXa25zizGdsm5evCycYuY6a2D8r" |
| 10 | +FIELDS = "id,name,mimeType,size,shortcutDetails,createdTime,webViewLink" |
| 11 | +MY_EMAILS = { |
| 12 | + 'aee5188bd988b0ab263a6b3003831c6e', |
| 13 | + 'e55371a7e1b97300ea623338dbcc0694', |
| 14 | + '3945098d73ac3a594febd2c87d357971', |
| 15 | + '3b654b6ccfb53f233fbd798415b62624', |
| 16 | + 'b9083baac482b28ac374ebe1856bfefc', |
| 17 | + '7f519cc091d7690b440aa4db74141a94', |
| 18 | + 'd97d9501979b0a1442b0482418509a84', |
| 19 | +} |
| 20 | + |
| 21 | +TD = 'td style="padding:5pt;"' |
| 22 | + |
| 23 | +def md5(text): |
| 24 | + return hashlib.md5(text.encode()).hexdigest() |
| 25 | + |
| 26 | +def human_readable_size(bytes_size): |
| 27 | + units = ["B", "KB", "MB", "GB", "TB", "PB"] |
| 28 | + if bytes_size == 0: |
| 29 | + return "0 B" |
| 30 | + index = 0 |
| 31 | + size = float(bytes_size) |
| 32 | + while size >= 922 and index < len(units) - 1: |
| 33 | + size /= 1024 |
| 34 | + index += 1 |
| 35 | + return f"{size:.2f} {units[index]}" |
| 36 | + |
| 37 | +def headerize(text, depth): |
| 38 | + if depth < 1: |
| 39 | + return '' |
| 40 | + if depth <= 4: |
| 41 | + return f"<h{depth}>{text}</h{depth}>" |
| 42 | + if depth <= 6: |
| 43 | + return f'<h{depth} style="font-size=11pt;">{text}</h{depth}>' |
| 44 | + space = " "*(depth-1) |
| 45 | + return f"<p>{space}+ {text}</p>" |
| 46 | + |
| 47 | +seen_folders = set() |
| 48 | + |
| 49 | +class DriveFolder: |
| 50 | + def __init__(self, name: str, folderid: str, createdTime: str, depth: int) -> None: |
| 51 | + if folderid in seen_folders: |
| 52 | + raise ValueError(f"Folder already seen: {folderid}") |
| 53 | + seen_folders.add(folderid) |
| 54 | + print(f"Loading folder \"{name}\"...") |
| 55 | + self.name = name |
| 56 | + self.id = id |
| 57 | + self.createdTime = createdTime |
| 58 | + self.depth = depth |
| 59 | + self.files = [] |
| 60 | + subfolders = [] |
| 61 | + shortcuts = [] |
| 62 | + query = f"trashed=false AND '{folderid}' in parents" |
| 63 | + for child in gdrive.all_files_matching(query, FIELDS): |
| 64 | + if child['mimeType'] == 'application/vnd.google-apps.folder': |
| 65 | + subfolders.append(child) |
| 66 | + continue |
| 67 | + if child['mimeType'] == 'application/vnd.google-apps.shortcut': |
| 68 | + shortcuts.append(child) |
| 69 | + continue |
| 70 | + child['size'] = int(child.get('size', 0)) |
| 71 | + self.files.append(child) |
| 72 | + if len(shortcuts) > 0: |
| 73 | + print(f" Resolving {len(shortcuts)} shortcut(s)...") |
| 74 | + for child in gdrive.batch_get_files_by_id( |
| 75 | + [c['shortcutDetails']['targetId'] for c in shortcuts], |
| 76 | + FIELDS+',owners' |
| 77 | + ): |
| 78 | + shortcut = [s for s in shortcuts if s['shortcutDetails']['targetId'] == child['id']][0] |
| 79 | + owner = child['owners'][0] |
| 80 | + if md5(owner['emailAddress']) in MY_EMAILS: |
| 81 | + print(f" Skipping {shortcut['name']}->{child['name']} because it's owned by me") |
| 82 | + continue |
| 83 | + child['originalName'] = child['name'] |
| 84 | + child['originalCreatedTime'] = child['createdTime'] |
| 85 | + child['name'] = shortcut['name'] |
| 86 | + child['createdTime'] = shortcut['createdTime'] |
| 87 | + if child['mimeType'] == 'application/vnd.google-apps.folder': |
| 88 | + subfolders.append(child) |
| 89 | + else: |
| 90 | + child['size'] = int(child.get('size', 0)) |
| 91 | + self.files.append(child) |
| 92 | + print(f" Got {len(self.files)} files and {len(subfolders)} subfolders") |
| 93 | + self.subfolders = [] |
| 94 | + for child in subfolders: |
| 95 | + self.subfolders.append(DriveFolder( |
| 96 | + child['name'], |
| 97 | + child['id'], |
| 98 | + child['createdTime'], |
| 99 | + self.depth + 1, |
| 100 | + )) |
| 101 | + self.files = sorted(self.files, key=lambda f: f['createdTime']) |
| 102 | + self.subfolders = sorted(self.subfolders, key=lambda f: f.createdTime) |
| 103 | + |
| 104 | + @cache |
| 105 | + def total_size(self): |
| 106 | + return sum(f['size'] for f in self.files) + sum(f.total_size() for f in self.subfolders) |
| 107 | + |
| 108 | + @cache |
| 109 | + def total_count(self): |
| 110 | + return sum(f.total_count() for f in self.subfolders) + len(self.files) |
| 111 | + |
| 112 | + def file_count_by_mimetype(self): |
| 113 | + ret = defaultdict(lambda: {'size': 0, 'count': 0}) |
| 114 | + for t in set([g['mimeType'] for g in self.files]): |
| 115 | + fs = [f for f in self.files if f['mimeType'] == t] |
| 116 | + ret[t] = {'size': sum(f['size'] for f in fs), 'count': len(fs)} |
| 117 | + for child in self.subfolders: |
| 118 | + subcounts = child.file_count_by_mimetype() |
| 119 | + for t in subcounts: |
| 120 | + ret[t]['count'] += subcounts[t]['count'] |
| 121 | + ret[t]['size'] += subcounts[t]['size'] |
| 122 | + return ret |
| 123 | + |
| 124 | + def list_files(self): |
| 125 | + space = ' '*self.depth |
| 126 | + ret = [headerize( |
| 127 | + f'<a href="{gdrive.FOLDER_LINK_PREFIX}{self.id}">{self.name}</a> <span style="color:#666666;">({human_readable_size(self.total_size())})</span>', |
| 128 | + self.depth, |
| 129 | + )] |
| 130 | + for child in self.files: |
| 131 | + ret.append(f"""<p>{space}- <a href="{child['webViewLink']}">{child['name']}</a></p>""") |
| 132 | + for child in self.subfolders: |
| 133 | + ret.append(child.list_files()) |
| 134 | + return '\n'.join(ret) |
| 135 | + |
| 136 | +if __name__ == "__main__": |
| 137 | + |
| 138 | + root = DriveFolder("A Curated Buddhist G-Library", ROOT_FOLDER, "2019-01-01T00:00:00Z", 0) |
| 139 | + total_size = human_readable_size(root.total_size()) |
| 140 | + total_count = root.total_count() |
| 141 | + print("\n==================\nFinished fetching data!\n==================\n") |
| 142 | + |
| 143 | + html = f"""<html> |
| 144 | + <head><meta content="text/html; charset=UTF-8"></head> |
| 145 | + <body class="doc-content"> |
| 146 | + <p class="title" style="font-size:26pt;padding-bottom:3pt;line-height:1.15;page-break-after:avoid;font-family:"Arial";orphans:2;widows:2;text-align:left;"><span style="font-weight:400;text-decoration:none;vertical-align:baseline;font-size:26pt;font-family:"Arial";font-style:normal">Buddhist G-Library Catalog</span></p> |
| 147 | + <p>An automatically generated list of all the files in the Library.</p> |
| 148 | + <p>Generated on {datetime.datetime.now(datetime.timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT")}</p><p></p> |
| 149 | + <p>In total, the library is {total_size} large and contains {total_count} files. They break down by MIME type as follows:</p> |
| 150 | + <table><tr style="text-decoration:underline;"><{TD}>MIME Type</td><{TD}>Count</td><{TD}>Size</td></tr> |
| 151 | + {"".join(f"<tr><{TD}>{t}</td><{TD}>{c['count']}</td><{TD}>{human_readable_size(c['size'])}</td></tr>" for t, c in root.file_count_by_mimetype().items())} |
| 152 | + <tr style="font-weight:700;"><{TD}>Total</td><{TD}>{total_count}</td><{TD}>{total_size}</td></tr> |
| 153 | + </table><p></p><h1>Files</h1><p></p>{root.list_files()} |
| 154 | + </body> |
| 155 | + </html> |
| 156 | + """ |
| 157 | + |
| 158 | + print("Replacing public doc with new version...") |
| 159 | + docid = gdrive.create_doc( |
| 160 | + html=html, |
| 161 | + creator="CatalogBuilder", |
| 162 | + replace_doc="1rGLm9Xh5de0e3hsMY2yyt97MWBuZJ1V1_q0jhGe7vpw", |
| 163 | + ) |
| 164 | + print(f"Done! See https://docs.google.com/document/d/{docid}/edit") |
0 commit comments