Skip to content

Commit edccf9b

Browse files
committed
PDF Thumbnail utils
[skip ci]
1 parent 0680e79 commit edccf9b

2 files changed

Lines changed: 50 additions & 0 deletions

File tree

‎scripts/pdfutils.py‎

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/bin/python
22
from strutils import (
33
whitespace,
4+
thumbnail_path_for_file,
5+
THUMBNAIL_SIZES,
46
)
57
import subprocess
68
from pathlib import Path
@@ -20,6 +22,35 @@ def get_page_count(pdf_path) -> int | None:
2022
except:
2123
return None
2224

25+
def render_pdf_thumbnail(path, min_d_size=256, max_d_size=512, type='png') -> bytes:
26+
try:
27+
import fitz
28+
except:
29+
print("pip install pymupdf")
30+
exit(1)
31+
doc = fitz.open(path)
32+
page = doc[0]
33+
maxzoom = max_d_size / max(page.rect.width, page.rect.height, max_d_size)
34+
minzoom = min_d_size / min(page.rect.width, page.rect.height)
35+
zoom = minzoom if minzoom < maxzoom else maxzoom
36+
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
37+
return pix.tobytes(type)
38+
39+
def get_shared_cached_pdf_thumbnail(pdf_path: Path, size='large') -> bytes:
40+
thumbnail_path = thumbnail_path_for_file(pdf_path, shared=True, size=size)
41+
if thumbnail_path.is_file():
42+
return thumbnail_path.read_bytes()
43+
tsize = THUMBNAIL_SIZES[size]
44+
thebytes = render_pdf_thumbnail(
45+
pdf_path,
46+
min_d_size=tsize,
47+
max_d_size=2*tsize,
48+
type='png',
49+
)
50+
thumbnail_path.parent.mkdir(exist_ok=True, parents=True)
51+
thumbnail_path.write_bytes(thebytes)
52+
return thebytes
53+
2354
def readpdf(pdf_file: str | Path, max_len=None, normalize=1) -> str:
2455
"""Returns a pdf's text.
2556

‎scripts/strutils.py‎

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ def md5(text: bytes | str | Path):
8989
raise ValueError(f"Expected bytes or str or Path, got {type(text)}")
9090
return hashlib.md5(bts).hexdigest()
9191

92+
THUMBNAIL_SIZES = {
93+
'normal': 128,
94+
'large': 256,
95+
'x-large': 512,
96+
'xx-large': 1024,
97+
'fail': 0,
98+
}
99+
100+
def thumbnail_path_for_file(filepath: Path | str, shared=False, size='large'):
101+
# Following the spec at https://specifications.freedesktop.org/thumbnail/latest/thumbsave.html
102+
filepath = Path(filepath).expanduser().resolve()
103+
if shared:
104+
cachdir = filepath.parent.joinpath('.sh_thumbnails').joinpath(size)
105+
else:
106+
cachdir = Path('~/.cache/thumbnails/').expanduser().joinpath(size)
107+
return cachdir.joinpath(
108+
f"{md5("file://"+str(filepath))}.png"
109+
)
110+
92111
def cumsum(vec):
93112
return reduce(lambda a,x: a+[a[-1]+x] if a else [x], vec, [])
94113

0 commit comments

Comments
 (0)