Skip to content

Commit dee7f08

Browse files
Dylan search function (#648)
* Add: oneword token search based on the search patter we use * removed: line reference comments * add: sutta finder variable at the top for one word query * create: onewordtoken function and TDD - first failed now passes * trying to get item titles from results array of objects * add: fallback for oneword search results back to results on no results * add: findoneword function in completed form with TDD * add: new normalizetitle function and better results handling * update: findoneword...() with matching against joinedTitles made from store items with new normalized titles * add: new test cases to normalizeSuttaTitles * add: remove the from beginning, better nikaya index handling and update: remove all text after : for scale * add: several test cases to match cases suggested on PR review, to show my feature works * Add: extra test cases and new normalizeSuttaTitles() handling * remove: un-used tokenResults variable * update: test case names to match test cases properly * update: test case description for testing parse lal sutra
1 parent ac6a442 commit dee7f08

3 files changed

Lines changed: 168 additions & 3 deletions

File tree

‎assets/js/search_functions.js‎

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,27 @@
11
// Parameters
22
var BMAX = 250; // Max blurb size in characters
33
var RMAX = 100; // Max number of results to display
4+
var joinedTitles = []
5+
6+
function normalizeSuttaTitles (obj) {
7+
var joinedTitleDatabase = []
8+
9+
for (var i in obj){
10+
const item = obj[i];
11+
if (!item || item.type !== "content" || item.category !== "canon") continue;
12+
const title = item.title || "";
13+
const titleJoin = title.normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/^\s*(?:DN|MN|SN|AN|KN|LAL|DA|MA|SA|EA|SNP|DHP|ITI|THAG|THIG|UD|NIDD|CV|BV|AP|JA|PV|VV|KP|PTS)\s*\d+(?:\.\d+)?\s*[:.-]?\s*/i, "").replace(/\s*[:\-]\s*.*$/, "").toLowerCase().replace(/[^a-z0-9]/g, "");
14+
const removedTheOnJoin = titleJoin.replace(/^\s*(?:the)\s*/i, "");
15+
if(removedTheOnJoin.includes('sutta') || removedTheOnJoin.includes('sutra') || removedTheOnJoin.includes('gatha')) {
16+
joinedTitleDatabase.push({
17+
ref: i,
18+
title: removedTheOnJoin,
19+
matchData: { metadata: {} }
20+
});
21+
}
22+
}
23+
return joinedTitleDatabase;
24+
}
425

526
function getPositions(result, field) {
627
var positions = [];
@@ -169,10 +190,27 @@ function displaySearchResults(results) {
169190
}
170191
}
171192

193+
function findOneWordSuttaTitleMatches(query, joinedTitles) {
194+
var tokenResults = [];
195+
const normalizedQuery = query.normalize("NFD").replace(/[\u0300-\u036f]/g, "").toLowerCase().replace(/[^a-z0-9]/g, "");
196+
for (var i in joinedTitles){
197+
const item = joinedTitles[i]
198+
if(item.title === normalizedQuery){
199+
tokenResults.push({
200+
ref: item.ref,
201+
score: 1,
202+
matchData: { metadata: {} }
203+
});
204+
}
205+
}
206+
return tokenResults;
207+
}
208+
172209
function handleSearchMessage(data, searchFn) {
173210
var results = [];
174211
var warning = "";
175212
var words = data.q.trim().split(" ");
213+
176214
for (var i = 0; i < words.length; i++) {
177215
const s = words[i].trim();
178216
if (!s.startsWith("+") && !s.startsWith("-") && s.length > 1 && lunr.stopWordFilter(s)) {
@@ -213,10 +251,11 @@ function handleSearchMessage(data, searchFn) {
213251
});
214252
});
215253
}
254+
finalResults = results.length ? results : findOneWordSuttaTitleMatches(data.q.trim(), joinedTitles);
216255
return {
217256
"warninghtml": warning,
218-
"html": displaySearchResults(results),
219-
"count": results ? results.length : 0,
257+
"html": displaySearchResults(finalResults),
258+
"count": finalResults ? finalResults.length : 0,
220259
"q": data.q,
221260
"filterquery": data.filterquery,
222261
"qt": data.qt

‎assets/js/search_index.js‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ var idx = lunr(function () {
6969
}
7070
});
7171

72+
joinedTitles = normalizeSuttaTitles(store);
73+
7274
self.onmessage = function(e) {
7375
self.postMessage(handleSearchMessage(e.data, idx.search.bind(idx)));
7476
}

‎assets/js/tests/search-worker.test.js‎

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,16 @@ vm.runInContext(
6969
'this.resultMatched = resultMatched;\n' +
7070
'this.addMatchHighlights = addMatchHighlights;\n' +
7171
'this.getBlurbForResult = getBlurbForResult;\n' +
72+
'this.normalizeSuttaTitles = normalizeSuttaTitles;\n' +
73+
'this.findOneWordSuttaTitleMatches = findOneWordSuttaTitleMatches;\n' +
7274
'this.handleSearchMessage = handleSearchMessage;\n' +
7375
'this.displaySearchResults = displaySearchResults;\n',
7476
sandbox
7577
);
7678

7779
const {
7880
categoryName, getPositions, resultMatched,
79-
addMatchHighlights, getBlurbForResult, handleSearchMessage
81+
addMatchHighlights, getBlurbForResult, oneWordToken, normalizeSuttaTitles, findOneWordSuttaTitleMatches, handleSearchMessage
8082
} = sandbox;
8183

8284
// ── categoryName ────────────────────────────────────────────────────
@@ -296,6 +298,128 @@ describe('getBlurbForResult', () => {
296298
});
297299
});
298300

301+
// ── normalizeSuttaTitles ─────────────────────────────────────────────
302+
describe('normalizeSuttaTitles', () => {
303+
304+
it('returns an array of database objects with a new normalized title', () => {
305+
const mockStore = {
306+
id1: {
307+
title: 'MN 35 Cūḷa Saccaka Sutta: The Shorter Discourse With Saccaka',
308+
type: 'content',
309+
category: 'canon'
310+
}
311+
};
312+
const result = normalizeSuttaTitles(mockStore);
313+
assert.equal(result.length, 1);
314+
assert.equal(result[0].ref, 'id1');
315+
assert.equal(result[0].title, 'culasaccakasutta');
316+
});
317+
318+
it('remove words after sutta. Also handles sutra', () => {
319+
const mockStore = {
320+
id1: {
321+
title: 'MA 128 Upasaka Sutra: Discourse on the White-Clad Disciple',
322+
type: 'content',
323+
category: 'canon'
324+
}
325+
};
326+
const result = normalizeSuttaTitles(mockStore);
327+
assert.equal(result.length, 1);
328+
assert.equal(result[0].ref, 'id1');
329+
assert.equal(result[0].title, 'upasakasutra');
330+
});
331+
332+
it('can parse a lal sutra', () => {
333+
const mockStore = {
334+
id1: {
335+
title: 'Lal 26 Dharmacakrapravartana Sūtra: The Discourse that Set the Dharma-Wheel Rolling',
336+
type: 'content',
337+
category: 'canon'
338+
}
339+
};
340+
const result = normalizeSuttaTitles(mockStore);
341+
assert.equal(result.length, 1);
342+
assert.equal(result[0].ref, 'id1');
343+
assert.equal(result[0].title, 'dharmacakrapravartanasutra');
344+
});
345+
346+
it('can parse Therigathas', () => {
347+
const mockStore = {
348+
id1: {
349+
title: "Thig 3.8 Somā Therīgāthā: Somā's Verses",
350+
type: 'content',
351+
category: 'canon'
352+
}
353+
};
354+
const mockStore2 = {
355+
id1: {
356+
title: "Thag 1.7 Bhalliya Theragāthā: Bhalliya's Verse",
357+
type: 'content',
358+
category: 'canon'
359+
}
360+
};
361+
const result2 = normalizeSuttaTitles(mockStore2);
362+
assert.equal(result2.length, 1);
363+
assert.equal(result2[0].ref, 'id1');
364+
assert.equal(result2[0].title, 'bhalliyatheragatha');
365+
const result = normalizeSuttaTitles(mockStore);
366+
assert.equal(result.length, 1);
367+
assert.equal(result[0].ref, 'id1');
368+
assert.equal(result[0].title, 'somatherigatha');
369+
});
370+
371+
it('handles "the" and removes it from a string if it appears at the beginning', () => {
372+
const mockStore = {
373+
id1: {
374+
title: 'DN 22 The Mahāsatipaṭṭhāna Sutta: The Long Discourse about the Ways of Attending to Mindfulness',
375+
type: 'content',
376+
category: 'canon'
377+
}
378+
};
379+
const result = normalizeSuttaTitles(mockStore);
380+
assert.equal(result.length, 1);
381+
assert.equal(result[0].ref, 'id1');
382+
assert.equal(result[0].title, 'mahasatipatthanasutta');
383+
});
384+
385+
it('filters out objects with types that are not equal to content', () => {
386+
const mockStore = {
387+
id1: {
388+
title: 'MN 3 Dhammadāyāda Sutta: Heirs in the Teaching',
389+
type: 'content',
390+
category: 'av'
391+
}
392+
};
393+
const result = normalizeSuttaTitles(mockStore);
394+
assert.equal(result.length, 0);
395+
});
396+
397+
it('filters out objects where titles do not include sutta, sutra, or gatha', () => {
398+
const mockStore = {
399+
id1: {
400+
title: 'The Hairy Spider Climbed Up The Waterspout: A day in the life Part 3 ',
401+
type: 'content',
402+
category: 'canon'
403+
}
404+
};
405+
const result = normalizeSuttaTitles(mockStore);
406+
assert.equal(result.length, 0);
407+
});
408+
409+
410+
});
411+
412+
// ── findOneWordSuttaTitleMatches ─────────────────────────────────────────────
413+
describe('findOneWordSuttaTitleMatches', () => {
414+
it('returns matched item when query matches title exactly', () => {
415+
const mockStore = {
416+
'id1': { title: 'culasaccakasutta', type: 'content', category: 'canon' }
417+
};
418+
const result = findOneWordSuttaTitleMatches('culasaccakasutta', mockStore);
419+
assert.equal(toLocal(result).length, 1);
420+
});
421+
});
422+
299423
// ── handleSearchMessage ─────────────────────────────────────────────
300424

301425
describe('handleSearchMessage', () => {

0 commit comments

Comments
 (0)