CMU Coding Bootcamp

feat: search engine


Co-authored-by: Max Mahn <mahn.maxwell@proton.me>

thecoded.prof 0f13456e c69c82eb

verified
Changed files
+85
ts
searchEngine
+65
ts/searchEngine/index.test.ts
··· 67 67 expect(index.getPagesForKeyword("pineapples")).toBeArrayOfSize(2); 68 68 }); 69 69 }); 70 + 71 + describe("Search Algorithm", () => { 72 + let index: SearchIndex; 73 + beforeEach(() => { 74 + index = new SearchIndex(); 75 + index.addPage( 76 + "https://www.beans.com", 77 + "beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans beans", 78 + ); 79 + index.addPage("https://www.beans-are-ok.com", "beans are ok I guess"); 80 + index.addPage( 81 + "https://www.example.com/cats", 82 + "This is a sample web page about cats", 83 + ); 84 + index.addPage( 85 + "https://www.example.com/dogs", 86 + "This is a sample web page about dogs and training", 87 + ); 88 + index.addPage( 89 + "https://www.training.com", 90 + "This is a general training website", 91 + ); 92 + index.addPage( 93 + "https://www.pineapple-world.com", 94 + "We have lots of pineapples. You've never seen this many pineapples before.", 95 + ); 96 + index.addPage( 97 + "https://www.pineapple-is-my-favorite-fruit.com", 98 + "I love pineapples, it's all I eat. I mean I REALLY LOVE PINEAPPLES", 99 + ); 100 + index.addPage( 101 + "https://www.example.com/ml", 102 + "This is a page about machine learning", 103 + ); 104 + }); 105 + 106 + it("should return relevant pages for a single keyword search", () => { 107 + const results = index.search("cats"); 108 + expect(results).toContain("https://www.example.com/cats"); 109 + }); 110 + 111 + it("should return relevant pages for a multi keyword search", () => { 112 + const results = index.search("dogs training"); 113 + expect(results).toContainAllValues([ 114 + "https://www.example.com/dogs", 115 + "https://www.training.com", 116 + ]); 117 + expect(results.indexOf("https://www.example.com/dogs")).toBe(0); 118 + expect(results.indexOf("https://www.training.com")).toBe(1); 119 + }); 120 + 121 + it("should return relevant pages for a phrase search", () => { 122 + const results = index.search("machine learning"); 123 + expect(results).toContain("https://www.example.com/ml"); 124 + }); 125 + 126 + it("should rank results properly by relevance", () => { 127 + const results = index.search("beans"); 128 + expect(results.indexOf("https://www.beans.com")).toBe(0); 129 + expect(results.indexOf("https://www.beans-are-ok.com")).toBe(1); 130 + const results2 = index.search("beans beans"); 131 + expect(results2.indexOf("https://www.beans.com")).toBe(0); 132 + expect(results2.indexOf("https://www.beans-are-ok.com")).toBe(1); 133 + }); 134 + });
+20
ts/searchEngine/index.ts
··· 178 178 .sort((a, b) => a[1] - b[1]) 179 179 .map(([url, _]) => url); 180 180 } 181 + 182 + search(query: string): string[] { 183 + const urls = new Map<string, number>(); 184 + const keys = this.extractKeywords(query); 185 + for (const [key, count] of keys.entries()) { 186 + const pages = this.index.get(key); 187 + if (!pages) continue; 188 + for (const [url, v] of pages) { 189 + urls.set( 190 + url, 191 + urls.has(url) 192 + ? (urls.get(url) || 0) + v * count 193 + : v * count, 194 + ); 195 + } 196 + } 197 + return Array.from(urls.entries()) 198 + .sort((a, b) => b[1] - a[1]) 199 + .map(([url, _]) => url); 200 + } 181 201 }