-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.ts
39 lines (33 loc) · 1.11 KB
/
scrape.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import { load } from 'cheerio';
import { createObjectCsvWriter } from 'csv-writer';
const letters = ['a', 'e', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'u', 'w'];
const getDefinitions = async (url: string) => {
console.log("Loading URL: ", url);
const response = await fetch(url);
console.log("Loaded");
const data = await response.text();
console.log("Parsing");
const $ = load(data);
const hwTags = $('html > body > table > tbody > tr > td > p.hw');
console.log("Parsed: ", hwTags.length);
const definitions = hwTags.map((i, el) => {
const word = $(el).find('span.HwNew > a.hw').text();
if(!word) return;
const definition = $(el).find('span.def').text();
return { word, definition };
}).get();
return definitions
}
const definitions =
(await Promise.all(
letters.map(
letter => getDefinitions(`https://www.trussel2.com/HAW/haw-${letter}.htm`))))
.flat();
const csvWriter = createObjectCsvWriter({
path: 'hawaiian-words.csv',
header: [
{ id: 'word', title: 'Word' },
{ id: 'definition', title: 'Definition' },
],
});
await csvWriter.writeRecords(definitions);