diff --git a/.gitignore b/.gitignore index b512c09..bf08dc9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,8 @@ -node_modules \ No newline at end of file +node_modules +*.yml +*.sh +*.json +DOCKER.md +Dockerfile +.dockerignore +PR_MESSAGE.md diff --git a/README.md b/README.md index 04e2dad..2010fe7 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,29 @@ There was no free Dictionary API on the web when I wanted one for my friend, so I created one. +## Data Source & License + +This API uses [Wiktionary](https://en.wiktionary.org/) as its data source via the Wiktionary REST API. Wiktionary is a free, collaboratively edited multilingual dictionary. + +### Attribution + +Dictionary definitions provided by this API are sourced from [Wiktionary](https://en.wiktionary.org/), a project of the [Wikimedia Foundation](https://wikimediafoundation.org/). + +The content from Wiktionary is available under the [Creative Commons Attribution-ShareAlike 4.0 International License (CC-BY-SA 4.0)](https://creativecommons.org/licenses/by-sa/4.0/). + +**If you use this API, you must:** +- Provide attribution to Wiktionary as the source of the definitions +- Include a link to the CC-BY-SA 4.0 license +- If you modify the content, you must distribute your contributions under the same license + +For individual word entries, the original contributors can be found in the page history at `https://en.wiktionary.org/wiki/`. + ## Important Note The API usage has been ramping up rapidly, making it difficult for me to keep the server running due to increased AWS costs. Your support directly helps the development of Dictionary API and keeps the server running. - + ## Getting Started @@ -25,26 +42,18 @@ As an example, to get definition of English word **hello** using _v2_, you can s [ { "word": "hello", - "phonetic": "həˈləʊ", - "phonetics": [ - { - "text": "həˈləʊ", - "audio": "//ssl.gstatic.com/dictionary/static/sounds/20200429/hello--_gb_1.mp3" - }, - { - "text": "hɛˈləʊ" - } - ], - "origin": "early 19th century: variant of earlier hollo ; related to holla.", + "phonetics": [], "meanings": [ { - "partOfSpeech": "exclamation", + "partOfSpeech": "interjection", "definitions": [ { - "definition": "used as a greeting or to begin a phone conversation.", - "example": "hello there, Katie!", - "synonyms": [], - "antonyms": [] + "definition": "A greeting (salutation) said when meeting someone or acknowledging someone's arrival or presence.", + "example": "Hello, everyone." + }, + { + "definition": "A greeting used when answering the telephone.", + "example": "Hello? How may I help you?" } ] }, @@ -52,10 +61,8 @@ As an example, to get definition of English word **hello** using _v2_, you can s "partOfSpeech": "noun", "definitions": [ { - "definition": "an utterance of ‘hello’; a greeting.", - "example": "she was getting polite nods and hellos from people", - "synonyms": [], - "antonyms": [] + "definition": "\"Hello!\" or an equivalent greeting.", + "example": "They gave each other a quick hello when they met, and went back on their merry ways." } ] }, @@ -63,10 +70,7 @@ As an example, to get definition of English word **hello** using _v2_, you can s "partOfSpeech": "verb", "definitions": [ { - "definition": "say or shout ‘hello’.", - "example": "I pressed the phone button and helloed", - "synonyms": [], - "antonyms": [] + "definition": "To greet with \"hello\"." } ] } @@ -75,6 +79,8 @@ As an example, to get definition of English word **hello** using _v2_, you can s ] ``` +> **Note:** The API now uses Wiktionary as its data source. Response format remains compatible but some fields like `phonetic`, `origin`, `synonyms`, and `antonyms` may not always be present. + ### Regarding V1 Version The API earlier used to send response as shown below, but this structure of response was found out to be difficult to work with (you can take a look at these tickets [#32](https://github.com/meetDeveloper/freeDictionaryAPI/issues/32) and [#4](https://github.com/meetDeveloper/freeDictionaryAPI/issues/4)), based on feedback in these tickets I have updated the API to _v2_ version. But _v1_ version will always be supported for backward compatibility. @@ -82,40 +88,23 @@ The API earlier used to send response as shown below, but this structure of resp [ { "word": "hello", - "phonetic": "həˈləʊ", - "phonetics": [ - { - "text": "həˈləʊ", - "audio": "//ssl.gstatic.com/dictionary/static/sounds/20200429/hello--_gb_1.mp3" - }, - { - "text": "hɛˈləʊ" - } - ], - "origin": "early 19th century: variant of earlier hollo ; related to holla.", + "phonetics": [], "meaning": { - "exclamation": [ + "interjection": [ { - "definition": "used as a greeting or to begin a phone conversation.", - "example": "hello there, Katie!", - "synonyms": [], - "antonyms": [] + "definition": "A greeting (salutation) said when meeting someone.", + "example": "Hello, everyone." } ], "noun": [ { - "definition": "an utterance of ‘hello’; a greeting.", - "example": "she was getting polite nods and hellos from people", - "synonyms": [], - "antonyms": [] + "definition": "\"Hello!\" or an equivalent greeting.", + "example": "They gave each other a quick hello." } ], "verb": [ { - "definition": "say or shout ‘hello’.", - "example": "I pressed the phone button and helloed", - "synonyms": [], - "antonyms": [] + "definition": "To greet with \"hello\"." } ] } @@ -147,7 +136,7 @@ This Dictionary API was initially created as an API that could be used by my fri Kindly help me keep running and developing this API. Thanks a lot for using my API, it feels good when your creation help other create their own projects. - + ## Related Projects diff --git a/app.js b/app.js index 0865cd0..347ea8b 100644 --- a/app.js +++ b/app.js @@ -34,22 +34,23 @@ const { JSDOM } = require('jsdom'), // GLOBALS global._ = require('lodash'); -function cleanText (text) { +function cleanText(text) { if (!text) { return text; } - return parser - .parseFromString(text, "text/html") - .body.textContent; + const doc = parser.parseFromString(text, "text/html"); + const elementsToRemove = doc.querySelectorAll("style, script"); + elementsToRemove.forEach(el => el.remove()); + return doc.body.textContent; } -function handleError (error = {}) { +function handleError(error = {}) { // Using duck typing to know if we explicitly threw this error // If not then wrapping original error into UnexpectedError if (!error.requestType) { error = new errors.UnexpectedError({ original_error: error }); } const { requestType, title, message, resolution } = error; - status = REQUEST_TYPE_STATUS_CODE[requestType], + status = REQUEST_TYPE_STATUS_CODE[requestType], body = JSON.stringify({ title, message, @@ -77,7 +78,7 @@ app.get('/api/:version/entries/:language/:word', async (req, res) => { word = decodeURIComponent(word); if (!word || !language || !version) { - return handleError.call(res, new errors.NoDefinitionsFound()); + return handleError.call(res, new errors.NoDefinitionsFound()); } // @todo: Find better error. @@ -93,7 +94,7 @@ app.get('/api/:version/entries/:language/:word', async (req, res) => { // @todo: Find better error. if (!utils.isLanguageSupported(language)) { return handleError.call(res, new errors.NoDefinitionsFound()); } - word = word.trim().toLocaleLowerCase(language); + word = word.trim(); try { let definitions = await dictionary.findDefinitions(word, language, { include }), diff --git a/modules/dictionary.js b/modules/dictionary.js index afc9ac6..ffd2b35 100644 --- a/modules/dictionary.js +++ b/modules/dictionary.js @@ -8,133 +8,132 @@ const fs = require('fs'), httpsAgent = new https.Agent({ keepAlive: true }); -function transformV2toV1 (data) { +function transformV2toV1(data) { return data.map((entry) => { - let { - meanings, - ...otherProps - } = entry; - - meanings = meanings.reduce((meanings, meaning) => { - let partOfSpeech, definitions; - - ({ - partOfSpeech, - definitions - } = meaning); - meanings[partOfSpeech] = definitions; - - return meanings; - }, {}); - - return { - ...otherProps, - meaning: meanings - }; - }); + let { + meanings, + ...otherProps + } = entry; + + meanings = meanings.reduce((meanings, meaning) => { + let partOfSpeech, definitions; + + ({ + partOfSpeech, + definitions + } = meaning); + meanings[partOfSpeech] = definitions; + + return meanings; + }, {}); + + return { + ...otherProps, + meaning: meanings + }; + }); } -function transform (word, language, data, { include }) { +// Original Google transform function +function transformGoogle(word, language, data, { include }) { return data - .map(e => e.entry) - .filter(e => e) - .reduce((accumulator, entry) => { - if (!entry.subentries) { return accumulator.push(entry) && accumulator; } - - let { subentries } = entry, - mappedSubentries; - - if (subentries.length > 1) { - utils.logEvent(word, language, 'subentries length is greater than 1', { data }); - } - - if (entry.sense_families) { - utils.logEvent(word, language, 'entry has subentries and sense families', { data }); - } - - if (entry.etymology) { - utils.logEvent(word, language, 'entry has subentries and etymology', { data }); - } - - mappedSubentries = subentries - .map((subentry) => { - if (subentry.sense_families) { - utils.logEvent(word, language, 'subentry has sense families', { data }); - } - - if (subentry.sense_family) { - subentry.sense_families = []; - subentry.sense_families.push(subentry.sense_family); + .map(e => e.entry) + .filter(e => e) + .reduce((accumulator, entry) => { + if (!entry.subentries) { return accumulator.push(entry) && accumulator; } + + let { subentries } = entry, + mappedSubentries; + + if (subentries.length > 1) { + utils.logEvent(word, language, 'subentries length is greater than 1', { data }); + } + + if (entry.sense_families) { + utils.logEvent(word, language, 'entry has subentries and sense families', { data }); + } + + if (entry.etymology) { + utils.logEvent(word, language, 'entry has subentries and etymology', { data }); + } + + mappedSubentries = subentries + .map((subentry) => { + if (subentry.sense_families) { + utils.logEvent(word, language, 'subentry has sense families', { data }); + } + + if (subentry.sense_family) { + subentry.sense_families = []; + subentry.sense_families.push(subentry.sense_family); + } + + return _.defaults(subentry, _.pick(entry, ['phonetics', 'etymology'])) + }) + + return accumulator.concat(mappedSubentries); + }, []) + .map((entry) => { + let { headword, lemma, phonetics = [], etymology = {}, sense_families = [] } = entry; + + return { + word: lemma || headword, + phonetic: _.get(phonetics, '0.text'), + phonetics: phonetics.map((e) => { + return { + text: e.text, + audio: e.oxford_audio + }; + }), + origin: _.get(etymology, 'etymology.text'), + meanings: sense_families.map((sense_family) => { + let { parts_of_speech, senses = [] } = sense_family; + + if (!parts_of_speech) { + parts_of_speech = _.get(senses[0], 'parts_of_speech', []); + + if (senses.length > 1) { + utils.logEvent(word, language, 'part of speech missing but more than one sense present', { data }); + } + } + + if (parts_of_speech.length > 1) { + utils.logEvent(word, language, 'more than one part of speech present', { data }); + } + + return { + partOfSpeech: _.get(parts_of_speech[0], 'value'), + definitions: senses.map((sense) => { + let { definition = {}, example_groups = [], thesaurus_entries = [] } = sense, + result = { + definition: definition.text, + example: _.get(example_groups[0], 'examples.0'), + synonyms: _.get(thesaurus_entries[0], 'synonyms.0.nyms', []) + .map(e => e.nym), + antonyms: _.get(thesaurus_entries[0], 'antonyms.0.nyms', []) + .map(e => e.nym) + }; + + if (include.example) { + result.examples = _.reduce(example_groups, (accumulator, example_group) => { + let example = _.get(example_group, 'examples', []); + + accumulator = accumulator.concat(example); + + return accumulator; + }, []); } - return _.defaults(subentry, _.pick(entry, ['phonetics', 'etymology'])) + return result; }) - - return accumulator.concat(mappedSubentries); - }, []) - .map((entry) => { - let { headword, lemma, phonetics = [], etymology = {}, sense_families = [] } = entry; - - return { - word: lemma || headword, - phonetic: _.get(phonetics, '0.text'), - phonetics: phonetics.map((e) => { - return { - text: e.text, - audio: e.oxford_audio - }; - }), - origin: _.get(etymology, 'etymology.text'), - meanings: sense_families.map((sense_family) => { - let { parts_of_speech, senses = []} = sense_family; - - // if parts of speech is empty at this level. - // Current hypothesis tells that it means only one sense is present - // We need to take out parts_of_speech from it and use it. - if (!parts_of_speech) { - parts_of_speech = _.get(senses[0], 'parts_of_speech', []); - - if (senses.length > 1) { - utils.logEvent(word, language, 'part of speech missing but more than one sense present', { data }); - } - } - - if (parts_of_speech.length > 1) { - utils.logEvent(word, language, 'more than one part of speech present', { data }); - } - - return { - partOfSpeech: _.get(parts_of_speech[0], 'value'), - definitions: senses.map((sense) => { - let { definition = {}, example_groups = [], thesaurus_entries = [] } = sense, - result = { - definition: definition.text, - example: _.get(example_groups[0], 'examples.0'), - synonyms: _.get(thesaurus_entries[0], 'synonyms.0.nyms', []) - .map(e => e.nym), - antonyms: _.get(thesaurus_entries[0], 'antonyms.0.nyms', []) - .map(e => e.nym) - }; - - if (include.example) { - result.examples = _.reduce(example_groups, (accumulator, example_group) => { - let example = _.get(example_group, 'examples', []); - - accumulator = accumulator.concat(example); - - return accumulator; - }, []); - } - - return result; - }) - }; - }) - }; - }); + }; + }) + }; + }); } -async function queryInternet (word, language) { +// Original Google API query +async function queryGoogle(word, language) { let url = new URL('https://www.google.com/async/callback:5493'); url.searchParams.set('fc', 'ErUBCndBTlVfTnFUN29LdXdNSlQ2VlZoWUIwWE1HaElOclFNU29TOFF4ZGxGbV9zbzA3YmQ2NnJyQXlHNVlrb3l3OXgtREpRbXpNZ0M1NWZPeFo4NjQyVlA3S2ZQOHpYa292MFBMaDQweGRNQjR4eTlld1E4bDlCbXFJMBIWU2JzSllkLVpHc3J5OVFPb3Q2aVlDZxoiQU9NWVJ3QmU2cHRlbjZEZmw5U0lXT1lOR3hsM2xBWGFldw'); @@ -153,41 +152,102 @@ async function queryInternet (word, language) { }) }); - if (response.status === 404) { throw new errors.NoDefinitionsFound({ reason: 'Website returned 404.'}); } + if (response.status === 404) { throw new errors.NoDefinitionsFound({ reason: 'Website returned 404.' }); } if (response.status === 429) { throw new errors.RateLimitError(); } - if (response.status !== 200) { throw new errors.NoDefinitionsFound({ reason: 'Threw non 200 status code.'}); } + if (response.status !== 200) { throw new Error(`Google returned status ${response.status}`); } let body = await response.text(), data = JSON.parse(body.substring(4)), single_results = _.get(data, 'feature-callback.payload.single_results', []), - error = _.chain(single_results) - .find('widget') - .get('widget.error') - .value() + error = _.chain(single_results) + .find('widget') + .get('widget.error') + .value() if (single_results.length === 0) { throw new errors.NoDefinitionsFound({ word, language }); } if (error === 'TERM_NOT_FOUND_ERROR') { throw new errors.NoDefinitionsFound({ word, language }); } - if (error) { throw new errors.UnexpectedError({ error }); } + if (error) { throw new Error(`Google returned error: ${error}`); } return single_results; } -async function fetchFromSource (word, language) { - let dictionaryData = await queryInternet(word, language); - - return dictionaryData; +// Wiktionary transform function +function transformWiktionary(word, data) { + return [{ + word: word, + phonetic: '', + phonetics: [], + origin: '', + meanings: data.map(entry => ({ + partOfSpeech: entry.partOfSpeech.toLowerCase(), + definitions: entry.definitions.map(def => ({ + definition: def.definition, + example: def.examples && def.examples.length > 0 ? def.examples[0] : undefined, + synonyms: [], + antonyms: [] + })) + })) + }]; } -async function findDefinitions (word, language, { include }) { - let dictionaryData = await fetchFromSource(word, language); - - if (_.isEmpty(dictionaryData)) { throw new errors.UnexpectedError(); } +// Wiktionary API query +async function queryWiktionary(word, language) { + const candidates = _.uniq([ + word, + word.toLowerCase(), + word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(), + word.toUpperCase() + ]); + + for (const candidate of candidates) { + const url = `https://en.wiktionary.org/api/rest_v1/page/definition/${encodeURIComponent(candidate)}`; + + try { + const response = await fetch(url); + + if (response.status === 200) { + const json = await response.json(); + if (json[language]) { + return { data: json[language], word: candidate }; + } + } + } catch (err) { + // Ignore errors and try next candidate + console.error(`Wiktionary: Failed to fetch for candidate: ${candidate}`, err.message); + } + } + + return null; +} - return transform(word, language, dictionaryData, { include }); +async function findDefinitions(word, language, { include }) { + // Strategy: Try Google first (has richer data), fallback to Wiktionary if Google fails + + // Try Google first + try { + const googleData = await queryGoogle(word, language); + if (!_.isEmpty(googleData)) { + console.log(`Using Google data for: ${word}`); + return transformGoogle(word, language, googleData, { include }); + } + } catch (googleError) { + console.log(`Google failed for "${word}": ${googleError.message}, trying Wiktionary...`); + } + + // Fallback to Wiktionary + const wiktionaryResult = await queryWiktionary(word, language); + + if (wiktionaryResult) { + console.log(`Using Wiktionary data for: ${word}`); + return transformWiktionary(wiktionaryResult.word, wiktionaryResult.data); + } + + // Both sources failed + throw new errors.NoDefinitionsFound({ word, language }); } module.exports = {