Это не официальный сайт wikipedia.org 01.01.2023

Модуль:WDSource — Википедия

Модуль:WDSource

Документация

Данный модуль предназначен для получения данных об источнике информации по элементам Викиданных. Работает на основе модуля Модуль:WDBackend.

Модуль используется модулем Модуль:CiteGost для оформления библиографических записей и по своей сути является его обособленной частью.

Схема источникаПравить

Общая схема источника в виде иерархии таблиц:

ТестыПравить

В качестве тестов используются тесты вышележащего модуля Модуль:CiteGost.

См. такжеПравить

  • CiteGost — модуль для форматирования информации об источнике, получающий информацию посредством модуля WDSource.
local p = {}
local wikidata = require('Модуль:WDCommon')
local wdLang = require('Модуль:WDLang')
local backend = require('Модуль:WDBackend')

local contentTypeEntities = { 'Q30070675', 'Q108676767', 'Q478798', 'Q60533375', 'Q11424', 'Q187947', 'Q2376293' }

local contributorComponentsMap = {
	{
		name = 'familyName',
		property = 'P734',
		max = 1,
		isLocal = true,
	},
	{
		name = 'givenName',
		property = 'P735',
		isLocal = true,
	},
	{
		name = 'ancestorName',
		property = 'P5056',
		max = 1,
		isLocal = true,
	},
}

local publishedInOriginLang = {
	name = 'publishedInOriginLang',
	property = 'P407',
	get = {
		{
			name = 'publishedInOriginLangCode',
			property = 'P218',
			max = 1,
			elseGet = {
				-- for ethnolects determine parent language
				{
					name = 'publishedInOriginLang',
					property = 'P279',
					overwrite = true,
					max = 1,
					get = {
						{
							name = 'publishedInOriginLangCode',
							property = 'P218',
							overwrite = true,
							max = 1,
						},
					},
				},
			},
		},
	},
}

local publishedInMap = {
	{
		name = 'publishedIn',
		getValue = wikidata.name,
	},
	{
		name = 'publishedInSubtitle',
		property = 'P1680',
		filter = wikidata.base.tryFilterStatementsByLang,
	},
	{
		name = 'edition',
		property = 'P9767',
		max = 1,
	},
	{
		name = 'publishedInWorkType',
		property = 'P31',
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'publishedInEditionType',
		property = 'P31',
		allowedEntities = { 'Q3331189', 'Q1238720', 'Q571' },
	},
	{
		name = 'publishedInWorkType',
		property = 'P7937',
	},
	{
		name = 'isScholarlyArticle',
		property = 'P31',
		mapEntity = { Q5633421 = 'Q13442814' },
	},
	{
		name = 'publishedInAuthors',
		properties = { 'P50', 'P2093' },
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInIllustrators',
		property = 'P110',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'isLikeBook',
		property = 'P31',
		allowedEntities = { 'Q571', 'Q128093', 'Q5292' },
	},
	{
		property = 'P1056',
		get = {
			{
				name = 'contentType',
				property = 'P4330',
				allowedEntities = contentTypeEntities,
			},
		},
	},
	{
		name = 'contentType',
		property = 'P4330',
		allowedEntities = contentTypeEntities,
	},
	{
		name = 'publishedInOrigin',
		property = 'P9745',
		getValue = wikidata.name,
		get = {
			publishedInOriginLang
		},
	},
	{
		name = 'publishedInOrigin',
		property = 'P629',
		getValue = wikidata.name,
		get = {
			publishedInOriginLang
		},
	},
	{
		name = 'idType',
		property = 'P1687',
		max = 1,
		get = {
			{
				name = 'urlMask',
				property = 'P1630',
				max = 1,
			},
		},
	},
	{
		name = 'urlMask',
		property = 'P4354',
		max = 1,
	},
	{
		name = 'publishedInTranslators',
		property = 'P655',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInEditorInChief',
		property = 'P5769',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInEditors',
		property = 'P98',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'isbn',
		property = 'P212',
		match = true,
		qualifiers = {
			{
				name = 'date',
				property = 'P577',
				max = 1,
			},
			{
				name = 'location',
				property = 'P291',
			},
			{
				name = 'publisher',
				property = 'P123',
				max = 1,
			},
			{
				name = 'pagesCount',
				property = 'P1104',
				defaultUnit = 'Q1069725',
				max = 1,
			},
		},
		elseGet = {
			{
				name = 'isbn',
				property = 'P957',
				qualifiers = {
					{
						name = 'date',
						property = 'P577',
						max = 1,
					},
					{
						name = 'location',
						property = 'P291',
					},
					{
						name = 'publisher',
						property = 'P123',
						max = 1,
					},
					{
						name = 'pagesCount',
						property = 'P1104',
						defaultUnit = 'Q1069725',
						max = 1,
					},
				},
			},
		},
	},
	{
		name = 'oclc',
		property = 'P243',
	},
	{
		name = 'date',
		property = 'P577',
	},
	{
		name = 'location',
		property = 'P291',
	},
	{
		name = 'publisher',
		property = 'P123',
		max = 1,
		qualifiers = {
			{
				name = 'publisher',
				property = 'P1932',
				overwrite = true,
				max = 1,
			},
		},
	},
	{
		name = 'pagesCount',
		property = 'P1104',
		defaultUnit = 'Q1069725',
		max = 1,
	},
	{
		name = 'issn',
		property = 'P236',
	},
	{
		name = 'publishedInPartsCount',
		property = 'P2635',
		allowedUnits = { 'Q1238720' },
		max = 1,
	},
}

local workMap = {
	{
		name = 'detectedInfo',
		property = 'P31',
		isArray = true,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'detectedInfo',
		property = 'P136',
		isArray = true,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
}

local workVersionMap = {
	{
		name = 'isScholarlyArticle',
		property = 'P31',
		allowedEntities = { 'Q13442814' },
	},
	{
		name = 'isVolume',
		property = 'P31',
		allowedEntities = { 'Q1238720' },
	},
	{
		name = 'contentType',
		property = 'P31',
		allowedEntities = contentTypeEntities,
	},
	{
		name = 'workType',
		property = 'P31',
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'workType',
		property = 'P7937',
		max = 1,
	},
	{
		name = 'isbn',
		property = 'P212',
		match = true,
		qualifiers = {
			{
				name = 'date',
				property = 'P577',
				max = 1,
			},
			{
				name = 'location',
				property = 'P291',
			},
			{
				name = 'publisher',
				property = 'P123',
				max = 1,
				qualifiers = {
					{
						name = 'publisher',
						property = 'P1932',
						overwrite = true,
						max = 1,
					},
				},
			},
			{
				name = 'pagesCount',
				property = 'P1104',
				defaultUnit = 'Q1069725',
				max = 1,
			},
		},
		elseGet = {
			{
				name = 'isbn',
				property = 'P957',
				qualifiers = {
					{
						name = 'date',
						property = 'P577',
						max = 1,
					},
					{
						name = 'location',
						property = 'P291',
					},
					{
						name = 'publisher',
						property = 'P123',
						max = 1,
					},
					{
						name = 'pagesCount',
						property = 'P1104',
						defaultUnit = 'Q1069725',
						max = 1,
					},
				},
			},
		},
	},
	{
		name = 'authors',
		properties = { 'P50', 'P2093' },
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'illustrators',
		property = 'P110',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'title',
		getValue = wikidata.name,
	},
	{
		name = 'subtitle',
		property = 'P1680',
		filter = wikidata.base.tryFilterStatementsByLang,
	},
	{
		name = 'edition',
		property = 'P9767',
		max = 1,
	},
	{
		name = 'info',
		property = 'P1684',
		has = {
			{ property='P2868', value='Q116158574' },
		},
		isArray = true,
		qualifiers = {
			{
				property = 'P6568',
				overwriteEntity = true,
			},
		},
	},
	{
		name = 'editors',
		property = 'P98',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'P5769',
		property = 'P98',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'editorInChief',
		property = 'P5769',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'editors',
		property = 'P98',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'translators',
		property = 'P655',
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'volume',
		property = 'P478',
		qualifiers = {
			{
				name = 'partTitle',
				property = 'P1476',
				max = 1,
			},
			{
				name = 'volumeTitle',
				property = 'P1476',
				max = 1,
			},
		},
	},
	{
		name = 'issue',
		property = 'P433',
	},
	{
		name = 'date',
		property = 'P577',
		qualifiers = {
			{
				name = 'startDate',
				property = 'P580',
				max = 1,
			},
			{
				name = 'endDate',
				property = 'P582',
				max = 1,
			},
		},
	},
	{
		name = 'pages',
		property = 'P304',
	},
	{
		name = 'pagesCount',
		property = 'P1104',
		defaultUnit = 'Q1069725',
	},
	{
		name = 'articleId',
		property = 'P2322',
	},
	{
		name = 'url',
		property = 'P953',
		max = 1,
		qualifiers = {
			{
				name = 'archiveUrl',
				property = 'P1065',
				max = 1,
			},
			{
				name = 'archiveDate',
				property = 'P2960',
				max = 1,
			},
			{
				name = 'urlStatus',
				property = 'P6954',
				max = 1,
			},
		},
	},
	{
		name = 'location',
		property = 'P291',
	},
	{
		name = 'publisher',
		property = 'P123',
		max = 1,
		qualifiers = {
			{
				-- for unknown value
				name = 'publisher',
				property = 'P1932',
				max = 1,
			},
		}
	},
	{
		name = 'origin',
		property = 'P629',
		getValue = wikidata.name,
		get = {
			{
				name = 'originLang',
				property = 'P407',
				get = {
					{
						name = 'langCode',
						property = 'P218',
						max = 1,
					}
				},
			},
		},
	},
	{
		name = 'partsCount',
		property = 'P2635',
		max = 1,
	},
	{
		name = 'dedicatedTo',
		property = 'P825',
	},
	{
		name = 'doi',
		property = 'P356',
	},
	{
		name = 'oclc',
		property = 'P243',
	},
	{
		name = 'pmid',
		property = 'P698',
	},
	{
		name = 'pmc',
		property = 'P932',
		get = {
			{
				name = 'urlMask',
				entity = 'P932',
				property = 'P1630',
				max = 1,
				isLocal = true,
			},
		},
	},
	{
		name = 's2sic',
		property = 'P8299',
	},
	{
		name = 'publishedIn',
		property = 'P1433',
		getValue = wikidata.name,
		max = 1,
		qualifiers = {
			-- probably, wrong way, different publications must have different items
			{
				name = 'volume',
				property = 'P478',
			},
			{
				name = 'issue',
				property = 'P433',
			},
			{
				name = 'date',
				property = 'P577',
			},
			{
				name = 'startDate',
				property = 'P580',
				max = 1,
			},
			{
				name = 'endDate',
				property = 'P582',
				max = 1,
			},
			{
				name = 'pages',
				property = 'P304',
			},
			{
				name = 'articleId',
				property = 'P2322',
			},
		},
	},
	{
		name = 'work',
		property = 'P629',
		max = 1,
	},
	{
		name = 'publishedIn',
		property = 'P361',
		getValue = wikidata.name,
		max = 1,
		get = {
			{
				name = 'partsCount',
				property = 'P2635',
				allowedUnits = { 'Q1238720' },
				max = 1,
			},
		},
	},
	{
		name = 'series',
		property = 'P179',
		max = 1,
		qualifiers = {
			{
				name = 'seriesIssue',
				property = 'P433',
				max = 1,
			},
		},
	},
}

local topicMap = {
	{
		name = 'id',
		propertyPath = { 'idType', 'entity' },
		max = 1,
		qualifiers = {
			{
				name = 'title',
				property = 'P1810',
				max = 1,
			},
			{
				name = 'date',
				property = 'P577',
				max = 1,
			},
			{
				name = 'archiveUrl',
				property = 'P1065',
				max = 1,
			},
			{
				name = 'archiveDate',
				property = 'P2960',
				max = 1,
			},
			{
				name = 'authors',
				property = 'P2093',
				isArray = true,
			},
		},
	},
	{
		-- if P1810 id qualifier is not specified
		name = 'title',
		getValue = wikidata.name,
	},
}

local alternativeUrl = {
	{
		name = 'id',
		property = 'P675',
		max = 1,
		get = {
			{
				name = 'urlMask',
				entity = 'P675',
				property = 'P1630',
				max = 1,
				overwrite = true,
			},
		},
	},
}

local function fetchUrl(f, source)
	if source.url then
		if source.urlStatus and source.urlStatus.entity == 'Q1193907' then
			source.url = nil
		else
			return
		end
	end

	local idTable = f:safeField(source, 'id')
	if idTable.value and source.urlMask then
		source.url = { value = source.urlMask.value:gsub('%$1', idTable.value) }
		return
	end

	local pmcTable = f:safeField(source, 'pmc')
	if pmcTable.value and pmcTable.components and pmcTable.components.urlMask then
		source.url = {
			value = pmcTable.components.urlMask.value:gsub('%$1', pmcTable.value)
		}
		return
	end
end

local function getLangCode(source)
	local langCode = source.langCode
	if type(langCode) == 'table' then
		langCode = langCode.value
	end
	return langCode
end

local function fetchLang(f, source)
	local publishedInTable = source.publishedIn

	local langMapItem = {
		name = 'lang',
		property = 'P407',
		max = 1,
		get = {
			{
				name = 'langCode',
				property = 'P218',
				max = 1,
			},
		},
	}
	if not source.langCode then
		f:fetch(source, {
			{
				name = 'workVersion',
				get = {
					langMapItem,
					{
						name = 'publishedIn',
						property = 'P1433',
						getValue = wikidata.name,
						max = 1,
						get = {
							langMapItem,
						},
					},
				},
			},
		})
	end

	f:fetch(source, {
		{
			name = 'publishedIn',
			get = {
				{
					name = 'publishedInLang',
					property = 'P407',
					max = 1,
					get = {
						{
							name = 'publishedInLangCode',
							property = 'P218',
							max = 1,
							elseGet = {
								-- for ethnolects determine parent language
								{
									name = 'publishedInLang',
									property = 'P279',
									overwrite = true,
									max = 1,
									get = {
										{
											name = 'publishedInLangCode',
											property = 'P218',
											overwrite = true,
											max = 1,
										},
									},
								},
							},
						},
					},
				},
			},
		},
	})

	if not source.langCode then
		if source.publishedInLang then
			source.lang = source.publishedInLang
		end
		if source.publishedInLangCode then
			source.langCode = source.publishedInLangCode
		end
	end
	
	if not source.langCode then
		f:ensureLang()
		source.langCode = { value = f.lang }
	else
		f.lang = getLangCode(source)
	end
	if not source.lang then
		source.lang = {
			entity = wdLang.langEntity(f.lang),
		}
	end
	
	-- publishedIn field need to be empty to get its qualifiers later
	if publishedInTable == nil then
		source.publishedIn = nil
	end
end

-- Remove all duplicates from inscriptions of a book
function removeInfoDuplicates(source)
	if not source.info then
		return
	end

	for _, infoItem in ipairs(source.info) do
		for i, detectedItem in ipairs(source.detectedInfo) do
			if detectedItem.entity == infoItem.entity then
				table.remove(source.detectedInfo, i)
			end
		end
	end
end

function p.fetch(source)
	f = backend.new(getLangCode(source))
	fetchLang(f, source)
	f:assertLang()

	f:fetch(source, {
		{
			name = 'workVersion',
			get = workVersionMap,
		},
		{
			name = 'publishedIn',
			get = publishedInMap,
		},
		{
			name = 'topic',
			get = topicMap,
		},
		{
			name = 'work',
			get = workMap,
		},
		{
			name = 'workVersion',
			get = alternativeUrl,
		},
	})

	local titleTable = f:safeField(source, 'title')
	if titleTable.value then
		local workTable = f:safeField(source, 'workVersion')
		titleTable.entity = workTable.entity
	end
	
	removeInfoDuplicates(source)

	fetchUrl(f, source)
	return source
end

return p