K wobsahej skočić

Modul:string/nowiki

z Wikisłownika

local table_list_to_set_module = "Module:table/listToSet"

local byte = string.byte
local char = string.char
local find = string.find
local format = string.format
local gsub = string.gsub
local lower = string.lower
local sub = string.sub

local function list_to_set(...)
	list_to_set = require(table_list_to_set_module)
	return list_to_set(...)
end

local function escape_byte(_, b)
	return format("&#%d;", b)
end

local absolute
local function get_absolute()
	absolute, get_absolute = list_to_set({"\"", "&", "'", ";", "<", "=", ">", "[", "]", "{", "|", "}"}, function(_, ch)
		return format("&#%d;", byte(ch))
	end), nil
	return absolute
end

local after_newline
local function get_after_newline()
	after_newline, get_after_newline = list_to_set({0x9, 0xA, 0xD, 0x20, 0x21, 0x23, 0x2A, 0x3A}, escape_byte), nil
	return after_newline
end

local scheme_chars, unused_scheme_chars, uri_schemes
local function get_scheme_chars()
	local _uri_schemes = {"bitcoin", "geo", "magnet", "mailto", "matrix", "news", "sip", "sips", "sms", "tel", "urn", "xmpp"}
	uri_schemes, get_scheme_chars = list_to_set(_uri_schemes), nil
	_uri_schemes = table.concat(_uri_schemes)
	-- Add alphanumeric characters used in a listed scheme to `scheme_chars`,
	-- and those that aren't to `unused_scheme_chars`.
	scheme_chars, unused_scheme_chars = {}, {}
	for i = 0x61, 0x7A do
		local chars = find(_uri_schemes, char(i), nil, true) and scheme_chars or unused_scheme_chars
		chars[i] = true
		chars[i - 0x20] = true
	end
	unused_scheme_chars[0x5F] = true -- _
	return scheme_chars
end

local after_magic_link
local function get_after_magic_link()
	after_magic_link, get_after_magic_link = list_to_set({0x9, 0xA, 0xC, 0xD, 0x20}, escape_byte), nil
	return after_magic_link
end

-- Temporary variables. No risk of collisions across stack levels, since there's
-- no way for nowiki() to be called recursively.
local _str, esc, esc_next, esc_next2

local function nowiki(loc, ch)
	if esc_next ~= nil then
		esc, esc_next, esc_next2 = esc_next, esc_next2, nil
		return esc or nil
	end
	esc = (absolute or get_absolute())[ch]
	if esc then
		return esc
	elseif ch == "\n" or ch == "\r" then
		esc_next = (after_newline or get_after_newline())[byte(_str, loc + 1)]
	elseif ch == "!" then
		if byte(_str, loc + 1) == 0x21 then -- !
			esc_next = "&#33;"
		end
	elseif ch == ":" then
		if sub(_str, loc + 1, loc + 2) == "//" then
			return "&#58;"
		end
		local n, b = 0
		repeat
			n = n + 1
			b = byte(_str, loc - n)
		until not (scheme_chars or get_scheme_chars())[b]
		-- Abort on an unused scheme character, as it can't be a scheme.
		if unused_scheme_chars[b] then
			return
		-- Otherwise, check if the the word before the colon matches a scheme.
		elseif uri_schemes[lower(sub(_str, loc - n + 1, loc - 1))] then
			return "&#58;"
		end
	elseif ch == "-" then
		local prev = byte(_str, loc - 1)
		if (prev == 0xA or prev == 0xD) and sub(_str, loc + 1, loc + 3) == "---" then
			return "&#45;"
		end
	elseif ch == "I" then
		local esc_sp = (after_magic_link or get_after_magic_link())[byte(_str, loc + 4)]
		if esc_sp and sub(_str, loc + 1, loc + 3) == "SBN" then
			esc_next = esc_sp
		end
	elseif ch == "P" then
		local esc_sp = (after_magic_link or get_after_magic_link())[byte(_str, loc + 4)]
		if esc_sp and sub(_str, loc + 1, loc + 3) == "MID" then
			esc_next, esc_next2 = false, esc_sp -- to avoid escaping "I" in "PMID"
		end
	elseif ch == "R" then
		local esc_sp = (after_magic_link or get_after_magic_link())[byte(_str, loc + 3)]
		if esc_sp and sub(_str, loc + 1, loc + 2) == "FC" then
			esc_next = esc_sp
		end
	elseif ch == "_" then
		if byte(_str, loc + 1) == 0x5F then -- _
			esc_next = "&#95;"
		end
	elseif ch == "~" then
		if sub(_str, loc + 1, loc + 2) == "~~" then
			esc_next, esc_next2 = false, "&#126;"
		end
	end
end

local first
local function get_first()
	first, get_first = list_to_set({0x9, 0xA, 0xD, 0x20, 0x21, 0x23, 0x2A, 0x2B, 0x2D, 0x3A, 0x5F, 0x7E}, escape_byte), nil
	return first
end

local last
local function get_last()
	last, get_last = list_to_set({0x9, 0xA, 0xC, 0xD, 0x20, 0x21, 0x3A, 0x5F, 0x7E}, escape_byte), nil
	return last
end

return function(str)
	if #str == 0 then
		return str
	end
	_str, esc, esc_next, esc_next2 = str, nil, nil, nil
	str = gsub(str, "()([%s!\"#&'*%-:;<=>IPR[%]_{|}~])", nowiki)
	local b = byte(str, 1)
	local esc_first = (first or get_first())[b]
	if #str == 1 then
		return esc_first or (last or get_last())[b] or str
	end
	local esc_last = (last or get_last())[byte(str, -1)]
	if esc_first then
		if esc_last then
			return esc_first .. sub(str, 2, -2) .. esc_last
		end
		return esc_first .. sub(str, 2)
	elseif esc_last then
		return sub(str, 1, -2) .. esc_last
	end
	return str
end