Jump to content

Module:Sandbox/trappist the monk/MSGJ regex

From Wikipedia, the free encyclopedia
require ('strict')

--[[--------------------------< M A I N >----------------------------------------------------------------------

lua implementation of this regex:
	[a-z]+(\-[a-z]+)*(_res\-[0-9a-f]{8}(\-[0-9a-f]{4}){3}\-[0-9a-f]{12})?

this function assumes that <target> is valid;

]]

local function main (target)
	target = target or 'xyz-uvw-rst_res-12345678-1111-2222-3333-012345543210';	-- a test string that I think matches the regex

	local part1, part2;
	if target:find ('_res', 1, true) then
		part1, part2 = target:match ('([^_]+)(_res.+)');						-- split at constant '_res'; part 2 nil when no constant '_res'
	else
		part1 = target;															-- part2 not in target
	end
	
	local captures_t = {};														-- sequence to hold captures (left to right)
	
	if part1:find ('-', 1, true) then											-- if part1 has hyphens
		for capture in part1:gmatch ('(%-%l+)') do
		table.insert (captures_t, capture);										-- add to the captures_t sequence
		end
	end
	
	if part2 then
		table.insert (captures_t, part2);										-- the part2 capture
		for capture in part2:gmatch ('%-%x%x%x%x%f[%-]') do
			table.insert (captures_t, capture);									-- add to the captures_t sequence
		end
	end
	
	return mw.dumpObject (captures_t);
end


--[[--------------------------< I S _ V A L I D >--------------------------------------------------------------
]]

local function is_valid (target)

	target = target or 'xyz-uvw-rst_res-12345678-1111-2222-3333-012345543210';	-- a test string that I think matches the regex

	local part1, part2;
	if target:find ('_res', 1, true) then
		part1, part2 = target:match ('([^_]+)(_res.+)');						-- split at constant '_res'; part 2 nil when no constant '_res'
	else
		part1 = target;															-- part2 not in target
	end

	if part1:find ('[^%-%l]') then												-- if <part1> has anything but lowercase letters and hyphens
		return false;															-- invalid so return false
	else																		-- here when <part1> has only lowercase letters and hyphens
		if part1:match ('^%-') or part1:match ('%-$') then						-- first and last characters in <part1> must be lowercase letters
			return false;														-- invalid so return false
		end
	end
	
	if part2 then
		if not part2:match ('^_res%-%x%x%x%x%x%x%x%x%-%x%x%x%x%-%x%x%x%x%-%x%x%x%x%-%x%x%x%x%x%x%x%x%x%x%x%x$') then
			return false;														-- invalid so return false
		end
	end
	
	return true;																-- valid so return true
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	main = main,
	is_valid = is_valid,
	}