Module:VolDiff
Jump to navigation
Jump to search
Documentation for this module may be created at Module:VolDiff/doc
local p = {}
-- Helper function to parse chapter number from the chapter link
local function getChapterNumber(chapterLink)
mw.log("getChapterNumber: " .. chapterLink)
-- Match the entire chapter link
local fullMatch = mw.ustring.match(chapterLink, "%[%[(.-)%]%]")
if fullMatch then
-- Split at the pipe if there is a display title
local splitMatch = mw.text.split(fullMatch, "|")
-- Return the first part before the pipe (if exists) or the entire match
return splitMatch[1]
end
return nil
end
-- Helper function to extract content under a specific section header
local function extractSection(content, header)
mw.log("extractSection: " .. header)
local pattern = header .. "(.-)(===)"
local sectionContent = mw.ustring.match(content, pattern)
if not sectionContent then
-- Try to match until the end of the content if the section is the last one
pattern = header .. "(.-)$"
sectionContent = mw.ustring.match(content, pattern)
end
mw.log("extractSection result: " .. (sectionContent or "nil"))
return sectionContent
end
-- Helper function to validate and sanitize UTF-8 strings
local function validateUtf8(str)
if mw.ustring.find(str, "[%z\1-\127\194-\244][\128-\191]*") then
return str
else
mw.log("Warning: Invalid UTF-8 sequence found.")
return mw.text.encode(str)
end
end
-- Helper function to capture the entire template, including nested braces
local function captureTemplate(content, templateName)
mw.log("captureTemplate: start for " .. templateName)
local templateStart, templateEnd = mw.ustring.find(content, "{{%s*" .. templateName .. "%s*[|\n]")
if not templateStart then
mw.log("captureTemplate: templateStart not found for " .. templateName)
return nil
end
local finalTemplate = content:sub(templateStart, templateEnd)
local i = templateEnd + 1
local braceCount = 2 -- Account for the initial "{{"
while i <= #content do
local char = content:sub(i, i)
finalTemplate = finalTemplate .. char
if char == '{' then
braceCount = braceCount + 1
elseif char == '}' then
braceCount = braceCount - 1
if braceCount == 0 then
local nextChar = content:sub(i + 1, i + 1)
if nextChar == '}' or nextChar == '\n' or nextChar == '|' then
break
else
braceCount = braceCount + 1
end
end
end
i = i + 1
end
-- Validate the final template as a UTF-8 string
finalTemplate = validateUtf8(finalTemplate)
mw.log("captureTemplate result: " .. finalTemplate)
return finalTemplate
end
-- Helper function to capture the entire Desc parameter including nested braces and wikitext links
local function captureDescParameter(diff)
mw.log("captureDescParameter: start")
local descStart, descEnd = mw.ustring.find(diff, "Desc%s*=%s*")
if not descStart then
mw.log("captureDescParameter: descStart not found")
return nil
end
local finalDesc = ""
local i = descEnd + 1
local braceCount = 0
while i < #diff - 2 do -- Subtract 2 from length to remove the }} from Diff
local char = diff:sub(i, i)
finalDesc = finalDesc .. char
if char == '{' or char == '[' then
braceCount = braceCount + 1
elseif char == '}' or char == ']' then
if braceCount > 0 then
braceCount = braceCount - 1
else
break
end
elseif char == '|' and braceCount == 0 then
break
end
i = i + 1
end
-- Validate the final description as a UTF-8 string
finalDesc = validateUtf8(finalDesc)
mw.log("captureDescParameter result: " .. finalDesc)
return mw.text.trim(finalDesc)
end
-- Main function to process the DiffBox template
function p.main(frame)
mw.log("main: start")
local args = frame:getParent().args
local currentTitle = mw.title.getCurrentTitle().text
local sourcePage = args[1]
local sectionType = args[2] or "magazine"
local sectionHeader = sectionType == "reprint" and "===Reprint Differences===" or "===Magazine Differences==="
local result = ""
if not sourcePage then
mw.log("main: No source page specified.")
return "Error: No source page specified."
end
-- Fetch the content from the specified source page
local content = mw.title.new(sourcePage):getContent()
if not content then
mw.log("main: Could not fetch content from the specified source page.")
return "Error: Could not fetch content from the specified source page."
end
content = validateUtf8(content)
mw.log("main: content fetched")
-- Extract the section content
local sectionContent = extractSection(content, sectionHeader)
if not sectionContent then
mw.log("main: No section found with header " .. sectionHeader)
return "Error: No section found with header " .. sectionHeader
end
sectionContent = validateUtf8(sectionContent)
mw.log("main: sectionContent extracted")
-- Extract the entire DiffBox content using brace counting
local diffBoxContent = captureTemplate(sectionContent, "DiffBox")
if not diffBoxContent then
mw.log("main: No DiffBox found in the specified section.")
return "Error: No DiffBox found in the specified section."
end
-- Extract DiffBox parameters
local part = mw.ustring.match(diffBoxContent, "Part%s*=%s*([^|]+)") or ""
local title = mw.ustring.match(diffBoxContent, "Title%s*=%s*([^|]+)") or ""
local label1 = mw.ustring.match(diffBoxContent, "Label1%s*=%s*([^|]*)") or ""
local label2 = mw.ustring.match(diffBoxContent, "Label2%s*=%s*([^|]*)") or ""
-- Start the DiffBox template
result = result .. string.format('{{DiffBox|Part=%s|Title=%s', part, title)
if label1 ~= "" then result = result .. string.format('|Label1=%s', label1) end
if label2 ~= "" then result = result .. string.format('|Label2=%s', label2) end
result = result .. '|\n'
-- Process the section content to extract Diff entries
local hasDiffs = false
local startPos = 1
local diffCount = -1 -- Subtract DiffBox
-- Count the number of Diff templates
for _ in mw.ustring.gmatch(diffBoxContent, "{{%s*Diff%s*[|\n]") do
diffCount = diffCount + 1
end
mw.log("main: diffCount = " .. diffCount)
local currentDiff = 0
while true do
local diffContent = captureTemplate(diffBoxContent:sub(startPos), "Diff")
if not diffContent then
mw.log("main: no more diffContent found")
break
end
startPos = startPos + #diffContent
currentDiff = currentDiff + 1
mw.log("main: processing diffContent #" .. currentDiff)
local diffPart = mw.ustring.match(diffContent, "Part%s*=%s*([^|]+)")
local diffChapter = mw.ustring.match(diffContent, "Chapter%s*=%s*(%[%[.-%]%])")
local diffPage = mw.ustring.match(diffContent, "Page%s*=%s*([^|]+)")
local diffImage1 = mw.ustring.match(diffContent, "Image1%s*=%s*([^|]+)")
local diffImage2 = mw.ustring.match(diffContent, "Image2%s*=%s*([^|]+)")
local diffDesc = captureDescParameter(diffContent)
local diffLabel1 = mw.ustring.match(diffContent, "Label1%s*=%s*([^|]*)") or ""
local diffLabel2 = mw.ustring.match(diffContent, "Label2%s*=%s*([^|]*)") or ""
if diffPart and diffChapter and diffPage and diffImage1 and diffImage2 and diffDesc then
local chapterNumber = getChapterNumber(diffChapter)
-- Check if the current page title matches the chapter number
if chapterNumber and mw.ustring.match(currentTitle, chapterNumber) then
hasDiffs = true
result = result .. string.format('{{Diff|Part=%s|Chapter=%s|Page=%s|Image1=%s|Image2=%s|Desc=%s',
diffPart, diffChapter, diffPage, diffImage1, diffImage2, mw.text.trim(diffDesc))
-- Add optional labels if they are present
if diffLabel1 ~= "" then result = result .. string.format('|Label1=%s', diffLabel1) end
if diffLabel2 ~= "" then result = result .. string.format('|Label2=%s', diffLabel2) end
result = result .. '}}\n'
end
end
end
-- End the DiffBox template or provide a message if no diffs are found
if hasDiffs then
result = result .. '}}'
else
result = "No differences found for this chapter."
end
mw.log("main: end")
return frame:preprocess(result)
end
return p