Modul:Benutzer:Herzi Pinki/kmlhack
Die Dokumentation für dieses Modul kann unter Modul:Benutzer:Herzi Pinki/kmlhack/Doku erstellt werden
-- table copied from Modul:Sort/latin, modified
local codeReplacements = {
[ 5760] = " ", -- OGHAM SPACE MARK
[ 8192] = " ", -- EN QUAD
[ 8193] = " ", -- EM QUAD
[ 8194] = " ", -- N-SPACE
[ 8195] = " ", -- M-SPACE
[ 8196] = " ", -- THREE-PER-EM SPACE
[ 8197] = " ", -- FOUR-PER-EM SPACE
[ 8198] = " ", -- SIX-PER-EM SPACE
[ 8199] = " ", -- FIGURE SPACE
[ 8200] = " ", -- PUNCTUATION SPACE
[ 8201] = " ", -- thinsp
[ 8202] = " ", -- HAIR SPACE (english typography)
[ 8239] = " ", -- NARROW NO-BREAK SPACE
[ 8287] = " ", -- MEDIUM MATHEMATICAL SPACE
[ 12288] = " ", -- IDEOGRAPHIC SPACE
[ 12351] = " ", -- IDEOGRAPHIC HALF FILL SPACE
[917536] = " ", -- TAG SPACE
[ 8208] = "-", -- HYPHEN
[ 8209] = "-", -- NON-BREAKING HYPHEN
[ 8210] = "-", -- FIGURE DASH
[ 8211] = "-", -- ndash
[ 8212] = "-", -- mdash
[ 8213] = "-", -- HORIZONTAL BAR
[ 8259] = "-", -- HYPHEN BULLET
[ 8722] = "-", -- MINUS sign
[ 8216] = "'", -- lsquo
[ 8217] = "'", -- rsquo
[ 8218] = "'", -- sbquo
[ 8249] = "'", -- lsaquo
[ 8250] = "'", -- rsaquo
[ 8220] = "\"", -- ldquo
[ 8221] = "\"", -- rdquo
[ 8222] = "\"", -- bdquo
-- some 2-byte chars skipped
[ 7838] = "SS", -- CAPITAL SHARP S
[ 7840] = "A", -- A with dot below
[ 7841] = "a", -- a with dot below
[ 7842] = "A", -- A with hook above
[ 7843] = "a", -- a with hook above
[ 7844] = "A", -- A with circumflex and acute
[ 7845] = "a", -- a with circumflex and acute
[ 7846] = "A", -- A with circumflex and grave
[ 7847] = "a", -- a with circumflex and grave
[ 7848] = "A", -- A with circumflex and hook above
[ 7849] = "a", -- a with circumflex and hook above
[ 7850] = "A", -- A with circumflex and tilde
[ 7851] = "a", -- a with circumflex and tilde
[ 7852] = "A", -- A with circumflex and dot below
[ 7853] = "a", -- a with circumflex and dot below
[ 7854] = "A", -- A with breve and acute
[ 7855] = "a", -- a with breve and acute
[ 7856] = "A", -- A with breve and grave
[ 7857] = "a", -- a with breve and grave
[ 7858] = "A", -- A with breve and hook above
[ 7859] = "a", -- a with breve and hook above
[ 7860] = "A", -- A with breve and tilde
[ 7861] = "a", -- a with breve and tilde
[ 7862] = "A", -- A with breve and dot below
[ 7863] = "a", -- a with breve and dot below
[ 7864] = "E", -- E with dot below
[ 7865] = "e", -- e with dot below
[ 7866] = "E", -- E with hook above
[ 7867] = "e", -- e with hook above
[ 7868] = "E", -- E with tilde
[ 7869] = "e", -- e with tilde
[ 7870] = "E", -- E with circumflex and acute
[ 7871] = "e", -- e with circumflex and acute
[ 7872] = "E", -- E with circumflex and grave
[ 7873] = "e", -- e with circumflex and grave
[ 7874] = "E", -- E with circumflex and hook above
[ 7875] = "e", -- e with circumflex and hook above
[ 7876] = "E", -- E with circumflex and tilde
[ 7877] = "e", -- e with circumflex and tilde
[ 7878] = "E", -- E with circumflex and dot below
[ 7879] = "e", -- e with circumflex and dot below
[ 7880] = "I", -- I with hook above
[ 7881] = "i", -- i with hook above
[ 7882] = "I", -- I with dot below
[ 7883] = "i", -- i with dot below
[ 7884] = "O", -- O with dot below
[ 7885] = "o", -- o with dot below
[ 7886] = "O", -- O with hook above
[ 7887] = "o", -- o with hook above
[ 7888] = "O", -- O with circumflex and acute
[ 7889] = "o", -- o with circumflex and acute
[ 7890] = "O", -- O with circumflex and grave
[ 7891] = "o", -- o with circumflex and grave
[ 7892] = "O", -- O with circumflex and hook above
[ 7893] = "o", -- o with circumflex and hook above
[ 7894] = "O", -- O with circumflex and tilde
[ 7895] = "o", -- o with circumflex and tilde
[ 7896] = "O", -- O with circumflex and dot below
[ 7897] = "o", -- o with circumflex and dot below
[ 7898] = "O", -- O with horn and acute
[ 7899] = "o", -- o with horn and acute
[ 7900] = "O", -- O with horn and grave
[ 7901] = "o", -- o with horn and grave
[ 7902] = "O", -- O with horn and hook above
[ 7903] = "o", -- o with horn and hook above
[ 7904] = "O", -- O with horn and tilde
[ 7905] = "o", -- o with horn and tilde
[ 7906] = "O", -- O with horn and dot below
[ 7907] = "o", -- o with horn and dot below
[ 7908] = "U", -- U with dot below
[ 7909] = "u", -- u with dot below
[ 7910] = "U", -- U with hook above
[ 7911] = "u", -- u with hook above
[ 7912] = "U", -- U with horn and acute
[ 7913] = "u", -- u with horn and acute
[ 7914] = "U", -- U with horn and grave
[ 7915] = "u", -- u with horn and grave
[ 7916] = "U", -- U with horn and hook above
[ 7917] = "u", -- u with horn and hook above
[ 7918] = "U", -- U with horn and tilde
[ 7919] = "u", -- u with horn and tilde
[ 7920] = "U", -- U with horn and dot below
[ 7921] = "u", -- u with horn and dot below
[ 7922] = "Y", -- Y with grave
[ 7923] = "y", -- y with grave
[ 7924] = "Y", -- Y with dot below
[ 7925] = "y", -- y with dot below
[ 7926] = "Y", -- Y with hook above
[ 7927] = "y", -- y with hook above
[ 7928] = "Y", -- Y with tilde
[ 7929] = "y", -- y with tilde
[ 7932] = "V", -- V (middle-welsh)
[ 7933] = "v", -- v (middle-welsh)
[ 7934] = "Y", -- Y with loop
[ 7935] = "y", -- y with loop
-- some 2-byte chars skipped
}
-- for me it is easier to match characters than to match bytes (of various length)
local charReplacements = {}
for k, v in pairs(codeReplacements) do
charReplacements[mw.ustring.char(k)] = v
end
local p = {}
p.subHighChars = function ( frame )
local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
local res, n = mw.ustring.gsub( s, pattern, charReplacements )
return res
end
p.removeHighChars = function ( frame )
local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
local res, n = mw.ustring.gsub( s, pattern, "" )
return res
end
p.kmlhack = function ( frame )
local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
local res, n = mw.ustring.gsub( s, pattern, charReplacements )
local res, n = mw.ustring.gsub( res, pattern, "" )
return res
end
return p;