Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs

/* Program to update the population numbers of Japanese municipalities in Wikidata and the German Wikipedia's population metadata templates
 * with the numbers from the Japanese Wikipedia.
 * The program throttles writing access to the Wikipedia/Wikidata servers by using the "maxlag=5" parameter and
 * limiting the edits to 30 per minute.
 */

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Reflection;
using System.Security.Cryptography;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Web;
using System.Xml;
using Microsoft.AspNetCore.WebUtilities;

namespace MetadatenEinwohnerzahlJPUpdater
{
  #region Exceptions

  internal class BusinessException : Exception
  {
    public BusinessException(string message)
      : base(message)
    {
    }
  }

  internal class MediawikiException : BusinessException
  {
    public MediawikiException(string message)
      : base(message)
    {
    }

    public MediawikiException(string code, string info)
      : base("Mediawiki error \"" + code + ": " + info + "\"")
    {
    }
  }

  internal class MediawikiLagException : MediawikiException
  {
    public MediawikiLagException(string code, string info)
      : base("Mediawiki error \"" + code + ": " + info + "\"")
    {
      LagTime = GetLagTime(info);
    }

    public TimeSpan LagTime { get; }

    public static TimeSpan GetLagTime(string info)
    {
      var lagtime = -1;
      var match = Regex.Match(info, "Waiting for [^ ]*: ([0-9.-]+) seconds lagged");
      if (match.Success)
      {
        int.TryParse(match.Groups[1].Value, out lagtime);
      }
      return TimeSpan.FromSeconds(lagtime);
    }
  }

  #endregion

  public class Program
  {
    private const int MaxEditsPerMinute = 30;
    private const string TimestampFilename = "jaWP_timestamps.json";

    private static readonly HttpClientHandler httpClientHandler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
    private static readonly object consoleLock = new object();
    private static readonly SemaphoreSlim settingsLock = new SemaphoreSlim(1);

    private static async Task<string> SetWikidataClaimAsync(WikidataEntry entry, string claimId, string editToken)
    {
      claimId ??= entry.Id.ToLowerInvariant() + "$" + Guid.NewGuid();

      #region Aussagenobjekt erstellen

      var claim = new
      {
        id = claimId,
        mainsnak = new
        {
          snaktype = "value",
          property = "P1082", // Eigenschaft "Einwohnerzahl"
          datavalue = new
          {
            value = new
            {
              amount = "+" + entry.Population,
              unit = "1"
            },
            type = "quantity"
          }
        },
        qualifiers = new Dictionary<string, List<object>>
        {
          {
            "P585", new List<object> // Eigenschaft "Zeitpunkt"
            {
              new
              {
                snaktype = "value",
                property = "P585",
                datavalue = new
                {
                  value = new
                  {
                    time = entry.Source.AsOf.ToString("+yyyy-MM-ddT00:00:00Z"),
                    timezone = 0,
                    before = 0,
                    after = 0,
                    precision = 11,
                    calendarmodel = "http://www.wikidata.org/entity/Q1985727"
                  },
                  type = "time"
                }
              }
            }
          }
        },
        type = "statement",
        rank = "preferred",
        references = new List<dynamic>
        {
          new
          {
            snaks = new Dictionary<string, List<object>>
            {
              {
                "P143", new List<object> // Eigenschaft "Datenherkunft"
                {
                  new
                  {
                    snaktype = "value",
                    property = "P143",
                    datavalue = new
                    {
                      value = new { entity_type = "item", numeric_id = 177837 },
                      // Q177837 = "Japanischsprachige Wikipedia"
                      type = "wikibase-entityid"
                    }
                  }
                }
              },
              {
                "P854", new List<object> // Eigenschaft "URL"
                {
                  new
                  {
                    snaktype = "value",
                    property = "P854",
                    datavalue = new { value = entry.Source.SourceUri, type = "string" }
                  }
                }
              },
              {
                "P364", new List<object> // Eigenschaft "Originalsprache"
                {
                  new
                  {
                    snaktype = "value",
                    property = "P364",
                    datavalue = new
                    {
                      value = new { entity_type = "item", numeric_id = 5287 }, // Q5287 = "Japanisch"
                      type = "wikibase-entityid"
                    }
                  }
                }
              },
              {
                "P813", new List<object> // Eigenschaft "abgerufen am"
                {
                  new
                  {
                    snaktype = "value",
                    property = "P813",
                    datavalue = new
                    {
                      value = new
                      {
                        time = entry.Source.AccessDate.ToString("+yyyy-MM-ddT00:00:00Z"),
                        timezone = 0,
                        before = 0,
                        after = 0,
                        precision = 11,
                        calendarmodel = "http://www.wikidata.org/entity/Q1985727"
                      },
                      type = "time"
                    }
                  }
                }
              }
            }
          }
        }
      };
      if (!string.IsNullOrWhiteSpace(entry.Source.SourceTitle))
      {
        var reference = claim.references[0].snaks;
        reference.Add("P1476", new List<object> // Eigenschaft "Titel"
        {
          new
          {
            snaktype = "value",
            property = "P1476",
            datavalue = new
            {
              value = new
              {
                language = "ja",
                text = entry.Source.SourceTitle
              },
              type = "monolingualtext"
            }
          }
        });
      }

      #endregion

      var claimJson = JsonSerializer.Serialize(claim);
      claimJson = claimJson.Replace("entity_type", "entity-type").Replace("numeric_id", "numeric-id");

      bool retry;
      using var client = CreateHttpClient("wikidata");
      do
      {
        retry = false;
        using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
        {
          { "assert", "user" },
          { "action", "wbsetclaim" },
          { "format", "json" },
          { "summary", "using [[de:Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs]]" },
          { "maxlag", "5" }
        }), FormUrlEncoded(new Dictionary<string, string>
        {
          { "token", editToken },
          { "claim", claimJson }
        }));
        await using var stream = await response.Content.ReadAsStreamAsync();
        Dictionary<string, object> result;
        using (var reader = new StreamReader(stream))
        {
          result = JsonSerializer.Deserialize<Dictionary<string, object>>(await reader.ReadToEndAsync());
        }

        if (result!.TryGetValue("error", out var errorObj))
        {
          var error = ((JsonElement)errorObj).EnumerateObject().ToDictionary(x => x.Name, x => x.Value.ToString());
          var code = error["code"];
          var info = error["info"];
          if (code == "maxlag")
          {
            await Task.Delay(MediawikiLagException.GetLagTime(info));
            retry = true;
          }
          else
          {
            throw new MediawikiException(code, info);
          }
        }
      } while (retry);

      return claimId;
    }

    private static async Task UpdateWikidataAsync(NetworkCredential credentials, Settings settings, IEnumerable<WikidataEntry> wikidataList)
    {
      int wdCursorX, wdCursorY;
      lock (consoleLock)
      {
        Console.Write("Schreibe Wikidata-Elemente:  ");
        wdCursorX = Console.CursorLeft;
        wdCursorY = Console.CursorTop;
        Console.WriteLine();
      }

      await LoginAsync("wikidata", credentials);
      var editToken = await GetTokenAsync("wikidata", TokenType.Csrf);

      foreach (var item in wikidataList)
      {
        var throttleTask = Task.Delay(TimeSpan.FromMinutes(1.0 / MaxEditsPerMinute));

        var prefAndName = PrefIsoCode.First(iso => iso.Value == item.Source.Iso).Key + item.Name;
        if (settings.WikiData.AccessDates.TryGetValue(prefAndName, out var prevAccessDate) && prevAccessDate >= item.Source.AccessDate) { continue; }

        lock (consoleLock)
        {
          Console.SetCursorPosition(wdCursorX, wdCursorY);
          Console.Write($"{item.Id,11}");
        }

        settings.WikiData.Ids.TryGetValue(prefAndName, out var cachedId);
        var claimId = cachedId;

        try
        {
          claimId = await SetWikidataClaimAsync(item, claimId, editToken);
        }
        catch (MediawikiException e)
        {
          Console.WriteLine();
          Console.Error.WriteLine($"{claimId}: {e.Message}");
        }

        settings.WikiData.AccessDates.AddOrUpdate(prefAndName, item.Source.AccessDate, (_, _) => item.Source.AccessDate);
        settings.WikiData.Ids.AddOrUpdate(prefAndName, claimId, (_, _) => claimId);
        await SaveSettingsAsync(settings);
        await throttleTask;
      }

      lock (consoleLock)
      {
        Console.SetCursorPosition(wdCursorX, wdCursorY);
        Console.WriteLine(" fertig." + new string(' ', 16));
      }
    }

    private static async Task UpdateDeWikiAsync(NetworkCredential credentials, Settings settings, IEnumerable<PrefTemplateData> prefDataList)
    {
      int deCursorX, deCursorY;
      lock (consoleLock)
      {
        Console.Write("Schreibe in deutsche Wikipedia: ");
        deCursorX = Console.CursorLeft;
        deCursorY = Console.CursorTop;
        Console.WriteLine();
      }

      await LoginAsync("de", credentials);
      var editToken = await GetTokenAsync("de", TokenType.Csrf);

      using var client = CreateHttpClient("de");

      foreach (var prefData in prefDataList)
      {
        if (settings.De.AccessDates.TryGetValue(prefData.Kanji, out var prevAccessDate) && prevAccessDate >= prefData.AccessDate) { continue; }

        lock (consoleLock)
        {
          Console.SetCursorPosition(deCursorX, deCursorY);
          Console.Write(prefData.Iso);
        }

        var pageTitleDe = "Vorlage:Metadaten Einwohnerzahl " + prefData.Iso;
        var md5Hash = CalcMd5Hash(new UTF8Encoding(false).GetBytes(prefData.ContentDe));

        using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
        {
          { "assert", "user" },
          { "action", "edit" },
          { "title", pageTitleDe },
          { "summary", "Metadaten-Aktualisierung per [[Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs]]" },
          { "nocreate", "" }
        }), FormUrlEncoded(new Dictionary<string, string>
        {
          { "token", editToken },
          { "text", prefData.ContentDe },
          { "md5", md5Hash }
        }));
        await using var stream = await response.Content.ReadAsStreamAsync();
        using var reader = XmlReader.Create(stream, new XmlReaderSettings { Async = true });
        while (await reader.ReadAsync())
        {
          CheckForError(reader);
        }

        settings.De.AccessDates.AddOrUpdate(prefData.Kanji, prefData.AccessDate, (_, _) => prefData.AccessDate);
        await SaveSettingsAsync(settings);
      }

      lock (consoleLock)
      {
        Console.SetCursorPosition(deCursorX, deCursorY);
        Console.WriteLine(" fertig.");
      }
    }

    private static async Task<int> Main(string[] args)
    {
      Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
      try
      {
        var credentials = GetCredentials(args);
        var sources = await GetJapaneseSourceDataAsync();
        var settings = await LoadSettingsAsync();

        var watch = Stopwatch.StartNew();

        var prefDataList = new BlockingCollection<PrefTemplateData>();
        var wikidataList = new BlockingCollection<WikidataEntry>();
        var unmatchedItems = new ConcurrentBag<string>();

        var sourceTask = ConvertSourceDataAsync(sources, settings, prefDataList, wikidataList, unmatchedItems);
        var wikideTask = UpdateDeWikiAsync(credentials, settings, prefDataList.GetConsumingEnumerable());
        var wikidataTask = UpdateWikidataAsync(credentials, settings, wikidataList.GetConsumingEnumerable());

        await Task.WhenAll(sourceTask, wikideTask, wikidataTask);
        watch.Stop();

        if (unmatchedItems.Count > 0)
        {
          Console.WriteLine($"Folgende Einträge konnten nicht gematcht werden: {string.Join("\n", unmatchedItems)}");
        }

        Console.WriteLine();
        Console.WriteLine($"Fertig ({watch.Elapsed:m\\:ss}).");
        return 0;
      }
      catch (Exception ex)
      {
        if (ex is AggregateException aggregate)
        {
          ex = aggregate.GetBaseException();
        }

        Console.Error.WriteLine(ex);
        return 1;
      }
      finally
      {
        httpClientHandler.Dispose();
      }
    }

    private static async Task SaveSettingsAsync(Settings settings)
    {
      await settingsLock.WaitAsync();
      try
      {
        await using var stream = File.Create(TimestampFilename);
        await JsonSerializer.SerializeAsync(stream, settings, new JsonSerializerOptions
        {
          WriteIndented = true,
          Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
        });
      }
      finally
      {
        settingsLock.Release();
      }
    }

    [SuppressMessage("ReSharper", "AccessToDisposedClosure")]
    private static async Task ConvertSourceDataAsync(IDictionary<string, string> sources, Settings settings,
      BlockingCollection<PrefTemplateData> prefDataList, BlockingCollection<WikidataEntry> wikidataList,
      ConcurrentBag<string> unmatchedItems)
    {
      using var semaphore = new SemaphoreSlim(4);
      using var client = CreateHttpClient("ja");

      var tasks = PrefIsoCode.Keys.Select(prefKanji => Task.Factory.StartNew(async () =>
      {
        try
        {
          await semaphore.WaitAsync();
          var templateData = await ReadTemplateDataAsync(client, prefKanji, sources, settings);
          if (templateData != null)
          {
            prefDataList.Add(templateData);
            var wikidataIds = await ReadWikidataIdsAsync(client, prefKanji);
            // Vorlageninhalte mit Artikelnamen und Wikidata-Ids abgleichen
            templateData.Match(wikidataIds, wikidataList, unmatchedItems);
          }
        }
        finally
        {
          semaphore.Release();
        }
      }, TaskCreationOptions.LongRunning).Unwrap());

      await Task.WhenAll(tasks);

      prefDataList.CompleteAdding();
      wikidataList.CompleteAdding();
    }


    // Japanische Inhalte einlesen
    private static async Task<PrefTemplateData> ReadTemplateDataAsync(HttpClient client, string prefKanji, IDictionary<string, string> sources, Settings settings)
    {
      PrefTemplateData templateData = null;
      using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
      {
        { "action", "query" },
        { "prop", "revisions" },
        { "titles", "Template:自治体人口/" + prefKanji },
        { "rvprop", "content|timestamp" }
      }));
      var prefIso = PrefIsoCode[prefKanji];
      TraverseHttpResponse(response, reader =>
      {
        if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "rev")
        {
          var curTimestamp = DateTime.Parse(reader.GetAttribute("timestamp") ?? string.Empty, null, DateTimeStyles.AdjustToUniversal);
          templateData = new PrefTemplateData(reader.ReadString(), sources, prefKanji, prefIso, curTimestamp);
        }
      });

      return templateData;
    }

    private static async Task<IList<WikidataEntry>> ReadWikidataIdsAsync(HttpClient client, string prefKanji)
    {
      // Vorlageneinbindungen und deren Wikidata-Links ermitteln
      var wikidataIds = new List<WikidataEntry>();
      using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
      {
        { "action", "query" },
        { "generator", "embeddedin" },
        { "geititle", "Template:自治体人口/" + prefKanji },
        { "geinamespace", "0" },
        { "geilimit", "500" },
        { "prop", "pageprops" }
      }));
      TraverseHttpResponse(response, reader =>
        {
          if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "page")
          {
            var name = reader.GetAttribute("title");
            if (reader.ReadToDescendant("pageprops", ""))
            {
              // ReSharper disable once AccessToModifiedClosure
              wikidataIds.Add(new WikidataEntry { Name = name, Id = reader.GetAttribute("wikibase_item") });
            }
          }
        }
      );

      return wikidataIds;
    }

    private static async Task<Settings> LoadSettingsAsync()
    {
      Settings settings;
      if (File.Exists(TimestampFilename))
      {
        await using var stream = File.OpenRead(TimestampFilename);
        settings = await JsonSerializer.DeserializeAsync<Settings>(stream);
      }
      else
      {
        settings = new Settings();
      }

      return settings;
    }

    private static async Task<IDictionary<string, string>> GetJapaneseSourceDataAsync()
    {
      Console.WriteLine("Quellen ermitteln.");
      string dokuContent = null;
      var sources = new Dictionary<string, string>();

      using var client = CreateHttpClient("ja");
      using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
      {
        { "action", "query" }, 
        { "prop", "revisions" }, 
        { "titles", "Template:自治体人口/doc" }, 
        { "rvprop", "content" }
      }));

      TraverseHttpResponse(response, reader =>
      {
        if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "rev")
        {
          dokuContent = reader.ReadString();
        }
      });
      if (dokuContent == null)
      {
        throw new BusinessException("Japanische Dokumentation nicht gefunden");
      }
      foreach (Match match in Regex.Matches(dokuContent, @"\|\[\[Template:自治体人口/(?<pref>.+?)\|\k<pref>\]\]\s*\|\|\s*\[(?<url>.+?)\]"))
      {
        if (PrefIsoCode.ContainsKey(match.Groups["pref"].Value))
        {
          sources.Add(match.Groups["pref"].Value, match.Groups["url"].Value);
        }
        else
        {
          Debug.WriteLine($"Unbekannter Eintrag \"{match.Groups["pref"].Value}\"");
        }
      }
      if (sources.Count == 0)
      {
        throw new BusinessException("Japanische Quellen nicht gefunden");
      }

      if (sources.Count != PrefIsoCode.Count)
      {
        throw new BusinessException("Für einige Präfekturen wurden keine Quellen gefunden");
      }
      return sources;
    }

    private static NetworkCredential GetCredentials(string[] args)
    {
      if (args.Length >= 2)
      {
        return new NetworkCredential(args[0], args[1]);
      }
      Console.Write("Benutzer: ");
      var user = Console.ReadLine();
      Console.Write("Passwort: ");
      var password = string.Empty;
      // Sternchen zeigen, statt eingetipptem Passwort
      while (true)
      {
        var keyInfo = Console.ReadKey(true);
        if (keyInfo.Key == ConsoleKey.Enter)
        {
          Console.WriteLine();
          break;
        }
        if (keyInfo.Key == ConsoleKey.Backspace)
        {
          if (password.Length > 0)
          {
            password = password.Remove(password.Length - 1);
            // bei Backspace: Cursor eine Position zurück, mit Leerzeichen überschreiben, wieder eine Position zurück
            Console.Write(keyInfo.KeyChar + " " + keyInfo.KeyChar);
          }
        }
        else
        {
          password += keyInfo.KeyChar;
          Console.Write("*");
        }
      }
      Console.WriteLine();
      return new NetworkCredential(user, password);
    }

    public class WikidataEntry
    {
      public string Id { get; set; }
      public string Name { get; set; }
      public int Population { get; set; }
      public PrefTemplateData Source { get; set; }
      public override string ToString() => Name;
    }

    public class PrefTemplateData
    {
      private static readonly Regex convertToGermanRegex =
        new Regex(@"\s*\|\s*date\s*=\s*(\d{4})年(\d{1,2})月(\d{1,2})日.*\s*\|\s*source\s*=.*?\n(.+\n)\s*\|\s*#default",
          RegexOptions.Singleline);

      private static readonly Regex extractPopulationRegex = new Regex(@"\|\s*([^\|]+?)\s*=\s*(\d+)");

      public PrefTemplateData(string contentJa, IDictionary<string, string> sources, string prefKanji, string prefIso,
        DateTime accessDate)
      {
        Iso = prefIso;
        Kanji = prefKanji;
        SourceUri = sources[prefKanji];
        AccessDate = accessDate;
        try
        {
          byte[] raw;
          using (var webClient = new WebClient())
          {
            raw = webClient.DownloadData(SourceUri);
          }
          var responseString = Encoding.UTF8.GetString(raw);
          var match = Regex.Match(responseString, "charset\\s*=\\s*\"?(.+?)[\">]", RegexOptions.IgnoreCase);
          if (match.Success && !match.Groups[1].Value.Equals("utf-8", StringComparison.InvariantCultureIgnoreCase))
          {
            try
            {
              var encoding = Encoding.GetEncoding(match.Groups[1].Value);
              responseString = encoding.GetString(raw);
            }
            catch (ArgumentException)
            {
            }
          }
          match = Regex.Match(responseString, "<title>\\s*(.+?)\\s*</title>",
            RegexOptions.Singleline | RegexOptions.IgnoreCase);
          if (match.Success)
          {
            SourceTitle = new string(HttpUtility.HtmlDecode(match.Groups[1].Value).Where(ch => ch >= ' ').ToArray());
          }
        }
        catch (WebException)
        {
        }
        ConvertToGerman(contentJa, prefIso);
        ExtractPopulations(contentJa);
      }

      public string Iso { get; }
      public string Kanji { get; }
      public string ContentDe { get; private set; }
      public Dictionary<string, int> Populations { get; private set; }
      public DateTime AsOf { get; private set; }
      public string SourceUri { get; }
      public string SourceTitle { get; }
      public DateTime AccessDate { get; }

      private void ConvertToGerman(string contentJa, string prefIso)
      {
        // Deutschen Inhalt erstellen
        var match = convertToGermanRegex.Match(contentJa);
        if (match.Success)
        {
          var year = int.Parse(match.Groups[1].Value);
          var month = int.Parse(match.Groups[2].Value);
          var day = int.Parse(match.Groups[3].Value);
          AsOf = new DateTime(year, month, day);
          string title;
          if (string.IsNullOrWhiteSpace(SourceTitle))
          {
            title = "ohne Titel";
          }
          else
          {
            title = "{{lang|ja|" + SourceTitle.Replace("|", "{{!}}") + "}}";
          }

          ContentDe =
            @"<includeonly>{{#if: {{{2|}}} | <!-- Die Metadaten-Parameter sind hier unabhängig vom Schlüssel -->
  {{#switch: {{{2}}}
  | STAND=" + AsOf.ToString("yyyy-MM-dd") + @"
  | QUELLE={{Internetquelle|url=" + SourceUri + "|titel=" + title + "| hrsg={{lang|ja|" + Kanji + "}}|datum=" +
            AsOf.ToString("yyyy-MM-dd") + "|sprache=ja|abruf=" + AccessDate.ToString("yyyy-MM-dd") + @"}}
  }} |
{{#switch: {{{1}}}
" + match.Groups[4].Value +
            '\t' +
            @"|#default= <span class=""error"">Ungültige Gemeinde <code>{{{1}}}</code></span>[[Kategorie:Wikipedia:Fehler in Vorlage Metadaten Einwohnerzahl]]
}} }}</includeonly><noinclude>
Teil der {{Kategorie:Vorlage:Metadaten Einwohnerzahl JP}}

[[Kategorie:Vorlage:Metadaten Einwohnerzahl JP|" + prefIso.Substring(3) + @"]]
</noinclude>";
        }
        else
        {
          throw new BusinessException("Japanischer Inhalt nicht auswertbar");
        }
      }

      private void ExtractPopulations(string contentJa)
      {
        var match = extractPopulationRegex.Match(contentJa);
        var result = new Dictionary<string, int>();
        while (match.Success)
        {
          if (match.Groups[1].Value != "date")
          {
            result.Add(match.Groups[1].Value, int.Parse(match.Groups[2].Value));
          }
          match = match.NextMatch();
        }
        Populations = result;
      }

      public void Match(IList<WikidataEntry> transcludedWikidataIds, BlockingCollection<WikidataEntry> wikidataList,
        ConcurrentBag<string> missingItems)
      {
        foreach (var key in Populations.Keys)
        {
          var unit = key;
          if (unit == "23区計")
          {
            unit = "東京都区部";
          }
          var lemmaAndWikidataId = transcludedWikidataIds.FirstOrDefault(transclusion =>
          {
            if (unit.EndsWith("区"))
            {
              var citySuffixPos = unit.IndexOf('市');
              if (citySuffixPos < 0)
              {
                return transclusion.Name.StartsWith(unit); // sollte nur bei den Tokioter Sonder-ku zutreffen
              }
              var city = unit.Substring(1, citySuffixPos);
              var district = unit.Substring(citySuffixPos + 1);
              return transclusion.Name.StartsWith(district) && transclusion.Name.Contains(city) ||
                     transclusion.Name.StartsWith(district);
            }
            return transclusion.Name.StartsWith(unit);
          });
          if (lemmaAndWikidataId?.Name == null)
          {
            missingItems.Add($"{Iso}: {key} nicht in den Einbindungen gefunden!");
          }
          else
          {
            wikidataList.Add(new WikidataEntry
            {
              Id = lemmaAndWikidataId.Id,
              Name = key,
              Population = Populations[key],
              Source = this
            });
          }
        }
      }

      public override string ToString()
      {
        return Iso;
      }
    }

    public class Settings
    {
      public DeSettings De { get; set; } = new();
      public WíkidataSettings WikiData { get; set; } = new();
    }

    public class DeSettings
    {
      public ConcurrentDictionary<string, DateTime> AccessDates { get; set; } = new();
    }

    public class WíkidataSettings
    {
      public ConcurrentDictionary<string, DateTime> AccessDates { get; set; } = new();
      public ConcurrentDictionary<string, string> Ids { get; set; } = new();
    }

    #region ISO-Codes

    public static readonly Dictionary<string, string> PrefIsoCode = new()
    {
      { "北海道", "JP-01" },
      { "青森県", "JP-02" },
      { "岩手県", "JP-03" },
      { "宮城県", "JP-04" },
      { "秋田県", "JP-05" },
      { "山形県", "JP-06" },
      { "福島県", "JP-07" },
      { "茨城県", "JP-08" },
      { "栃木県", "JP-09" },
      { "群馬県", "JP-10" },
      { "埼玉県", "JP-11" },
      { "千葉県", "JP-12" },
      { "東京都", "JP-13" },
      { "神奈川県", "JP-14" },
      { "新潟県", "JP-15" },
      { "富山県", "JP-16" },
      { "石川県", "JP-17" },
      { "福井県", "JP-18" },
      { "山梨県", "JP-19" },
      { "長野県", "JP-20" },
      { "岐阜県", "JP-21" },
      { "静岡県", "JP-22" },
      { "愛知県", "JP-23" },
      { "三重県", "JP-24" },
      { "滋賀県", "JP-25" },
      { "京都府", "JP-26" },
      { "大阪府", "JP-27" },
      { "兵庫県", "JP-28" },
      { "奈良県", "JP-29" },
      { "和歌山県", "JP-30" },
      { "鳥取県", "JP-31" },
      { "島根県", "JP-32" },
      { "岡山県", "JP-33" },
      { "広島県", "JP-34" },
      { "山口県", "JP-35" },
      { "徳島県", "JP-36" },
      { "香川県", "JP-37" },
      { "愛媛県", "JP-38" },
      { "高知県", "JP-39" },
      { "福岡県", "JP-40" },
      { "佐賀県", "JP-41" },
      { "長崎県", "JP-42" },
      { "熊本県", "JP-43" },
      { "大分県", "JP-44" },
      { "宮崎県", "JP-45" },
      { "鹿児島県", "JP-46" },
      { "沖縄県", "JP-47" }
    };

    #endregion

    #region Netzwerk- und Mediawiki-Funktionen

    public static HttpClient CreateHttpClient(string language)
    {
      var assemblyName = Assembly.GetExecutingAssembly().GetName();
      return new(httpClientHandler, false)
      {
        BaseAddress = new Uri($"https://{(language == "wikidata" ? "www.wikidata.org" : language + ".wikipedia.org")}/w/api.php"),
        DefaultRequestHeaders =
        {
          UserAgent =
          {
            new ProductInfoHeaderValue(assemblyName.Name!, assemblyName.Version!.ToString(2)),
            new ProductInfoHeaderValue($"({Environment.OSVersion.VersionString}; .NET CLR {Environment.Version})")
          }
        }
      };
    }

    public static string GetQuery(IDictionary<string, string> query) => 
      QueryHelpers.AddQueryString("?format=xml", query);

    public static HttpContent FormUrlEncoded(IDictionary<string, string> postData) => 
      postData == null ? null : new StringContent(QueryHelpers.AddQueryString(string.Empty, postData).TrimStart('?'), new UTF8Encoding(false), "application/x-www-form-urlencoded");

    public static void TraverseHttpResponse(HttpResponseMessage response, Action<XmlReader> onXmlNode)
    {
      using var stream = response.Content.ReadAsStream();
      using var reader = XmlReader.Create(stream);

      if (!reader.ReadToFollowing("api"))
      {
        throw new MediawikiException("Malformed response");
      }

      while (reader.Read())
      {
        CheckForError(reader);
        onXmlNode(reader);
      }
    }

    public static string CalcMd5Hash(byte[] data)
    {
      using var md5 = MD5.Create();
      var md5Hash = md5.ComputeHash(data);
      return BitConverter.ToString(md5Hash).Replace("-", "").ToLowerInvariant();
    }

    private static void CheckForError(XmlReader reader)
    {
      if (reader.NodeType != XmlNodeType.Element) { return; }

      if (reader.LocalName == "warnings")
      {
        Debug.WriteLine($"  warning: {reader.ReadInnerXml()}");
        return;
      }

      if (reader.LocalName != "error") { return; }

      var code = reader.GetAttribute("code");
      var info = reader.GetAttribute("info");
      Debug.WriteLine($" error \"{code}\": {info}");
      
      if (code == "maxlag")
      {
        throw new MediawikiLagException(code, info);
      }
      
      throw new MediawikiException(code, info);
    }

    public enum TokenType
    {
      Csrf,
      Watch,
      Patrol,
      Rollback,
      UserRights,
      Login,
      CreateAccount
    }

    public static async Task<string> GetTokenAsync(string lang, TokenType tokenType)
    {
      var tokenName = tokenType.ToString().ToLowerInvariant();

      using var client = CreateHttpClient(lang);
      using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
      {
        { "action", "query" },
        { "meta", "tokens" },
        { "type", tokenName }
      }), null!);
      string token = null;
      TraverseHttpResponse(response, reader =>
      {
        if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "tokens")
        {
          token = reader.GetAttribute(tokenName + "token");
        }
      });
      return token;
    }

    public static async Task LoginAsync(string lang, NetworkCredential credentials)
    {
      using var client = CreateHttpClient(lang);
      using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
      {
        { "action", "login" },
        { "lgname", credentials.UserName }
      }), FormUrlEncoded(new Dictionary<string, string>
      {
        { "lgpassword", credentials.Password },
        { "lgtoken", await GetTokenAsync(lang, TokenType.Login) }
      }));

      TraverseHttpResponse(response, reader =>
      {
        if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "login")
        {
          var result = reader.GetAttribute("result");
          if (result != "Success")
          {
            if (result == "Throttled")
            {
              result += $" (Please wait {reader.GetAttribute("wait")}s)";
            }
            throw new MediawikiException(result);
          }
        }
      });
    }

    //public static async Task LogoutAsync(string lang)
    //{
    //  using var client = CreateHttpClient(lang);
    //  using var response = client.GetAsync(GetQuery(new Dictionary<string, string> { { "action", "logout" } }));
    //}

    #endregion
  }
}