Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 111 additions & 73 deletions Scripts/WikiUpdater.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
Expand Down Expand Up @@ -49,7 +51,7 @@ public class WikiUpdater
/// List of compound names, used to differentiate between using the thrive:compound and
/// thrive:icon bbcode tags
/// </summary>
private readonly Lazy<string[]> compoundNames = new(LoadCompoundNames);
private readonly Lazy<SearchValues<string>> compoundNames = new(LoadCompoundNames);

/// <summary>
/// List of existing translation keys used by the game. Used to check when a wiki translation key can be a lot
Expand Down Expand Up @@ -159,15 +161,22 @@ public async Task<bool> Run(CancellationToken cancellationToken)
return true;
}

private static string[] LoadCompoundNames()
private static SearchValues<string> LoadCompoundNames()
{
// We only care about the keys here
var data = JsonSerializer.Deserialize<Dictionary<string, dynamic>>(File.OpenRead(COMPOUND_DEFINITIONS));
var data = JsonSerializer.Deserialize<Dictionary<string, Compound>>(File.OpenRead(COMPOUND_DEFINITIONS));

if (data == null)
throw new NullDecodedJsonException();

return data.Keys.ToArray();
var compoundNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

foreach (var (key, compound) in data)
{
compoundNames.Add(key);
compoundNames.Add(compound.Name.Replace('_', ' '));
}

return SearchValues.Create(compoundNames.ToArray(), StringComparison.OrdinalIgnoreCase);
}

private static HashSet<string> LoadGameTranslationKeys()
Expand Down Expand Up @@ -222,6 +231,32 @@ private static HashSet<string> LoadGameTranslationKeys()
return result;
}

/// <summary>
/// Converts formatted HTML text into BBCode.
/// </summary>
private static string ConvertTextToBbcode(string paragraph)
{
return paragraph
.Replace("\n", string.Empty)
.Replace("<b>", "[b]")
.Replace("</b>", "[/b]")
.Replace("<i>", "[i]")
.Replace("</i>", "[/i]")
.Replace("<u>", "[u]")
.Replace("</u>", "[/u]")
.Replace("<code>", "[code]")
.Replace("</code>", "[/code]")
.Replace("<pre>", "[code]")
.Replace("</pre>", "[/code]")
.Replace("<br>", "\n")
.Replace("\"", "\\\"");
}

private static bool HasCompoundDivider([NotNullWhen(true)] INode? node)
{
return node is IText text && string.IsNullOrWhiteSpace(text.Text);
}

/// <summary>
/// Fetches a page from the online wiki
/// </summary>
Expand Down Expand Up @@ -528,49 +563,46 @@ private Stage[] StageStringToEnumValues(string rawStageStrings)
private List<GameWiki.Page.Section> GetMainBodySections(IHtmlElement body)
{
var sections = new List<GameWiki.Page.Section> { new(null, string.Empty) };

var children = body.QuerySelector(".mw-parser-output")!.Children;
var text = new StringBuilder();
foreach (var child in children)
{
if (child.TagName == "H2")
{
// Complete the previous section and start a new one with this heading
sections.Add(new GameWiki.Page.Section(child.TextContent, string.Empty));
continue;
}

string text;
switch (child.TagName)
{
case "H2":
{
// Complete the previous section
sections[^1].SectionBody = text.ToString().Trim();

// and start a new one with this heading
sections.Add(new GameWiki.Page.Section(child.TextContent, string.Empty));
text = new StringBuilder();
continue;
}
case "P":
text = ConvertParagraphToBbcode(child) + "\n\n";
text = ConvertParagraphToBbcode(child, text);
break;
case "UL":

// TODO: switch to the Godot 4 way to handle this:
// https://github.com/Revolutionary-Games/Thrive/issues/5511
// Godot 3 does not support lists in BBCode, so use custom formatting
text = child.Children
.Where(c => c.TagName == "LI")
.Select(l => $"[indent]— {ConvertParagraphToBbcode(l)}[/indent]")
.Aggregate((a, b) => a + "\n" + b) + "\n\n";
case "UL" or "OL":
text = ConvertListToBbcode(child, text);
break;
case "H3":
var headline = child.Children
.First(c => c.ClassList.Contains("mw-headline"));

text = $"[b][u]{headline.TextContent}[/u][/b]\n\n";
text = text.Append($"[b][u]{headline.TextContent}[/u][/b]");
break;
default:
// Ignore all other tag types
continue;
}

// Concatenate this tag with the rest of the section so far
sections[^1] = new GameWiki.Page.Section(sections[^1].SectionHeading, sections[^1].SectionBody + text);
text.Append("\n\n");
}

return sections.Select(s => new GameWiki.Page.Section(s.SectionHeading, s.SectionBody.Trim())).ToList();
// ensuring sections have their SectionBody
sections[^1].SectionBody = text.ToString().Trim();

return sections;
}

/// <summary>
Expand All @@ -586,18 +618,36 @@ private GameWiki.Page.Section UntranslateSection(GameWiki.Page.Section section,
return new GameWiki.Page.Section(heading, body);
}

/// <summary>
/// Converts HTML for a single paragraph into BBCode. Paragraph must not contain lists, headings, etc.
/// </summary>
private string ConvertParagraphToBbcode(IElement paragraph)
private StringBuilder ConvertListToBbcode(IElement list, StringBuilder builder)
{
var bbcode = new StringBuilder();
switch (list)
{
case IHtmlUnorderedListElement or IHtmlOrderedListElement:
{
var tag = list is IHtmlOrderedListElement ? "ol" : "ul";

builder.Append($"[{tag}]");
foreach (var item in list.Children)
{
ConvertListToBbcode(item, builder);
builder.Append('\n');
}

ConvertParagraphToBbcode(paragraph, bbcode);
return bbcode.ToString();
builder.Append($"[/{tag}]");
break;
}
case IHtmlListItemElement:
ConvertParagraphToBbcode(list, builder);
break;
}

return builder;
}

private void ConvertParagraphToBbcode(INode paragraph, StringBuilder result)
/// <summary>
/// Converts HTML for a single paragraph into BBCode. Paragraph must not contain lists, headings, etc.
/// </summary>
private StringBuilder ConvertParagraphToBbcode(INode paragraph, StringBuilder result)
{
var children = paragraph.ChildNodes;
foreach (var child in children)
Expand All @@ -624,12 +674,15 @@ private void ConvertParagraphToBbcode(INode paragraph, StringBuilder result)
result.Append(ConvertLinkToBbcode(link));
break;
case IHtmlImageElement image:
result.Append(ConvertImageToBbcode(image, result));
result.Append(ConvertImageToBbcode(image));
break;
case IText text when string.IsNullOrWhiteSpace(text.Text):
case IElement { TagName: "B" } element when IsPartOfCompound(element):
break;
case IElement { TagName: "B", Children.Length: > 0 } element:
// Deal with items inside bold tags, e.g. links
result.Append("[b]");
result.Append(ConvertParagraphToBbcode(element));
ConvertParagraphToBbcode(element, result);
result.Append("[/b]");
continue;
case IElement element:
Expand All @@ -640,19 +693,25 @@ private void ConvertParagraphToBbcode(INode paragraph, StringBuilder result)
break;
}
}

return result;
}

/// <summary>
/// Removes the last bold text label and all subsequent text from this string.
/// </summary>
private void RemoveLastBoldText(StringBuilder bbcode)
private bool IsPartOfCompound(IElement element)
{
var boldTextIndex = bbcode.ToString().LastIndexOf("[b]", StringComparison.Ordinal);

if (boldTextIndex < 0)
return;
return element.TextContent.ContainsAny(compoundNames.Value) && (
HasCompoundDivider(element.PreviousSibling) && HasCompoundImage(element.PreviousSibling.PreviousSibling) ||
HasCompoundDivider(element.NextSibling) && HasCompoundImage(element.NextElementSibling)
);
}

bbcode.Remove(boldTextIndex, bbcode.Length - boldTextIndex);
private bool HasCompoundImage(INode? element)
{
return element is IHtmlSpanElement
{
Attributes: [{ Name: "typeof" }],
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is especially unfamiliar looking to me. Could you maybe add a comment inside this method to explain what it does?

Children: [IHtmlSpanElement { Children: [IHtmlImageElement { AlternativeText: { } alternativeText }] }],
} && compoundNames.Value.Contains(alternativeText);
}

/// <summary>
Expand Down Expand Up @@ -690,13 +749,11 @@ private string ConvertLinkToBbcode(IHtmlAnchorElement link)
/// <summary>
/// Converts an HTML image into BBCode. Currently only works for compound and other icons embedded in paragraphs.
/// </summary>
private string ConvertImageToBbcode(IHtmlImageElement image, StringBuilder bbcode)
private string ConvertImageToBbcode(IHtmlImageElement image)
{
if (compoundNames.Value.Contains(image.AlternativeText))
if (image is { AlternativeText: { } alternativeText } && compoundNames.Value.Contains(alternativeText))
{
// In-game compound BBCode already has bold text label, so remove the extra one
RemoveLastBoldText(bbcode);
return $"[thrive:compound type=\\\"{image.AlternativeText}\\\"][/thrive:compound]";
return $"[thrive:compound type=\\\"{alternativeText}\\\"][/thrive:compound]";
}

if (IsThriveIcon(image.AlternativeText))
Expand All @@ -718,27 +775,6 @@ private bool IsThriveIcon(string? iconName)
return EmbeddedThriveIconExtensions.TryGetIcon(iconName, out _);
}

/// <summary>
/// Converts formatted HTML text into BBCode.
/// </summary>
private string ConvertTextToBbcode(string paragraph)
{
return paragraph
.Replace("\n", string.Empty)
.Replace("<b>", "[b]")
.Replace("</b>", "[/b]")
.Replace("<i>", "[i]")
.Replace("</i>", "[/i]")
.Replace("<u>", "[u]")
.Replace("</u>", "[/u]")
.Replace("<code>", "[code]")
.Replace("</code>", "[/code]")
.Replace("<pre>", "[code]")
.Replace("</pre>", "[/code]")
.Replace("<br>", "\n")
.Replace("\"", "\\\"");
}

/// <summary>
/// Inserts into en.po the English translations for all the translation keys in a list of wiki pages.
/// </summary>
Expand Down Expand Up @@ -886,3 +922,5 @@ public TranslationPair(GameWiki.Page untranslatedPage, GameWiki.Page translatedP
public GameWiki.Page TranslatedPage { get; }
}
}

file sealed record Compound(string Name);