summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore13
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/.name1
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml4
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml8
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml6
-rw-r--r--src/I2R.LightNews.csproj14
-rw-r--r--src/Models/AppPath.cs24
-rw-r--r--src/Models/NewsArticle.cs20
-rw-r--r--src/Models/NewsSource.cs10
-rw-r--r--src/Pages/Index.cshtml19
-rw-r--r--src/Pages/Index.cshtml.cs30
-rw-r--r--src/Pages/Read.cshtml26
-rw-r--r--src/Pages/Read.cshtml.cs24
-rw-r--r--src/Pages/Shared/_Layout.cshtml20
-rw-r--r--src/Pages/_ViewImports.cshtml3
-rw-r--r--src/Pages/_ViewStart.cshtml3
-rw-r--r--src/Program.cs20
-rw-r--r--src/Properties/launchSettings.json22
-rw-r--r--src/Services/GrabberService.cs147
-rw-r--r--src/Utilities/HtmlSanitiser.cs83
-rw-r--r--src/wwwroot/index.css0
-rw-r--r--src/wwwroot/reset.css74
22 files changed, 571 insertions, 0 deletions
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore b/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore
new file mode 100644
index 0000000..1c6e926
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore
@@ -0,0 +1,13 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Rider ignored files
+/contentModel.xml
+/modules.xml
+/.idea.I2R.LightNews.iml
+/projectSettingsUpdater.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/.name b/src/.idea/.idea.I2R.LightNews.dir/.idea/.name
new file mode 100644
index 0000000..a1f668e
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/.name
@@ -0,0 +1 @@
+I2R.LightNews \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml
new file mode 100644
index 0000000..df87cf9
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
+</project> \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml
new file mode 100644
index 0000000..7b08163
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="UserContentModel">
+ <attachedFolders />
+ <explicitIncludes />
+ <explicitExcludes />
+ </component>
+</project> \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml
new file mode 100644
index 0000000..1d8c84d
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="com.jetbrains.rider.android.RiderAndroidMiscFileCreationComponent">
+ <option name="ENSURE_MISC_FILE_EXISTS" value="true" />
+ </component>
+</project> \ No newline at end of file
diff --git a/src/I2R.LightNews.csproj b/src/I2R.LightNews.csproj
new file mode 100644
index 0000000..fd635d0
--- /dev/null
+++ b/src/I2R.LightNews.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+ <PropertyGroup>
+ <TargetFramework>net7.0</TargetFramework>
+ <ImplicitUsings>enable</ImplicitUsings>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <PackageReference Include="AngleSharp" Version="0.17.1" />
+ <PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
+ <PackageReference Include="IOL.Helpers" Version="3.1.0" />
+ </ItemGroup>
+
+</Project>
diff --git a/src/Models/AppPath.cs b/src/Models/AppPath.cs
new file mode 100644
index 0000000..2133f20
--- /dev/null
+++ b/src/Models/AppPath.cs
@@ -0,0 +1,24 @@
+namespace I2R.LightNews.Models;
+
+public sealed record AppPath
+{
+ public string HostPath { get; init; }
+ public string WebPath { get; init; }
+
+
+ public string GetHostPathForFilename(string filename, string fallback = "") {
+ if (filename.IsNullOrWhiteSpace()) {
+ return fallback;
+ }
+
+ return Path.Combine(HostPath, filename);
+ }
+
+ public string GetWebPathForFilename(string filename, string fallback = "") {
+ if (filename.IsNullOrWhiteSpace()) {
+ return fallback;
+ }
+
+ return Path.Combine(WebPath, filename);
+ }
+} \ No newline at end of file
diff --git a/src/Models/NewsArticle.cs b/src/Models/NewsArticle.cs
new file mode 100644
index 0000000..957dee4
--- /dev/null
+++ b/src/Models/NewsArticle.cs
@@ -0,0 +1,20 @@
+namespace I2R.LightNews.Models;
+
+public class NewsArticle
+{
+ public string Title { get; set; }
+ public string Subtitle { get; set; }
+ public string Href { get; set; }
+ public string Content { get; set; }
+ public List<Author> Authors { get; set; }
+ public DateTimeOffset CachedAt { get; set; }
+ public DateTime PublishedAt { get; set; }
+ public DateTime UpdatedAt { get; set; }
+
+ public class Author
+ {
+ public string Name { get; set; }
+ public string Contact { get; set; }
+ public string Title { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/Models/NewsSource.cs b/src/Models/NewsSource.cs
new file mode 100644
index 0000000..5eadad0
--- /dev/null
+++ b/src/Models/NewsSource.cs
@@ -0,0 +1,10 @@
+namespace I2R.LightNews.Models;
+
+public class NewsSource
+{
+ public string Name { get; set; }
+ public string CanonicalUrl { get; set; }
+ public string Attribution { get; set; }
+ public DateTime Created { get; set; }
+ public List<NewsArticle> Articles { get; set; }
+} \ No newline at end of file
diff --git a/src/Pages/Index.cshtml b/src/Pages/Index.cshtml
new file mode 100644
index 0000000..d7599bc
--- /dev/null
+++ b/src/Pages/Index.cshtml
@@ -0,0 +1,19 @@
+@page "{site?}"
+@model IndexModel
+@{
+ ViewData["Title"] = Model.Source.Name;
+}
+
+@foreach (var article in Model.Source.Articles) {
+ <section>
+ <a href="/les/@Model.Source.Name?url=@article.Href">
+ <h2>@Html.Raw(article.Title)</h2>
+ <a href="@article.Href" rel="noreferrer">Kilde</a>
+ </a>
+ </section>
+}
+<footer>
+ <small>
+ @Model.Source.Attribution &copy; @Model.Source.Name, @(DateTime.UtcNow.Subtract(Model.Source.Created).Minutes) minutter siden
+ </small>
+</footer> \ No newline at end of file
diff --git a/src/Pages/Index.cshtml.cs b/src/Pages/Index.cshtml.cs
new file mode 100644
index 0000000..9305766
--- /dev/null
+++ b/src/Pages/Index.cshtml.cs
@@ -0,0 +1,30 @@
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+
+namespace I2R.LightNews.Pages;
+
+public class IndexModel : PageModel
+{
+ private readonly ILogger<IndexModel> _logger;
+ private readonly GrabberService _grabber;
+
+ public IndexModel(ILogger<IndexModel> logger, GrabberService grabber) {
+ _logger = logger;
+ _grabber = grabber;
+ }
+
+ public NewsSource Source { get; set; }
+
+ public async Task<ActionResult> OnGet(string site) {
+ if (site.IsNullOrWhiteSpace()) {
+ return Redirect("/nrk");
+ }
+
+ Source = site switch {
+ "nrk" => await _grabber.GrabNrkAsync(),
+ _ => await _grabber.GrabNrkAsync()
+ };
+
+ return Page();
+ }
+} \ No newline at end of file
diff --git a/src/Pages/Read.cshtml b/src/Pages/Read.cshtml
new file mode 100644
index 0000000..2aecc75
--- /dev/null
+++ b/src/Pages/Read.cshtml
@@ -0,0 +1,26 @@
+@page "/les/{site}"
+@model ReadModel
+@{
+ ViewData["Title"] = Model.Source.Title;
+}
+
+<div style="display: flex; justify-content: space-between">
+ <div>
+ <h1>@Model.Source.Title</h1>
+ <p>@Model.Source.Subtitle</p>
+ </div>
+ <div style="display: flex; flex-direction: row">
+ <div style="flex-direction:column">
+ <small>Publisert: @Model.Source.PublishedAt.ToString("F")</small><br>
+ <small>Oppdatert: @Model.Source.UpdatedAt.ToString("F")</small>
+ </div>
+ <div style="margin: 0 5px; height: 100%; border: 1px solid black"></div>
+ <div style="flex-direction:column">
+ @foreach (var author in Model.Source.Authors) {
+ <small><em>@author.Name</em>: @author.Title</small> <br/>
+ }
+ </div>
+ </div>
+</div>
+<hr>
+@Html.Raw(Model.Source.Content) \ No newline at end of file
diff --git a/src/Pages/Read.cshtml.cs b/src/Pages/Read.cshtml.cs
new file mode 100644
index 0000000..16a1055
--- /dev/null
+++ b/src/Pages/Read.cshtml.cs
@@ -0,0 +1,24 @@
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+
+namespace I2R.LightNews.Pages;
+
+public class ReadModel : PageModel
+{
+ private readonly GrabberService _grabber;
+
+ public NewsArticle Source { get; set; }
+
+ public ReadModel(GrabberService grabber) {
+ _grabber = grabber;
+ }
+
+ public async Task<ActionResult> OnGet([FromRoute] string site, [FromQuery] string url) {
+ Source = site switch {
+ "nrk" => await _grabber.GrabNrkArticleAsync(url),
+ _ => default
+ };
+ if (Source == default) return Redirect("/");
+ return Page();
+ }
+} \ No newline at end of file
diff --git a/src/Pages/Shared/_Layout.cshtml b/src/Pages/Shared/_Layout.cshtml
new file mode 100644
index 0000000..bcdd658
--- /dev/null
+++ b/src/Pages/Shared/_Layout.cshtml
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html lang="nb">
+<head>
+ <meta charset="utf-8"/>
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+ <link rel="stylesheet" href="/reset.css" >
+ <link rel="stylesheet" href="/index.css" >
+ <title>@ViewData["Title"] - Lettnytt</title>
+</head>
+<body>
+<header>
+ <nav>
+ <a href="/nrk">NRK</a>
+ </nav>
+</header>
+<main>
+ @RenderBody()
+</main>
+</body>
+</html> \ No newline at end of file
diff --git a/src/Pages/_ViewImports.cshtml b/src/Pages/_ViewImports.cshtml
new file mode 100644
index 0000000..bc2d314
--- /dev/null
+++ b/src/Pages/_ViewImports.cshtml
@@ -0,0 +1,3 @@
+@using I2R.LightNews
+@namespace I2R.LightNews.Pages
+@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers
diff --git a/src/Pages/_ViewStart.cshtml b/src/Pages/_ViewStart.cshtml
new file mode 100644
index 0000000..a5f1004
--- /dev/null
+++ b/src/Pages/_ViewStart.cshtml
@@ -0,0 +1,3 @@
+@{
+ Layout = "_Layout";
+}
diff --git a/src/Program.cs b/src/Program.cs
new file mode 100644
index 0000000..1716be9
--- /dev/null
+++ b/src/Program.cs
@@ -0,0 +1,20 @@
+global using I2R.LightNews.Services;
+global using I2R.LightNews.Models;
+global using IOL.Helpers;
+using System.Globalization;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.AddHttpClient();
+builder.Services.AddScoped<GrabberService>();
+builder.Services.AddRazorPages();
+
+var app = builder.Build();
+
+CultureInfo.CurrentCulture = new CultureInfo("nb-no");
+CultureInfo.CurrentUICulture = new CultureInfo("nb-no");
+app.UseStaticFiles();
+app.UseStatusCodePages();
+app.UseRouting();
+app.MapRazorPages();
+app.Run(); \ No newline at end of file
diff --git a/src/Properties/launchSettings.json b/src/Properties/launchSettings.json
new file mode 100644
index 0000000..88dc0d3
--- /dev/null
+++ b/src/Properties/launchSettings.json
@@ -0,0 +1,22 @@
+{
+ "profiles": {
+ "http": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": false,
+ "applicationUrl": "http://localhost:5088",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ }
+ },
+ "https": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": false,
+ "applicationUrl": "https://localhost:7037;http://localhost:5088",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ }
+ }
+ }
+}
diff --git a/src/Services/GrabberService.cs b/src/Services/GrabberService.cs
new file mode 100644
index 0000000..814f0d3
--- /dev/null
+++ b/src/Services/GrabberService.cs
@@ -0,0 +1,147 @@
+using System.Security.Cryptography;
+using System.Text;
+using AngleSharp.Html.Parser;
+using I2R.LightNews.Utilities;
+
+namespace I2R.LightNews.Services;
+
+public class GrabberService
+{
+ private readonly ILogger<GrabberService> _logger;
+ private readonly HttpClient _http;
+ private const string NrkPrefix = "nrkno";
+ private const int StaleTime = 100800;
+
+ private static AppPath _cachePath => new() {
+ HostPath = "AppData/__sitecache"
+ };
+
+ public GrabberService(ILogger<GrabberService> logger, HttpClient http) {
+ _logger = logger;
+ _http = http;
+ }
+
+ public async Task<NewsArticle> GrabNrkArticleAsync(string url) {
+ var strippedUrl = url.Replace("https://", "")
+ .Replace("http://", "")
+ .Replace("www.", "");
+
+ if (!strippedUrl.StartsWith("nrk.no")
+ || strippedUrl.StartsWith("nrk.no/mat")
+ || strippedUrl.StartsWith("nrk.no/tv")
+ || strippedUrl.StartsWith("nrk.no/radio")
+ || strippedUrl.StartsWith("nrk.no/xl")
+ ) return default;
+
+ using var md5 = MD5.Create();
+ var articleFilePrefix = "art-" + NrkPrefix + "-" + Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(url)));
+ var source = await GrabSourceAsync(url, articleFilePrefix, true);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsArticle() {
+ CachedAt = source.CacheFileCreatedAt,
+ Href = url,
+ Title = doc.QuerySelector("h1.title")?.TextContent,
+ Subtitle = doc.QuerySelector(".article-lead p")?.TextContent,
+ Authors = new List<NewsArticle.Author>()
+ };
+
+ foreach (var authorNode in doc.QuerySelectorAll(".authors .author")) {
+ var author = new NewsArticle.Author() {
+ Name = authorNode.QuerySelector(".author__name")?.TextContent,
+ Title = authorNode.QuerySelector(".author__role")?.TextContent
+ };
+ result.Authors.Add(author);
+ }
+
+ DateTime.TryParse(doc.QuerySelector("time.datePublished")?.Attributes["datetime"]?.Value, out var published);
+ DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified);
+ result.UpdatedAt = modified;
+ result.PublishedAt = published;
+ result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "img,a,.video-reference,.image-reference,.reference");
+ return result;
+ }
+
+ public async Task<NewsSource> GrabNrkAsync() {
+ var source = await GrabSourceAsync("https://nrk.no", NrkPrefix);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsSource() {
+ Name = "nrk",
+ Attribution = "Fra https://nrk.no",
+ Created = source.CacheFileCreatedAt.DateTime,
+ CanonicalUrl = doc.QuerySelector("link[rel='canonical']")?.Attributes["href"]?.Value ?? "uvisst",
+ Articles = new List<NewsArticle>()
+ };
+
+ foreach (var articleAnchorNode in doc.QuerySelectorAll("main section a")) {
+ var article = new NewsArticle {
+ Href = articleAnchorNode.Attributes["href"]?.Value.Trim(),
+ Title = articleAnchorNode.QuerySelector(".kur-room__title span")?.TextContent.Trim()
+ };
+
+ if (
+ article.Href.IsNullOrWhiteSpace()
+ || article.Title.IsNullOrWhiteSpace()
+ || (!article.Href?.StartsWith("https://www.nrk.no") ?? true)
+ || (article.Href?.StartsWith("https://www.nrk.no/mat") ?? false)
+ ) {
+ continue;
+ }
+
+ result.Articles.Add(article);
+ }
+
+ return result;
+ }
+
+ private class SourceResult
+ {
+ public string CacheFileName { get; set; }
+ public string Content { get; set; }
+ public DateTimeOffset CacheFileCreatedAt { get; set; }
+ }
+
+ private async Task<SourceResult> GrabSourceAsync(string url, string prefix, bool forceRefresh = false) {
+ var cacheFileName = forceRefresh ? default : GetLatestCacheFile(prefix);
+ if (cacheFileName != default) {
+ _logger.LogInformation("Returned cached {0} file, filename: {1}", url, cacheFileName.CacheFileName);
+ cacheFileName.Content = await File.ReadAllTextAsync(_cachePath.GetHostPathForFilename(cacheFileName.CacheFileName));
+ return cacheFileName;
+ }
+
+ var sourceResponse = await _http.GetAsync(url);
+ var sourceContent = await sourceResponse.Content.ReadAsStringAsync();
+ var utcNow = DateTimeOffset.UtcNow;
+ var newCacheFileName = prefix + "-" + utcNow.ToUnixTimeSeconds() + ".html";
+ await File.WriteAllTextAsync(_cachePath.GetHostPathForFilename(newCacheFileName), sourceContent);
+ _logger.LogInformation("Wrote new cache file for {0}, filename: {1}", url, newCacheFileName);
+ return new SourceResult() {
+ CacheFileName = newCacheFileName,
+ CacheFileCreatedAt = utcNow,
+ Content = sourceContent
+ };
+ }
+
+ private SourceResult GetLatestCacheFile(string prefix) {
+ var cacheDirectoryInfo = new DirectoryInfo(_cachePath.HostPath);
+ if (!cacheDirectoryInfo.Exists) {
+ cacheDirectoryInfo.Create();
+ return default;
+ }
+
+ var files = cacheDirectoryInfo.GetFiles();
+ if (!files.Any()) return default;
+ var relevantFiles = files.Where(c => c.Name.StartsWith(prefix)).OrderBy(c => c.Name).ToList();
+ if (!relevantFiles.Any()) return default;
+ var mostRecentFileName = relevantFiles.Last().Name;
+ var mostRecentEpochString = new string(mostRecentFileName.Skip(mostRecentFileName.LastIndexOf('-')).Where(Char.IsDigit).ToArray());
+ long.TryParse(mostRecentEpochString, out var mostRecentEpochLong);
+ // more than 30 minutes since last grab
+ if (mostRecentEpochLong + StaleTime < DateTimeOffset.UtcNow.ToUnixTimeSeconds()) return default;
+ return new SourceResult {
+ CacheFileName = mostRecentFileName,
+ CacheFileCreatedAt = DateTimeOffset.FromUnixTimeSeconds(mostRecentEpochLong)
+ };
+ }
+} \ No newline at end of file
diff --git a/src/Utilities/HtmlSanitiser.cs b/src/Utilities/HtmlSanitiser.cs
new file mode 100644
index 0000000..68f5d23
--- /dev/null
+++ b/src/Utilities/HtmlSanitiser.cs
@@ -0,0 +1,83 @@
+using AngleSharp.Dom;
+using AngleSharp.Html.Parser;
+
+namespace I2R.LightNews.Utilities;
+
+public static class HtmlSanitiser
+{
+ private const string VoidElements = "area,br,col,hr,img,wbr";
+ private const string OptionalEndTagBlockElements = "colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr";
+ private const string OptionalEndTagInlineElements = "rp,rt";
+ private const string OptionalEndTagElements = OptionalEndTagInlineElements + "," + OptionalEndTagBlockElements;
+ private const string BlockElements = OptionalEndTagBlockElements + ",address,article,aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul";
+ private const string InlineElements = OptionalEndTagInlineElements + ",a,abbr,acronym,b,bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small,span,strike,strong,sub,sup,time,tt,u,var";
+ private const string DefaulValidElements = VoidElements + "," + BlockElements + "," + InlineElements + "," + OptionalEndTagElements;
+ private const string DefaulUriAttrs = "background,cite,href,longdesc,src,xlink:href";
+ private const string DefaulSrcsetAttrs = "srcset";
+ private const string DefaultHtmlAttrs = "abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,valign,value,vspace,width";
+ private const string DefaulValidAttrs = DefaulUriAttrs + "," + DefaulSrcsetAttrs + "," + DefaultHtmlAttrs;
+ private static readonly ISet<string> ValidElements = DefaulValidElements.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase);
+ private static readonly ISet<string> ValidAttributes = DefaulValidAttrs.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase);
+
+ public static string SanitizeHtmlFragment(string html, string excludeSelectors = default) {
+ var element = ParseHtmlFragment(html);
+ for (var i = element.ChildNodes.Length - 1; i >= 0; i--) {
+ Sanitize(element.ChildNodes[i], excludeSelectors);
+ }
+
+ return element.InnerHtml;
+ }
+
+ private static IElement ParseHtmlFragment(string content) {
+ var uniqueId = Guid.NewGuid().ToString("D");
+ var parser = new HtmlParser();
+ var document = parser.ParseDocument($"<div id='{uniqueId}'>{content}</div>");
+ var element = document.GetElementById(uniqueId);
+ return element;
+ }
+
+ private static void Sanitize(INode node, string excludeSelectors = default) {
+ if (node is IElement htmlElement) {
+ if (excludeSelectors.HasValue()) {
+ foreach (var selector in excludeSelectors.Split(',')) {
+ if (selector.StartsWith(".")) {
+ if (htmlElement.ClassList.Contains(excludeSelectors)) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ continue;
+ }
+ }
+
+ if (selector.StartsWith("#")) {
+ if (htmlElement.Id == selector) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ continue;
+ }
+ }
+
+ if (htmlElement.TagName == selector) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ }
+ }
+ }
+
+ if (!ValidElements.Contains(htmlElement.TagName)) {
+ htmlElement.Remove();
+ return;
+ }
+
+ for (var i = htmlElement.Attributes.Length - 1; i >= 0; i--) {
+ var attribute = htmlElement.Attributes[i];
+ if (!ValidAttributes.Contains(attribute.Name)) {
+ htmlElement.RemoveAttribute(attribute.NamespaceUri, attribute.Name);
+ }
+ }
+ }
+
+ for (var i = node.ChildNodes.Length - 1; i >= 0; i--) {
+ Sanitize(node.ChildNodes[i]);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/wwwroot/index.css b/src/wwwroot/index.css
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/wwwroot/index.css
diff --git a/src/wwwroot/reset.css b/src/wwwroot/reset.css
new file mode 100644
index 0000000..ae5fad7
--- /dev/null
+++ b/src/wwwroot/reset.css
@@ -0,0 +1,74 @@
+/* Box sizing rules */
+*,
+*::before,
+*::after {
+ box-sizing: border-box;
+}
+
+/* Remove default margin */
+body,
+h1,
+h2,
+h3,
+h4,
+p,
+figure,
+blockquote,
+dl,
+dd {
+ margin: 0;
+}
+
+/* Remove list styles on ul, ol elements with a list role, which suggests default styling will be removed */
+ul[role="list"],
+ol[role="list"] {
+ list-style: none;
+}
+
+/* Set core root defaults */
+html:focus-within {
+ scroll-behavior: smooth;
+}
+
+/* Set core body defaults */
+body {
+ min-height: 100vh;
+ text-rendering: optimizeSpeed;
+ line-height: 1.5;
+}
+
+/* A elements that don't have a class get default styles */
+a:not([class]) {
+ text-decoration-skip-ink: auto;
+}
+
+/* Make images easier to work with */
+img,
+picture {
+ max-width: 100%;
+ display: block;
+}
+
+/* Inherit fonts for inputs and buttons */
+input,
+button,
+textarea,
+select {
+ font: inherit;
+}
+
+/* Remove all animations and transitions for people that prefer not to see them */
+@media (prefers-reduced-motion: reduce) {
+ html:focus-within {
+ scroll-behavior: auto;
+ }
+
+ *,
+ *::before,
+ *::after {
+ animation-duration: 0.01ms !important;
+ animation-iteration-count: 1 !important;
+ transition-duration: 0.01ms !important;
+ scroll-behavior: auto !important;
+ }
+}