diff options
| -rw-r--r-- | src/I2R.LightNews.csproj | 2 | ||||
| -rw-r--r-- | src/Pages/Index.cshtml | 14 | ||||
| -rw-r--r-- | src/Pages/Index.cshtml.cs | 10 | ||||
| -rw-r--r-- | src/Pages/Read.cshtml | 24 | ||||
| -rw-r--r-- | src/Pages/Shared/_Layout.cshtml | 9 | ||||
| -rw-r--r-- | src/Program.cs | 2 | ||||
| -rw-r--r-- | src/Services/GrabberService.cs | 10 | ||||
| -rw-r--r-- | src/Utilities/HtmlSanitiser.cs | 18 | ||||
| -rw-r--r-- | src/wwwroot/index.css | 56 |
9 files changed, 112 insertions, 33 deletions
diff --git a/src/I2R.LightNews.csproj b/src/I2R.LightNews.csproj index fd635d0..a84c852 100644 --- a/src/I2R.LightNews.csproj +++ b/src/I2R.LightNews.csproj @@ -7,8 +7,8 @@ <ItemGroup> <PackageReference Include="AngleSharp" Version="0.17.1" /> - <PackageReference Include="HtmlAgilityPack" Version="1.11.46" /> <PackageReference Include="IOL.Helpers" Version="3.1.0" /> + <PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="7.0.0" /> </ItemGroup> </Project> diff --git a/src/Pages/Index.cshtml b/src/Pages/Index.cshtml index d7599bc..d809043 100644 --- a/src/Pages/Index.cshtml +++ b/src/Pages/Index.cshtml @@ -5,15 +5,17 @@ } @foreach (var article in Model.Source.Articles) { - <section> + <section style="margin-bottom: 8px;display: flex; flex-direction: column"> <a href="/les/@Model.Source.Name?url=@article.Href"> - <h2>@Html.Raw(article.Title)</h2> - <a href="@article.Href" rel="noreferrer">Kilde</a> + <h2 style="font-size: 18px">@Html.Raw(article.Title)</h2> </a> + <a href="@article.Href" style="font-size: 14px;display:flex;justify-content: end" rel="noreferrer">Les på nrk.no</a> </section> } <footer> - <small> - @Model.Source.Attribution © @Model.Source.Name, @(DateTime.UtcNow.Subtract(Model.Source.Created).Minutes) minutter siden - </small> + <p> + <small> + @Model.Source.Attribution © @Model.Source.Name, @(DateTime.UtcNow.Subtract(Model.Source.Created).Minutes) minutter siden + </small> + </p> </footer>
\ No newline at end of file diff --git a/src/Pages/Index.cshtml.cs b/src/Pages/Index.cshtml.cs index 9305766..80b0ed0 100644 --- a/src/Pages/Index.cshtml.cs +++ b/src/Pages/Index.cshtml.cs @@ -16,15 +16,15 @@ public class IndexModel : PageModel public NewsSource Source { get; set; } public async Task<ActionResult> OnGet(string site) { - if (site.IsNullOrWhiteSpace()) { - return Redirect("/nrk"); - } - Source = site switch { "nrk" => await _grabber.GrabNrkAsync(), - _ => await _grabber.GrabNrkAsync() + _ => default }; + if (Source == default) { + return Redirect("/nrk"); + } + return Page(); } }
\ No newline at end of file diff --git a/src/Pages/Read.cshtml b/src/Pages/Read.cshtml index 2aecc75..cbbd69a 100644 --- a/src/Pages/Read.cshtml +++ b/src/Pages/Read.cshtml @@ -4,23 +4,31 @@ ViewData["Title"] = Model.Source.Title; } -<div style="display: flex; justify-content: space-between"> +<div id="art-header" style="display: flex; justify-content: space-between"> <div> <h1>@Model.Source.Title</h1> <p>@Model.Source.Subtitle</p> </div> - <div style="display: flex; flex-direction: row"> + <div style="display: flex; flex-direction: column; flex-wrap: nowrap"> <div style="flex-direction:column"> - <small>Publisert: @Model.Source.PublishedAt.ToString("F")</small><br> - <small>Oppdatert: @Model.Source.UpdatedAt.ToString("F")</small> + @if (Model.Source.PublishedAt != default) { + <small style="white-space: nowrap">Publisert: @Model.Source.PublishedAt.ToString("dd-MM-yyyy hh:mm:ss")</small> + } + @if (Model.Source.UpdatedAt != default) { + <br/> + <small style="white-space: nowrap">Oppdatert: @Model.Source.UpdatedAt.ToString("dd-MM-yyyy hh:mm:ss")</small> + } </div> - <div style="margin: 0 5px; height: 100%; border: 1px solid black"></div> + <div style="margin: 0 5px; border: 0.5px solid black"></div> <div style="flex-direction:column"> @foreach (var author in Model.Source.Authors) { - <small><em>@author.Name</em>: @author.Title</small> <br/> + <small style="white-space: nowrap"><b>@author.Name</b>: @author.Title</small> + <br/> } </div> </div> </div> -<hr> -@Html.Raw(Model.Source.Content)
\ No newline at end of file + +<div id="art-body"> + @Html.Raw(Model.Source.Content) +</div>
\ No newline at end of file diff --git a/src/Pages/Shared/_Layout.cshtml b/src/Pages/Shared/_Layout.cshtml index bcdd658..7831774 100644 --- a/src/Pages/Shared/_Layout.cshtml +++ b/src/Pages/Shared/_Layout.cshtml @@ -3,14 +3,15 @@ <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1.0"/> - <link rel="stylesheet" href="/reset.css" > - <link rel="stylesheet" href="/index.css" > + <link rel="stylesheet" href="/reset.css"> + <link rel="stylesheet" href="/index.css"> <title>@ViewData["Title"] - Lettnytt</title> </head> <body> <header> - <nav> - <a href="/nrk">NRK</a> + <nav style="display: flex; flex-direction: row; gap: 0 15px"> + <a href="/nrk" style="color: @(Path.StartsWith("/nrk") ? "black" : "blue")">NRK</a> + <a href="/dagbladet" style="color: @(Path.StartsWith("/dagbladet") ? "black" : "blue")">Dagbladet</a> </nav> </header> <main> diff --git a/src/Program.cs b/src/Program.cs index 1716be9..d751102 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -7,7 +7,7 @@ var builder = WebApplication.CreateBuilder(args); builder.Services.AddHttpClient(); builder.Services.AddScoped<GrabberService>(); -builder.Services.AddRazorPages(); +builder.Services.AddRazorPages().AddRazorRuntimeCompilation(); var app = builder.Build(); diff --git a/src/Services/GrabberService.cs b/src/Services/GrabberService.cs index 814f0d3..5331126 100644 --- a/src/Services/GrabberService.cs +++ b/src/Services/GrabberService.cs @@ -1,5 +1,6 @@ using System.Security.Cryptography; using System.Text; +using AngleSharp.Dom; using AngleSharp.Html.Parser; using I2R.LightNews.Utilities; @@ -35,7 +36,7 @@ public class GrabberService using var md5 = MD5.Create(); var articleFilePrefix = "art-" + NrkPrefix + "-" + Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(url))); - var source = await GrabSourceAsync(url, articleFilePrefix, true); + var source = await GrabSourceAsync(url, articleFilePrefix); var parser = new HtmlParser(); var doc = await parser.ParseDocumentAsync(source.Content); var result = new NewsArticle() { @@ -58,7 +59,12 @@ public class GrabberService DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified); result.UpdatedAt = modified; result.PublishedAt = published; - result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "img,a,.video-reference,.image-reference,.reference"); + if (doc.QuerySelector("kortstokk-app") != default) { + result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".dhks-cardSection").InnerHtml, ".dhks-background,.dhks-actions,.dhks-credits,.dhks-sticky-reset,.dhks-byline"); + } else { + result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "a,.section-reference,.video-reference,.image-reference,.reference"); + } + return result; } diff --git a/src/Utilities/HtmlSanitiser.cs b/src/Utilities/HtmlSanitiser.cs index 68f5d23..7a3de3e 100644 --- a/src/Utilities/HtmlSanitiser.cs +++ b/src/Utilities/HtmlSanitiser.cs @@ -10,11 +10,11 @@ public static class HtmlSanitiser private const string OptionalEndTagInlineElements = "rp,rt"; private const string OptionalEndTagElements = OptionalEndTagInlineElements + "," + OptionalEndTagBlockElements; private const string BlockElements = OptionalEndTagBlockElements + ",address,article,aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul"; - private const string InlineElements = OptionalEndTagInlineElements + ",a,abbr,acronym,b,bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small,span,strike,strong,sub,sup,time,tt,u,var"; + private const string InlineElements = OptionalEndTagInlineElements + ",a,abbr,acronym,b,bdi,bdo,big,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small,span,strike,strong,sub,sup,time,tt,u,var"; private const string DefaulValidElements = VoidElements + "," + BlockElements + "," + InlineElements + "," + OptionalEndTagElements; private const string DefaulUriAttrs = "background,cite,href,longdesc,src,xlink:href"; private const string DefaulSrcsetAttrs = "srcset"; - private const string DefaultHtmlAttrs = "abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,valign,value,vspace,width"; + private const string DefaultHtmlAttrs = "abbr,align,class,alt,axis,bgcolor,border,cellpadding,cellspacing,clear,color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,valign,value,vspace,width"; private const string DefaulValidAttrs = DefaulUriAttrs + "," + DefaulSrcsetAttrs + "," + DefaultHtmlAttrs; private static readonly ISet<string> ValidElements = DefaulValidElements.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase); private static readonly ISet<string> ValidAttributes = DefaulValidAttrs.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase); @@ -25,6 +25,7 @@ public static class HtmlSanitiser Sanitize(element.ChildNodes[i], excludeSelectors); } + return element.InnerHtml; } @@ -40,8 +41,14 @@ public static class HtmlSanitiser if (node is IElement htmlElement) { if (excludeSelectors.HasValue()) { foreach (var selector in excludeSelectors.Split(',')) { + // Console.WriteLine(new { + // selector, + // tag = htmlElement.TagName, + // classes = JsonSerializer.Serialize(htmlElement.ClassList.ToArray()) + // }); + if (selector.StartsWith(".")) { - if (htmlElement.ClassList.Contains(excludeSelectors)) { + if (htmlElement.ClassList.Contains(selector.Replace(".", ""))) { Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector); htmlElement.Remove(); continue; @@ -49,14 +56,14 @@ public static class HtmlSanitiser } if (selector.StartsWith("#")) { - if (htmlElement.Id == selector) { + if (htmlElement.Id == selector.Replace("#", "")) { Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector); htmlElement.Remove(); continue; } } - if (htmlElement.TagName == selector) { + if (htmlElement.TagName == selector.ToUpper()) { Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector); htmlElement.Remove(); } @@ -65,7 +72,6 @@ public static class HtmlSanitiser if (!ValidElements.Contains(htmlElement.TagName)) { htmlElement.Remove(); - return; } for (var i = htmlElement.Attributes.Length - 1; i >= 0; i--) { diff --git a/src/wwwroot/index.css b/src/wwwroot/index.css index e69de29..0296036 100644 --- a/src/wwwroot/index.css +++ b/src/wwwroot/index.css @@ -0,0 +1,56 @@ +body { + display: flex; + flex-direction: column; + min-height: 100vh; + padding: 5vh clamp(1rem, 5vw, 3rem) 1rem; + font-family: system-ui, sans-serif; + line-height: 1.5; + color: #222; + background: white; +} + +body > * { + --layout-spacing: max(8vh, 3rem); + --max-width: 70ch; + width: min(100%, var(--max-width)); + margin-left: auto; + margin-right: auto; +} + +main { + margin-top: var(--layout-spacing); +} + +footer { + margin-top: auto; + padding-top: var(--layout-spacing); +} + +footer p { + border-top: 1px solid #ccc; + padding-top: 0.25em; + font-size: 0.9rem; + color: #767676; +} + +:is(h1, h2, h3) { + line-height: 1.2; +} + +:is(h2, h3):not(:first-child) { + margin-top: 2em; +} + +article * + * { + margin-top: 1em; +} + +.quote-text { + padding: 5px 15px 0 0; + font-style: italic; + background: #e9e9e9; +} + +.quote-source { + padding-left: 20px; +}
\ No newline at end of file |
