diff options
| author | ivarlovlie <git@ivarlovlie.no> | 2022-11-15 06:20:30 +0100 |
|---|---|---|
| committer | ivarlovlie <git@ivarlovlie.no> | 2022-11-15 06:20:30 +0100 |
| commit | aa1e0e321ad64b72d80efcb7984acdf1cd3d6b7a (patch) | |
| tree | 4abce617ce12dea1ebe9f030d93ebb5f4407cad8 /src/Services | |
| parent | add94527050dc311c4ad117e25dd5e4517b3b887 (diff) | |
| download | lettnytt-aa1e0e321ad64b72d80efcb7984acdf1cd3d6b7a.tar.xz lettnytt-aa1e0e321ad64b72d80efcb7984acdf1cd3d6b7a.zip | |
refactor: Add some styling
Diffstat (limited to 'src/Services')
| -rw-r--r-- | src/Services/GrabberService.cs | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/src/Services/GrabberService.cs b/src/Services/GrabberService.cs index 814f0d3..5331126 100644 --- a/src/Services/GrabberService.cs +++ b/src/Services/GrabberService.cs @@ -1,5 +1,6 @@ using System.Security.Cryptography; using System.Text; +using AngleSharp.Dom; using AngleSharp.Html.Parser; using I2R.LightNews.Utilities; @@ -35,7 +36,7 @@ public class GrabberService using var md5 = MD5.Create(); var articleFilePrefix = "art-" + NrkPrefix + "-" + Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(url))); - var source = await GrabSourceAsync(url, articleFilePrefix, true); + var source = await GrabSourceAsync(url, articleFilePrefix); var parser = new HtmlParser(); var doc = await parser.ParseDocumentAsync(source.Content); var result = new NewsArticle() { @@ -58,7 +59,12 @@ public class GrabberService DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified); result.UpdatedAt = modified; result.PublishedAt = published; - result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "img,a,.video-reference,.image-reference,.reference"); + if (doc.QuerySelector("kortstokk-app") != default) { + result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".dhks-cardSection").InnerHtml, ".dhks-background,.dhks-actions,.dhks-credits,.dhks-sticky-reset,.dhks-byline"); + } else { + result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "a,.section-reference,.video-reference,.image-reference,.reference"); + } + return result; } |
