From 1dca731f6f64c7b74da0ecdf3a807ad387c4281f Mon Sep 17 00:00:00 2001
From: ivarlovlie
Date: Tue, 15 Nov 2022 23:47:30 +0700
Subject: feat: Add .dockerignore
---
.dockerignore | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 .dockerignore
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..8bfeeca
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,8 @@
+**/AppData
+**/obj
+**/bin
+**/.idea
+**/Properties
+Dockerfile
+*.gitignore
+*.dockerignore
--
cgit v1.3
From a7e7d83057503262a9f2b56c4fb0c18e3f8ea15c Mon Sep 17 00:00:00 2001
From: ivarlovlie
Date: Wed, 16 Nov 2022 12:06:06 +0700
Subject: feat: Add memory cache
---
src/Pages/Read.cshtml | 45 +++++++------
src/Services/GrabberService.cs | 149 ++++++++++++++++++++++++-----------------
2 files changed, 114 insertions(+), 80 deletions(-)
diff --git a/src/Pages/Read.cshtml b/src/Pages/Read.cshtml
index cbbd69a..63543aa 100644
--- a/src/Pages/Read.cshtml
+++ b/src/Pages/Read.cshtml
@@ -9,26 +9,33 @@
@Model.Source.Title
@Model.Source.Subtitle
-
-
- @if (Model.Source.PublishedAt != default) {
- Publisert: @Model.Source.PublishedAt.ToString("dd-MM-yyyy hh:mm:ss")
- }
- @if (Model.Source.UpdatedAt != default) {
-
- Oppdatert: @Model.Source.UpdatedAt.ToString("dd-MM-yyyy hh:mm:ss")
- }
-
-
-
- @foreach (var author in Model.Source.Authors) {
- @author.Name: @author.Title
-
- }
-
-
@Html.Raw(Model.Source.Content)
-
\ No newline at end of file
+
+
+
\ No newline at end of file
diff --git a/src/Services/GrabberService.cs b/src/Services/GrabberService.cs
index f406d28..4a92517 100644
--- a/src/Services/GrabberService.cs
+++ b/src/Services/GrabberService.cs
@@ -2,12 +2,14 @@ using System.Security.Cryptography;
using System.Text;
using AngleSharp.Html.Parser;
using I2R.LightNews.Utilities;
+using Microsoft.Extensions.Caching.Memory;
namespace I2R.LightNews.Services;
public class GrabberService
{
private readonly ILogger _logger;
+ private readonly MemoryCache _memoryCache;
private readonly HttpClient _http;
private const string NrkPrefix = "nrkno";
private const int StaleTime = 1800;
@@ -16,88 +18,113 @@ public class GrabberService
HostPath = "AppData/__sitecache"
};
- public GrabberService(ILogger logger, HttpClient http) {
+ public GrabberService(ILogger logger, HttpClient http, MemoryCache memoryCache) {
_logger = logger;
_http = http;
+ _memoryCache = memoryCache;
}
- public async Task GrabNrkArticleAsync(string url) {
+ private bool IsSupportedNrkUrl(string url) {
var strippedUrl = url.Replace("https://", "")
.Replace("http://", "")
.Replace("www.", "");
- if (!strippedUrl.StartsWith("nrk.no")
- || strippedUrl.StartsWith("nrk.no/mat")
- || strippedUrl.StartsWith("nrk.no/tv")
- || strippedUrl.StartsWith("nrk.no/radio")
- || strippedUrl.StartsWith("nrk.no/xl")
- ) return default;
+ var ignored = new List() {
+ "nrk.no/mat",
+ "nrk.no/radio",
+ "nrk.no/tv",
+ "nrk.no/xl"
+ };
+ return strippedUrl.StartsWith("nrk.no") && ignored.All(c => !strippedUrl.Contains(c));
+ }
+
+ public async Task GrabNrkArticleAsync(string url) {
+ if (!IsSupportedNrkUrl(url)) return default;
using var md5 = MD5.Create();
var articleFilePrefix = "art-" + NrkPrefix + "-" + Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(url)));
- var source = await GrabSourceAsync(url, articleFilePrefix);
- var parser = new HtmlParser();
- var doc = await parser.ParseDocumentAsync(source.Content);
- var result = new NewsArticle() {
- CachedAt = source.CacheFileCreatedAt,
- Href = url,
- Title = doc.QuerySelector("h1.title")?.TextContent,
- Subtitle = doc.QuerySelector(".article-lead p")?.TextContent,
- Authors = new List()
- };
-
- foreach (var authorNode in doc.QuerySelectorAll(".authors .author")) {
- var author = new NewsArticle.Author() {
- Name = authorNode.QuerySelector(".author__name")?.TextContent,
- Title = authorNode.QuerySelector(".author__role")?.TextContent
+ return await _memoryCache.GetOrCreateAsync(articleFilePrefix, async entry => {
+ entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(10);
+ var source = await GrabSourceAsync(url, articleFilePrefix);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsArticle() {
+ CachedAt = source.CacheFileCreatedAt,
+ Href = url,
+ Title = doc.QuerySelector("h1.title")?.TextContent,
+ Subtitle = doc.QuerySelector(".article-lead p")?.TextContent,
+ Authors = new List()
};
- result.Authors.Add(author);
- }
- DateTime.TryParse(doc.QuerySelector("time.datePublished")?.Attributes["datetime"]?.Value, out var published);
- DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified);
- result.UpdatedAt = modified;
- result.PublishedAt = published;
- if (doc.QuerySelector("kortstokk-app") != default) {
- result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".dhks-cardSection").InnerHtml, ".dhks-background,.dhks-actions,.dhks-credits,.dhks-sticky-reset,.dhks-byline");
- } else {
- result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "a,.section-reference,.widget,.article-body--updating,.video-reference,.image-reference,.reference");
- }
+ foreach (var authorNode in doc.QuerySelectorAll(".authors .author")) {
+ var author = new NewsArticle.Author() {
+ Name = authorNode.QuerySelector(".author__name")?.TextContent,
+ Title = authorNode.QuerySelector(".author__role")?.TextContent
+ };
+ result.Authors.Add(author);
+ }
+
+ DateTime.TryParse(doc.QuerySelector("time.datePublished")?.Attributes["datetime"]?.Value, out var published);
+ DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified);
+
+ result.UpdatedAt = modified;
+ result.PublishedAt = published;
+
+ if (doc.QuerySelector("kortstokk-app") != default) {
+ var excludes = new List() {
+ ".dhks-background",
+ ".dhks-actions",
+ ".dhks-credits",
+ ".dhks-sticky-reset",
+ ".dhks-byline"
+ };
+ result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".dhks-cardSection").InnerHtml, string.Join(',', excludes));
+ } else {
+ var excludes = new List() {
+ ".compilation-reference",
+ ".section-reference",
+ ".widget",
+ ".image-reference",
+ ".video-reference",
+ ".article-body--updating",
+ ".reference"
+ };
+ result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, string.Join(',', excludes));
+ }
- return result;
+ return result;
+ });
}
public async Task GrabNrkAsync() {
- var source = await GrabSourceAsync("https://nrk.no", NrkPrefix);
- var parser = new HtmlParser();
- var doc = await parser.ParseDocumentAsync(source.Content);
- var result = new NewsSource() {
- Name = "nrk",
- Attribution = "Fra https://nrk.no",
- Created = source.CacheFileCreatedAt.DateTime,
- CanonicalUrl = doc.QuerySelector("link[rel='canonical']")?.Attributes["href"]?.Value ?? "uvisst",
- Articles = new List()
- };
-
- foreach (var articleAnchorNode in doc.QuerySelectorAll("main section a")) {
- var article = new NewsArticle {
- Href = articleAnchorNode.Attributes["href"]?.Value.Trim(),
- Title = articleAnchorNode.QuerySelector(".kur-room__title span")?.TextContent.Trim()
+ return await _memoryCache.GetOrCreateAsync(NrkPrefix, async entry => {
+ entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(31);
+ var source = await GrabSourceAsync("https://nrk.no", NrkPrefix);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsSource() {
+ Name = "nrk",
+ Attribution = "Fra https://nrk.no",
+ Created = source.CacheFileCreatedAt.DateTime,
+ CanonicalUrl = doc.QuerySelector("link[rel='canonical']")?.Attributes["href"]?.Value ?? "uvisst",
+ Articles = new List()
};
- if (
- article.Href.IsNullOrWhiteSpace()
- || article.Title.IsNullOrWhiteSpace()
- || (!article.Href?.StartsWith("https://www.nrk.no") ?? true)
- || (article.Href?.StartsWith("https://www.nrk.no/mat") ?? false)
- ) {
- continue;
- }
+ foreach (var articleAnchorNode in doc.QuerySelectorAll("main section a")) {
+ var article = new NewsArticle {
+ Href = articleAnchorNode.Attributes["href"]?.Value.Trim(),
+ Title = articleAnchorNode.QuerySelector(".kur-room__title span")?.TextContent.Trim()
+ };
- result.Articles.Add(article);
- }
+ if (article.Href.IsNullOrWhiteSpace() || article.Title.IsNullOrWhiteSpace() || !IsSupportedNrkUrl(article.Href)) {
+ continue;
+ }
+
+ result.Articles.Add(article);
+ }
- return result;
+ return result;
+ });
}
private class SourceResult
--
cgit v1.3