aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivarlovlie <git@ivarlovlie.no>2022-11-15 05:05:05 +0100
committerivarlovlie <git@ivarlovlie.no>2022-11-15 05:05:05 +0100
commitadd94527050dc311c4ad117e25dd5e4517b3b887 (patch)
tree137306ea8c7c63b6287bd77c6deaf09e1319833e
downloadlettnytt-add94527050dc311c4ad117e25dd5e4517b3b887.tar.xz
lettnytt-add94527050dc311c4ad117e25dd5e4517b3b887.zip
feat: Initial commit
-rw-r--r--.gitignore478
-rw-r--r--README.md4
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore13
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/.name1
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml4
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml8
-rw-r--r--src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml6
-rw-r--r--src/I2R.LightNews.csproj14
-rw-r--r--src/Models/AppPath.cs24
-rw-r--r--src/Models/NewsArticle.cs20
-rw-r--r--src/Models/NewsSource.cs10
-rw-r--r--src/Pages/Index.cshtml19
-rw-r--r--src/Pages/Index.cshtml.cs30
-rw-r--r--src/Pages/Read.cshtml26
-rw-r--r--src/Pages/Read.cshtml.cs24
-rw-r--r--src/Pages/Shared/_Layout.cshtml20
-rw-r--r--src/Pages/_ViewImports.cshtml3
-rw-r--r--src/Pages/_ViewStart.cshtml3
-rw-r--r--src/Program.cs20
-rw-r--r--src/Properties/launchSettings.json22
-rw-r--r--src/Services/GrabberService.cs147
-rw-r--r--src/Utilities/HtmlSanitiser.cs83
-rw-r--r--src/wwwroot/index.css0
-rw-r--r--src/wwwroot/reset.css74
24 files changed, 1053 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b0962d6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,478 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# Tye
+.tye/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+
+##
+## Visual studio for Mac
+##
+
+
+# globs
+Makefile.in
+*.userprefs
+*.usertasks
+config.make
+config.status
+aclocal.m4
+install-sh
+autom4te.cache/
+*.tar.gz
+tarballs/
+test-results/
+
+# Mac bundle stuff
+*.dmg
+*.app
+
+# content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+AppData/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1ac5363
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+# Lettnytt
+
+En tjeneste som fjerner bråk fra nyhetene dine.
+
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore b/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore
new file mode 100644
index 0000000..1c6e926
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/.gitignore
@@ -0,0 +1,13 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Rider ignored files
+/contentModel.xml
+/modules.xml
+/.idea.I2R.LightNews.iml
+/projectSettingsUpdater.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/.name b/src/.idea/.idea.I2R.LightNews.dir/.idea/.name
new file mode 100644
index 0000000..a1f668e
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/.name
@@ -0,0 +1 @@
+I2R.LightNews \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml
new file mode 100644
index 0000000..df87cf9
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
+</project> \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml
new file mode 100644
index 0000000..7b08163
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/indexLayout.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="UserContentModel">
+ <attachedFolders />
+ <explicitIncludes />
+ <explicitExcludes />
+ </component>
+</project> \ No newline at end of file
diff --git a/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml b/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml
new file mode 100644
index 0000000..1d8c84d
--- /dev/null
+++ b/src/.idea/.idea.I2R.LightNews.dir/.idea/misc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+ <component name="com.jetbrains.rider.android.RiderAndroidMiscFileCreationComponent">
+ <option name="ENSURE_MISC_FILE_EXISTS" value="true" />
+ </component>
+</project> \ No newline at end of file
diff --git a/src/I2R.LightNews.csproj b/src/I2R.LightNews.csproj
new file mode 100644
index 0000000..fd635d0
--- /dev/null
+++ b/src/I2R.LightNews.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+ <PropertyGroup>
+ <TargetFramework>net7.0</TargetFramework>
+ <ImplicitUsings>enable</ImplicitUsings>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <PackageReference Include="AngleSharp" Version="0.17.1" />
+ <PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
+ <PackageReference Include="IOL.Helpers" Version="3.1.0" />
+ </ItemGroup>
+
+</Project>
diff --git a/src/Models/AppPath.cs b/src/Models/AppPath.cs
new file mode 100644
index 0000000..2133f20
--- /dev/null
+++ b/src/Models/AppPath.cs
@@ -0,0 +1,24 @@
+namespace I2R.LightNews.Models;
+
+public sealed record AppPath
+{
+ public string HostPath { get; init; }
+ public string WebPath { get; init; }
+
+
+ public string GetHostPathForFilename(string filename, string fallback = "") {
+ if (filename.IsNullOrWhiteSpace()) {
+ return fallback;
+ }
+
+ return Path.Combine(HostPath, filename);
+ }
+
+ public string GetWebPathForFilename(string filename, string fallback = "") {
+ if (filename.IsNullOrWhiteSpace()) {
+ return fallback;
+ }
+
+ return Path.Combine(WebPath, filename);
+ }
+} \ No newline at end of file
diff --git a/src/Models/NewsArticle.cs b/src/Models/NewsArticle.cs
new file mode 100644
index 0000000..957dee4
--- /dev/null
+++ b/src/Models/NewsArticle.cs
@@ -0,0 +1,20 @@
+namespace I2R.LightNews.Models;
+
+public class NewsArticle
+{
+ public string Title { get; set; }
+ public string Subtitle { get; set; }
+ public string Href { get; set; }
+ public string Content { get; set; }
+ public List<Author> Authors { get; set; }
+ public DateTimeOffset CachedAt { get; set; }
+ public DateTime PublishedAt { get; set; }
+ public DateTime UpdatedAt { get; set; }
+
+ public class Author
+ {
+ public string Name { get; set; }
+ public string Contact { get; set; }
+ public string Title { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/Models/NewsSource.cs b/src/Models/NewsSource.cs
new file mode 100644
index 0000000..5eadad0
--- /dev/null
+++ b/src/Models/NewsSource.cs
@@ -0,0 +1,10 @@
+namespace I2R.LightNews.Models;
+
+public class NewsSource
+{
+ public string Name { get; set; }
+ public string CanonicalUrl { get; set; }
+ public string Attribution { get; set; }
+ public DateTime Created { get; set; }
+ public List<NewsArticle> Articles { get; set; }
+} \ No newline at end of file
diff --git a/src/Pages/Index.cshtml b/src/Pages/Index.cshtml
new file mode 100644
index 0000000..d7599bc
--- /dev/null
+++ b/src/Pages/Index.cshtml
@@ -0,0 +1,19 @@
+@page "{site?}"
+@model IndexModel
+@{
+ ViewData["Title"] = Model.Source.Name;
+}
+
+@foreach (var article in Model.Source.Articles) {
+ <section>
+ <a href="/les/@Model.Source.Name?url=@article.Href">
+ <h2>@Html.Raw(article.Title)</h2>
+ <a href="@article.Href" rel="noreferrer">Kilde</a>
+ </a>
+ </section>
+}
+<footer>
+ <small>
+ @Model.Source.Attribution &copy; @Model.Source.Name, @(DateTime.UtcNow.Subtract(Model.Source.Created).Minutes) minutter siden
+ </small>
+</footer> \ No newline at end of file
diff --git a/src/Pages/Index.cshtml.cs b/src/Pages/Index.cshtml.cs
new file mode 100644
index 0000000..9305766
--- /dev/null
+++ b/src/Pages/Index.cshtml.cs
@@ -0,0 +1,30 @@
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+
+namespace I2R.LightNews.Pages;
+
+public class IndexModel : PageModel
+{
+ private readonly ILogger<IndexModel> _logger;
+ private readonly GrabberService _grabber;
+
+ public IndexModel(ILogger<IndexModel> logger, GrabberService grabber) {
+ _logger = logger;
+ _grabber = grabber;
+ }
+
+ public NewsSource Source { get; set; }
+
+ public async Task<ActionResult> OnGet(string site) {
+ if (site.IsNullOrWhiteSpace()) {
+ return Redirect("/nrk");
+ }
+
+ Source = site switch {
+ "nrk" => await _grabber.GrabNrkAsync(),
+ _ => await _grabber.GrabNrkAsync()
+ };
+
+ return Page();
+ }
+} \ No newline at end of file
diff --git a/src/Pages/Read.cshtml b/src/Pages/Read.cshtml
new file mode 100644
index 0000000..2aecc75
--- /dev/null
+++ b/src/Pages/Read.cshtml
@@ -0,0 +1,26 @@
+@page "/les/{site}"
+@model ReadModel
+@{
+ ViewData["Title"] = Model.Source.Title;
+}
+
+<div style="display: flex; justify-content: space-between">
+ <div>
+ <h1>@Model.Source.Title</h1>
+ <p>@Model.Source.Subtitle</p>
+ </div>
+ <div style="display: flex; flex-direction: row">
+ <div style="flex-direction:column">
+ <small>Publisert: @Model.Source.PublishedAt.ToString("F")</small><br>
+ <small>Oppdatert: @Model.Source.UpdatedAt.ToString("F")</small>
+ </div>
+ <div style="margin: 0 5px; height: 100%; border: 1px solid black"></div>
+ <div style="flex-direction:column">
+ @foreach (var author in Model.Source.Authors) {
+ <small><em>@author.Name</em>: @author.Title</small> <br/>
+ }
+ </div>
+ </div>
+</div>
+<hr>
+@Html.Raw(Model.Source.Content) \ No newline at end of file
diff --git a/src/Pages/Read.cshtml.cs b/src/Pages/Read.cshtml.cs
new file mode 100644
index 0000000..16a1055
--- /dev/null
+++ b/src/Pages/Read.cshtml.cs
@@ -0,0 +1,24 @@
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+
+namespace I2R.LightNews.Pages;
+
+public class ReadModel : PageModel
+{
+ private readonly GrabberService _grabber;
+
+ public NewsArticle Source { get; set; }
+
+ public ReadModel(GrabberService grabber) {
+ _grabber = grabber;
+ }
+
+ public async Task<ActionResult> OnGet([FromRoute] string site, [FromQuery] string url) {
+ Source = site switch {
+ "nrk" => await _grabber.GrabNrkArticleAsync(url),
+ _ => default
+ };
+ if (Source == default) return Redirect("/");
+ return Page();
+ }
+} \ No newline at end of file
diff --git a/src/Pages/Shared/_Layout.cshtml b/src/Pages/Shared/_Layout.cshtml
new file mode 100644
index 0000000..bcdd658
--- /dev/null
+++ b/src/Pages/Shared/_Layout.cshtml
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html lang="nb">
+<head>
+ <meta charset="utf-8"/>
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+ <link rel="stylesheet" href="/reset.css" >
+ <link rel="stylesheet" href="/index.css" >
+ <title>@ViewData["Title"] - Lettnytt</title>
+</head>
+<body>
+<header>
+ <nav>
+ <a href="/nrk">NRK</a>
+ </nav>
+</header>
+<main>
+ @RenderBody()
+</main>
+</body>
+</html> \ No newline at end of file
diff --git a/src/Pages/_ViewImports.cshtml b/src/Pages/_ViewImports.cshtml
new file mode 100644
index 0000000..bc2d314
--- /dev/null
+++ b/src/Pages/_ViewImports.cshtml
@@ -0,0 +1,3 @@
+@using I2R.LightNews
+@namespace I2R.LightNews.Pages
+@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers
diff --git a/src/Pages/_ViewStart.cshtml b/src/Pages/_ViewStart.cshtml
new file mode 100644
index 0000000..a5f1004
--- /dev/null
+++ b/src/Pages/_ViewStart.cshtml
@@ -0,0 +1,3 @@
+@{
+ Layout = "_Layout";
+}
diff --git a/src/Program.cs b/src/Program.cs
new file mode 100644
index 0000000..1716be9
--- /dev/null
+++ b/src/Program.cs
@@ -0,0 +1,20 @@
+global using I2R.LightNews.Services;
+global using I2R.LightNews.Models;
+global using IOL.Helpers;
+using System.Globalization;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.AddHttpClient();
+builder.Services.AddScoped<GrabberService>();
+builder.Services.AddRazorPages();
+
+var app = builder.Build();
+
+CultureInfo.CurrentCulture = new CultureInfo("nb-no");
+CultureInfo.CurrentUICulture = new CultureInfo("nb-no");
+app.UseStaticFiles();
+app.UseStatusCodePages();
+app.UseRouting();
+app.MapRazorPages();
+app.Run(); \ No newline at end of file
diff --git a/src/Properties/launchSettings.json b/src/Properties/launchSettings.json
new file mode 100644
index 0000000..88dc0d3
--- /dev/null
+++ b/src/Properties/launchSettings.json
@@ -0,0 +1,22 @@
+{
+ "profiles": {
+ "http": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": false,
+ "applicationUrl": "http://localhost:5088",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ }
+ },
+ "https": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": false,
+ "applicationUrl": "https://localhost:7037;http://localhost:5088",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ }
+ }
+ }
+}
diff --git a/src/Services/GrabberService.cs b/src/Services/GrabberService.cs
new file mode 100644
index 0000000..814f0d3
--- /dev/null
+++ b/src/Services/GrabberService.cs
@@ -0,0 +1,147 @@
+using System.Security.Cryptography;
+using System.Text;
+using AngleSharp.Html.Parser;
+using I2R.LightNews.Utilities;
+
+namespace I2R.LightNews.Services;
+
+public class GrabberService
+{
+ private readonly ILogger<GrabberService> _logger;
+ private readonly HttpClient _http;
+ private const string NrkPrefix = "nrkno";
+ private const int StaleTime = 100800;
+
+ private static AppPath _cachePath => new() {
+ HostPath = "AppData/__sitecache"
+ };
+
+ public GrabberService(ILogger<GrabberService> logger, HttpClient http) {
+ _logger = logger;
+ _http = http;
+ }
+
+ public async Task<NewsArticle> GrabNrkArticleAsync(string url) {
+ var strippedUrl = url.Replace("https://", "")
+ .Replace("http://", "")
+ .Replace("www.", "");
+
+ if (!strippedUrl.StartsWith("nrk.no")
+ || strippedUrl.StartsWith("nrk.no/mat")
+ || strippedUrl.StartsWith("nrk.no/tv")
+ || strippedUrl.StartsWith("nrk.no/radio")
+ || strippedUrl.StartsWith("nrk.no/xl")
+ ) return default;
+
+ using var md5 = MD5.Create();
+ var articleFilePrefix = "art-" + NrkPrefix + "-" + Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(url)));
+ var source = await GrabSourceAsync(url, articleFilePrefix, true);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsArticle() {
+ CachedAt = source.CacheFileCreatedAt,
+ Href = url,
+ Title = doc.QuerySelector("h1.title")?.TextContent,
+ Subtitle = doc.QuerySelector(".article-lead p")?.TextContent,
+ Authors = new List<NewsArticle.Author>()
+ };
+
+ foreach (var authorNode in doc.QuerySelectorAll(".authors .author")) {
+ var author = new NewsArticle.Author() {
+ Name = authorNode.QuerySelector(".author__name")?.TextContent,
+ Title = authorNode.QuerySelector(".author__role")?.TextContent
+ };
+ result.Authors.Add(author);
+ }
+
+ DateTime.TryParse(doc.QuerySelector("time.datePublished")?.Attributes["datetime"]?.Value, out var published);
+ DateTime.TryParse(doc.QuerySelector("time.dateModified")?.Attributes["datetime"]?.Value, out var modified);
+ result.UpdatedAt = modified;
+ result.PublishedAt = published;
+ result.Content = HtmlSanitiser.SanitizeHtmlFragment(doc.QuerySelector(".article-body").InnerHtml, "img,a,.video-reference,.image-reference,.reference");
+ return result;
+ }
+
+ public async Task<NewsSource> GrabNrkAsync() {
+ var source = await GrabSourceAsync("https://nrk.no", NrkPrefix);
+ var parser = new HtmlParser();
+ var doc = await parser.ParseDocumentAsync(source.Content);
+ var result = new NewsSource() {
+ Name = "nrk",
+ Attribution = "Fra https://nrk.no",
+ Created = source.CacheFileCreatedAt.DateTime,
+ CanonicalUrl = doc.QuerySelector("link[rel='canonical']")?.Attributes["href"]?.Value ?? "uvisst",
+ Articles = new List<NewsArticle>()
+ };
+
+ foreach (var articleAnchorNode in doc.QuerySelectorAll("main section a")) {
+ var article = new NewsArticle {
+ Href = articleAnchorNode.Attributes["href"]?.Value.Trim(),
+ Title = articleAnchorNode.QuerySelector(".kur-room__title span")?.TextContent.Trim()
+ };
+
+ if (
+ article.Href.IsNullOrWhiteSpace()
+ || article.Title.IsNullOrWhiteSpace()
+ || (!article.Href?.StartsWith("https://www.nrk.no") ?? true)
+ || (article.Href?.StartsWith("https://www.nrk.no/mat") ?? false)
+ ) {
+ continue;
+ }
+
+ result.Articles.Add(article);
+ }
+
+ return result;
+ }
+
+ private class SourceResult
+ {
+ public string CacheFileName { get; set; }
+ public string Content { get; set; }
+ public DateTimeOffset CacheFileCreatedAt { get; set; }
+ }
+
+ private async Task<SourceResult> GrabSourceAsync(string url, string prefix, bool forceRefresh = false) {
+ var cacheFileName = forceRefresh ? default : GetLatestCacheFile(prefix);
+ if (cacheFileName != default) {
+ _logger.LogInformation("Returned cached {0} file, filename: {1}", url, cacheFileName.CacheFileName);
+ cacheFileName.Content = await File.ReadAllTextAsync(_cachePath.GetHostPathForFilename(cacheFileName.CacheFileName));
+ return cacheFileName;
+ }
+
+ var sourceResponse = await _http.GetAsync(url);
+ var sourceContent = await sourceResponse.Content.ReadAsStringAsync();
+ var utcNow = DateTimeOffset.UtcNow;
+ var newCacheFileName = prefix + "-" + utcNow.ToUnixTimeSeconds() + ".html";
+ await File.WriteAllTextAsync(_cachePath.GetHostPathForFilename(newCacheFileName), sourceContent);
+ _logger.LogInformation("Wrote new cache file for {0}, filename: {1}", url, newCacheFileName);
+ return new SourceResult() {
+ CacheFileName = newCacheFileName,
+ CacheFileCreatedAt = utcNow,
+ Content = sourceContent
+ };
+ }
+
+ private SourceResult GetLatestCacheFile(string prefix) {
+ var cacheDirectoryInfo = new DirectoryInfo(_cachePath.HostPath);
+ if (!cacheDirectoryInfo.Exists) {
+ cacheDirectoryInfo.Create();
+ return default;
+ }
+
+ var files = cacheDirectoryInfo.GetFiles();
+ if (!files.Any()) return default;
+ var relevantFiles = files.Where(c => c.Name.StartsWith(prefix)).OrderBy(c => c.Name).ToList();
+ if (!relevantFiles.Any()) return default;
+ var mostRecentFileName = relevantFiles.Last().Name;
+ var mostRecentEpochString = new string(mostRecentFileName.Skip(mostRecentFileName.LastIndexOf('-')).Where(Char.IsDigit).ToArray());
+ long.TryParse(mostRecentEpochString, out var mostRecentEpochLong);
+ // more than 30 minutes since last grab
+ if (mostRecentEpochLong + StaleTime < DateTimeOffset.UtcNow.ToUnixTimeSeconds()) return default;
+ return new SourceResult {
+ CacheFileName = mostRecentFileName,
+ CacheFileCreatedAt = DateTimeOffset.FromUnixTimeSeconds(mostRecentEpochLong)
+ };
+ }
+} \ No newline at end of file
diff --git a/src/Utilities/HtmlSanitiser.cs b/src/Utilities/HtmlSanitiser.cs
new file mode 100644
index 0000000..68f5d23
--- /dev/null
+++ b/src/Utilities/HtmlSanitiser.cs
@@ -0,0 +1,83 @@
+using AngleSharp.Dom;
+using AngleSharp.Html.Parser;
+
+namespace I2R.LightNews.Utilities;
+
+public static class HtmlSanitiser
+{
+ private const string VoidElements = "area,br,col,hr,img,wbr";
+ private const string OptionalEndTagBlockElements = "colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr";
+ private const string OptionalEndTagInlineElements = "rp,rt";
+ private const string OptionalEndTagElements = OptionalEndTagInlineElements + "," + OptionalEndTagBlockElements;
+ private const string BlockElements = OptionalEndTagBlockElements + ",address,article,aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul";
+ private const string InlineElements = OptionalEndTagInlineElements + ",a,abbr,acronym,b,bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small,span,strike,strong,sub,sup,time,tt,u,var";
+ private const string DefaulValidElements = VoidElements + "," + BlockElements + "," + InlineElements + "," + OptionalEndTagElements;
+ private const string DefaulUriAttrs = "background,cite,href,longdesc,src,xlink:href";
+ private const string DefaulSrcsetAttrs = "srcset";
+ private const string DefaultHtmlAttrs = "abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,valign,value,vspace,width";
+ private const string DefaulValidAttrs = DefaulUriAttrs + "," + DefaulSrcsetAttrs + "," + DefaultHtmlAttrs;
+ private static readonly ISet<string> ValidElements = DefaulValidElements.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase);
+ private static readonly ISet<string> ValidAttributes = DefaulValidAttrs.Split(',').ToHashSet(StringComparer.OrdinalIgnoreCase);
+
+ public static string SanitizeHtmlFragment(string html, string excludeSelectors = default) {
+ var element = ParseHtmlFragment(html);
+ for (var i = element.ChildNodes.Length - 1; i >= 0; i--) {
+ Sanitize(element.ChildNodes[i], excludeSelectors);
+ }
+
+ return element.InnerHtml;
+ }
+
+ private static IElement ParseHtmlFragment(string content) {
+ var uniqueId = Guid.NewGuid().ToString("D");
+ var parser = new HtmlParser();
+ var document = parser.ParseDocument($"<div id='{uniqueId}'>{content}</div>");
+ var element = document.GetElementById(uniqueId);
+ return element;
+ }
+
+ private static void Sanitize(INode node, string excludeSelectors = default) {
+ if (node is IElement htmlElement) {
+ if (excludeSelectors.HasValue()) {
+ foreach (var selector in excludeSelectors.Split(',')) {
+ if (selector.StartsWith(".")) {
+ if (htmlElement.ClassList.Contains(excludeSelectors)) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ continue;
+ }
+ }
+
+ if (selector.StartsWith("#")) {
+ if (htmlElement.Id == selector) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ continue;
+ }
+ }
+
+ if (htmlElement.TagName == selector) {
+ Console.WriteLine("Removed: " + htmlElement.TagName + ", because of: " + selector);
+ htmlElement.Remove();
+ }
+ }
+ }
+
+ if (!ValidElements.Contains(htmlElement.TagName)) {
+ htmlElement.Remove();
+ return;
+ }
+
+ for (var i = htmlElement.Attributes.Length - 1; i >= 0; i--) {
+ var attribute = htmlElement.Attributes[i];
+ if (!ValidAttributes.Contains(attribute.Name)) {
+ htmlElement.RemoveAttribute(attribute.NamespaceUri, attribute.Name);
+ }
+ }
+ }
+
+ for (var i = node.ChildNodes.Length - 1; i >= 0; i--) {
+ Sanitize(node.ChildNodes[i]);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/wwwroot/index.css b/src/wwwroot/index.css
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/wwwroot/index.css
diff --git a/src/wwwroot/reset.css b/src/wwwroot/reset.css
new file mode 100644
index 0000000..ae5fad7
--- /dev/null
+++ b/src/wwwroot/reset.css
@@ -0,0 +1,74 @@
+/* Box sizing rules */
+*,
+*::before,
+*::after {
+ box-sizing: border-box;
+}
+
+/* Remove default margin */
+body,
+h1,
+h2,
+h3,
+h4,
+p,
+figure,
+blockquote,
+dl,
+dd {
+ margin: 0;
+}
+
+/* Remove list styles on ul, ol elements with a list role, which suggests default styling will be removed */
+ul[role="list"],
+ol[role="list"] {
+ list-style: none;
+}
+
+/* Set core root defaults */
+html:focus-within {
+ scroll-behavior: smooth;
+}
+
+/* Set core body defaults */
+body {
+ min-height: 100vh;
+ text-rendering: optimizeSpeed;
+ line-height: 1.5;
+}
+
+/* A elements that don't have a class get default styles */
+a:not([class]) {
+ text-decoration-skip-ink: auto;
+}
+
+/* Make images easier to work with */
+img,
+picture {
+ max-width: 100%;
+ display: block;
+}
+
+/* Inherit fonts for inputs and buttons */
+input,
+button,
+textarea,
+select {
+ font: inherit;
+}
+
+/* Remove all animations and transitions for people that prefer not to see them */
+@media (prefers-reduced-motion: reduce) {
+ html:focus-within {
+ scroll-behavior: auto;
+ }
+
+ *,
+ *::before,
+ *::after {
+ animation-duration: 0.01ms !important;
+ animation-iteration-count: 1 !important;
+ transition-duration: 0.01ms !important;
+ scroll-behavior: auto !important;
+ }
+}