Skip to content

Commit

Permalink
Implemented Word Cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdes committed Feb 3, 2022
1 parent ee51c90 commit dc9ce87
Show file tree
Hide file tree
Showing 10 changed files with 316 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Library.Encyclopedia.DataAccess;
using Library.Encyclopedia.DataAccess.DataAccess;
using Library.Encyclopedia.DataAccess.QueryStatsAccess;
using Library.Encyclopedia.Entity.Interfaces;
using Library.Encyclopedia.Entity.Models;
using Library.Encyclopedia.Entity.Models.External;
Expand Down
68 changes: 68 additions & 0 deletions Library.Encyclopedia.API/Controllers/StatisticsController.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using Library.Encyclopedia.DataAccess;
using Library.Encyclopedia.DataAccess.QueryStatsAccess;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;

namespace Library.Encyclopedia.API.Controllers
{
[ApiController]
[Route("[controller]")]
public class StatisticsController : ControllerBase
{
private readonly ILogger<StatisticsController> _logger;
private readonly CommonlyOccuringWordsAdapter commonlyOccuringWordsAdapter;

public StatisticsController(ILogger<StatisticsController> logger, IApplicationDbContext applicationDbContext, IMemoryCache memoryCache)
{
_logger = logger;
this.commonlyOccuringWordsAdapter = new CommonlyOccuringWordsAdapter(applicationDbContext, memoryCache);
}

[HttpGet("GetRecommendedWords")]
public IActionResult GetCommonlyOccuringWordsStats(int count = 10)
{
try
{
return Ok(commonlyOccuringWordsAdapter.GetCommonlyUsedWords(count));
}
catch (Exception ex)
{
_logger.LogError(ex, $"an error has occured {ex.Message}");
throw;
}
}

[HttpGet("GetPopularWords")]
public IActionResult GetMostSearchedWordStats(int count = 10)
{
try
{
return Ok(commonlyOccuringWordsAdapter.GetCommonlySearchedWords(count));
}
catch (Exception ex)
{
_logger.LogError(ex, $"an error has occured {ex.Message}");
throw;
}
}

[HttpGet("GetNLPResult")]
public IActionResult GetNLPResult()
{
try
{
return Ok(commonlyOccuringWordsAdapter.GetNLPResult());
}
catch (Exception ex)
{
_logger.LogError(ex, $"an error has occured {ex.Message}");
throw;
}
}
}
}
4 changes: 3 additions & 1 deletion Library.Encyclopedia.API/Startup.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Library.Encyclopedia.DataAccess;
using Library.Encyclopedia.DataAccess.FileAccess;
using Library.Encyclopedia.DataAccess.QueryStatsAccess;
using Library.Encyclopedia.Entity.Interfaces;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
Expand Down Expand Up @@ -64,7 +65,8 @@ public void ConfigureServices(IServiceCollection services)
});
}

services.AddScoped<IApplicationDbContext>(s => new ApplicationDbContext(Configuration.GetConnectionString("DefaultConnection")));
services.AddSingleton<IApplicationDbContext>(s => new ApplicationDbContext(Configuration.GetConnectionString("DefaultConnection")));
services.AddMemoryCache();

services.Configure<FormOptions>(x =>
{
Expand Down
11 changes: 10 additions & 1 deletion Library.Encyclopedia.DataAccess/ApplicationDbContext.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Library.Encyclopedia.Entity.Models;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Storage;
using System;
using System.Threading.Tasks;

Expand All @@ -16,16 +17,24 @@ public ApplicationDbContext(string connectionString)
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
{
var serverVersion = new MySqlServerVersion(new Version(8, 0, 18));
optionsBuilder.UseMySql(connectionString, serverVersion);
optionsBuilder.UseMySql(connectionString, serverVersion, (mySqlOptions)=> {
mySqlOptions.EnableRetryOnFailure();
});
}

public DbSet<Main> Main { get; set; }
public DbSet<Files> Files { get; set; }
public DbSet<Links> Links { get; set; }
public DbSet<QueryStats> QueryStats { get; set; }

public new async Task<int> SaveChanges()
{
return await base.SaveChangesAsync();
}

public async Task<IDbContextTransaction> BeginTransactionAsync()
{
return await base.Database.BeginTransactionAsync();
}
}
}
11 changes: 9 additions & 2 deletions Library.Encyclopedia.DataAccess/DataAccess/MainDataAccess.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Library.Encyclopedia.Entity.Exceptions;
using Library.Encyclopedia.DataAccess.QueryStatsAccess;
using Library.Encyclopedia.Entity.Exceptions;
using Library.Encyclopedia.Entity.Interfaces;
using Library.Encyclopedia.Entity.Models;
using Library.Encyclopedia.Entity.Models.External;
Expand All @@ -10,6 +11,7 @@
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Web;

Expand All @@ -19,13 +21,15 @@ public class MainDataAccess : IMainDataAccess
{
private IApplicationDbContext _dbcontext;
private readonly IFilesAdapter filesAdapter;
private readonly QueryStatsAdapter queryStatsAdapter;
private string APP_BASE_URL;

public MainDataAccess(IApplicationDbContext dbcontext, IConfiguration configuration, IFilesAdapter filesAdapter)
{
APP_BASE_URL = configuration.GetSection("App-Base-Url").Value;
_dbcontext = dbcontext;
this.filesAdapter = filesAdapter;
this.queryStatsAdapter = new QueryStatsAdapter(dbcontext);
}

#region GET
Expand Down Expand Up @@ -55,6 +59,9 @@ async Task<MainMinimizedExternalCollection> IMainDataAccess.GetAsync(string quer

MainMinimizedExternalCollection result = new MainMinimizedExternalCollection(data.MinimizeWithQuery(query, previewSize), total);

if (!string.IsNullOrEmpty(query) && !string.IsNullOrWhiteSpace(query) && total > 0 && query.Length >= 3)
this.queryStatsAdapter.AddQuery(query);

return result;
}

Expand Down Expand Up @@ -100,7 +107,7 @@ async Task<MainMinimizedExternalCollection> IMainDataAccess.GetByCategoryAsync(s
.ToListAsync();
else
data = await _dbcontext.Main.Where(s => s.Category == null)

.OrderByDescending(s => s.Title.ToLower())
.ThenByDescending(s => s.RawDescription.ToLower())
.Skip(offset)
Expand Down
3 changes: 3 additions & 0 deletions Library.Encyclopedia.DataAccess/IApplicationDbContext.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Library.Encyclopedia.Entity.Models;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Storage;
using System.Threading.Tasks;

namespace Library.Encyclopedia.DataAccess
Expand All @@ -9,6 +10,8 @@ public interface IApplicationDbContext
DbSet<Main> Main { get; set; }
DbSet<Files> Files { get; set; }
DbSet<Links> Links { get; set; }
DbSet<QueryStats> QueryStats { get; set; }
Task<int> SaveChanges();
Task<IDbContextTransaction> BeginTransactionAsync();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Catalyst" Version="1.0.25056" />
<PackageReference Include="Catalyst.Models.English" Version="1.0.24970" />
<PackageReference Include="Microsoft.AspNetCore.Http.Features" Version="5.0.12" />
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="5.0.12" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="5.0.12">
Expand All @@ -18,6 +20,7 @@
</PackageReference>
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="5.0.3" />
<PackageReference Include="SSH.NET" Version="2020.0.1" />
<PackageReference Include="System.Runtime.Caching" Version="6.0.0" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
using Catalyst;
using Catalyst.Models;
using Library.Encyclopedia.Entity.Models;
using Microsoft.Extensions.Caching.Memory;
using Mosaik.Core;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;

namespace Library.Encyclopedia.DataAccess.QueryStatsAccess
{
public class CommonlyOccuringWordsAdapter
{
private readonly IApplicationDbContext applicationDbContext;
private Dictionary<string, long> commonlyOccuringWords;
private Dictionary<PartOfSpeech, List<string>> forReference;
private IMemoryCache _cache;

public CommonlyOccuringWordsAdapter(IApplicationDbContext applicationDbContext, IMemoryCache cache)
{
commonlyOccuringWords = new Dictionary<string, long>();
forReference = new Dictionary<PartOfSpeech, List<string>>();
this.applicationDbContext = applicationDbContext;
this._cache = cache;
}

private void PopulateRecommendedWords()
{
commonlyOccuringWords = new Dictionary<string, long>();
forReference = new Dictionary<PartOfSpeech, List<string>>();

var allData = applicationDbContext.Main.ToList();

//BasicParser(allData);
AdvancedParser(allData);

var cacheEntryOptions = new MemoryCacheEntryOptions()
.SetSlidingExpiration(TimeSpan.FromHours(1));

if (commonlyOccuringWords != null)
{
_cache.Set("commonlyOccuringWords", commonlyOccuringWords, cacheEntryOptions);
}
}

private void AdvancedParser(List<Main> allData)
{
English.Register();
Storage.Current = new DiskStorage("catalyst-models");
var nlp = Pipeline.For(Language.English);

foreach (var item in allData)
{
ProcessText(nlp, item.Title, item.Id);
ProcessText(nlp, item.RawDescription, item.Id);
}
}

private void ProcessText(Pipeline nlp, string text, Guid id)
{
var doc = new Document(text, Language.English);
nlp.ProcessSingle(doc);

foreach (var sentence in doc)
{
foreach (var token in sentence)
{
switch (token.POS)
{
case PartOfSpeech.PROPN:
case PartOfSpeech.NOUN:
case PartOfSpeech.ADJ:
case PartOfSpeech.ADV:
case PartOfSpeech.NUM:
if (token.Value.Length >= 3)
{
if (commonlyOccuringWords.ContainsKey(token.Value))
commonlyOccuringWords[token.Value]++;
else
commonlyOccuringWords.Add(token.Value, 1);
}
break;
case PartOfSpeech.NONE:
case PartOfSpeech.ADP:
case PartOfSpeech.AUX:
case PartOfSpeech.CCONJ:
case PartOfSpeech.DET:
case PartOfSpeech.INTJ:
case PartOfSpeech.PART:
case PartOfSpeech.PRON:
case PartOfSpeech.PUNCT:
case PartOfSpeech.SCONJ:
case PartOfSpeech.SYM:
case PartOfSpeech.VERB:
case PartOfSpeech.X:
default:
break;
}

if (!forReference.ContainsKey(token.POS))
forReference.Add(token.POS, new List<string>() { token.Value.ToLower() });
else
{
if (!forReference[token.POS].Contains(token.Value.ToLower()))
forReference[token.POS].Add(token.Value.ToLower());
}
}
}
}

private void BasicParser(List<Main> allData)
{
foreach (var item in allData)
{
string[] seperators = { ", ", ". ", "! ", "? ", ": ", "; ", " " };

string v1 = item.Title.Replace('(', ' ')
.Replace(')', ' ')
.Replace('{', ' ')
.Replace('}', ' ')
.Replace('[', ' ')
.Replace(']', ' ');
foreach (var word in v1.Split(seperators, StringSplitOptions.RemoveEmptyEntries))
{
if (!string.IsNullOrEmpty(word) && !string.IsNullOrWhiteSpace(word) && !word.ToCharArray().All(s => !char.IsLetterOrDigit(s)))
{
if (commonlyOccuringWords.ContainsKey(word))
commonlyOccuringWords[word]++;
else
commonlyOccuringWords.Add(word, 1);
}
}

string v2 = item.RawDescription.Replace('(', ' ')
.Replace(')', ' ')
.Replace('{', ' ')
.Replace('}', ' ')
.Replace('[', ' ')
.Replace(']', ' ');

foreach (var word in v2.Split(seperators, StringSplitOptions.RemoveEmptyEntries))
{
if (!string.IsNullOrEmpty(word) && !string.IsNullOrWhiteSpace(word) && !word.ToCharArray().All(s => !char.IsLetterOrDigit(s)))
{
if (commonlyOccuringWords.ContainsKey(word))
commonlyOccuringWords[word]++;
else
commonlyOccuringWords.Add(word, 1);
}
}
}
}

public Dictionary<string, long> GetCommonlyUsedWords(int count = 0)
{
bool resultCacheValueFlag = _cache.TryGetValue("commonlyOccuringWords", out object value);
if (!resultCacheValueFlag || value == null)
PopulateRecommendedWords();

return _cache.Get<Dictionary<string, long>>("commonlyOccuringWords").OrderByDescending(s => s.Value).Take(count).ToDictionary(x => x.Key, x => x.Value);
}

public Dictionary<string, long> GetCommonlySearchedWords(int count = 0)
{
IQueryable<QueryStats> queryable = applicationDbContext.QueryStats.OrderByDescending(s => s.Count).Take(count);
return queryable.ToDictionary(s => s.Query, s => s.Count);
}

public Dictionary<string, List<string>> GetNLPResult()
{
return forReference.ToDictionary(s => s.Key.ToString(), s => s.Value);
}
}
}
Loading

0 comments on commit dc9ce87

Please sign in to comment.