Skip to content

Commit

Permalink
fixed issues with cleaning data
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdes committed Jan 22, 2022
1 parent aad7d09 commit eeba115
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 50 deletions.
24 changes: 13 additions & 11 deletions Library.Encyclopedia.API/Controllers/EncylopediaController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Library.Encyclopedia.Entity.Models.External;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
Expand All @@ -28,29 +29,30 @@ public class EncylopediaController : ControllerBase
/// </summary>
/// <param name="logger"></param>
/// <param name="dbContext"></param>
public EncylopediaController(ILogger<EncylopediaController> logger, IApplicationDbContext dbContext)
public EncylopediaController(ILogger<EncylopediaController> logger, IApplicationDbContext dbContext, IConfiguration configuration)
{
_logger = logger;
this.mainDataAccess = new MainDataAccess(dbContext);
this.mainDataAccess = new MainDataAccess(dbContext, configuration);
}

/// <summary>
/// Get all items based on search query
/// </summary>
/// <param name="query"></param>
/// <param name="offset"></param>
/// <param name="size"></param>
/// <param name="limit"></param>
/// <param name="asc"></param>
/// <returns></returns>
[HttpGet]
public async Task<IActionResult> Get(string query,
int offset = 0,
int size = 10,
int limit = 10,
int previewSize = 50,
bool asc = true)
{
try
{
var response = await mainDataAccess.GetAsync(query, offset, size, asc);
var response = await mainDataAccess.GetAsync(query, offset, limit, previewSize, asc);
if (response == null)
{
return StatusCode(204);
Expand All @@ -72,18 +74,18 @@ public async Task<IActionResult> Get(string query,
/// </summary>
/// <param name="query"></param>
/// <param name="offset"></param>
/// <param name="size"></param>
/// <param name="limit"></param>
/// <param name="asc"></param>
/// <returns></returns>
[HttpGet("category")]
public async Task<IActionResult> GetByCategory(string category,
int offset = 0,
int size = 10,
int limit = 10,
bool asc = true)
{
try
{
var response = await mainDataAccess.GetByCategoryAsync(category, offset, size, asc);
var response = await mainDataAccess.GetByCategoryAsync(category, offset, limit, asc);

if (response == null)
{
Expand All @@ -106,18 +108,18 @@ public async Task<IActionResult> GetByCategory(string category,
/// </summary>
/// <param name="query"></param>
/// <param name="offset"></param>
/// <param name="size"></param>
/// <param name="limit"></param>
/// <param name="asc"></param>
/// <returns></returns>
[HttpGet("alphabet")]
public async Task<IActionResult> GetByStartingAlphabet(char alphabet,
int offset = 0,
int size = 10,
int limit = 10,
bool asc = true)
{
try
{
var response = await mainDataAccess.GetByAlphabetAsync(alphabet, offset, size, asc);
var response = await mainDataAccess.GetByAlphabetAsync(alphabet, offset, limit, asc);

if (response == null)
{
Expand Down
1 change: 1 addition & 0 deletions Library.Encyclopedia.API/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"Microsoft.Hosting.Lifetime": "Information"
}
},
"App-Base-Url": "http://localhost:4200",
"AllowedHosts": "*",
"ConnectionStrings": {
"DefaultConnection": "Server=localhost;Database=Encyclopedia;User=root;Password=root"
Expand Down
121 changes: 90 additions & 31 deletions Library.Encyclopedia.DataAccess/DataAccess/MainDataAccess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,56 @@
using Library.Encyclopedia.Entity.Models;
using Library.Encyclopedia.Entity.Models.External;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;

namespace Library.Encyclopedia.DataAccess.DataAccess
{
public class MainDataAccess : IMainDataAccess
{
private IApplicationDbContext _dbcontext;
public MainDataAccess(IApplicationDbContext dbcontext)
private string APP_BASE_URL;

public MainDataAccess(IApplicationDbContext dbcontext, IConfiguration configuration)
{
APP_BASE_URL = configuration.GetSection("App-Base-Url").Value;
_dbcontext = dbcontext;
}

#region GET
async Task<MainMinimizedExternalCollection> IMainDataAccess.GetAsync(string query, int offset, int pagesize, bool ascending)
async Task<MainMinimizedExternalCollection> IMainDataAccess.GetAsync(string query, int offset, int pagesize, int previewSize, bool ascending)
{
query = query.ToLower();
var temp = _dbcontext.Main.Where(s => s.Description.ToLower().Contains(query) || s.Title.ToLower().Contains(query))
// random cleanup
//await CleanUpData();

query = query != null ? query.ToLower() : string.Empty;
var temp = _dbcontext.Main.Where(s => s.RawDescription.ToLower().Contains(query) || s.Title.ToLower().Contains(query))
.Skip(offset)
.Take(pagesize);
.Take(pagesize)
.Include(s => s.Links);

IEnumerable<Main> data;
if (ascending)
data = await temp.OrderBy(s => s.Title)
.ThenBy(s => s.Description)
.ThenBy(s => s.RawDescription)
.ToListAsync();
else
data = await temp.OrderByDescending(s => s.Title)
.ThenByDescending(s => s.Description)
.ThenByDescending(s => s.RawDescription)
.ToListAsync();

var total = await _dbcontext.Main.CountAsync(s => s.Description.ToLower().Contains(query) || s.Title.ToLower().Contains(query));
var total = await _dbcontext.Main.CountAsync(s => s.RawDescription.ToLower().Contains(query) || s.Title.ToLower().Contains(query));

MainMinimizedExternalCollection result = new MainMinimizedExternalCollection(data.MinimizeWithQuery(query, previewSize), total);

MainMinimizedExternalCollection result = new MainMinimizedExternalCollection(data.Minimize(), total);

// random cleanup
await CleanUpData();

return result;
}
Expand All @@ -60,10 +68,10 @@ async Task<MainMinimizedExternalCollection> IMainDataAccess.GetByCategoryAsync(s
IEnumerable<Main> data;
if (ascending)
data = temp.OrderBy(s => s.Title)
.ThenBy(s => s.Description);
.ThenBy(s => s.RawDescription);
else
data = temp.OrderByDescending(s => s.Title)
.ThenByDescending(s => s.Description);
.ThenByDescending(s => s.RawDescription);

var total = rawData.Count(s => s.Category.ToLower().Split(',', StringSplitOptions.None).Contains(category));

Expand All @@ -82,11 +90,11 @@ async Task<MainMinimizedExternalCollection> IMainDataAccess.GetByAlphabetAsync(c
IEnumerable<Main> data;
if (ascending)
data = await temp.OrderBy(s => s.Title)
.ThenBy(s => s.Description)
.ThenBy(s => s.RawDescription)
.ToListAsync();
else
data = await temp.OrderByDescending(s => s.Title)
.ThenByDescending(s => s.Description)
.ThenByDescending(s => s.RawDescription)
.ToListAsync();

var total = await _dbcontext.Main.CountAsync(s => s.Title.ToLower().StartsWith(alph));
Expand All @@ -98,7 +106,20 @@ async Task<MainMinimizedExternalCollection> IMainDataAccess.GetByAlphabetAsync(c

async Task<Main> IMainDataAccess.GetAsync(Guid id)
{
return await _dbcontext.Main.Include(s => s.Files).Include(s => s.Links).FirstOrDefaultAsync(s => s.Id == id);
Main item = await _dbcontext.Main.Include(s => s.Files).Include(s => s.Links).FirstOrDefaultAsync(s => s.Id == id);

// replace links in item
while (item.RichDescription.IndexOf("$$$") != -1)
{
var startIndex = item.RichDescription.IndexOf("$$$") + 3;
var endIndex = item.RichDescription.Substring(startIndex).IndexOf("$$$");

var referenceid = item.RichDescription.Substring(startIndex, endIndex);

item.RichDescription= item.RichDescription.Replace($"$$${referenceid}$$$", $"<a href=\"{APP_BASE_URL}/{referenceid}\">{item.Links.FirstOrDefault(s => s.ReferenceId.ToString() == referenceid).Description}</a>");
}

return item;
}
#endregion

Expand All @@ -124,7 +145,10 @@ public async Task UpdateAsync(Guid id, MainUpdateModel model)
if (entry != null)
{
entry.Title = model.Title;
entry.Description = model.Description;
entry.RichDescription = model.RichDescription;

// TODO : do conversion on raw description

entry.Category = model.Category;

_dbcontext.Main.Update(entry);
Expand Down Expand Up @@ -172,19 +196,18 @@ public async Task CleanUpData()
{
string searchStartString = "<a href=";
string searchEndString = "</a>";
var links = new List<Links>();
var allLinks = new List<Links>();
var allData = await _dbcontext.Main.ToListAsync();

//using StreamWriter file = new StreamWriter(@"C:\temp\temp.txt", append: true);
foreach (var item in allData)
{
if (item.Description != null)
if (item.RichDescription != null)
{
// Extract start indices
List<int> startIndexes = new List<int>();
for (int index = 0; ; index += searchStartString.Length)
{
index = item.Description.IndexOf(searchStartString, index);
index = item.RichDescription.IndexOf(searchStartString, index);
if (index != -1)
startIndexes.Add(index);
else break;
Expand All @@ -194,32 +217,32 @@ public async Task CleanUpData()
List<int> endIndexes = new List<int>();
for (int index = 0; ; index += searchEndString.Length)
{
index = item.Description.IndexOf(searchEndString, index);
index = item.RichDescription.IndexOf(searchEndString, index);
if (index != -1)
endIndexes.Add(index);
else break;
}

//
var newDesc = new string(item.Description);
var newDesc = new string(item.RichDescription);
var links = new List<Links>();

for (int i = 0; i < startIndexes.Count; i++)
{
string value = item.Description.Substring(startIndexes[i], endIndexes[i] - startIndexes[i] + searchEndString.Length);
string value = item.RichDescription.Substring(startIndexes[i], endIndexes[i] - startIndexes[i] + searchEndString.Length);
if (value.Contains("http://"))
{
var id = value.Substring(value.IndexOf("id=") + 3, 36);
var desc = value.Substring(value.IndexOf(">") + 1, value.IndexOf("<", value.IndexOf(">")) - value.IndexOf(">") - 1);

//await file.WriteLineAsync($"id = {id} | Description = {desc}");

newDesc = newDesc.Replace(value, $"$$${id}$$$");
//await file.WriteLineAsync($"new description = {newDesc}");

// add to links list
links.Add(new Links
{
Id = Guid.NewGuid(),
MainId = Guid.Parse(id),
MainId = item.Id,
ReferenceId = Guid.Parse(id),
Link = value,
Description = desc,
IsInternal = true
Expand All @@ -228,18 +251,54 @@ public async Task CleanUpData()
}

// update the description
item.Description = newDesc;
item.RichDescription = newDesc;
// update the links array
item.Links = links;
allLinks.AddRange(links);
}
}

// add to the links table
await _dbcontext.Links.AddRangeAsync(links);

await _dbcontext.Links.AddRangeAsync(allLinks);
// update the main data
_dbcontext.Main.UpdateRange(allData);
await _dbcontext.SaveChanges();

allData = await _dbcontext.Main.Include(s => s.Links).ToListAsync();

foreach (var item in allData)
{
var temp = new string(item.RichDescription);

if (!string.IsNullOrEmpty(temp))
{
// replace the links
if (item.Links != null && item.Links.Any())
{
foreach (var link in item.Links)
{
var identifier = $"$$${link.ReferenceId}$$$";
temp = temp.Replace(identifier, link.Description);
}
}

item.RawDescription = temp;

// replace the HTML tags
item.RawDescription = StripHTML(item.RawDescription, true);
}
}
// update the main data
_dbcontext.Main.UpdateRange(allData);
await _dbcontext.SaveChanges();
}

public static string StripHTML(string HTMLText, bool decode = true)
{
Regex reg = new Regex("<[^>]+>", RegexOptions.IgnoreCase);
var stripped = reg.Replace(HTMLText, "");
return decode ? HttpUtility.HtmlDecode(stripped) : stripped;
}
#endregion
}
}
2 changes: 1 addition & 1 deletion Library.Encyclopedia.Entity/Interfaces/IMainDataAccess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace Library.Encyclopedia.Entity.Interfaces
public interface IMainDataAccess
{
#region GET
public Task<MainMinimizedExternalCollection> GetAsync(string query, int offset, int pagesize, bool ascending);
public Task<MainMinimizedExternalCollection> GetAsync(string query, int offset, int pagesize, int previewSize, bool ascending);
public Task<MainMinimizedExternalCollection> GetByCategoryAsync(string category, int offset, int pagesize, bool ascending);
public Task<MainMinimizedExternalCollection> GetByAlphabetAsync(char startingAlphabet, int offset, int pagesize, bool ascending);
public Task<Main> GetAsync(Guid id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ public class MainMinimizedExternal
{
public Guid Id { get; set; }
public string Title { get; set; }
public string Description { get; set; }
public string Category { get; set; }
public string Preview { get; set; }
}
public class MainMinimizedExternalCollection : QueryExternalModel<MainMinimizedExternal>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
public class MainUpdateModel
{
public string Title { get; set; }
public string Description { get; set; }
public string RawDescription { get; set; }
public string RichDescription { get; set; }
public string Category { get; set; }
}
}
2 changes: 2 additions & 0 deletions Library.Encyclopedia.Entity/Models/Links.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public class Links
[Required]
public Guid MainId { get; set; }
[Required]
public Guid ReferenceId { get; set; }
[Required]
public string Link { get; set; }
public string Description { get; set; }
[Required]
Expand Down
Loading

0 comments on commit eeba115

Please sign in to comment.