-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from agailloty/multi-xpaths
Adding the ability to evaluate multiple xpath expressions
- Loading branch information
Showing
9 changed files
with
162 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using System.Collections.Generic; | ||
|
||
namespace XpathRunner; | ||
|
||
public class ResultModel | ||
{ | ||
public string ColumnName { get; set; } | ||
Check warning on line 7 in XpathRunner/ResultModel.cs
|
||
public List<string> Rows { get; set; } = new(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,62 @@ | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using HtmlAgilityPack; | ||
|
||
namespace XpathRunner.Service; | ||
|
||
public class XpathService | ||
{ | ||
public IList<string> ExtractHtmlContent(string filepath, string xpath) | ||
public IList<ResultModel> ExtractMultipleHtmlContent(string[] filepaths, string[] xpaths) | ||
{ | ||
if (string.IsNullOrEmpty(filepath) || string.IsNullOrEmpty(xpath)) | ||
return new List<string>(); | ||
// keep only the paths and xpaths that are not empty | ||
var paths = filepaths.Where(path => !string.IsNullOrEmpty(path)).ToArray(); | ||
var xpathsList = xpaths.Where(xpath => !string.IsNullOrEmpty(xpath)).ToArray(); | ||
|
||
var content = new List<string>(); | ||
var doc = new HtmlDocument(); | ||
doc.Load(filepath); | ||
|
||
var results = doc.DocumentNode.SelectNodes(xpath); | ||
if (results == null) | ||
return content; | ||
|
||
foreach (var result in results) | ||
content.Add(result.InnerText.Trim()); | ||
return content; | ||
} | ||
|
||
public IList<string> ExtractHtmlContent(string[] filepaths, string xpath) | ||
{ | ||
if (filepaths == null || filepaths.Length == 0 || string.IsNullOrEmpty(xpath)) | ||
return new List<string>(); | ||
// if there are no paths or xpaths, return an empty list | ||
if (paths.Length == 0 || xpathsList.Length == 0) | ||
return new List<ResultModel>(); | ||
|
||
var content = new List<string>(); | ||
foreach (var filepath in filepaths) | ||
var results = new List<ResultModel>(); | ||
|
||
foreach (var filepath in paths) | ||
{ | ||
var doc = new HtmlDocument(); | ||
doc.Load(filepath); | ||
var content = EvaluateMultipleXpaths(doc, xpathsList); | ||
results.AddRange(content); | ||
} | ||
|
||
// Get unique xpath expressions | ||
var uniqueXpaths = results.Select(x => x.ColumnName).Distinct().ToList(); | ||
var uniqueResults = new List<ResultModel>(); | ||
foreach (var xpath in uniqueXpaths) | ||
{ | ||
var rows = results.Where(r => r.ColumnName == xpath).SelectMany(r => r.Rows).ToList(); | ||
uniqueResults.Add(new ResultModel { ColumnName = xpath, Rows = rows }); | ||
|
||
} | ||
|
||
return uniqueResults; | ||
} | ||
|
||
var results = doc.DocumentNode.SelectNodes(xpath); | ||
if (results == null) | ||
public IList<ResultModel> EvaluateMultipleXpaths(HtmlDocument doc, IEnumerable<string> xpaths) | ||
{ | ||
var results = new List<ResultModel>(); | ||
foreach (var xpath in xpaths) | ||
{ | ||
var content = new ResultModel( ) { ColumnName = xpath }; | ||
var resultsNode = doc.DocumentNode.SelectNodes(xpath); | ||
if (resultsNode == null) | ||
{ | ||
results.Add(content); | ||
continue; | ||
} | ||
|
||
foreach (var result in results) | ||
content.Add(result.InnerText.Trim()); | ||
foreach (var result in resultsNode) | ||
content.Rows.Add(result.InnerText.Trim()); | ||
results.Add(content); | ||
} | ||
return content; | ||
return results; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using CommunityToolkit.Mvvm.ComponentModel; | ||
|
||
namespace XpathRunner.ViewModels; | ||
|
||
public class XpathExpressionItem : ObservableObject | ||
{ | ||
private string _xpathExpression; | ||
Check warning on line 7 in XpathRunner/ViewModels/XpathExpressionItem.cs
|
||
|
||
public string XpathExpression | ||
{ | ||
get => _xpathExpression; | ||
set => SetProperty(ref _xpathExpression, value); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters