Skip to content
This repository was archived by the owner on Mar 9, 2021. It is now read-only.

Commit 7c3699a

Browse files
committed
Cache StreamWriter instances for text downloading
Stores StreamWriter instances in Dictionary and reuses them for recurring text appends in text post downloading. This prevents massive seek I/O in large blog downloads.
1 parent 8d452b5 commit 7c3699a

11 files changed

+137
-11
lines changed

src/TumblThree/SharedAssemblyInfo.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212

1313
[assembly: ComVisible(false)]
1414
[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)]
15-
[assembly: AssemblyVersion("1.0.8.73")]
16-
[assembly: AssemblyFileVersion("1.0.8.73")]
15+
[assembly: AssemblyVersion("1.0.8.74")]
16+
[assembly: AssemblyFileVersion("1.0.8.74")]

src/TumblThree/TumblThree.Applications/Controllers/CrawlerController.cs

+17-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
namespace TumblThree.Applications.Controllers
1717
{
1818
[Export]
19-
internal class CrawlerController
19+
internal class CrawlerController : IDisposable
2020
{
2121
private readonly ICrawlerFactory crawlerFactory;
2222
private readonly ICrawlerService crawlerService;
@@ -178,6 +178,7 @@ private async Task RunCrawlerTasksAsync(CancellationToken ct, PauseToken pt)
178178

179179
ICrawler crawler = crawlerFactory.GetCrawler(blog, ct, pt, new Progress<DownloadProgress>());
180180
crawler.IsBlogOnlineAsync().Wait(4000);
181+
crawler.Dispose();
181182

182183
if (crawlerService.ActiveItems.Any(item =>
183184
item.Blog.Name.Equals(nextQueueItem.Blog.Name) &&
@@ -215,6 +216,7 @@ private async Task StartSiteSpecificDownloaderAsync(QueueListItem queueListItem,
215216

216217
ICrawler crawler = crawlerFactory.GetCrawler(blog, ct, pt, progress);
217218
await crawler.CrawlAsync();
219+
crawler.Dispose();
218220

219221
Monitor.Enter(lockObject);
220222
QueueOnDispatcher.CheckBeginInvokeOnUI(() => crawlerService.RemoveActiveItem(queueListItem));
@@ -233,5 +235,19 @@ private ProgressThrottler<DownloadProgress> SetupThrottledQueueListProgress(Queu
233235
var progressHandler = new Progress<DownloadProgress>(value => { queueListItem.Progress = value.Progress; });
234236
return new ProgressThrottler<DownloadProgress>(progressHandler, shellService.Settings.ProgressUpdateInterval);
235237
}
238+
239+
protected virtual void Dispose(bool disposing)
240+
{
241+
if (disposing)
242+
{
243+
crawlerCancellationTokenSource?.Dispose();
244+
}
245+
}
246+
247+
public void Dispose()
248+
{
249+
Dispose(true);
250+
GC.SuppressFinalize(this);
251+
}
236252
}
237253
}

src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs

+2
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ private async Task CheckStatusOfBlogsAsync(SemaphoreSlim semaphoreSlim, IBlog bl
374374
ICrawler crawler = crawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
375375
new Progress<DownloadProgress>());
376376
await crawler.IsBlogOnlineAsync();
377+
crawler.Dispose();
377378
}
378379
finally
379380
{
@@ -645,6 +646,7 @@ private async Task UpdateMetaInformationAsync(IBlog blog)
645646
new Progress<DownloadProgress>());
646647

647648
await crawler.UpdateMetaInformationAsync();
649+
crawler.Dispose();
648650
}
649651

650652
private IBlog CheckIfCrawlableBlog(string blogUrl)

src/TumblThree/TumblThree.Applications/Crawler/ICrawler.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
using System.Threading.Tasks;
1+
using System;
2+
using System.Threading.Tasks;
23

34
namespace TumblThree.Applications.Crawler
45
{
5-
public interface ICrawler
6+
public interface ICrawler : IDisposable
67
{
78
Task CrawlAsync();
89

src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs

+15
Original file line numberDiff line numberDiff line change
@@ -700,5 +700,20 @@ private async Task AddExternalPhotoUrlToDownloadListAsync(Post post)
700700

701701
if (blog.DownloadCatBox) AddCatBoxUrl(searchableText, timestamp);
702702
}
703+
704+
protected virtual void Dispose(bool disposing)
705+
{
706+
if (disposing)
707+
{
708+
semaphoreSlim?.Dispose();
709+
downloader.Dispose();
710+
}
711+
}
712+
713+
public void Dispose()
714+
{
715+
Dispose(true);
716+
GC.SuppressFinalize(this);
717+
}
703718
}
704719
}

src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs

+15
Original file line numberDiff line numberDiff line change
@@ -666,5 +666,20 @@ private async Task AddExternalPhotoUrlToDownloadListAsync(Post post)
666666

667667
if (blog.DownloadCatBox) AddCatBoxUrl(searchableText, timestamp);
668668
}
669+
670+
protected virtual void Dispose(bool disposing)
671+
{
672+
if (disposing)
673+
{
674+
semaphoreSlim?.Dispose();
675+
downloader.Dispose();
676+
}
677+
}
678+
679+
public void Dispose()
680+
{
681+
Dispose(true);
682+
GC.SuppressFinalize(this);
683+
}
669684
}
670685
}

src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs

+15
Original file line numberDiff line numberDiff line change
@@ -255,5 +255,20 @@ private void AddVideoUrlToDownloadList(string document)
255255
if (blog.RegExVideos)
256256
AddGenericVideoUrl(document);
257257
}
258+
259+
protected virtual void Dispose(bool disposing)
260+
{
261+
if (disposing)
262+
{
263+
semaphoreSlim?.Dispose();
264+
downloader.Dispose();
265+
}
266+
}
267+
268+
public void Dispose()
269+
{
270+
Dispose(true);
271+
GC.SuppressFinalize(this);
272+
}
258273
}
259274
}

src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs

+15
Original file line numberDiff line numberDiff line change
@@ -206,5 +206,20 @@ private void AddVideoUrlToDownloadList(string document)
206206
if (blog.RegExVideos)
207207
AddGenericVideoUrl(document);
208208
}
209+
210+
protected virtual void Dispose(bool disposing)
211+
{
212+
if (disposing)
213+
{
214+
semaphoreSlim?.Dispose();
215+
downloader.Dispose();
216+
}
217+
}
218+
219+
public void Dispose()
220+
{
221+
Dispose(true);
222+
GC.SuppressFinalize(this);
223+
}
209224
}
210225
}

src/TumblThree/TumblThree.Applications/Crawler/TumblrTagSearchCrawler.cs

+15
Original file line numberDiff line numberDiff line change
@@ -247,5 +247,20 @@ private void AddVideoUrlToDownloadList(string document)
247247
if (blog.RegExVideos)
248248
AddGenericVideoUrl(document);
249249
}
250+
251+
protected virtual void Dispose(bool disposing)
252+
{
253+
if (disposing)
254+
{
255+
semaphoreSlim?.Dispose();
256+
downloader.Dispose();
257+
}
258+
}
259+
260+
public void Dispose()
261+
{
262+
Dispose(true);
263+
GC.SuppressFinalize(this);
264+
}
250265
}
251266
}

src/TumblThree/TumblThree.Applications/Downloader/AbstractDownloader.cs

+35-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public abstract class AbstractDownloader : IDownloader
3434

3535
private SemaphoreSlim concurrentConnectionsSemaphore;
3636
private SemaphoreSlim concurrentVideoConnectionsSemaphore;
37+
private readonly Dictionary<string, StreamWriter> streamWriters = new Dictionary<string, StreamWriter>();
3738

3839
protected AbstractDownloader(IShellService shellService, IManagerService managerService, CancellationToken ct,
3940
PauseToken pt, IProgress<DownloadProgress> progress, IPostQueue<TumblrPost> postQueue, FileDownloader fileDownloader,
@@ -123,10 +124,8 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
123124
{
124125
lock (lockObjectDownload)
125126
{
126-
using (var sw = new StreamWriter(fileLocation, true))
127-
{
128-
sw.WriteLine(text);
129-
}
127+
StreamWriter sw = GetTextAppenderStreamWriter(fileLocation);
128+
sw.WriteLine(text);
130129
}
131130

132131
return true;
@@ -144,6 +143,18 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
144143
}
145144
}
146145

146+
private StreamWriter GetTextAppenderStreamWriter(string key)
147+
{
148+
if (streamWriters.ContainsKey(key))
149+
{
150+
return streamWriters[key];
151+
}
152+
StreamWriter sw = new StreamWriter(key, true);
153+
streamWriters.Add(key, sw);
154+
155+
return sw;
156+
}
157+
147158
public virtual async Task<bool> DownloadBlogAsync()
148159
{
149160
concurrentConnectionsSemaphore =
@@ -335,5 +346,25 @@ protected void CheckIfShouldPause()
335346
if (pt.IsPaused)
336347
pt.WaitWhilePausedWithResponseAsyc().Wait();
337348
}
349+
350+
protected virtual void Dispose(bool disposing)
351+
{
352+
if (disposing)
353+
{
354+
concurrentConnectionsSemaphore?.Dispose();
355+
concurrentVideoConnectionsSemaphore?.Dispose();
356+
357+
foreach (var sw in streamWriters.Values)
358+
{
359+
sw.Dispose();
360+
}
361+
}
362+
}
363+
364+
public void Dispose()
365+
{
366+
Dispose(true);
367+
GC.SuppressFinalize(this);
368+
}
338369
}
339370
}

src/TumblThree/TumblThree.Applications/Downloader/IDownloader.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
using System.Threading.Tasks;
1+
using System;
2+
using System.Threading.Tasks;
23

34
namespace TumblThree.Applications.Downloader
45
{
5-
public interface IDownloader
6+
public interface IDownloader : IDisposable
67
{
78
Task<bool> DownloadBlogAsync();
89

0 commit comments

Comments
 (0)