Skip to content

Commit

Permalink
Merge pull request #99 from twingly/read-psl-data-as-utf8
Browse files Browse the repository at this point in the history
Make sure we always read PSL data as UTF-8
  • Loading branch information
dentarg authored Sep 19, 2016
2 parents 30c27a3 + 897bfd7 commit c3313a5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
6 changes: 4 additions & 2 deletions lib/twingly/public_suffix_list.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require "addressable/idna"
require "public_suffix"

module Twingly
Expand All @@ -7,8 +8,9 @@ class PublicSuffixList
private_constant :ACE_PREFIX

# Extend the PSL with ASCII form of all internationalized domain names
def self.with_punycoded_names
list_data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
def self.with_punycoded_names(encoding: Encoding::UTF_8)
list_path = PublicSuffix::List::DEFAULT_LIST_PATH
list_data = File.read(list_path, encoding: encoding)
list = PublicSuffix::List.parse(list_data, private_domains: false)

punycoded_names(list).each do |punycoded_name|
Expand Down
30 changes: 30 additions & 0 deletions spec/lib/twingly/public_suffix_list_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
require "spec_helper"

require "twingly/public_suffix_list"

describe Twingly::PublicSuffixList do
describe ".with_punycoded_names" do
subject { described_class.with_punycoded_names(encoding: encoding) }

context "when the list is data is read with the default encoding" do
subject { described_class.with_punycoded_names }

it { is_expected.to be_a(PublicSuffix::List) }
end

context "when the list data is read as UTF-8" do
let(:encoding) { Encoding::UTF_8 }

it { is_expected.to be_a(PublicSuffix::List) }
end

context "when the list data is read as US-ASCII" do
let(:encoding) { Encoding::US_ASCII }

it "parsing the data will fail" do
expect { subject }.
to raise_error(ArgumentError, "invalid byte sequence in US-ASCII")
end
end
end
end

0 comments on commit c3313a5

Please sign in to comment.