Add SLDs that act like TLDs

This change offers the designate administrator the ability to configure
2 lists to indicate the TLDs(accepted_tlds_file) and SLDs and others
that
effectively act as TLDs.(effective_tlds_file)

2 sample files are included to show the format required for these files.
The TLD file is from http://data.iana.org/TLD/tlds-alpha-by-domain.txt
Commented lines in this file begin with a #. There is one entry per line
and the entries are in the IDNA format.

The sample effective TLD file is from http://publicsuffix.org/list/
This file is in the UTF-8 format.  Lines beginning with a // and ! are
ignored.  The wildcard character * (asterisk) may only be used to
wildcard
the topmost level in a domain name.  All the entries  are converted to
idna format.

Closes-Bug: #1249396

Change-Id: I9718489335e24cb24733d4c4fc2a966e490014d3
This commit is contained in:
Vinod Mangalpally 2013-11-19 08:53:53 -06:00
parent a5f6a1fb3d
commit 3db98ab1fd
8 changed files with 7879 additions and 72 deletions

View File

@ -15,45 +15,6 @@
# under the License.
from oslo.config import cfg
# NOTE(kiall): See http://data.iana.org/TLD/tlds-alpha-by-domain.txt
# Version 2013110700.
IANA_TLDS = [
'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq',
'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd',
'be', 'bf', 'bg', 'bh', 'bi', 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs',
'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'camera', 'cat', 'cc', 'cd', 'cf',
'cg', 'ch', 'ci', 'ck', 'cl', 'clothing', 'cm', 'cn', 'co', 'com', 'coop',
'cr', 'cu', 'cv', 'cw', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do',
'dz', 'ec', 'edu', 'ee', 'eg', 'equipment', 'er', 'es', 'et', 'eu', 'fi',
'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh',
'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'guru',
'gw', 'gy', 'hk', 'hm', 'hn', 'holdings', 'hr', 'ht', 'hu', 'id', 'ie',
'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is', 'it', 'je',
'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', 'kr',
'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lighting', 'lk', 'lr', 'ls',
'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mil', 'mk',
'ml', 'mm', 'mn', 'mo', 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu',
'museum', 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net',
'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org', 'pa',
'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'post', 'pr', 'pro', 'ps',
'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc',
'sd', 'se', 'sg', 'sh', 'si', 'singles', 'sj', 'sk', 'sl', 'sm', 'sn',
'so', 'sr', 'st', 'su', 'sv', 'sx', 'sy', 'sz', 'tc', 'td', 'tel', 'tf',
'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt',
'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've',
'ventures', 'vg', 'vi', 'vn', 'voyage', 'vu', 'wf', 'ws', 'xn--3e0b707e',
'xn--45brj9c', 'xn--80ao21a', 'xn--80asehdb', 'xn--80aswg', 'xn--90a3ac',
'xn--clchc0ea0b2g2a9gcd', 'xn--fiqs8s', 'xn--fiqz9s', 'xn--fpcrj9c3d',
'xn--fzc2c9e2c', 'xn--gecrj9c', 'xn--h2brj9c', 'xn--j1amh', 'xn--j6w193g',
'xn--kprw13d', 'xn--kpry57d', 'xn--l1acc', 'xn--lgbbat1ad8j',
'xn--mgb9awbf', 'xn--mgba3a4f16a', 'xn--mgbaam7a8h', 'xn--mgbayh7gpa',
'xn--mgbbh1a71e', 'xn--mgbc0a9azcg', 'xn--mgberp4a5d4ar', 'xn--mgbx4cd0ab',
'xn--ngbc5azd', 'xn--o3cw4h', 'xn--ogbpf8fl', 'xn--p1ai', 'xn--pgbs0dh',
'xn--s9brj9c', 'xn--unup4y', 'xn--wgbh1c', 'xn--wgbl6a',
'xn--xkc2al3hye2a', 'xn--xkc2dl3a5ee0h', 'xn--yfro4i67o', 'xn--ygbi2ammx',
'xxx', 'ye', 'yt', 'za', 'zm', 'zw'
]
cfg.CONF.register_group(cfg.OptGroup(
name='service:central', title="Configuration for Central Service"
))
@ -71,8 +32,10 @@ cfg.CONF.register_opts([
default=['\\.arpa\\.$', '\\.novalocal\\.$', '\\.localhost\\.$',
'\\.localdomain\\.$', '\\.local\\.$'],
help='DNS domain name blacklist'),
cfg.ListOpt('accepted-tld-list', default=IANA_TLDS,
help='Accepted TLDs'),
cfg.StrOpt('accepted-tlds-file', default='tlds-alpha-by-domain.txt',
help='Accepted TLDs'),
cfg.StrOpt('effective-tlds-file', default='effective_tld_names.dat',
help='Effective TLDs'),
cfg.IntOpt('max_domain_name_len', default=255,
help="Maximum domain name length"),
cfg.IntOpt('max_record_name_len', default=255,

View File

@ -0,0 +1,179 @@
# Copyright (c) 2013 Rackspace Hosting
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import codecs
import re
from designate import utils
from designate.openstack.common import log as logging
from oslo.config import cfg
LOG = logging.getLogger(__name__)
class EffectiveTld(object):
def __init__(self, *args, **kwargs):
self._load_accepted_tld_list()
self._load_effective_tld_list()
def _load_accepted_tld_list(self):
"""
This loads the accepted TLDs from a file to a list - accepted_tld_list.
The file is expected to have one TLD per line. TLDs need to be in the
IDN format. Comments in the file are lines beginning with a #
The normal source for this file is
http://data.iana.org/TLD/tlds-alpha-by-domain.txt
"""
self.accepted_tld_list = []
accepted_tld_files = utils.find_config(
cfg.CONF['service:central'].accepted_tlds_file)
# We do not require the accepted_tld_files to be present to be
# compatible with stable/havana release.
if len(accepted_tld_files) == 0:
LOG.info('Unable to determine appropriate accepted tlds file')
return
LOG.info('Using accepted_tld_file found at: %s'
% accepted_tld_files[0])
with open(accepted_tld_files[0]) as fh:
for line in fh:
if line.startswith('#'):
continue
line = line.strip()
self.accepted_tld_list.append(line.lower())
LOG.info("Entries in Accepted TLD List: %d"
% len(self.accepted_tld_list))
# LOG.info("Accepted TLD List:\n%s" % self.accepted_tld_list)
def _load_effective_tld_list(self):
"""
This loads the effective TLDs from a file. Effective TLDs are the SLDs
that act as TLDs - e.g. co.uk. The file is in UTF-8 format.
The normal source for this file is at http://publicsuffix.org/list/
The format of the file is:
1. Lines beginning with a // or ! are ignored.
2. The domain names are 1 per line.
3. The wildcard character * (asterisk) may only be used to wildcard the
topmost level in a domain name.
The publicsuffix.org has more rules and !'s are treated differently but
this code ignores those until we find that we need to do otherwise.
The file is put into a dictionary and a list. Domain names with only 1
label are ignored as they are already present in the accepted_tld_list.
All the entries are converted to IDN format.
All the effective TLDs without a wildcard are put into a dictionary -
_effective_tld_dict.
The entries with a wildcard are converted to a regular expression and
put into a separate list - _effective_re_tld_list.
The separation to a dictionary and a regular expression list is done
to make it easier for searching.
The maximum labels in the dictionary and list are tracked to short
circuit checks later as needed.
"""
self._effective_tld_dict = {}
# _max_effective_tld_labels tracks the maximum labels in the
# dictionary self._effective_tld_dict
# This helps to determine if we need to search the dictionary while
# creating a domain
self._max_effective_tld_labels = 0
# The list _effective_re_tld_list contains domains with a *
self._effective_re_tld_list = []
# _max_effective_re_tld_labels tracks the maximum labels in the
# list self._effective_re_tld_list
self._max_effective_re_tld_labels = 0
effective_tld_files = utils.find_config(
cfg.CONF['service:central'].effective_tlds_file)
# We do not require the effective_tld_file to be present to be
# compatible with stable/havana release.
if len(effective_tld_files) == 0:
LOG.info('Unable to determine appropriate effective tlds file')
return
LOG.info('Using effective_tld_file found at: %s'
% effective_tld_files[0])
with codecs.open(effective_tld_files[0], "r", "utf-8") as fh:
for line in fh:
line = line.strip()
if line.startswith('//') or line.startswith('!') or not line:
continue
labels_len = len(line.split('.'))
# skip TLDs as they are already in the accepted_tld_list
if labels_len == 1:
continue
# Convert the public suffix list to idna format
line = line.encode('idna')
# Entries with wildcards go to a separate list.
if (line.startswith('*')):
if labels_len > self._max_effective_re_tld_labels:
self._max_effective_re_tld_labels = labels_len
# Convert the wildcard entry to a regular expression
# The ^ and $ at the beginning and end respectively are to
# match the whole term. The [^.]* is to match anything
# other than a "." This is so that only one label is
# matched. The rest of the label separators "." are
# escaped to match the "." and not any character.
self._effective_re_tld_list.append(
'^[^.]*' + '\.'.join(line.split('.'))[1:] + '$')
continue
if labels_len > self._max_effective_tld_labels:
self._max_effective_tld_labels = labels_len
# The rest of the entries go into a dictionary.
self._effective_tld_dict[line.lower()] = 1
LOG.info("Entries in Effective TLD List Dict: %d"
% len(self._effective_tld_dict))
# LOG.info("Effective TLD Dict:\n%s" % self._effective_tld_dict)
LOG.info("Entries in Effective RE TLD List: %d"
% len(self._effective_re_tld_list))
# LOG.info("Effective RE TLD List:\n%s" % self._effective_re_tld_list)
def is_effective_tld(self, domain_name):
"""
Returns True if the domain_name is the same as an effective TLD else
returns False.
"""
# Break the domain name up into its component labels
stripped_domain_name = domain_name.strip('.').lower()
domain_labels = stripped_domain_name.split('.')
if len(domain_labels) <= self._max_effective_tld_labels:
# First search the dictionary
if stripped_domain_name in self._effective_tld_dict.keys():
return True
# Now search the list of regular expressions for effective TLDs
if len(domain_labels) <= self._max_effective_re_tld_labels:
for eff_re_label in self._effective_re_tld_list:
if bool(re.search(eff_re_label, stripped_domain_name)):
return True
return False

View File

@ -17,6 +17,7 @@
import re
import contextlib
from oslo.config import cfg
from designate.central import effectivetld
from designate.openstack.common import log as logging
from designate.openstack.common.rpc import service as rpc_service
from designate import backend
@ -66,6 +67,7 @@ class Service(rpc_service.Service):
# Get a quota manager instance
self.quota = quota.get_quota()
self.effective_tld = effectivetld.EffectiveTld()
def start(self):
self.backend.start()
@ -81,21 +83,6 @@ class Service(rpc_service.Service):
self.backend.stop()
@property
def accepted_tld_list(self):
# Only iterate the list once please..
if hasattr(self, '_accepted_tld_list'):
return self._accepted_tld_list
accepted_tld_list = cfg.CONF['service:central'].accepted_tld_list
if accepted_tld_list:
accepted_tld_list = [tld.lower() for tld in accepted_tld_list]
self._accepted_tld_list = accepted_tld_list
return accepted_tld_list
def _is_valid_domain_name(self, context, domain_name):
# Validate domain name length
if len(domain_name) > cfg.CONF['service:central'].max_domain_name_len:
@ -108,14 +95,22 @@ class Service(rpc_service.Service):
if len(domain_labels) <= 1:
raise exceptions.InvalidDomainName('More than one label is '
'required')
# Check the TLD for validity
if self.accepted_tld_list:
# We cannot use the effective TLD list as the publicsuffix.org list is
# missing some top level entries. At the time of coding, the following
# entries were missing
# arpa, au, bv, gb, gn, kp, lb, lr, sj, tp, tz, xn--80ao21a, xn--l1acc
# xn--mgbx4cd0ab
if self.effective_tld.accepted_tld_list:
domain_tld = domain_labels[-1].lower()
if domain_tld not in self.accepted_tld_list:
if domain_tld not in self.effective_tld.accepted_tld_list:
raise exceptions.InvalidTLD('Unknown or invalid TLD')
# Check if the domain_name is the same as an effective TLD.
if self.effective_tld.is_effective_tld(domain_name):
raise exceptions.DomainIsSameAsAnEffectiveTLD(
'Domain name cannot be the same as an effective TLD')
# Check domain name blacklist
if self._is_blacklisted_domain_name(context, domain_name):
# Some users are allowed bypass the blacklist.. Is this one?

View File

@ -81,6 +81,11 @@ class InvalidDomainName(Base):
error_type = 'invalid_domain_name'
class DomainIsSameAsAnEffectiveTLD(Base):
error_code = 400
error_type = 'domain_is_same_as_an_effective_tld'
class InvalidTLD(Base):
error_code = 400
error_type = 'invalid_tld'

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright 2012 Managed I.T.
#
# Author: Kiall Mac Innes <kiall@managedit.ie>
@ -34,7 +35,7 @@ class CentralServiceTest(CentralTestCase):
def test_is_valid_domain_name(self):
self.config(max_domain_name_len=10,
accepted_tld_list=['org'],
accepted_tlds_file='tlds-alpha-by-domain.txt.sample',
group='service:central')
context = self.get_context()
@ -317,7 +318,7 @@ class CentralServiceTest(CentralTestCase):
self.central_service.count_tenants(self.get_context())
# Domain Tests
def test_create_domain(self):
def _test_create_domain(self, values):
# Create a server
self.create_server()
@ -326,11 +327,6 @@ class CentralServiceTest(CentralTestCase):
context = self.get_admin_context()
values = dict(
name='example.com.',
email='info@example.com'
)
# Create a domain
domain = self.central_service.create_domain(context, values=values)
@ -357,6 +353,43 @@ class CentralServiceTest(CentralTestCase):
self.assertEqual(payload['name'], domain['name'])
self.assertEqual(payload['tenant_id'], domain['tenant_id'])
def test_create_domain_over_tld(self):
values = dict(
name='example.com',
email='info@example.com'
)
self._test_create_domain(values)
def test_idn_create_domain_over_tld(self):
# Test creation of a domain in 한국 (kr)
values = dict(
name='example.xn--3e0b707e.',
email='info@example.xn--3e0b707e'
)
self._test_create_domain(values)
def test_create_domain_over_re_effective_tld(self):
values = dict(
name='example.co.uk.',
email='info@example.co.uk'
)
self._test_create_domain(values)
def test_create_domain_over_effective_tld(self):
values = dict(
name='example.com.ac.',
email='info@example.com.ac'
)
self._test_create_domain(values)
def test_idn_create_domain_over_effective_tld(self):
# Test creation of a domain in 公司.cn
values = dict(
name='example.xn--55qx5d.cn.',
email='info@example.xn--55qx5d.cn'
)
self._test_create_domain(values)
def test_create_domain_over_quota(self):
self.config(quota_domains=1)
@ -450,10 +483,35 @@ class CentralServiceTest(CentralTestCase):
# Create a domain
self.central_service.create_domain(context, values=values)
def test_create_domain_invalid_tld_fail(self):
self.config(accepted_tld_list=['com'],
def _test_create_domain_fail(self, values, exception):
self.config(accepted_tlds_file='tlds-alpha-by-domain.txt.sample',
effective_tlds_file='effective_tld_names.dat.sample',
group='service:central')
# The above configuration values are not overriden at the time when
# the initializer is called to load the accepted and effective tld
# lists. So I need to call them again explicitly to load the correct
# values
self.central_service.effective_tld._load_accepted_tld_list()
self.central_service.effective_tld._load_effective_tld_list()
context = self.get_admin_context()
with testtools.ExpectedException(exception):
# Create an invalid domain
self.central_service.create_domain(context, values=values)
def test_create_domain_invalid_tld_fail(self):
self.config(accepted_tlds_file='tlds-alpha-by-domain.txt.sample',
effective_tlds_file='effective_tld_names.dat.sample',
group='service:central')
# The above configuration values are not overriden at the time when
# the initializer is called to load the accepted and effective tld
# lists. So I need to call them again explicitly to load the correct
# values
self.central_service.effective_tld._load_accepted_tld_list()
self.central_service.effective_tld._load_effective_tld_list()
context = self.get_admin_context()
# Create a server
@ -468,7 +526,7 @@ class CentralServiceTest(CentralTestCase):
self.central_service.create_domain(context, values=values)
values = dict(
name='invalid.NeT.',
name='invalid.NeT1.',
email='info@invalid.com'
)
@ -476,6 +534,35 @@ class CentralServiceTest(CentralTestCase):
# Create an invalid domain
self.central_service.create_domain(context, values=values)
def test_create_domain_effective_tld_fail(self):
values = dict(
name='co.ug',
email='info@invalid.com'
)
self._test_create_domain_fail(values,
exceptions.DomainIsSameAsAnEffectiveTLD)
def test_idn_create_domain_effective_tld_fail(self):
# Test creation of the effective TLD - brønnøysund.no
values = dict(
name='xn--brnnysund-m8ac.no',
email='info@invalid.com'
)
self._test_create_domain_fail(values,
exceptions.DomainIsSameAsAnEffectiveTLD)
def test_create_domain_re_effective_tld_fail(self):
# co.uk is in the regular expression list for effective_tlds
values = dict(
name='co.uk',
email='info@invalid.com'
)
self._test_create_domain_fail(values,
exceptions.DomainIsSameAsAnEffectiveTLD)
def test_find_domains(self):
context = self.get_admin_context()

View File

@ -35,8 +35,15 @@ root_helper = sudo
# List of blacklist domain name regexes
#domain_name_blacklist = \.arpa\.$, \.novalocal\.$, \.localhost\.$, \.localdomain\.$, \.local\.$
# Accepted TLD list - http://data.iana.org/TLD/tlds-alpha-by-domain.txt
#accepted_tld_list = COM, NET, ORG, IE, UK, ...
# Accepted TLDs
# This is a local copy of the list at
# http://data.iana.org/TLD/tlds-alpha-by-domain.txt
#accepted_tlds_file = tlds-alpha-by-domain.txt
# Effective TLDs
# This is a local copy of the list at http://publicsuffix.org/list/
# This contains domain names that effectively act like TLDs e.g. co.uk or tx.us
#effective_tlds_file = effective_tld_names.dat
# Maximum domain name length
#max_domain_name_len = 255

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,338 @@
# The latest version of the file can be obtained from
# http://data.iana.org/TLD/tlds-alpha-by-domain.txt
# Commented lines in this file begin with a #.
# There is one entry per line and the entries are in the IDNA format.
#
# Version 2013111800, Last Updated Mon Nov 18 07:07:01 2013 UTC
AC
AD
AE
AERO
AF
AG
AI
AL
AM
AN
AO
AQ
AR
ARPA
AS
ASIA
AT
AU
AW
AX
AZ
BA
BB
BD
BE
BF
BG
BH
BI
BIKE
BIZ
BJ
BM
BN
BO
BR
BS
BT
BV
BW
BY
BZ
CA
CAMERA
CAT
CC
CD
CF
CG
CH
CI
CK
CL
CLOTHING
CM
CN
CO
COM
CONSTRUCTION
CONTRACTORS
COOP
CR
CU
CV
CW
CX
CY
CZ
DE
DJ
DK
DM
DO
DZ
EC
EDU
EE
EG
EQUIPMENT
ER
ES
ESTATE
ET
EU
FI
FJ
FK
FM
FO
FR
GA
GALLERY
GB
GD
GE
GF
GG
GH
GI
GL
GM
GN
GOV
GP
GQ
GR
GRAPHICS
GS
GT
GU
GURU
GW
GY
HK
HM
HN
HOLDINGS
HR
HT
HU
ID
IE
IL
IM
IN
INFO
INT
IO
IQ
IR
IS
IT
JE
JM
JO
JOBS
JP
KE
KG
KH
KI
KM
KN
KP
KR
KW
KY
KZ
LA
LAND
LB
LC
LI
LIGHTING
LK
LR
LS
LT
LU
LV
LY
MA
MC
MD
ME
MG
MH
MIL
MK
ML
MM
MN
MO
MOBI
MP
MQ
MR
MS
MT
MU
MUSEUM
MV
MW
MX
MY
MZ
NA
NAME
NC
NE
NET
NF
NG
NI
NL
NO
NP
NR
NU
NZ
OM
ORG
PA
PE
PF
PG
PH
PK
PL
PLUMBING
PM
PN
POST
PR
PRO
PS
PT
PW
PY
QA
RE
RO
RS
RU
RW
SA
SB
SC
SD
SE
SEXY
SG
SH
SI
SINGLES
SJ
SK
SL
SM
SN
SO
SR
ST
SU
SV
SX
SY
SZ
TATTOO
TC
TD
TECHNOLOGY
TEL
TF
TG
TH
TJ
TK
TL
TM
TN
TO
TP
TR
TRAVEL
TT
TV
TW
TZ
UA
UG
UK
US
UY
UZ
VA
VC
VE
VENTURES
VG
VI
VN
VOYAGE
VU
WF
WS
XN--3E0B707E
XN--45BRJ9C
XN--80AO21A
XN--80ASEHDB
XN--80ASWG
XN--90A3AC
XN--CLCHC0EA0B2G2A9GCD
XN--FIQS8S
XN--FIQZ9S
XN--FPCRJ9C3D
XN--FZC2C9E2C
XN--GECRJ9C
XN--H2BRJ9C
XN--J1AMH
XN--J6W193G
XN--KPRW13D
XN--KPRY57D
XN--L1ACC
XN--LGBBAT1AD8J
XN--MGB9AWBF
XN--MGBA3A4F16A
XN--MGBAAM7A8H
XN--MGBAYH7GPA
XN--MGBBH1A71E
XN--MGBC0A9AZCG
XN--MGBERP4A5D4AR
XN--MGBX4CD0AB
XN--NGBC5AZD
XN--O3CW4H
XN--OGBPF8FL
XN--P1AI
XN--PGBS0DH
XN--S9BRJ9C
XN--UNUP4Y
XN--WGBH1C
XN--WGBL6A
XN--XKC2AL3HYE2A
XN--XKC2DL3A5EE0H
XN--YFRO4I67O
XN--YGBI2AMMX
XXX
YE
YT
ZA
ZM
ZW