Make link URL matching smarter
Ignore-this: bff39c43073f3576e31bda52b7383e6b - Link URL matching no longer requires the URL to be at the beginningfront of the line - Includes test case and documentation fix. darcs-hash:20101227093449-82ea9-727739b99ac86256216350b90dc3e710ea181f51.gz
This commit is contained in:
parent
11dcb55c4b
commit
a7c62223ea
@ -169,21 +169,19 @@ adjust and have MeetBot re-process the logs later.
|
||||
release.
|
||||
|
||||
#link
|
||||
Add a link to the minutes. The URL must be the first thing on the
|
||||
line, separated by a space from the rest of the line, and it will be
|
||||
properly hyperlinked. This command is automatically detected if the line
|
||||
starts with http:, https:, mailto:, and some other common protocols
|
||||
defined in the ``UrlProtocols`` configuration variable. Examples::
|
||||
|
||||
Add a link to the minutes. The URL will be properly detected within
|
||||
the line in most cases - the URL can't contain spaces. This command
|
||||
is automatically detected if the line starts with http:, https:,
|
||||
mailto:, and some other common protocols defined in the
|
||||
``UrlProtocols`` configuration variable. Examples::
|
||||
|
||||
< MrBeige> #link http://wiki.debian.org/MeetBot/ is the main page
|
||||
< MrBeige> http://wiki.debian.org/MeetBot/ is the main page
|
||||
|
||||
Both of these two examples are equivalent, and will hyperlink
|
||||
properly. The first example below won't hyperlink properly, the
|
||||
second one won't be automatically detected::
|
||||
|
||||
< MrBeige> #link the main page is http://wiki.debian.org/MeetBot/
|
||||
< MrBeige> the main page is http://wiki.debian.org/MeetBot/
|
||||
so go there
|
||||
< MrBeige> the main page is http://wiki.debian.org/MeetBot/ so go
|
||||
there. (This will NOT be detected automatically)
|
||||
|
||||
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
###
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
import writers
|
||||
@ -73,10 +74,9 @@ class _BaseItem(object):
|
||||
replacements[name] = getattr(self, name)
|
||||
replacements['nick'] = escapewith(replacements['nick'])
|
||||
replacements['link'] = self.logURL(M)
|
||||
if 'line' in replacements:
|
||||
replacements['line'] = escapewith(replacements['line'])
|
||||
if 'topic' in replacements:
|
||||
replacements['topic'] = escapewith(replacements['topic'])
|
||||
for key in ('line', 'prefix', 'suffix', 'topic'):
|
||||
if key in replacements:
|
||||
replacements[key] = escapewith(replacements[key])
|
||||
if 'url' in replacements:
|
||||
replacements['url_quoteescaped'] = \
|
||||
escapewith(self.url.replace('"', "%22"))
|
||||
@ -228,24 +228,40 @@ class Rejected(GenericItem):
|
||||
class Link(_BaseItem):
|
||||
itemtype = 'LINK'
|
||||
html_template = """<tr><td><a href='%(link)s#%(anchor)s'>%(time)s</a></td>
|
||||
<td>%(itemtype)s</td><td>%(nick)s</td><td>%(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s</td>
|
||||
<td>%(itemtype)s</td><td>%(nick)s</td><td>%(starthtml)s%(prefix)s<a href="%(url)s">%(url_readable)s</a>%(suffix)s%(endhtml)s</td>
|
||||
</tr>"""
|
||||
#html2_template = ("""<i>%(itemtype)s</i>: %(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
||||
# """(%(nick)s, <a href='%(link)s#%(anchor)s'>%(time)s</a>)""")
|
||||
#html2_template = ("""<i>%(itemtype)s</i>: %(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
||||
# """(<a href='%(link)s#%(anchor)s'>%(nick)s</a>, %(time)s)""")
|
||||
html2_template = ("""%(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
||||
html2_template = ("""%(starthtml)s%(prefix)s<a href="%(url)s">%(url_readable)s</a>%(suffix)s%(endhtml)s """
|
||||
"""<span class="details">"""
|
||||
"""(<a href='%(link)s#%(anchor)s'>%(nick)s</a>, """
|
||||
"""%(time)s)"""
|
||||
"""</span>""")
|
||||
rst_template = """*%(itemtype)s*: %(startrst)s%(url)s %(line)s%(endrst)s (%(rstref)s_)"""
|
||||
text_template = """%(itemtype)s: %(starttext)s%(url)s %(line)s%(endtext)s (%(nick)s, %(time)s)"""
|
||||
mw_template = """''%(itemtype)s:'' %(startmw)s%(url)s %(line)s%(endmw)s (%(nick)s, %(time)s)"""
|
||||
def __init__(self, nick, line, linenum, time_):
|
||||
rst_template = """*%(itemtype)s*: %(startrst)s%(prefix)s%(url)s%(suffix)s%(endrst)s (%(rstref)s_)"""
|
||||
text_template = """%(itemtype)s: %(starttext)s%(prefix)s%(url)s%(suffix)s%(endtext)s (%(nick)s, %(time)s)"""
|
||||
mw_template = """''%(itemtype)s:'' %(startmw)s%(prefix)s%(url)s%(suffix)s%(endmw)s (%(nick)s, %(time)s)"""
|
||||
def __init__(self, nick, line, linenum, time_, M):
|
||||
self.nick = nick ; self.linenum = linenum
|
||||
self.time = time.strftime("%H:%M:%S", time_)
|
||||
self.url, self.line = (line+' ').split(' ', 1)
|
||||
self.line = line
|
||||
|
||||
protocols = M.config.UrlProtocols
|
||||
protocols = '|'.join(re.escape(p) for p in protocols)
|
||||
protocols = '(?:'+protocols+')'
|
||||
# This is gross.
|
||||
# (.*?) - any prefix, non-greedy
|
||||
# (%s//[^\s]+ - protocol://... until the next space
|
||||
# (?<!\.|\)) - but the last character can NOT be . or )
|
||||
# (.*) - any suffix
|
||||
url_re = re.compile(r'(.*?)(%s//[^\s]+(?<!\.|\)))(.*)'%protocols)
|
||||
m = url_re.match(line)
|
||||
if m:
|
||||
self.prefix = m.group(1)
|
||||
self.url = m.group(2)
|
||||
self.suffix = m.group(3)
|
||||
else:
|
||||
# simple matching, the old way.
|
||||
self.url, self.suffix = (line+' ').split(' ', 1)
|
||||
self.suffix = ' '+self.suffix
|
||||
self.prefix = ''
|
||||
# URL-sanitization
|
||||
self.url_readable = self.url # readable line version
|
||||
self.url = self.url
|
||||
|
@ -448,7 +448,7 @@ class MeetingCommands(object):
|
||||
self.addnick(nick, lines=0)
|
||||
def do_link(self, **kwargs):
|
||||
"""Add informational item to the minutes."""
|
||||
m = items.Link(**kwargs)
|
||||
m = items.Link(M=self, **kwargs)
|
||||
self.additem(m)
|
||||
def do_commands(self, **kwargs):
|
||||
commands = [ "#"+x[3:] for x in dir(self) if x[:3]=="do_" ]
|
||||
|
@ -222,6 +222,32 @@ class MeetBotTest(unittest.TestCase):
|
||||
results, re.IGNORECASE), \
|
||||
"Nick full-word matching failed"
|
||||
|
||||
def test_urlMatching(self):
|
||||
"""Test properly detection of URLs in lines
|
||||
"""
|
||||
script = """
|
||||
20:13:50 <x> #startmeeting
|
||||
20:13:50 <x> #link prefix http://site1.com suffix
|
||||
20:13:50 <x> http://site2.com suffix
|
||||
20:13:50 <x> ftp://ftpsite1.com suffix
|
||||
20:13:50 <x> #link prefix ftp://ftpsite2.com suffix
|
||||
20:13:50 <x> irc://ircsite1.com suffix
|
||||
20:13:50 <x> mailto://a@mail.com suffix
|
||||
20:13:50 <x> #endmeeting
|
||||
"""
|
||||
M = process_meeting(script)
|
||||
results = M.save()['.html']
|
||||
assert re.search(r'prefix.*href.*http://site1.com.*suffix',
|
||||
results), "URL missing 1"
|
||||
assert re.search(r'href.*http://site2.com.*suffix',
|
||||
results), "URL missing 2"
|
||||
assert re.search(r'href.*ftp://ftpsite1.com.*suffix',
|
||||
results), "URL missing 3"
|
||||
assert re.search(r'prefix.*href.*ftp://ftpsite2.com.*suffix',
|
||||
results), "URL missing 4"
|
||||
assert re.search(r'href.*mailto://a@mail.com.*suffix',
|
||||
results), "URL missing 5"
|
||||
|
||||
def t_css(self):
|
||||
"""Runs all CSS-related tests.
|
||||
"""
|
||||
|
@ -64,7 +64,15 @@
|
||||
# links
|
||||
20:13:50 <MrBeige> #topic Links
|
||||
20:13:50 <Utahraptor> #link http://test<b>.zgib.net
|
||||
20:13:50 <Utahraptor> #link ftp://test<b>.zgib.net "
|
||||
20:13:50 <Utahraptor> #link mailto://a@bla"h.com
|
||||
20:13:50 <Utahraptor> #link http://test.zgib.net/&testpage
|
||||
20:13:50 <Utahraptor> #link prefix http://test.zgib.net/&testpage suffix
|
||||
20:13:50 <Utahraptor> #link prefix ftp://test.zg"ib.net/&testpage suffix
|
||||
20:13:50 <Utahraptor> #link prefix mailto://a@blah.com&testpage suffix
|
||||
20:13:50 <Utahraptor> #link prefix http://google.com/. suffix
|
||||
20:13:50 <Utahraptor> #link prefix (http://google.com/) suffix
|
||||
|
||||
|
||||
# accents
|
||||
20:13:50 <MrBeige> #topic Character sets
|
||||
|
Loading…
Reference in New Issue
Block a user