url_check_line () now recognizes all urls

previously it only recognized url with "://" after the scheme.
Urls without a scheme are purposely not recognized by this function.

Fixes #701
Closes #815
This commit is contained in:
Diogo Sousa 2013-10-21 04:25:36 +01:00 committed by TingPing
parent a25363f5c3
commit 6d3c176ff8

View File

@ -35,6 +35,7 @@ GTree *url_btree = NULL;
static gboolean regex_match (const GRegex *re, const char *word, static gboolean regex_match (const GRegex *re, const char *word,
int *start, int *end); int *start, int *end);
static const GRegex *re_url (void); static const GRegex *re_url (void);
static const GRegex *re_url_no_scheme (void);
static const GRegex *re_host (void); static const GRegex *re_host (void);
static const GRegex *re_host6 (void); static const GRegex *re_host6 (void);
static const GRegex *re_email (void); static const GRegex *re_email (void);
@ -294,7 +295,10 @@ match_email (const char *word, int *start, int *end)
static gboolean static gboolean
match_url (const char *word, int *start, int *end) match_url (const char *word, int *start, int *end)
{ {
return regex_match (re_url (), word, start, end); if (regex_match (re_url (), word, start, end))
return TRUE;
return regex_match (re_url_no_scheme (), word, start, end);
} }
static gboolean static gboolean
@ -372,7 +376,6 @@ url_check_line (char *buf, int len)
g_match_info_fetch_pos(gmi, 0, &start, &end); g_match_info_fetch_pos(gmi, 0, &start, &end);
while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n')) while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n'))
end--; end--;
if (g_strstr_len (po + start, end - start, "://"))
url_add(po + start, end - start); url_add(po + start, end - start);
g_match_info_next(gmi, NULL); g_match_info_next(gmi, NULL);
} }
@ -539,6 +542,18 @@ struct
{ NULL, "", 0} { NULL, "", 0}
}; };
static const GRegex *
re_url_no_scheme (void)
{
static GRegex *url_ret = NULL;
if (url_ret) return url_ret;
url_ret = make_re ("(" HOST_URL OPT_PORT "/" "(" PATH ")?" ")");
return url_ret;
}
static const GRegex * static const GRegex *
re_url (void) re_url (void)
{ {
@ -551,12 +566,12 @@ re_url (void)
grist_gstr = g_string_new (NULL); grist_gstr = g_string_new (NULL);
/* Add regex "host/path", representing a "schemeless" url */
g_string_append (grist_gstr, "(" HOST_URL OPT_PORT "/" "(" PATH ")?" ")");
for (i = 0; uri[i].scheme; i++) for (i = 0; uri[i].scheme; i++)
{ {
g_string_append (grist_gstr, "|("); if (i)
g_string_append (grist_gstr, "|");
g_string_append (grist_gstr, "(");
g_string_append_printf (grist_gstr, "%s:", uri[i].scheme); g_string_append_printf (grist_gstr, "%s:", uri[i].scheme);
if (uri[i].flags & URI_AUTHORITY) if (uri[i].flags & URI_AUTHORITY)