From 8f1ea7cbb6cb365e4ffd75bdc2d901afcbfdf72f Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 26 Aug 2014 15:49:15 +0200
Subject: [PATCH] [empflix] Revert to XML parser

Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore).
Instead, use the transform function we already have :)

This partially reverts commit c7bee2a7254d31b7c478c0ac33bf23bdeba1c53c.
---
 youtube_dl/extractor/empflix.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py
index 3c2289e57..1c498d8c8 100644
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -3,6 +3,7 @@
 import re
 
 from .common import InfoExtractor
+from ..utils import fix_xml_ampersands
 
 
 class EmpflixIE(InfoExtractor):
@@ -35,20 +36,17 @@ def _real_extract(self, url):
             r'flashvars\.config = escape\("([^"]+)"',
             webpage, 'flashvars.config')
 
-        # XML is malformed
-        cfg_xml = self._download_webpage(
-            cfg_url, video_id, note='Downloading metadata')
+        cfg_xml = self._download_xml(
+            cfg_url, video_id, note='Downloading metadata',
+            transform_source=fix_xml_ampersands)
 
         formats = [
             {
-                'url': item[1],
-                'format_id': item[0],
-            } for item in re.findall(
-                r'<item>\s*<res>([^>]+)</res>\s*<videoLink>([^<]+)</videoLink>\s*</item>', cfg_xml)
+                'url': item.find('videoLink').text,
+                'format_id': item.find('res').text,
+            } for item in cfg_xml.findall('./quality/item')
         ]
-
-        thumbnail = self._html_search_regex(
-            r'<startThumb>([^<]+)</startThumb>', cfg_xml, 'thumbnail', fatal=False)
+        thumbnail = cfg_xml.find('./startThumb').text
 
         return {
             'id': video_id,