Skip to content

Commit a6bd9f4

Browse files
committed
[KleinanzeigenBridge] random improvements and fixes
Previous MR #4820 introduced a bug where the URI wasn't getting expanded. This is because it is obtained from a non-standard data-uri attribute which defaultLinkTo() doesn't support. On top of that: - sanitizes the HTML in Content - use a longer Description found in JSON - fix timestamp processing, including for relative Today and Yesterday strings - move media to enclousures - be explicit about elements chosen to augument the description - simplify the image URL processing
1 parent ced9e56 commit a6bd9f4

File tree

1 file changed

+49
-17
lines changed

1 file changed

+49
-17
lines changed

bridges/KleinanzeigenBridge.php

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ public function collectData()
9191
if ($this->queriedContext === 'By profile') {
9292
for ($i = 1; $i <= $this->getInput('pages'); $i++) {
9393
$html = getSimpleHTMLDOM($this->getURI() . '/s-bestandsliste.html?userId=' . $this->getInput('userid') . '&pageNum=' . $i . '&sortingField=SORTING_DATE');
94-
$html = defaultLinkTo($html, $this->getURI());
9594

9695
$foundItem = false;
9796
foreach ($html->find('article.aditem') as $element) {
@@ -120,7 +119,6 @@ public function collectData()
120119
]);
121120

122121
$html = getSimpleHTMLDOM($searchUrl);
123-
$html = defaultLinkTo($html, $this->getURI());
124122

125123
// end of list if returned page is not the expected one
126124
if ($html->find('.pagination-current', 0)->plaintext != $page) {
@@ -138,22 +136,56 @@ private function addItem($element)
138136
{
139137
$item = [];
140138

139+
$item['content'] = '';
140+
141+
$json = $element->find('.aditem-image > script', 0);
142+
if ($json) {
143+
$data = json_decode($json->innertext, true);
144+
$item['title'] = $data['title'];
145+
$item['content'] .= '<div><p>' . $data['description'] . '</div></p></br>';
146+
} else {
147+
$item['title'] = $element->find('h2', 0)->plaintext;
148+
$item['content'] .= $element->find('.aditem-main--middle--description');
149+
}
150+
151+
if ($element->find('.aditem-main--top', 0)) {
152+
$item['content'] .= $element->find('.aditem-main--top', 0);
153+
}
154+
155+
if ($element->find('.aditem-main--middle--price-shipping', 0)) {
156+
$item['content'] .= preg_replace(
157+
'#(<p\s+class="aditem-main--middle--price-shipping--old-price"[^>]*>.*?</p>)#si',
158+
'<s>$1</s>',
159+
$element->find('.aditem-main--middle--price-shipping', 0)
160+
);
161+
}
162+
163+
if ($element->find('.aditem-main--bottom', 0)) {
164+
$item['content'] .= $element->find('.aditem-main--bottom', 0);
165+
}
166+
167+
$item['content'] = sanitize($item['content']);
168+
141169
$item['uid'] = $element->getAttribute('data-adid');
142-
$item['uri'] = $element->getAttribute('data-href');
143-
144-
$item['title'] = $element->find('h2', 0)->plaintext;
145-
$item['timestamp'] = $element->find('div.aditem-main--top--right', 0)->plaintext;
146-
$imgUrl = str_replace(
147-
'rule=$_2.JPG',
148-
'rule=$_57.JPG',
149-
str_replace(
150-
'rule=$_35.JPG',
151-
'rule=$_57.JPG',
152-
$element->find('img', 0) ? $element->find('img', 0)->getAttribute('src') : ''
153-
)
154-
); //enhance img quality
155-
156-
$item['content'] = '<img src="' . $imgUrl . '"/>' . $element->find('div.aditem-main', 0)->outertext;
170+
$item['uri'] = urljoin($this->getURI(), $element->getAttribute('data-href'));
171+
172+
$dateString = trim($element->find('div.aditem-main--top--right', 0)->plaintext);
173+
if ($dateString) {
174+
$dateString = str_ireplace(
175+
['Gestern', 'Heute'],
176+
['yesterday', 'today'],
177+
$dateString
178+
);
179+
180+
$item['timestamp'] = strtotime($dateString);
181+
} else {
182+
$item['timestamp'] = time();
183+
}
184+
185+
if ($element->find('img', 0)) {
186+
//enhance img quality. Cannot use convertLazyLoading() here due to non-standard URI suffix in srcset.
187+
$item['enclosures'] = [preg_replace('/rule=\$_\d+\.AUTO/i', 'rule=$_57.AUTO', $element->find('img', 0)->getAttribute('src')) . '#.image'];
188+
};
157189

158190
$this->items[] = $item;
159191
}

0 commit comments

Comments
 (0)