philipp-zettl commited on
Commit
941ea0e
1 Parent(s): 5e38ce5

Update src/text.py

Browse files
Files changed (1) hide show
  1. src/text.py +2 -1
src/text.py CHANGED
@@ -100,9 +100,10 @@ def doctree_from_url(url, elem_class='div', class_name='article-body'):
100
  article = extract_article(url)
101
  # convert to MD to handle splitting better
102
  article_content = select_content(article.html, elem_class, class_name)
103
- requires_title = not any(filter(lambda x: x.startswith('# '), article_content.split('\n')))
104
 
105
  if requires_title:
 
106
  article_content = f"# {article.title}\n\n{article_content}"
107
  article_content = article_content.replace('\n\n', '\n').replace('#', '%%@@%%')
108
  # fix relative website links
 
100
  article = extract_article(url)
101
  # convert to MD to handle splitting better
102
  article_content = select_content(article.html, elem_class, class_name)
103
+ requires_title = not any(filter(lambda x: x.strip().startswith('# '), article_content.split('\n')))
104
 
105
  if requires_title:
106
+ print('Didn\'t find title, will add it manually...')
107
  article_content = f"# {article.title}\n\n{article_content}"
108
  article_content = article_content.replace('\n\n', '\n').replace('#', '%%@@%%')
109
  # fix relative website links