Skip to content

Commit

Permalink
Use lxml to help generate clean HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Apr 22, 2021
1 parent 9c95f96 commit 2c7365d
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions examples/search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Search a paperai index.
Requires streamlit to be installed.
pip install streamlit
Requires streamlit and lxml to be installed.
pip install streamlit lxml
"""

import os
Expand All @@ -12,6 +12,8 @@
import pandas as pd
import streamlit as st

from lxml.html.clean import clean_html

from paperai.models import Models
from paperai.query import Query

Expand Down Expand Up @@ -56,12 +58,11 @@ def search(self, query, topn, threshold):

# Print each result, sorted by max score descending
for uid in sorted(documents, key=lambda k: sum([x[0] for x in documents[k]]), reverse=True):
cur.execute("SELECT Title, Published, Publication, Design, Size, Sample, Method, Entry, Id, Reference " +
cur.execute("SELECT Title, Published, Publication, Design, Size, Sample, Method, Entry, Id, Reference " +
"FROM articles WHERE id = ?", [uid])
article = cur.fetchone()

matches = "\n".join([text for _, text in documents[uid]])
matches = matches.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
matches = "<br/>".join([text for _, text in documents[uid]])

title = "<a target='_blank' href='%s'>%s</a>" % (article[9], article[0])

Expand Down Expand Up @@ -94,7 +95,8 @@ def run(self):
st.markdown("<p class='small-font'>%d results</p>" % len(df), unsafe_allow_html=True)

if not df.empty:
st.write(df[columns].to_html(escape=False, index=False), unsafe_allow_html=True)
html = df[columns].to_html(escape=False, index=False)
st.write(clean_html(html), unsafe_allow_html=True)

@st.cache(allow_output_mutation=True)
def create(path):
Expand All @@ -113,7 +115,7 @@ def create(path):

if len(sys.argv) <= 1 or not os.path.isdir(sys.argv[1]):
st.error("Path to embeddings index not present or invalid")
else:
else:
st.set_page_config(layout="wide")

# Create and run application
Expand Down

0 comments on commit 2c7365d

Please # to comment.