.jupyter | ||
.local/share/jupyter | ||
data | ||
.gitignore | ||
analysis.ipynb | ||
Dockerfile | ||
LICENSE | ||
README.md | ||
requirements.txt | ||
slide.md |
import pandas as pd import seaborn as sns from wordcloud import WordCloud import matplotlib.pyplot as plt from IPython.display import display, Markdown from collections import Counter from itertools import islice
articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers
Most common tags
man_tags = articles['Manual Tags'].dropna().str.lower() auto_tags = articles['Automatic Tags'].dropna().str.lower() tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list() c = Counter(tags)
text = '## Top 50 common tags:\n\n' for val, key in c.most_common(50): text += f"1. {val} ({key})\n" display(Markdown(text))
wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c) plt.figure(figsize=(10,10)) plt.imshow(wc) plt.axis("off") plt.show()
Title
titles = articles['Title'].dropna().str.lower()
tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles)) plt.figure(figsize=(10,10)) plt.imshow(tc) plt.axis("off") plt.show()
top_title = "### Top 50 phrase in title\n\n"
tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in tc_words.items(): top_title += f"1. {word} ({freq})\n"
display(Markdown(top_title))
Abstract
abstracts = articles['Abstract Note'].dropna().str.lower()
ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts)) plt.figure(figsize=(10,10)) plt.imshow(ac) plt.axis("off") plt.show()
top_abstract = "### Top 50 phrase in abstract\n\n"
ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in ac_words.items(): top_abstract += f"1. {word} ({freq})\n"
display(Markdown(top_abstract))
Journal
pub = articles['Publication Title'].dropna().str.lower()
pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub)) plt.figure(figsize=(10,10)) plt.imshow(pc) plt.axis("off") plt.show()
top_pub = "### Top 50 phrase in publication title\n\n"
pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in pc_words.items(): top_pub += f"1. {word} ({freq})\n"
display(Markdown(top_pub))