Go to file
2024-12-04 17:04:08 +07:00
.jupyter export notebook 2024-12-04 17:04:08 +07:00
.local/share/jupyter export notebook 2024-12-04 17:04:08 +07:00
data export notebook 2024-12-04 17:04:08 +07:00
.gitignore add files 2024-12-04 16:52:06 +07:00
analysis.ipynb export notebook 2024-12-04 17:04:08 +07:00
Dockerfile add files 2024-12-04 16:52:06 +07:00
LICENSE Initial commit 2024-12-04 16:27:53 +07:00
README.md add files 2024-12-04 16:52:06 +07:00
requirements.txt add files 2024-12-04 16:52:06 +07:00
slide.md export notebook 2024-12-04 17:04:08 +07:00

import pandas as pd import seaborn as sns from wordcloud import WordCloud import matplotlib.pyplot as plt from IPython.display import display, Markdown from collections import Counter from itertools import islice

articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers

Most common tags

man_tags = articles['Manual Tags'].dropna().str.lower() auto_tags = articles['Automatic Tags'].dropna().str.lower() tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list() c = Counter(tags)

text = '## Top 50 common tags:\n\n' for val, key in c.most_common(50): text += f"1. {val} ({key})\n" display(Markdown(text))

wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c) plt.figure(figsize=(10,10)) plt.imshow(wc) plt.axis("off") plt.show()

Title

titles = articles['Title'].dropna().str.lower()

tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles)) plt.figure(figsize=(10,10)) plt.imshow(tc) plt.axis("off") plt.show()


top_title = "### Top 50 phrase in title\n\n"

tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in tc_words.items(): top_title += f"1. {word} ({freq})\n"

display(Markdown(top_title))


Abstract

abstracts = articles['Abstract Note'].dropna().str.lower()

ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts)) plt.figure(figsize=(10,10)) plt.imshow(ac) plt.axis("off") plt.show()


top_abstract = "### Top 50 phrase in abstract\n\n"

ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in ac_words.items(): top_abstract += f"1. {word} ({freq})\n"

display(Markdown(top_abstract))


Journal

pub = articles['Publication Title'].dropna().str.lower()

pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub)) plt.figure(figsize=(10,10)) plt.imshow(pc) plt.axis("off") plt.show()


top_pub = "### Top 50 phrase in publication title\n\n"

pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in pc_words.items(): top_pub += f"1. {word} ({freq})\n"

display(Markdown(top_pub))