how-gen-ai-affects-researchers/README.md
2024-12-04 16:52:06 +07:00

2.5 KiB

import pandas as pd import seaborn as sns from wordcloud import WordCloud import matplotlib.pyplot as plt from IPython.display import display, Markdown from collections import Counter from itertools import islice

articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers

Most common tags

man_tags = articles['Manual Tags'].dropna().str.lower() auto_tags = articles['Automatic Tags'].dropna().str.lower() tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list() c = Counter(tags)

text = '## Top 50 common tags:\n\n' for val, key in c.most_common(50): text += f"1. {val} ({key})\n" display(Markdown(text))

wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c) plt.figure(figsize=(10,10)) plt.imshow(wc) plt.axis("off") plt.show()

Title

titles = articles['Title'].dropna().str.lower()

tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles)) plt.figure(figsize=(10,10)) plt.imshow(tc) plt.axis("off") plt.show()


top_title = "### Top 50 phrase in title\n\n"

tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in tc_words.items(): top_title += f"1. {word} ({freq})\n"

display(Markdown(top_title))


Abstract

abstracts = articles['Abstract Note'].dropna().str.lower()

ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts)) plt.figure(figsize=(10,10)) plt.imshow(ac) plt.axis("off") plt.show()


top_abstract = "### Top 50 phrase in abstract\n\n"

ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in ac_words.items(): top_abstract += f"1. {word} ({freq})\n"

display(Markdown(top_abstract))


Journal

pub = articles['Publication Title'].dropna().str.lower()

pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub)) plt.figure(figsize=(10,10)) plt.imshow(pc) plt.axis("off") plt.show()


top_pub = "### Top 50 phrase in publication title\n\n"

pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in pc_words.items(): top_pub += f"1. {word} ({freq})\n"

display(Markdown(top_pub))