how-gen-ai-affects-researchers/README.md

import pandas as pd
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from collections import Counter
from itertools import islice

articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers

## Most common tags

man_tags = articles['Manual Tags'].dropna().str.lower()
auto_tags = articles['Automatic Tags'].dropna().str.lower()
tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list()
c = Counter(tags)

text = '## Top 50 common tags:\n\n'
for val, key in c.most_common(50):
    text += f"1. {val} ({key})\n"
display(Markdown(text))

wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c)
plt.figure(figsize=(10,10))
plt.imshow(wc)
plt.axis("off")
plt.show()

## Title

titles = articles['Title'].dropna().str.lower()

tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles))
plt.figure(figsize=(10,10))
plt.imshow(tc)
plt.axis("off")
plt.show()

---

top_title = "### Top 50 phrase in title\n\n"

tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in tc_words.items():
    top_title += f"1. {word} ({freq})\n"

display(Markdown(top_title))

---

## Abstract

abstracts = articles['Abstract Note'].dropna().str.lower()

ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts))
plt.figure(figsize=(10,10))
plt.imshow(ac)
plt.axis("off")
plt.show()

---

top_abstract = "### Top 50 phrase in abstract\n\n"

ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in ac_words.items():
    top_abstract += f"1. {word} ({freq})\n"

display(Markdown(top_abstract))

---

## Journal

pub = articles['Publication Title'].dropna().str.lower()

pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub))
plt.figure(figsize=(10,10))
plt.imshow(pc)
plt.axis("off")
plt.show()

---

top_pub = "### Top 50 phrase in publication title\n\n"

pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in pc_words.items():
    top_pub += f"1. {word} ({freq})\n"

display(Markdown(top_pub))