In [1]:
import pandas as pd
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from collections import Counter
from itertools import islice
articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers
Most common tags¶
In [2]:
man_tags = articles['Manual Tags'].dropna().str.lower()
auto_tags = articles['Automatic Tags'].dropna().str.lower()
tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list()
c = Counter(tags)
text = '## Top 50 common tags:\n\n'
for val, key in c.most_common(50):
text += f"1. {val} ({key})\n"
display(Markdown(text))
wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c)
plt.figure(figsize=(10,10))
plt.imshow(wc)
plt.axis("off")
plt.show()
Top 50 common tags:¶
- artificial intelligence (535)
- large language models (151)
- machine learning (130)
- chatgpt (123)
- generative artificial intelligence (87)
- deep learning (76)
- natural language processing (69)
- generative ai (67)
- chatbots (59)
- large language model (47)
- language (46)
- ai (35)
- artificial intelligence (ai) (31)
- automation (28)
- education (28)
- ethics (28)
- language model (23)
- higher education (22)
- explainable artificial intelligence (22)
- algorithms (21)
- llm (21)
- decision making (20)
- neural networks (19)
- sustainability (19)
- chatbot (18)
- large language models (llms) (17)
- bibliometrics (17)
- software (16)
- healthcare (15)
- sentiment analysis (15)
- design (14)
- systematic review (14)
- accuracy (14)
- digital transformation (14)
- privacy (13)
- technology (13)
- gpt-4 (13)
- internet of things (13)
- bibliometric analysis (13)
- research methodology (12)
- research (12)
- sustainable development (12)
- datasets (12)
- big data (12)
- students (12)
- llms (12)
- natural language (11)
- learning (11)
- teachers (11)
- data mining (11)
In [4]:
## Title
titles = articles['Title'].dropna().str.lower()
top_title = "### Top 50 phrase in title\n\n"
tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in tc_words.items():
top_title += f"1. {word} ({freq})\n"
display(Markdown(top_title))
tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles))
plt.figure(figsize=(10,10))
plt.imshow(tc)
plt.axis("off")
plt.show()
Top 50 phrase in title¶
- artificial intelligence (456)
- based (152)
- language model (143)
- ai (137)
- large language (135)
- using (103)
- chatgpt (98)
- model (92)
- application (82)
- analysis (74)
- research (72)
- system (63)
- generative ai (63)
- generative artificial (62)
- study (59)
- education (47)
- learning (44)
- student (43)
- image (43)
- human (42)
- exploring (42)
- development (41)
- medical (39)
- data (39)
- approach (37)
- digital (37)
- framework (36)
- impact (36)
- detection (35)
- review (34)
- performance (34)
- llm (34)
- design (34)
- use (33)
- evaluation (33)
- management (32)
- perspective (31)
- assessment (30)
- potential (30)
- technology (30)
- future (30)
- prediction (30)
- generation (29)
- deep learning (29)
- role (28)
- challenge (28)
- tool (28)
- method (28)
- algorithm (27)
- machine learning (27)
In [5]:
abstracts = articles['Abstract Note'].dropna().str.lower()
top_abstract = "### Top 50 phrase in abstract\n\n"
ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in ac_words.items():
top_abstract += f"1. {word} ({freq})\n"
display(Markdown(top_abstract))
ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts))
plt.figure(figsize=(10,10))
plt.imshow(ac)
plt.axis("off")
plt.show()
Top 50 phrase in abstract¶
- artificial intelligence (1319)
- research (1197)
- ai (1145)
- model (903)
- data (787)
- study (652)
- chatgpt (592)
- using (559)
- use (550)
- method (549)
- based (546)
- llm (525)
- potential (504)
- system (465)
- application (441)
- language model (440)
- intelligence ai (439)
- human (422)
- result (419)
- performance (405)
- development (388)
- used (379)
- analysis (379)
- large language (361)
- student (357)
- challenge (346)
- dataset (337)
- tool (335)
- paper (330)
- provide (321)
- field (316)
- task (311)
- new (306)
- image (302)
- information (301)
- technology (299)
- accuracy (297)
- approach (287)
- generative ai (278)
- including (276)
- design (273)
- framework (266)
- impact (263)
- patient (256)
- process (246)
- work (240)
- education (238)
- article (234)
- enhance (234)
- two (232)
In [6]:
pub = articles['Publication Title'].dropna().str.lower()
top_pub = "### Top 50 phrase in publication title\n\n"
pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
for word, freq in pc_words.items():
top_pub += f"1. {word} ({freq})\n"
display(Markdown(top_pub))
pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub))
plt.figure(figsize=(10,10))
plt.imshow(pc)
plt.axis("off")
plt.show()
Top 50 phrase in publication title¶
- journal (189)
- computer (129)
- international conference (124)
- artificial intelligence (71)
- heliyon (64)
- science (63)
- research (54)
- education artificial (50)
- international journal (48)
- engineering (47)
- application (44)
- education (32)
- medicine (31)
- machine learning (28)
- human behavior (25)
- information (23)
- knowledge based (23)
- information systems (22)
- energy (21)
- information management (21)
- intelligent information (21)
- informatics (20)
- technology (19)
- big data (19)
- 28th international (19)
- engineering systems (19)
- systems kes (19)
- open (17)
- conference (17)
- behavior artificial (16)
- artificial humans (16)
- health (15)
- privacy (15)
- 11th international (15)
- data analytics (15)
- iot security (15)
- ai (14)
- technologies (14)
- communication (14)
- computational (13)
- medical (12)
- sustainability (12)
- industry (12)
- iscience (12)
- management data (12)
- data insights (12)
- digital discovery (12)
- social science (12)
- current (11)
- review (11)