In [1]:
import pandas as pd
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from collections import Counter
from itertools import islice

articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers

Most common tags¶

In [2]:
man_tags = articles['Manual Tags'].dropna().str.lower()
auto_tags = articles['Automatic Tags'].dropna().str.lower()
tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list()
c = Counter(tags)

text = '## Top 50 common tags:\n\n'
for val, key in c.most_common(50):
    text += f"1. {val} ({key})\n"
display(Markdown(text))

wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c)
plt.figure(figsize=(10,10))
plt.imshow(wc)
plt.axis("off")
plt.show()

Top 50 common tags:¶

  1. artificial intelligence (535)
  2. large language models (151)
  3. machine learning (130)
  4. chatgpt (123)
  5. generative artificial intelligence (87)
  6. deep learning (76)
  7. natural language processing (69)
  8. generative ai (67)
  9. chatbots (59)
  10. large language model (47)
  11. language (46)
  12. ai (35)
  13. artificial intelligence (ai) (31)
  14. automation (28)
  15. education (28)
  16. ethics (28)
  17. language model (23)
  18. higher education (22)
  19. explainable artificial intelligence (22)
  20. algorithms (21)
  21. llm (21)
  22. decision making (20)
  23. neural networks (19)
  24. sustainability (19)
  25. chatbot (18)
  26. large language models (llms) (17)
  27. bibliometrics (17)
  28. software (16)
  29. healthcare (15)
  30. sentiment analysis (15)
  31. design (14)
  32. systematic review (14)
  33. accuracy (14)
  34. digital transformation (14)
  35. privacy (13)
  36. technology (13)
  37. gpt-4 (13)
  38. internet of things (13)
  39. bibliometric analysis (13)
  40. research methodology (12)
  41. research (12)
  42. sustainable development (12)
  43. datasets (12)
  44. big data (12)
  45. students (12)
  46. llms (12)
  47. natural language (11)
  48. learning (11)
  49. teachers (11)
  50. data mining (11)
No description has been provided for this image
In [4]:
## Title
titles = articles['Title'].dropna().str.lower()

top_title = "### Top 50 phrase in title\n\n"

tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in tc_words.items():
    top_title += f"1. {word} ({freq})\n"

display(Markdown(top_title))


tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles))
plt.figure(figsize=(10,10))
plt.imshow(tc)
plt.axis("off")
plt.show()

Top 50 phrase in title¶

  1. artificial intelligence (456)
  2. based (152)
  3. language model (143)
  4. ai (137)
  5. large language (135)
  6. using (103)
  7. chatgpt (98)
  8. model (92)
  9. application (82)
  10. analysis (74)
  11. research (72)
  12. system (63)
  13. generative ai (63)
  14. generative artificial (62)
  15. study (59)
  16. education (47)
  17. learning (44)
  18. student (43)
  19. image (43)
  20. human (42)
  21. exploring (42)
  22. development (41)
  23. medical (39)
  24. data (39)
  25. approach (37)
  26. digital (37)
  27. framework (36)
  28. impact (36)
  29. detection (35)
  30. review (34)
  31. performance (34)
  32. llm (34)
  33. design (34)
  34. use (33)
  35. evaluation (33)
  36. management (32)
  37. perspective (31)
  38. assessment (30)
  39. potential (30)
  40. technology (30)
  41. future (30)
  42. prediction (30)
  43. generation (29)
  44. deep learning (29)
  45. role (28)
  46. challenge (28)
  47. tool (28)
  48. method (28)
  49. algorithm (27)
  50. machine learning (27)
No description has been provided for this image
In [5]:
abstracts = articles['Abstract Note'].dropna().str.lower()

top_abstract = "### Top 50 phrase in abstract\n\n"

ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in ac_words.items():
    top_abstract += f"1. {word} ({freq})\n"

display(Markdown(top_abstract))

ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts))
plt.figure(figsize=(10,10))
plt.imshow(ac)
plt.axis("off")
plt.show()

Top 50 phrase in abstract¶

  1. artificial intelligence (1319)
  2. research (1197)
  3. ai (1145)
  4. model (903)
  5. data (787)
  6. study (652)
  7. chatgpt (592)
  8. using (559)
  9. use (550)
  10. method (549)
  11. based (546)
  12. llm (525)
  13. potential (504)
  14. system (465)
  15. application (441)
  16. language model (440)
  17. intelligence ai (439)
  18. human (422)
  19. result (419)
  20. performance (405)
  21. development (388)
  22. used (379)
  23. analysis (379)
  24. large language (361)
  25. student (357)
  26. challenge (346)
  27. dataset (337)
  28. tool (335)
  29. paper (330)
  30. provide (321)
  31. field (316)
  32. task (311)
  33. new (306)
  34. image (302)
  35. information (301)
  36. technology (299)
  37. accuracy (297)
  38. approach (287)
  39. generative ai (278)
  40. including (276)
  41. design (273)
  42. framework (266)
  43. impact (263)
  44. patient (256)
  45. process (246)
  46. work (240)
  47. education (238)
  48. article (234)
  49. enhance (234)
  50. two (232)
No description has been provided for this image
In [6]:
pub = articles['Publication Title'].dropna().str.lower()

top_pub = "### Top 50 phrase in publication title\n\n"

pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))

for word, freq in pc_words.items():
    top_pub += f"1. {word} ({freq})\n"

display(Markdown(top_pub))

pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub))
plt.figure(figsize=(10,10))
plt.imshow(pc)
plt.axis("off")
plt.show()

Top 50 phrase in publication title¶

  1. journal (189)
  2. computer (129)
  3. international conference (124)
  4. artificial intelligence (71)
  5. heliyon (64)
  6. science (63)
  7. research (54)
  8. education artificial (50)
  9. international journal (48)
  10. engineering (47)
  11. application (44)
  12. education (32)
  13. medicine (31)
  14. machine learning (28)
  15. human behavior (25)
  16. information (23)
  17. knowledge based (23)
  18. information systems (22)
  19. energy (21)
  20. information management (21)
  21. intelligent information (21)
  22. informatics (20)
  23. technology (19)
  24. big data (19)
  25. 28th international (19)
  26. engineering systems (19)
  27. systems kes (19)
  28. open (17)
  29. conference (17)
  30. behavior artificial (16)
  31. artificial humans (16)
  32. health (15)
  33. privacy (15)
  34. 11th international (15)
  35. data analytics (15)
  36. iot security (15)
  37. ai (14)
  38. technologies (14)
  39. communication (14)
  40. computational (13)
  41. medical (12)
  42. sustainability (12)
  43. industry (12)
  44. iscience (12)
  45. management data (12)
  46. data insights (12)
  47. digital discovery (12)
  48. social science (12)
  49. current (11)
  50. review (11)
No description has been provided for this image