Index of Journals
1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14.
1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14.Tags: Journal,Investment,Management,
1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36. 37.Tags: Journal,Investment,Management,
FYI: Country: Code Kenya: +254 Uganda: +256 import os import re import json from time import time import pandas as pd import matplotlib.pyplot as plt import phonenumbers from phonenumbers.phonenumberutil import region_code_for_country_code from phonenumbers.phonenumberutil import region_code_for_number import pycountry for path, subdirs, files in os.walk("f1/WA_Africa_202112/"): p = path files_list = files all_lines = [] for i in files_list: with open(os.path.join(p, i), mode='r', encoding='utf-8') as f: lines = f.readlines() all_lines += lines lines_2 = [] for j in all_lines: if "Ashish Jain" in j: pass else: lines_2.append(j) lines_3 = [] lines_wags = [] for i in lines_2: if "https://chat.whatsapp.com/" in i: lines_wags.append(i) else: lines_3.append(i) phone_numbers = [] lines_temp = [] for i in lines_3: x = re.findall(r"[+][0-9]{3}\s[0-9][0-9][0-9]\s[0-9]{6}", i) # +123 123 123456 y = re.findall(r"[+][0-9][0-9][0-9]\s[0-9]{9}", i) # +254 111222333 z = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9]", i) # +258 12 345 6789 a = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9]{7}", i) # +92 123 1234567 b = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9]", i) # +234 123 456 1234 c = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9][0-9][0-9]", i) # +44 1234 123456 d = re.findall(r"[0][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", i) # 0123412345 e = re.findall(r"[+][0-9]{11}", i) # +12345678901 f = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9][0-9][-][0-9][0-9][0-9][0-9][0-9][0-9]", i) # +212 123-123456 g = re.findall(r"[+][0-9]\s[\(][0-9][0-9][0-9][\)]\s[0-9]{3}[-][0-9]{4}", i) # +1 (123) 123-1234 h = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", i) # +31 12345671234 i2 = re.findall(r"[+][0-9]{2}\s[0-9]{5}\s[0-9]{5}", i) # +91 12345 12345 j = re.findall(r"[+][0-9]{2}\s[0-9]{3}[-][0-9]{4}[-][0-9]{5}", i) # +62 123-1234-12345 k = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9]\s[0-9][0-9]", i) # +90 123 123 27 12 l = re.findall(r"[+][0-9]{2}\s[0-9]{4}\s[0-9]{3}\s[0-9]{3}", i) # +91 1234 123 123 m = re.findall(r"[+][0-9]{2}\s[0-9]{2}\s[0-9]{3}\s[0-9]{4}", i) # +27 65 123 1234 n = re.findall(r"[+][0-9]{5}[-][0-9]{7}", i) # +12345-1234567 o = re.findall(r"[+][0-9]{3}\s[0-9]{4}\s[0-9]{4}", i) # +968 1234 1234 p = re.findall(r"[+][0-9]{12}", i) # +123451234512 q = re.findall(r"[+][0-9]{3}\s[0-9]{8}", i) # +229 12345678 r = re.findall(r"[+][0-9]{2}\s[0-9]{3}\s[0-9]{3}\s[0-9]{3}", i) # +40 123 123 123 s = re.findall(r"[+][0-9]{2}\s[0-9]{3}\s[0-9]{3}\s[0-9]{4}", i) # +98 123 123 1234 t = re.findall(r"[+][0-9]{3}\s[0-9]{3}\s[0-9]{3}\s[0-9]{3}", i) # +123 123 123 123 u = re.findall(r"[+][0-9]{3}\s([0-9]{2}\s){3}[0-9]{2}", i) # +228 12 34 56 78 v = re.findall(r"[+][0-9]{3}\s[0-9]{2}\s[0-9]{6}", i) # +232 30 123456 w2 = re.findall(r"[+][0-9]{3}\s[0-9]{3}\s[0-9]{2}\s[0-9]{2}\s[0-9]{2}", i) # +265 123 45 67 89 x2 = re.findall(r"[+][0-9]{3}\s[0-9]{2}\s[0-9]{3}\s[0-9]{3}", i) # # +267 12 123 123 w = x + y + z + a + b + c + d + e + f + g + h + i2 + j + k + l + m + n + o + p + q + r + s + t + u + v + w2 + x2 phone_numbers += w lines_temp += [i for j in range(len(w))] ts = str(time()) print("Before:", len(phone_numbers)) phone_numbers = sorted(set(phone_numbers)) print("After:", len(phone_numbers)) with open(file = "phone_numbers_" + ts + ".txt", mode="w", encoding = "utf-8") as f: f.write(json.dumps(phone_numbers)) phone_numbers_2 = [i.replace("\xa0", " ") for i in phone_numbers] phn_num_list = [] cntry = [] for i in phone_numbers_2: try: pn = phonenumbers.parse(i) # print(region_code_for_country_code(pn.country_code)) country = pycountry.countries.get(alpha_2 = region_code_for_number(pn)) phn_num_list.append(i) cntry.append(country.name) except: pass df1 = pd.DataFrame({ "phn": phn_num_list, "cntry": cntry }) df2 = df1.groupby('cntry').count() def plot_pie(labels, sizes, title = ""): colors = ['#f47961', '#f0c419', '#255c61', '#78909c', '#6ad4cf', '#17aee8', '#5c6bc0', '#444b6e', '#ef4c60', '#744593', '#ee5691', '#9ccc65', '#708b75', '#d1cb65', '#0d8de1', '#a4554b', '#694f5d', '#45adb3', '#26a69a', '#bdc7cc', ] colors = colors[0:len(labels)] explode = explode = [0.1] + [0]*1000 # explode 1st slice explode = explode[0:len(labels)] # Plot plt.figure(num=None, figsize=(9, 7), dpi=80, facecolor='w', edgecolor='k') plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140) plt.title(title) plt.axis('equal') plt.show() top_10 = sorted(df2.phn.values)[::-1][0:9] df2.head() df2 = df2.reset_index() df2.head() def get_cntry_label(in_row): if(in_row['phn'] in top_10): return in_row['cntry'] else: return 'Others' df2['cntry_lbl'] = df2.apply(get_cntry_label, axis = 1) df3 = df2[['cntry_lbl', 'phn']].groupby('cntry_lbl').sum('phn') df3 plot_pie(df3.index, df3.phn.values, 'Countries')Tags: Technology,Data Visualization,Machine Learning,