Sunday, December 19, 2021

Tenzin Tibetan Spa (2021-Dec-19)



Index of Journals
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
Tags: Journal,Investment,Management,

Saturday, December 18, 2021

Packaged Food (2021-Dec-18)



Index of Journals
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
29.
30.
31.
32.
33.
34.
35.
36.
37.
Tags: Journal,Investment,Management,

Friday, December 17, 2021

Pie Plot using Python for African Countries from WhatsApp Phone Numbers Dataset



FYI:
Country: Code 
Kenya: +254
Uganda: +256

import os
import re
import json
from time import time
import pandas as pd

import matplotlib.pyplot as plt

import phonenumbers
from phonenumbers.phonenumberutil import region_code_for_country_code
from phonenumbers.phonenumberutil import region_code_for_number
import pycountry


for path, subdirs, files in os.walk("f1/WA_Africa_202112/"):
    p = path
    files_list = files

all_lines = []

for i in files_list:
    with open(os.path.join(p, i), mode='r', encoding='utf-8') as f:
        lines = f.readlines()
        all_lines += lines

lines_2 = []
for j in all_lines:
    if "Ashish Jain" in j:
        pass
    else:
        lines_2.append(j)

lines_3 = []
lines_wags = []
for i in lines_2:
    if "https://chat.whatsapp.com/" in i:
        lines_wags.append(i)
    else:
        lines_3.append(i)

phone_numbers = []
lines_temp = []
for i in lines_3:
    
    x = re.findall(r"[+][0-9]{3}\s[0-9][0-9][0-9]\s[0-9]{6}", i) # +123 123 123456
    y = re.findall(r"[+][0-9][0-9][0-9]\s[0-9]{9}", i) # +254 111222333
    z = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9]", i) # +258 12 345 6789 
    a = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9]{7}", i) # +92 123 1234567
    b = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9]", i) # +234 123 456 1234
    c = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9][0-9]\s[0-9][0-9][0-9][0-9][0-9][0-9]", i) # +44 1234 123456
    d = re.findall(r"[0][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", i) # 0123412345
    e = re.findall(r"[+][0-9]{11}", i) # +12345678901
    f = re.findall(r"[+][0-9][0-9][0-9]\s[0-9][0-9][0-9][-][0-9][0-9][0-9][0-9][0-9][0-9]", i) # +212 123-123456
    g = re.findall(r"[+][0-9]\s[\(][0-9][0-9][0-9][\)]\s[0-9]{3}[-][0-9]{4}", i) # +1 (123) 123-1234
    h = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", i) # +31 12345671234
    i2 = re.findall(r"[+][0-9]{2}\s[0-9]{5}\s[0-9]{5}", i) # +91 12345 12345
    j = re.findall(r"[+][0-9]{2}\s[0-9]{3}[-][0-9]{4}[-][0-9]{5}", i) # +62 123-1234-12345
    k = re.findall(r"[+][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9][0-9]\s[0-9][0-9]\s[0-9][0-9]", i) # +90 123 123 27 12
    l = re.findall(r"[+][0-9]{2}\s[0-9]{4}\s[0-9]{3}\s[0-9]{3}", i) # +91 1234 123 123
    m = re.findall(r"[+][0-9]{2}\s[0-9]{2}\s[0-9]{3}\s[0-9]{4}", i) # +27 65 123 1234
    n = re.findall(r"[+][0-9]{5}[-][0-9]{7}", i) # +12345-1234567
    o = re.findall(r"[+][0-9]{3}\s[0-9]{4}\s[0-9]{4}", i) # +968 1234 1234
    p = re.findall(r"[+][0-9]{12}", i) # +123451234512
    q = re.findall(r"[+][0-9]{3}\s[0-9]{8}", i) # +229 12345678
    r = re.findall(r"[+][0-9]{2}\s[0-9]{3}\s[0-9]{3}\s[0-9]{3}", i) # +40 123 123 123 
    s = re.findall(r"[+][0-9]{2}\s[0-9]{3}\s[0-9]{3}\s[0-9]{4}", i) # +98 123 123 1234
    t = re.findall(r"[+][0-9]{3}\s[0-9]{3}\s[0-9]{3}\s[0-9]{3}", i) # +123 123 123 123
    u = re.findall(r"[+][0-9]{3}\s([0-9]{2}\s){3}[0-9]{2}", i) # +228 12 34 56 78
    v = re.findall(r"[+][0-9]{3}\s[0-9]{2}\s[0-9]{6}", i) # +232 30 123456
    w2 = re.findall(r"[+][0-9]{3}\s[0-9]{3}\s[0-9]{2}\s[0-9]{2}\s[0-9]{2}", i) # +265 123 45 67 89
    x2 = re.findall(r"[+][0-9]{3}\s[0-9]{2}\s[0-9]{3}\s[0-9]{3}", i) # # +267 12 123 123
    
    w = x + y + z + a + b + c + d + e + f + g + h + i2 + j + k + l + m + n + o + p + q + r + s + t + u + v + w2 + x2
    
    phone_numbers += w
    lines_temp += [i for j in range(len(w))]



ts = str(time())

print("Before:", len(phone_numbers))
phone_numbers = sorted(set(phone_numbers))
print("After:", len(phone_numbers))
with open(file = "phone_numbers_" + ts + ".txt", mode="w", encoding = "utf-8") as f:
    f.write(json.dumps(phone_numbers))

phone_numbers_2 = [i.replace("\xa0", " ") for i in phone_numbers]

phn_num_list = []
cntry = []

for i in phone_numbers_2:
    try:
        pn = phonenumbers.parse(i)
        # print(region_code_for_country_code(pn.country_code))

        country = pycountry.countries.get(alpha_2 = region_code_for_number(pn))
        
        phn_num_list.append(i)
        cntry.append(country.name)
    except:
        pass

df1 = pd.DataFrame({
    "phn": phn_num_list,
    "cntry": cntry
})

df2 = df1.groupby('cntry').count()


def plot_pie(labels, sizes, title = ""):
    colors = ['#f47961', '#f0c419', '#255c61', '#78909c', '#6ad4cf', '#17aee8', '#5c6bc0', '#444b6e', '#ef4c60', '#744593', 
              '#ee5691', '#9ccc65', '#708b75', '#d1cb65', '#0d8de1', '#a4554b', '#694f5d', '#45adb3', '#26a69a', '#bdc7cc', ]
    colors = colors[0:len(labels)]
    
    explode = explode = [0.1] + [0]*1000  # explode 1st slice
    explode = explode[0:len(labels)]
    
    # Plot
    plt.figure(num=None, figsize=(9, 7), dpi=80, facecolor='w', edgecolor='k')
    plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
    plt.title(title)
    plt.axis('equal')
    plt.show()


top_10 = sorted(df2.phn.values)[::-1][0:9]

df2.head()

df2 = df2.reset_index() df2.head()
def get_cntry_label(in_row): if(in_row['phn'] in top_10): return in_row['cntry'] else: return 'Others' df2['cntry_lbl'] = df2.apply(get_cntry_label, axis = 1) df3 = df2[['cntry_lbl', 'phn']].groupby('cntry_lbl').sum('phn') df3
plot_pie(df3.index, df3.phn.values, 'Countries')
Tags: Technology,Data Visualization,Machine Learning,