Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: formatting improvements, parameters for sankey #3

Merged
merged 3 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ jobs:
- name: Install dependencies
run: make install-dev

- name: Install nbstripout
run: pip install nbstripout

- name: Remove notebook outputs
run: nbstripout --install

- name: Run formatting
run: make format

Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,19 @@ Analyze the behavior and performance of Moveo.AI virtual agents.
## Used for creating the graphs

https://plotly.com/python/

## Contributing

### To ensure that your commits [exclude any notebook outputs](https://gist.github.com/33eyes/431e3d432f73371509d176d0dfb95b6e) while contributing to the project, execute the following command within your terminal, in the project's root directory:

```bash
git config filter.strip-notebook-output.clean 'jupyter nbconvert --ClearOutputPreprocessor.enabled=True --to=notebook --stdin --stdout --log-level=ERROR'
```

### The project uses a Makefile.

To see all the available commands:

```bash
make help
```
1 change: 1 addition & 0 deletions notebooks/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.ipynb filter=strip-notebook-output
6,782 changes: 40 additions & 6,742 deletions notebooks/dialog_flow_analysis.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/human_agent_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"metadata": {},
"outputs": [],
"source": [
"df = analyze_agents(\"Flows analysis - Agents Acc Issues BR_agents.csv\")\n",
"df = analyze_agents(\"acc_issues_GR&CY_agents.csv\")\n",
"df.head()"
]
},
Expand Down
27 changes: 18 additions & 9 deletions notebooks/utils/dialog_flow_analysis/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def fetch_data_analytics_api(session_id: str, account_id: str, api_key: str) ->
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger.warning(
f"Received a 404 from the logs API for session {session_id}."
f"Received a 404 from the logs API for session {session_id}. "
"This is probably because the session does not exist"
)
return []
Expand Down Expand Up @@ -211,7 +211,7 @@ def analyze_agents(content_csv_fname: str):
return df


def analyze_flows(content_csv_fname: str):
def analyze_flows(content_csv_fname: str, min_transitions_displayed=3):
"""
Analyze flows from a CSV file containing session data.

Expand Down Expand Up @@ -280,21 +280,23 @@ def analyze_flows(content_csv_fname: str):
# Generate Sankey diagrams and histograms for visualization
create_sankey(
detractors,
title="Detractors Journey Flow"
title="Detractors Journey Flow "
f"(Ratings: {RATING_MIN}-{DETRACTORS_UPPER_LIMIT})",
fname=content_csv_fname,
top_k=min_transitions_displayed,
)
create_sankey(
promoters,
title=f"Promoters Journey Flow (Ratings: {PROMOTERS_LOWER_LIMIT}-{RATING_MAX})",
fname=content_csv_fname,
top_k=min_transitions_displayed,
)
create_sankey(
neutral,
title="Neutrals Journey Flow (Ratings:"
title="Neutrals Journey Flow (Ratings: "
f"{DETRACTORS_UPPER_LIMIT + 1}-{PROMOTERS_LOWER_LIMIT - 1})",
fname=content_csv_fname,
top_k=3,
top_k=min_transitions_displayed,
)
# Generate the ratings histogram
plot_ratings_histogram(
Expand Down Expand Up @@ -329,7 +331,12 @@ def analyze_flows(content_csv_fname: str):


# NOTE: top_k should be adjusted to get better results depending on the ammount of data.
def create_sankey(df: pd.DataFrame, title: str, fname: str = None, top_k=10):
def create_sankey(
df: pd.DataFrame,
title: str,
fname: str = None,
top_k=3,
):
"""
Create a Sankey diagram from a DataFrame.

Expand All @@ -339,7 +346,9 @@ def create_sankey(df: pd.DataFrame, title: str, fname: str = None, top_k=10):
Args:
df (pd.DataFrame): DataFrame containing flow data.
title (str): Title of the Sankey diagram.
top_k (int, optional): Min number of transitions to include. Defaults to 10.
top_k (int, optional): Min number of transitions to include.
Defaults to 3. For a large ammount of data,
increase it in order to get readable diagrams.

Returns:
None
Expand All @@ -366,7 +375,7 @@ def create_sankey(df: pd.DataFrame, title: str, fname: str = None, top_k=10):

# Filter transitions by count
transition_counts = Counter(
{item: count for item, count in transition_counts.items() if count > top_k}
{item: count for item, count in transition_counts.items() if count >= top_k}
)

# Get unique states
Expand All @@ -383,7 +392,7 @@ def create_sankey(df: pd.DataFrame, title: str, fname: str = None, top_k=10):
values = []

for (source, target), count in transition_counts.items():
if count > 10:
if count >= top_k:
sources.append(state_to_index[source])
targets.append(state_to_index[target])
values.append(count)
Expand Down
2 changes: 1 addition & 1 deletion notebooks/utils/dialog_flow_analysis/zendesk.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def split_agents_and_brain_tickets(csv_fname: str, brain_name: str):
len(df_virtual_assistant) + len(df_agents) + len(df_no_agent)
)
if total_rows_output_files == len(df):
logger.info("Total rows in output files match total rows in input file.")
logger.debug("Total rows in output files match total rows in input file.")
else:
logger.warning(
"Total rows in output files do not match total rows in input file."
Expand Down
45 changes: 4 additions & 41 deletions notebooks/zendesk_utilities.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -79,51 +79,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"WARNING: Output file '/home/nikos/moveogithub/virtual-agent-analysis/data/Flows analysis - Agents Acc Issues BR_virtual_assistant.csv' already exists.Do you want to override it? (Y/n): Y\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Output file '/home/nikos/moveogithub/virtual-agent-analysis/data/Flows analysis - Agents Acc Issues BR_virtual_assistant.csv' will be overwritten.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Splitting tickets: 100%|██████████████████████████████████████████████████████████████████████| 10925/10925 [00:00<00:00, 20296.11it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Split and saved DataFrame to CSV files.\n",
"Total rows in output files match total rows in input file.\n"
]
}
],
"outputs": [],
"source": [
"split_agents_and_brain_tickets(\n",
" \"Flows analysis - Agents Acc Issues BR.csv\", \"BR Virtual Assistant\"\n",
" \"acc_issues_GR&CY.csv\", \"GR & CY & CZ & ON & NG Virtual Assistant\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down