expeStats.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. 'use strict'
  2. // import { experiments } from './experimentConfig'
  3. const config = require('./experimentConfig')
  4. const fs = require('fs-extra')
  5. const winston = require('winston')
  6. const execSync = require('child_process').execSync
  7. // get whitelist scene for MatchExtractsWithReference experiment
  8. const scenes = config.experiments.MatchExtractsWithReference.availableScenes.whitelist
  9. // File logger configuration
  10. const fileLogger = winston.createLogger({
  11. level: 'info',
  12. format: winston.format.json(),
  13. transports: [
  14. new winston.transports.File({ filename: 'logs/expeStats.log' }),
  15. new winston.transports.File({ filename: 'logs/expeStats.error.log', level: 'error' }),
  16. new winston.transports.Console({
  17. level: 'debug',
  18. handleExceptions: true,
  19. format: winston.format.json()
  20. })
  21. ],
  22. exitOnError: false
  23. })
  24. const setup = async (logToFile = false) => {
  25. if (logToFile) fileLogger.info({ log: 'Start extraction of data from mongo for `MatchExtractsExperiments`.', date: new Date() })
  26. execSync('python utils/extract_experiment.py', { encoding: 'utf-8' })
  27. if (logToFile) fileLogger.info({ log: 'Mongo extraction done', date: new Date() })
  28. execSync('python utils/extract_stats_freq_and_min_all.py --file results/experiments_results.json --output results/match_extracts_stats.csv', { encoding: 'utf-8' })
  29. if (logToFile) fileLogger.info({ log: 'Stats computation done, need to create probability for each scene', date: new Date() })
  30. // read extracted stats in order to compute probabilities
  31. let statsPath = 'results/match_extracts_stats.csv'
  32. let buffer = fs.readFileSync(statsPath)
  33. let lines = buffer.toString().split('\n')
  34. let stats = {}
  35. let nUsers = 0
  36. for (let l of lines) {
  37. if (l.length > 0) {
  38. // extract data from csv file
  39. let data = l.split(';')
  40. // data[0] contains scene name
  41. // data[1] contains number of users who do this scene
  42. let u = Number(data[1])
  43. stats[String(data[0])] = u
  44. nUsers += u
  45. }
  46. }
  47. // start computing probabilities
  48. let probabilities = {}
  49. let probsArr = []
  50. let nUnknownScenes = 0
  51. // based on white list
  52. for (let s of scenes) {
  53. if (s in stats) {
  54. probabilities[s] = stats[s] / nUsers
  55. probsArr.push(probabilities[s])
  56. }
  57. else {
  58. nUnknownScenes += 1
  59. }
  60. }
  61. // normalize probabilities
  62. let currentMax = Math.max(...probsArr)
  63. for (let s of scenes) {
  64. // if new scene
  65. if (!(s in stats)) {
  66. // multiply prob criteria based on number of unknown scene
  67. // => increase chance for user to pass this scene
  68. probabilities[s] = (1 + (1 - (nUnknownScenes / scenes.length))) * currentMax
  69. probsArr.push(probabilities[s])
  70. }
  71. }
  72. // get sum of current probs
  73. let sum = probsArr.reduce((a, b) => a + b, 0)
  74. for (let s of scenes) {
  75. probabilities[s] /= sum
  76. }
  77. if (logToFile) fileLogger.info({ log: 'New probabilities extracted:' + JSON.stringify(probabilities, null, 3), date: new Date() })
  78. fs.writeFile('results/match_extracts_probs.json', JSON.stringify(probabilities, null, 3))
  79. }
  80. // Execute setup command
  81. setup()
  82. module.exports = { setup, expeStatsServiceLogger: fileLogger }